1 | //===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// |
10 | /// This file implements the InstrBuilder interface. |
11 | /// |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/MCA/InstrBuilder.h" |
15 | #include "llvm/ADT/APInt.h" |
16 | #include "llvm/ADT/DenseMap.h" |
17 | #include "llvm/ADT/Statistic.h" |
18 | #include "llvm/MC/MCInst.h" |
19 | #include "llvm/Support/Debug.h" |
20 | #include "llvm/Support/WithColor.h" |
21 | #include "llvm/Support/raw_ostream.h" |
22 | |
23 | #define DEBUG_TYPE "llvm-mca-instrbuilder" |
24 | |
25 | namespace llvm { |
26 | namespace mca { |
27 | |
28 | char RecycledInstErr::ID = 0; |
29 | |
30 | InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti, |
31 | const llvm::MCInstrInfo &mcii, |
32 | const llvm::MCRegisterInfo &mri, |
33 | const llvm::MCInstrAnalysis *mcia, |
34 | const mca::InstrumentManager &im) |
35 | : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), IM(im), FirstCallInst(true), |
36 | FirstReturnInst(true) { |
37 | const MCSchedModel &SM = STI.getSchedModel(); |
38 | ProcResourceMasks.resize(N: SM.getNumProcResourceKinds()); |
39 | computeProcResourceMasks(SM: STI.getSchedModel(), Masks: ProcResourceMasks); |
40 | } |
41 | |
42 | static void initializeUsedResources(InstrDesc &ID, |
43 | const MCSchedClassDesc &SCDesc, |
44 | const MCSubtargetInfo &STI, |
45 | ArrayRef<uint64_t> ProcResourceMasks) { |
46 | const MCSchedModel &SM = STI.getSchedModel(); |
47 | |
48 | // Populate resources consumed. |
49 | using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>; |
50 | SmallVector<ResourcePlusCycles, 4> Worklist; |
51 | |
52 | // Track cycles contributed by resources that are in a "Super" relationship. |
53 | // This is required if we want to correctly match the behavior of method |
54 | // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set |
55 | // of "consumed" processor resources and resource cycles, the logic in |
56 | // ExpandProcResource() doesn't update the number of resource cycles |
57 | // contributed by a "Super" resource to a group. |
58 | // We need to take this into account when we find that a processor resource is |
59 | // part of a group, and it is also used as the "Super" of other resources. |
60 | // This map stores the number of cycles contributed by sub-resources that are |
61 | // part of a "Super" resource. The key value is the "Super" resource mask ID. |
62 | DenseMap<uint64_t, unsigned> SuperResources; |
63 | |
64 | unsigned NumProcResources = SM.getNumProcResourceKinds(); |
65 | APInt Buffers(NumProcResources, 0); |
66 | |
67 | bool AllInOrderResources = true; |
68 | bool AnyDispatchHazards = false; |
69 | for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) { |
70 | const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(SC: &SCDesc) + I; |
71 | const MCProcResourceDesc &PR = *SM.getProcResource(ProcResourceIdx: PRE->ProcResourceIdx); |
72 | if (!PRE->ReleaseAtCycle) { |
73 | #ifndef NDEBUG |
74 | WithColor::warning() |
75 | << "Ignoring invalid write of zero cycles on processor resource " |
76 | << PR.Name << "\n" ; |
77 | WithColor::note() << "found in scheduling class " << SCDesc.Name |
78 | << " (write index #" << I << ")\n" ; |
79 | #endif |
80 | continue; |
81 | } |
82 | |
83 | uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx]; |
84 | if (PR.BufferSize < 0) { |
85 | AllInOrderResources = false; |
86 | } else { |
87 | Buffers.setBit(getResourceStateIndex(Mask)); |
88 | AnyDispatchHazards |= (PR.BufferSize == 0); |
89 | AllInOrderResources &= (PR.BufferSize <= 1); |
90 | } |
91 | |
92 | CycleSegment RCy(0, PRE->ReleaseAtCycle, false); |
93 | Worklist.emplace_back(Args: ResourcePlusCycles(Mask, ResourceUsage(RCy))); |
94 | if (PR.SuperIdx) { |
95 | uint64_t Super = ProcResourceMasks[PR.SuperIdx]; |
96 | SuperResources[Super] += PRE->ReleaseAtCycle; |
97 | } |
98 | } |
99 | |
100 | ID.MustIssueImmediately = AllInOrderResources && AnyDispatchHazards; |
101 | |
102 | // Sort elements by mask popcount, so that we prioritize resource units over |
103 | // resource groups, and smaller groups over larger groups. |
104 | sort(C&: Worklist, Comp: [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) { |
105 | unsigned popcntA = llvm::popcount(Value: A.first); |
106 | unsigned popcntB = llvm::popcount(Value: B.first); |
107 | if (popcntA < popcntB) |
108 | return true; |
109 | if (popcntA > popcntB) |
110 | return false; |
111 | return A.first < B.first; |
112 | }); |
113 | |
114 | uint64_t UsedResourceUnits = 0; |
115 | uint64_t UsedResourceGroups = 0; |
116 | uint64_t UnitsFromResourceGroups = 0; |
117 | |
118 | // Remove cycles contributed by smaller resources, and check if there |
119 | // are partially overlapping resource groups. |
120 | ID.HasPartiallyOverlappingGroups = false; |
121 | |
122 | for (unsigned I = 0, E = Worklist.size(); I < E; ++I) { |
123 | ResourcePlusCycles &A = Worklist[I]; |
124 | if (!A.second.size()) { |
125 | assert(llvm::popcount(A.first) > 1 && "Expected a group!" ); |
126 | UsedResourceGroups |= llvm::bit_floor(Value: A.first); |
127 | continue; |
128 | } |
129 | |
130 | ID.Resources.emplace_back(Args&: A); |
131 | uint64_t NormalizedMask = A.first; |
132 | |
133 | if (llvm::popcount(Value: A.first) == 1) { |
134 | UsedResourceUnits |= A.first; |
135 | } else { |
136 | // Remove the leading 1 from the resource group mask. |
137 | NormalizedMask ^= llvm::bit_floor(Value: NormalizedMask); |
138 | if (UnitsFromResourceGroups & NormalizedMask) |
139 | ID.HasPartiallyOverlappingGroups = true; |
140 | |
141 | UnitsFromResourceGroups |= NormalizedMask; |
142 | UsedResourceGroups |= (A.first ^ NormalizedMask); |
143 | } |
144 | |
145 | for (unsigned J = I + 1; J < E; ++J) { |
146 | ResourcePlusCycles &B = Worklist[J]; |
147 | if ((NormalizedMask & B.first) == NormalizedMask) { |
148 | B.second.CS.subtract(Cycles: A.second.size() - SuperResources[A.first]); |
149 | if (llvm::popcount(Value: B.first) > 1) |
150 | B.second.NumUnits++; |
151 | } |
152 | } |
153 | } |
154 | |
155 | // A SchedWrite may specify a number of cycles in which a resource group |
156 | // is reserved. For example (on target x86; cpu Haswell): |
157 | // |
158 | // SchedWriteRes<[HWPort0, HWPort1, HWPort01]> { |
159 | // let ReleaseAtCycles = [2, 2, 3]; |
160 | // } |
161 | // |
162 | // This means: |
163 | // Resource units HWPort0 and HWPort1 are both used for 2cy. |
164 | // Resource group HWPort01 is the union of HWPort0 and HWPort1. |
165 | // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01 |
166 | // will not be usable for 2 entire cycles from instruction issue. |
167 | // |
168 | // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency |
169 | // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an |
170 | // extra delay on top of the 2 cycles latency. |
171 | // During those extra cycles, HWPort01 is not usable by other instructions. |
172 | for (ResourcePlusCycles &RPC : ID.Resources) { |
173 | if (llvm::popcount(Value: RPC.first) > 1 && !RPC.second.isReserved()) { |
174 | // Remove the leading 1 from the resource group mask. |
175 | uint64_t Mask = RPC.first ^ llvm::bit_floor(Value: RPC.first); |
176 | uint64_t MaxResourceUnits = llvm::popcount(Value: Mask); |
177 | if (RPC.second.NumUnits > (unsigned)llvm::popcount(Value: Mask)) { |
178 | RPC.second.setReserved(); |
179 | RPC.second.NumUnits = MaxResourceUnits; |
180 | } |
181 | } |
182 | } |
183 | |
184 | // Identify extra buffers that are consumed through super resources. |
185 | for (const std::pair<uint64_t, unsigned> &SR : SuperResources) { |
186 | for (unsigned I = 1, E = NumProcResources; I < E; ++I) { |
187 | const MCProcResourceDesc &PR = *SM.getProcResource(ProcResourceIdx: I); |
188 | if (PR.BufferSize == -1) |
189 | continue; |
190 | |
191 | uint64_t Mask = ProcResourceMasks[I]; |
192 | if (Mask != SR.first && ((Mask & SR.first) == SR.first)) |
193 | Buffers.setBit(getResourceStateIndex(Mask)); |
194 | } |
195 | } |
196 | |
197 | ID.UsedBuffers = Buffers.getZExtValue(); |
198 | ID.UsedProcResUnits = UsedResourceUnits; |
199 | ID.UsedProcResGroups = UsedResourceGroups; |
200 | |
201 | LLVM_DEBUG({ |
202 | for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources) |
203 | dbgs() << "\t\tResource Mask=" << format_hex(R.first, 16) << ", " |
204 | << "Reserved=" << R.second.isReserved() << ", " |
205 | << "#Units=" << R.second.NumUnits << ", " |
206 | << "cy=" << R.second.size() << '\n'; |
207 | uint64_t BufferIDs = ID.UsedBuffers; |
208 | while (BufferIDs) { |
209 | uint64_t Current = BufferIDs & (-BufferIDs); |
210 | dbgs() << "\t\tBuffer Mask=" << format_hex(Current, 16) << '\n'; |
211 | BufferIDs ^= Current; |
212 | } |
213 | dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n'; |
214 | dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16) |
215 | << '\n'; |
216 | dbgs() << "\t\tHasPartiallyOverlappingGroups=" |
217 | << ID.HasPartiallyOverlappingGroups << '\n'; |
218 | }); |
219 | } |
220 | |
221 | static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc, |
222 | const MCSchedClassDesc &SCDesc, |
223 | const MCSubtargetInfo &STI) { |
224 | if (MCDesc.isCall()) { |
225 | // We cannot estimate how long this call will take. |
226 | // Artificially set an arbitrarily high latency (100cy). |
227 | ID.MaxLatency = 100U; |
228 | return; |
229 | } |
230 | |
231 | int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc); |
232 | // If latency is unknown, then conservatively assume a MaxLatency of 100cy. |
233 | ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency); |
234 | } |
235 | |
236 | static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) { |
237 | // Count register definitions, and skip non register operands in the process. |
238 | unsigned I, E; |
239 | unsigned NumExplicitDefs = MCDesc.getNumDefs(); |
240 | for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) { |
241 | const MCOperand &Op = MCI.getOperand(i: I); |
242 | if (Op.isReg()) |
243 | --NumExplicitDefs; |
244 | } |
245 | |
246 | if (NumExplicitDefs) { |
247 | return make_error<InstructionError<MCInst>>( |
248 | Args: "Expected more register operand definitions." , Args: MCI); |
249 | } |
250 | |
251 | if (MCDesc.hasOptionalDef()) { |
252 | // Always assume that the optional definition is the last operand. |
253 | const MCOperand &Op = MCI.getOperand(i: MCDesc.getNumOperands() - 1); |
254 | if (I == MCI.getNumOperands() || !Op.isReg()) { |
255 | std::string Message = |
256 | "expected a register operand for an optional definition. Instruction " |
257 | "has not been correctly analyzed." ; |
258 | return make_error<InstructionError<MCInst>>(Args&: Message, Args: MCI); |
259 | } |
260 | } |
261 | |
262 | return ErrorSuccess(); |
263 | } |
264 | |
265 | void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI, |
266 | unsigned SchedClassID) { |
267 | const MCInstrDesc &MCDesc = MCII.get(Opcode: MCI.getOpcode()); |
268 | const MCSchedModel &SM = STI.getSchedModel(); |
269 | const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassIdx: SchedClassID); |
270 | |
271 | // Assumptions made by this algorithm: |
272 | // 1. The number of explicit and implicit register definitions in a MCInst |
273 | // matches the number of explicit and implicit definitions according to |
274 | // the opcode descriptor (MCInstrDesc). |
275 | // 2. Uses start at index #(MCDesc.getNumDefs()). |
276 | // 3. There can only be a single optional register definition, an it is |
277 | // either the last operand of the sequence (excluding extra operands |
278 | // contributed by variadic opcodes) or one of the explicit register |
279 | // definitions. The latter occurs for some Thumb1 instructions. |
280 | // |
281 | // These assumptions work quite well for most out-of-order in-tree targets |
282 | // like x86. This is mainly because the vast majority of instructions is |
283 | // expanded to MCInst using a straightforward lowering logic that preserves |
284 | // the ordering of the operands. |
285 | // |
286 | // About assumption 1. |
287 | // The algorithm allows non-register operands between register operand |
288 | // definitions. This helps to handle some special ARM instructions with |
289 | // implicit operand increment (-mtriple=armv7): |
290 | // |
291 | // vld1.32 {d18, d19}, [r1]! @ <MCInst #1463 VLD1q32wb_fixed |
292 | // @ <MCOperand Reg:59> |
293 | // @ <MCOperand Imm:0> (!!) |
294 | // @ <MCOperand Reg:67> |
295 | // @ <MCOperand Imm:0> |
296 | // @ <MCOperand Imm:14> |
297 | // @ <MCOperand Reg:0>> |
298 | // |
299 | // MCDesc reports: |
300 | // 6 explicit operands. |
301 | // 1 optional definition |
302 | // 2 explicit definitions (!!) |
303 | // |
304 | // The presence of an 'Imm' operand between the two register definitions |
305 | // breaks the assumption that "register definitions are always at the |
306 | // beginning of the operand sequence". |
307 | // |
308 | // To workaround this issue, this algorithm ignores (i.e. skips) any |
309 | // non-register operands between register definitions. The optional |
310 | // definition is still at index #(NumOperands-1). |
311 | // |
312 | // According to assumption 2. register reads start at #(NumExplicitDefs-1). |
313 | // That means, register R1 from the example is both read and written. |
314 | unsigned NumExplicitDefs = MCDesc.getNumDefs(); |
315 | unsigned NumImplicitDefs = MCDesc.implicit_defs().size(); |
316 | unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries; |
317 | unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs; |
318 | if (MCDesc.hasOptionalDef()) |
319 | TotalDefs++; |
320 | |
321 | unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands(); |
322 | ID.Writes.resize(N: TotalDefs + NumVariadicOps); |
323 | // Iterate over the operands list, and skip non-register operands. |
324 | // The first NumExplicitDefs register operands are expected to be register |
325 | // definitions. |
326 | unsigned CurrentDef = 0; |
327 | unsigned OptionalDefIdx = MCDesc.getNumOperands() - 1; |
328 | unsigned i = 0; |
329 | for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) { |
330 | const MCOperand &Op = MCI.getOperand(i); |
331 | if (!Op.isReg()) |
332 | continue; |
333 | |
334 | if (MCDesc.operands()[CurrentDef].isOptionalDef()) { |
335 | OptionalDefIdx = CurrentDef++; |
336 | continue; |
337 | } |
338 | |
339 | WriteDescriptor &Write = ID.Writes[CurrentDef]; |
340 | Write.OpIndex = i; |
341 | if (CurrentDef < NumWriteLatencyEntries) { |
342 | const MCWriteLatencyEntry &WLE = |
343 | *STI.getWriteLatencyEntry(SC: &SCDesc, DefIdx: CurrentDef); |
344 | // Conservatively default to MaxLatency. |
345 | Write.Latency = |
346 | WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles); |
347 | Write.SClassOrWriteResourceID = WLE.WriteResourceID; |
348 | } else { |
349 | // Assign a default latency for this write. |
350 | Write.Latency = ID.MaxLatency; |
351 | Write.SClassOrWriteResourceID = 0; |
352 | } |
353 | Write.IsOptionalDef = false; |
354 | LLVM_DEBUG({ |
355 | dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex |
356 | << ", Latency=" << Write.Latency |
357 | << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; |
358 | }); |
359 | CurrentDef++; |
360 | } |
361 | |
362 | assert(CurrentDef == NumExplicitDefs && |
363 | "Expected more register operand definitions." ); |
364 | for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) { |
365 | unsigned Index = NumExplicitDefs + CurrentDef; |
366 | WriteDescriptor &Write = ID.Writes[Index]; |
367 | Write.OpIndex = ~CurrentDef; |
368 | Write.RegisterID = MCDesc.implicit_defs()[CurrentDef]; |
369 | if (Index < NumWriteLatencyEntries) { |
370 | const MCWriteLatencyEntry &WLE = |
371 | *STI.getWriteLatencyEntry(SC: &SCDesc, DefIdx: Index); |
372 | // Conservatively default to MaxLatency. |
373 | Write.Latency = |
374 | WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles); |
375 | Write.SClassOrWriteResourceID = WLE.WriteResourceID; |
376 | } else { |
377 | // Assign a default latency for this write. |
378 | Write.Latency = ID.MaxLatency; |
379 | Write.SClassOrWriteResourceID = 0; |
380 | } |
381 | |
382 | Write.IsOptionalDef = false; |
383 | assert(Write.RegisterID != 0 && "Expected a valid phys register!" ); |
384 | LLVM_DEBUG({ |
385 | dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex |
386 | << ", PhysReg=" << MRI.getName(Write.RegisterID) |
387 | << ", Latency=" << Write.Latency |
388 | << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; |
389 | }); |
390 | } |
391 | |
392 | if (MCDesc.hasOptionalDef()) { |
393 | WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs]; |
394 | Write.OpIndex = OptionalDefIdx; |
395 | // Assign a default latency for this write. |
396 | Write.Latency = ID.MaxLatency; |
397 | Write.SClassOrWriteResourceID = 0; |
398 | Write.IsOptionalDef = true; |
399 | LLVM_DEBUG({ |
400 | dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex |
401 | << ", Latency=" << Write.Latency |
402 | << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; |
403 | }); |
404 | } |
405 | |
406 | if (!NumVariadicOps) |
407 | return; |
408 | |
409 | bool AssumeUsesOnly = !MCDesc.variadicOpsAreDefs(); |
410 | CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef(); |
411 | for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); |
412 | I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) { |
413 | const MCOperand &Op = MCI.getOperand(i: OpIndex); |
414 | if (!Op.isReg()) |
415 | continue; |
416 | |
417 | WriteDescriptor &Write = ID.Writes[CurrentDef]; |
418 | Write.OpIndex = OpIndex; |
419 | // Assign a default latency for this write. |
420 | Write.Latency = ID.MaxLatency; |
421 | Write.SClassOrWriteResourceID = 0; |
422 | Write.IsOptionalDef = false; |
423 | ++CurrentDef; |
424 | LLVM_DEBUG({ |
425 | dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex |
426 | << ", Latency=" << Write.Latency |
427 | << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; |
428 | }); |
429 | } |
430 | |
431 | ID.Writes.resize(N: CurrentDef); |
432 | } |
433 | |
434 | void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI, |
435 | unsigned SchedClassID) { |
436 | const MCInstrDesc &MCDesc = MCII.get(Opcode: MCI.getOpcode()); |
437 | unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs(); |
438 | unsigned NumImplicitUses = MCDesc.implicit_uses().size(); |
439 | // Remove the optional definition. |
440 | if (MCDesc.hasOptionalDef()) |
441 | --NumExplicitUses; |
442 | unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands(); |
443 | unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps; |
444 | ID.Reads.resize(N: TotalUses); |
445 | unsigned CurrentUse = 0; |
446 | for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses; |
447 | ++I, ++OpIndex) { |
448 | const MCOperand &Op = MCI.getOperand(i: OpIndex); |
449 | if (!Op.isReg()) |
450 | continue; |
451 | |
452 | ReadDescriptor &Read = ID.Reads[CurrentUse]; |
453 | Read.OpIndex = OpIndex; |
454 | Read.UseIndex = I; |
455 | Read.SchedClassID = SchedClassID; |
456 | ++CurrentUse; |
457 | LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex |
458 | << ", UseIndex=" << Read.UseIndex << '\n'); |
459 | } |
460 | |
461 | // For the purpose of ReadAdvance, implicit uses come directly after explicit |
462 | // uses. The "UseIndex" must be updated according to that implicit layout. |
463 | for (unsigned I = 0; I < NumImplicitUses; ++I) { |
464 | ReadDescriptor &Read = ID.Reads[CurrentUse + I]; |
465 | Read.OpIndex = ~I; |
466 | Read.UseIndex = NumExplicitUses + I; |
467 | Read.RegisterID = MCDesc.implicit_uses()[I]; |
468 | Read.SchedClassID = SchedClassID; |
469 | LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex |
470 | << ", UseIndex=" << Read.UseIndex << ", RegisterID=" |
471 | << MRI.getName(Read.RegisterID) << '\n'); |
472 | } |
473 | |
474 | CurrentUse += NumImplicitUses; |
475 | |
476 | bool AssumeDefsOnly = MCDesc.variadicOpsAreDefs(); |
477 | for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); |
478 | I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) { |
479 | const MCOperand &Op = MCI.getOperand(i: OpIndex); |
480 | if (!Op.isReg()) |
481 | continue; |
482 | |
483 | ReadDescriptor &Read = ID.Reads[CurrentUse]; |
484 | Read.OpIndex = OpIndex; |
485 | Read.UseIndex = NumExplicitUses + NumImplicitUses + I; |
486 | Read.SchedClassID = SchedClassID; |
487 | ++CurrentUse; |
488 | LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex |
489 | << ", UseIndex=" << Read.UseIndex << '\n'); |
490 | } |
491 | |
492 | ID.Reads.resize(N: CurrentUse); |
493 | } |
494 | |
495 | Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID, |
496 | const MCInst &MCI) const { |
497 | if (ID.NumMicroOps != 0) |
498 | return ErrorSuccess(); |
499 | |
500 | bool UsesBuffers = ID.UsedBuffers; |
501 | bool UsesResources = !ID.Resources.empty(); |
502 | if (!UsesBuffers && !UsesResources) |
503 | return ErrorSuccess(); |
504 | |
505 | // FIXME: see PR44797. We should revisit these checks and possibly move them |
506 | // in CodeGenSchedule.cpp. |
507 | StringRef Message = "found an inconsistent instruction that decodes to zero " |
508 | "opcodes and that consumes scheduler resources." ; |
509 | return make_error<InstructionError<MCInst>>(Args: std::string(Message), Args: MCI); |
510 | } |
511 | |
512 | Expected<const InstrDesc &> |
513 | InstrBuilder::createInstrDescImpl(const MCInst &MCI, |
514 | const SmallVector<Instrument *> &IVec) { |
515 | assert(STI.getSchedModel().hasInstrSchedModel() && |
516 | "Itineraries are not yet supported!" ); |
517 | |
518 | // Obtain the instruction descriptor from the opcode. |
519 | unsigned short Opcode = MCI.getOpcode(); |
520 | const MCInstrDesc &MCDesc = MCII.get(Opcode); |
521 | const MCSchedModel &SM = STI.getSchedModel(); |
522 | |
523 | // Then obtain the scheduling class information from the instruction. |
524 | // Allow InstrumentManager to override and use a different SchedClassID |
525 | unsigned SchedClassID = IM.getSchedClassID(MCII, MCI, IVec); |
526 | bool IsVariant = SM.getSchedClassDesc(SchedClassIdx: SchedClassID)->isVariant(); |
527 | |
528 | // Try to solve variant scheduling classes. |
529 | if (IsVariant) { |
530 | unsigned CPUID = SM.getProcessorID(); |
531 | while (SchedClassID && SM.getSchedClassDesc(SchedClassIdx: SchedClassID)->isVariant()) |
532 | SchedClassID = |
533 | STI.resolveVariantSchedClass(SchedClass: SchedClassID, MI: &MCI, MCII: &MCII, CPUID); |
534 | |
535 | if (!SchedClassID) { |
536 | return make_error<InstructionError<MCInst>>( |
537 | Args: "unable to resolve scheduling class for write variant." , Args: MCI); |
538 | } |
539 | } |
540 | |
541 | // Check if this instruction is supported. Otherwise, report an error. |
542 | const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassIdx: SchedClassID); |
543 | if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) { |
544 | return make_error<InstructionError<MCInst>>( |
545 | Args: "found an unsupported instruction in the input assembly sequence." , |
546 | Args: MCI); |
547 | } |
548 | |
549 | LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n'); |
550 | LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n'); |
551 | LLVM_DEBUG(dbgs() << "\t\tOpcode=" << Opcode << '\n'); |
552 | |
553 | // Create a new empty descriptor. |
554 | std::unique_ptr<InstrDesc> ID = std::make_unique<InstrDesc>(); |
555 | ID->NumMicroOps = SCDesc.NumMicroOps; |
556 | ID->SchedClassID = SchedClassID; |
557 | |
558 | if (MCDesc.isCall() && FirstCallInst) { |
559 | // We don't correctly model calls. |
560 | WithColor::warning() << "found a call in the input assembly sequence.\n" ; |
561 | WithColor::note() << "call instructions are not correctly modeled. " |
562 | << "Assume a latency of 100cy.\n" ; |
563 | FirstCallInst = false; |
564 | } |
565 | |
566 | if (MCDesc.isReturn() && FirstReturnInst) { |
567 | WithColor::warning() << "found a return instruction in the input" |
568 | << " assembly sequence.\n" ; |
569 | WithColor::note() << "program counter updates are ignored.\n" ; |
570 | FirstReturnInst = false; |
571 | } |
572 | |
573 | initializeUsedResources(ID&: *ID, SCDesc, STI, ProcResourceMasks); |
574 | computeMaxLatency(ID&: *ID, MCDesc, SCDesc, STI); |
575 | |
576 | if (Error Err = verifyOperands(MCDesc, MCI)) |
577 | return std::move(Err); |
578 | |
579 | populateWrites(ID&: *ID, MCI, SchedClassID); |
580 | populateReads(ID&: *ID, MCI, SchedClassID); |
581 | |
582 | LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n'); |
583 | LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n'); |
584 | |
585 | // Validation check on the instruction descriptor. |
586 | if (Error Err = verifyInstrDesc(ID: *ID, MCI)) |
587 | return std::move(Err); |
588 | |
589 | // Now add the new descriptor. |
590 | bool IsVariadic = MCDesc.isVariadic(); |
591 | if ((ID->IsRecyclable = !IsVariadic && !IsVariant)) { |
592 | auto DKey = std::make_pair(x: MCI.getOpcode(), y&: SchedClassID); |
593 | Descriptors[DKey] = std::move(ID); |
594 | return *Descriptors[DKey]; |
595 | } |
596 | |
597 | auto VDKey = std::make_pair(x: &MCI, y&: SchedClassID); |
598 | VariantDescriptors[VDKey] = std::move(ID); |
599 | return *VariantDescriptors[VDKey]; |
600 | } |
601 | |
602 | Expected<const InstrDesc &> |
603 | InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI, |
604 | const SmallVector<Instrument *> &IVec) { |
605 | // Cache lookup using SchedClassID from Instrumentation |
606 | unsigned SchedClassID = IM.getSchedClassID(MCII, MCI, IVec); |
607 | |
608 | auto DKey = std::make_pair(x: MCI.getOpcode(), y&: SchedClassID); |
609 | if (Descriptors.find_as(Val: DKey) != Descriptors.end()) |
610 | return *Descriptors[DKey]; |
611 | |
612 | unsigned CPUID = STI.getSchedModel().getProcessorID(); |
613 | SchedClassID = STI.resolveVariantSchedClass(SchedClass: SchedClassID, MI: &MCI, MCII: &MCII, CPUID); |
614 | auto VDKey = std::make_pair(x: &MCI, y&: SchedClassID); |
615 | if (VariantDescriptors.contains(Val: VDKey)) |
616 | return *VariantDescriptors[VDKey]; |
617 | |
618 | return createInstrDescImpl(MCI, IVec); |
619 | } |
620 | |
621 | STATISTIC(NumVariantInst, "Number of MCInsts that doesn't have static Desc" ); |
622 | |
623 | Expected<std::unique_ptr<Instruction>> |
624 | InstrBuilder::createInstruction(const MCInst &MCI, |
625 | const SmallVector<Instrument *> &IVec) { |
626 | Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI, IVec); |
627 | if (!DescOrErr) |
628 | return DescOrErr.takeError(); |
629 | const InstrDesc &D = *DescOrErr; |
630 | Instruction *NewIS = nullptr; |
631 | std::unique_ptr<Instruction> CreatedIS; |
632 | bool IsInstRecycled = false; |
633 | |
634 | if (!D.IsRecyclable) |
635 | ++NumVariantInst; |
636 | |
637 | if (D.IsRecyclable && InstRecycleCB) { |
638 | if (auto *I = InstRecycleCB(D)) { |
639 | NewIS = I; |
640 | NewIS->reset(); |
641 | IsInstRecycled = true; |
642 | } |
643 | } |
644 | if (!IsInstRecycled) { |
645 | CreatedIS = std::make_unique<Instruction>(args: D, args: MCI.getOpcode()); |
646 | NewIS = CreatedIS.get(); |
647 | } |
648 | |
649 | const MCInstrDesc &MCDesc = MCII.get(Opcode: MCI.getOpcode()); |
650 | const MCSchedClassDesc &SCDesc = |
651 | *STI.getSchedModel().getSchedClassDesc(SchedClassIdx: D.SchedClassID); |
652 | |
653 | NewIS->setMayLoad(MCDesc.mayLoad()); |
654 | NewIS->setMayStore(MCDesc.mayStore()); |
655 | NewIS->setHasSideEffects(MCDesc.hasUnmodeledSideEffects()); |
656 | NewIS->setBeginGroup(SCDesc.BeginGroup); |
657 | NewIS->setEndGroup(SCDesc.EndGroup); |
658 | NewIS->setRetireOOO(SCDesc.RetireOOO); |
659 | |
660 | // Check if this is a dependency breaking instruction. |
661 | APInt Mask; |
662 | |
663 | bool IsZeroIdiom = false; |
664 | bool IsDepBreaking = false; |
665 | if (MCIA) { |
666 | unsigned ProcID = STI.getSchedModel().getProcessorID(); |
667 | IsZeroIdiom = MCIA->isZeroIdiom(MI: MCI, Mask, CPUID: ProcID); |
668 | IsDepBreaking = |
669 | IsZeroIdiom || MCIA->isDependencyBreaking(MI: MCI, Mask, CPUID: ProcID); |
670 | if (MCIA->isOptimizableRegisterMove(MI: MCI, CPUID: ProcID)) |
671 | NewIS->setOptimizableMove(); |
672 | } |
673 | |
674 | // Initialize Reads first. |
675 | MCPhysReg RegID = 0; |
676 | size_t Idx = 0U; |
677 | for (const ReadDescriptor &RD : D.Reads) { |
678 | if (!RD.isImplicitRead()) { |
679 | // explicit read. |
680 | const MCOperand &Op = MCI.getOperand(i: RD.OpIndex); |
681 | // Skip non-register operands. |
682 | if (!Op.isReg()) |
683 | continue; |
684 | RegID = Op.getReg(); |
685 | } else { |
686 | // Implicit read. |
687 | RegID = RD.RegisterID; |
688 | } |
689 | |
690 | // Skip invalid register operands. |
691 | if (!RegID) |
692 | continue; |
693 | |
694 | // Okay, this is a register operand. Create a ReadState for it. |
695 | ReadState *RS = nullptr; |
696 | if (IsInstRecycled && Idx < NewIS->getUses().size()) { |
697 | NewIS->getUses()[Idx] = ReadState(RD, RegID); |
698 | RS = &NewIS->getUses()[Idx++]; |
699 | } else { |
700 | NewIS->getUses().emplace_back(Args: RD, Args&: RegID); |
701 | RS = &NewIS->getUses().back(); |
702 | ++Idx; |
703 | } |
704 | |
705 | if (IsDepBreaking) { |
706 | // A mask of all zeroes means: explicit input operands are not |
707 | // independent. |
708 | if (Mask.isZero()) { |
709 | if (!RD.isImplicitRead()) |
710 | RS->setIndependentFromDef(); |
711 | } else { |
712 | // Check if this register operand is independent according to `Mask`. |
713 | // Note that Mask may not have enough bits to describe all explicit and |
714 | // implicit input operands. If this register operand doesn't have a |
715 | // corresponding bit in Mask, then conservatively assume that it is |
716 | // dependent. |
717 | if (Mask.getBitWidth() > RD.UseIndex) { |
718 | // Okay. This map describe register use `RD.UseIndex`. |
719 | if (Mask[RD.UseIndex]) |
720 | RS->setIndependentFromDef(); |
721 | } |
722 | } |
723 | } |
724 | } |
725 | if (IsInstRecycled && Idx < NewIS->getUses().size()) |
726 | NewIS->getUses().pop_back_n(NumItems: NewIS->getUses().size() - Idx); |
727 | |
728 | // Early exit if there are no writes. |
729 | if (D.Writes.empty()) { |
730 | if (IsInstRecycled) |
731 | return llvm::make_error<RecycledInstErr>(Args&: NewIS); |
732 | else |
733 | return std::move(CreatedIS); |
734 | } |
735 | |
736 | // Track register writes that implicitly clear the upper portion of the |
737 | // underlying super-registers using an APInt. |
738 | APInt WriteMask(D.Writes.size(), 0); |
739 | |
740 | // Now query the MCInstrAnalysis object to obtain information about which |
741 | // register writes implicitly clear the upper portion of a super-register. |
742 | if (MCIA) |
743 | MCIA->clearsSuperRegisters(MRI, Inst: MCI, Writes&: WriteMask); |
744 | |
745 | // Initialize writes. |
746 | unsigned WriteIndex = 0; |
747 | Idx = 0U; |
748 | for (const WriteDescriptor &WD : D.Writes) { |
749 | RegID = WD.isImplicitWrite() ? WD.RegisterID |
750 | : MCI.getOperand(i: WD.OpIndex).getReg(); |
751 | // Check if this is a optional definition that references NoReg. |
752 | if (WD.IsOptionalDef && !RegID) { |
753 | ++WriteIndex; |
754 | continue; |
755 | } |
756 | |
757 | assert(RegID && "Expected a valid register ID!" ); |
758 | if (IsInstRecycled && Idx < NewIS->getDefs().size()) { |
759 | NewIS->getDefs()[Idx++] = |
760 | WriteState(WD, RegID, |
761 | /* ClearsSuperRegs */ WriteMask[WriteIndex], |
762 | /* WritesZero */ IsZeroIdiom); |
763 | } else { |
764 | NewIS->getDefs().emplace_back(Args: WD, Args&: RegID, |
765 | /* ClearsSuperRegs */ Args: WriteMask[WriteIndex], |
766 | /* WritesZero */ Args&: IsZeroIdiom); |
767 | ++Idx; |
768 | } |
769 | ++WriteIndex; |
770 | } |
771 | if (IsInstRecycled && Idx < NewIS->getDefs().size()) |
772 | NewIS->getDefs().pop_back_n(NumItems: NewIS->getDefs().size() - Idx); |
773 | |
774 | if (IsInstRecycled) |
775 | return llvm::make_error<RecycledInstErr>(Args&: NewIS); |
776 | else |
777 | return std::move(CreatedIS); |
778 | } |
779 | } // namespace mca |
780 | } // namespace llvm |
781 | |