1 | //===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This implements the ScheduleDAG class, which is a base class used by |
10 | // scheduling implementation classes. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "ScheduleDAGSDNodes.h" |
15 | #include "InstrEmitter.h" |
16 | #include "SDNodeDbgValue.h" |
17 | #include "llvm/ADT/DenseMap.h" |
18 | #include "llvm/ADT/SmallPtrSet.h" |
19 | #include "llvm/ADT/SmallSet.h" |
20 | #include "llvm/ADT/SmallVector.h" |
21 | #include "llvm/ADT/Statistic.h" |
22 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
23 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
24 | #include "llvm/CodeGen/SelectionDAG.h" |
25 | #include "llvm/CodeGen/TargetInstrInfo.h" |
26 | #include "llvm/CodeGen/TargetLowering.h" |
27 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
28 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
29 | #include "llvm/Config/llvm-config.h" |
30 | #include "llvm/MC/MCInstrItineraries.h" |
31 | #include "llvm/Support/CommandLine.h" |
32 | #include "llvm/Support/Debug.h" |
33 | #include "llvm/Support/raw_ostream.h" |
34 | #include "llvm/Target/TargetMachine.h" |
35 | using namespace llvm; |
36 | |
37 | #define DEBUG_TYPE "pre-RA-sched" |
38 | |
39 | STATISTIC(LoadsClustered, "Number of loads clustered together" ); |
40 | |
41 | // This allows the latency-based scheduler to notice high latency instructions |
42 | // without a target itinerary. The choice of number here has more to do with |
43 | // balancing scheduler heuristics than with the actual machine latency. |
44 | static cl::opt<int> HighLatencyCycles( |
45 | "sched-high-latency-cycles" , cl::Hidden, cl::init(Val: 10), |
46 | cl::desc("Roughly estimate the number of cycles that 'long latency'" |
47 | "instructions take for targets with no itinerary" )); |
48 | |
49 | ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) |
50 | : ScheduleDAG(mf), InstrItins(mf.getSubtarget().getInstrItineraryData()) {} |
51 | |
52 | /// Run - perform scheduling. |
53 | /// |
54 | void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb) { |
55 | BB = bb; |
56 | DAG = dag; |
57 | |
58 | // Clear the scheduler's SUnit DAG. |
59 | ScheduleDAG::clearDAG(); |
60 | Sequence.clear(); |
61 | |
62 | // Invoke the target's selection of scheduler. |
63 | Schedule(); |
64 | } |
65 | |
66 | /// NewSUnit - Creates a new SUnit and return a ptr to it. |
67 | /// |
68 | SUnit *ScheduleDAGSDNodes::newSUnit(SDNode *N) { |
69 | #ifndef NDEBUG |
70 | const SUnit *Addr = nullptr; |
71 | if (!SUnits.empty()) |
72 | Addr = &SUnits[0]; |
73 | #endif |
74 | SUnits.emplace_back(args&: N, args: (unsigned)SUnits.size()); |
75 | assert((Addr == nullptr || Addr == &SUnits[0]) && |
76 | "SUnits std::vector reallocated on the fly!" ); |
77 | SUnits.back().OrigNode = &SUnits.back(); |
78 | SUnit *SU = &SUnits.back(); |
79 | const TargetLowering &TLI = DAG->getTargetLoweringInfo(); |
80 | if (!N || |
81 | (N->isMachineOpcode() && |
82 | N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF)) |
83 | SU->SchedulingPref = Sched::None; |
84 | else |
85 | SU->SchedulingPref = TLI.getSchedulingPreference(N); |
86 | return SU; |
87 | } |
88 | |
89 | SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { |
90 | SUnit *SU = newSUnit(N: Old->getNode()); |
91 | SU->OrigNode = Old->OrigNode; |
92 | SU->Latency = Old->Latency; |
93 | SU->isVRegCycle = Old->isVRegCycle; |
94 | SU->isCall = Old->isCall; |
95 | SU->isCallOp = Old->isCallOp; |
96 | SU->isTwoAddress = Old->isTwoAddress; |
97 | SU->isCommutable = Old->isCommutable; |
98 | SU->hasPhysRegDefs = Old->hasPhysRegDefs; |
99 | SU->hasPhysRegClobbers = Old->hasPhysRegClobbers; |
100 | SU->isScheduleHigh = Old->isScheduleHigh; |
101 | SU->isScheduleLow = Old->isScheduleLow; |
102 | SU->SchedulingPref = Old->SchedulingPref; |
103 | Old->isCloned = true; |
104 | return SU; |
105 | } |
106 | |
107 | /// CheckForPhysRegDependency - Check if the dependency between def and use of |
108 | /// a specified operand is a physical register dependency. If so, returns the |
109 | /// register and the cost of copying the register. |
110 | static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, |
111 | const TargetRegisterInfo *TRI, |
112 | const TargetInstrInfo *TII, |
113 | const TargetLowering &TLI, |
114 | unsigned &PhysReg, int &Cost) { |
115 | if (Op != 2 || User->getOpcode() != ISD::CopyToReg) |
116 | return; |
117 | |
118 | unsigned Reg = cast<RegisterSDNode>(Val: User->getOperand(Num: 1))->getReg(); |
119 | if (TLI.checkForPhysRegDependency(Def, User, Op, TRI, TII, PhysReg, Cost)) |
120 | return; |
121 | |
122 | if (Register::isVirtualRegister(Reg)) |
123 | return; |
124 | |
125 | unsigned ResNo = User->getOperand(Num: 2).getResNo(); |
126 | if (Def->getOpcode() == ISD::CopyFromReg && |
127 | cast<RegisterSDNode>(Val: Def->getOperand(Num: 1))->getReg() == Reg) { |
128 | PhysReg = Reg; |
129 | } else if (Def->isMachineOpcode()) { |
130 | const MCInstrDesc &II = TII->get(Opcode: Def->getMachineOpcode()); |
131 | if (ResNo >= II.getNumDefs() && II.hasImplicitDefOfPhysReg(Reg)) |
132 | PhysReg = Reg; |
133 | } |
134 | |
135 | if (PhysReg != 0) { |
136 | const TargetRegisterClass *RC = |
137 | TRI->getMinimalPhysRegClass(Reg, VT: Def->getSimpleValueType(ResNo)); |
138 | Cost = RC->getCopyCost(); |
139 | } |
140 | } |
141 | |
142 | // Helper for AddGlue to clone node operands. |
143 | static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, ArrayRef<EVT> VTs, |
144 | SDValue = SDValue()) { |
145 | SmallVector<SDValue, 8> Ops(N->op_begin(), N->op_end()); |
146 | if (ExtraOper.getNode()) |
147 | Ops.push_back(Elt: ExtraOper); |
148 | |
149 | SDVTList VTList = DAG->getVTList(VTs); |
150 | MachineSDNode *MN = dyn_cast<MachineSDNode>(Val: N); |
151 | |
152 | // Store memory references. |
153 | SmallVector<MachineMemOperand *, 2> MMOs; |
154 | if (MN) |
155 | MMOs.assign(in_start: MN->memoperands_begin(), in_end: MN->memoperands_end()); |
156 | |
157 | DAG->MorphNodeTo(N, Opc: N->getOpcode(), VTs: VTList, Ops); |
158 | |
159 | // Reset the memory references |
160 | if (MN) |
161 | DAG->setNodeMemRefs(N: MN, NewMemRefs: MMOs); |
162 | } |
163 | |
164 | static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) { |
165 | SDNode *GlueDestNode = Glue.getNode(); |
166 | |
167 | // Don't add glue from a node to itself. |
168 | if (GlueDestNode == N) return false; |
169 | |
170 | // Don't add a glue operand to something that already uses glue. |
171 | if (GlueDestNode && |
172 | N->getOperand(Num: N->getNumOperands()-1).getValueType() == MVT::Glue) { |
173 | return false; |
174 | } |
175 | // Don't add glue to something that already has a glue value. |
176 | if (N->getValueType(ResNo: N->getNumValues() - 1) == MVT::Glue) return false; |
177 | |
178 | SmallVector<EVT, 4> VTs(N->values()); |
179 | if (AddGlue) |
180 | VTs.push_back(MVT::Elt: Glue); |
181 | |
182 | CloneNodeWithValues(N, DAG, VTs, ExtraOper: Glue); |
183 | |
184 | return true; |
185 | } |
186 | |
187 | // Cleanup after unsuccessful AddGlue. Use the standard method of morphing the |
188 | // node even though simply shrinking the value list is sufficient. |
189 | static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) { |
190 | assert((N->getValueType(N->getNumValues() - 1) == MVT::Glue && |
191 | !N->hasAnyUseOfValue(N->getNumValues() - 1)) && |
192 | "expected an unused glue value" ); |
193 | |
194 | CloneNodeWithValues(N, DAG, |
195 | VTs: ArrayRef(N->value_begin(), N->getNumValues() - 1)); |
196 | } |
197 | |
198 | /// ClusterNeighboringLoads - Force nearby loads together by "gluing" them. |
199 | /// This function finds loads of the same base and different offsets. If the |
200 | /// offsets are not far apart (target specific), it add MVT::Glue inputs and |
201 | /// outputs to ensure they are scheduled together and in order. This |
202 | /// optimization may benefit some targets by improving cache locality. |
203 | void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { |
204 | SDValue Chain; |
205 | unsigned NumOps = Node->getNumOperands(); |
206 | if (Node->getOperand(Num: NumOps-1).getValueType() == MVT::Other) |
207 | Chain = Node->getOperand(Num: NumOps-1); |
208 | if (!Chain) |
209 | return; |
210 | |
211 | // Skip any load instruction that has a tied input. There may be an additional |
212 | // dependency requiring a different order than by increasing offsets, and the |
213 | // added glue may introduce a cycle. |
214 | auto hasTiedInput = [this](const SDNode *N) { |
215 | const MCInstrDesc &MCID = TII->get(Opcode: N->getMachineOpcode()); |
216 | for (unsigned I = 0; I != MCID.getNumOperands(); ++I) { |
217 | if (MCID.getOperandConstraint(OpNum: I, Constraint: MCOI::TIED_TO) != -1) |
218 | return true; |
219 | } |
220 | |
221 | return false; |
222 | }; |
223 | |
224 | // Look for other loads of the same chain. Find loads that are loading from |
225 | // the same base pointer and different offsets. |
226 | SmallPtrSet<SDNode*, 16> Visited; |
227 | SmallVector<int64_t, 4> Offsets; |
228 | DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode. |
229 | bool Cluster = false; |
230 | SDNode *Base = Node; |
231 | |
232 | if (hasTiedInput(Base)) |
233 | return; |
234 | |
235 | // This algorithm requires a reasonably low use count before finding a match |
236 | // to avoid uselessly blowing up compile time in large blocks. |
237 | unsigned UseCount = 0; |
238 | for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end(); |
239 | I != E && UseCount < 100; ++I, ++UseCount) { |
240 | if (I.getUse().getResNo() != Chain.getResNo()) |
241 | continue; |
242 | |
243 | SDNode *User = *I; |
244 | if (User == Node || !Visited.insert(Ptr: User).second) |
245 | continue; |
246 | int64_t Offset1, Offset2; |
247 | if (!TII->areLoadsFromSameBasePtr(Load1: Base, Load2: User, Offset1, Offset2) || |
248 | Offset1 == Offset2 || |
249 | hasTiedInput(User)) { |
250 | // FIXME: Should be ok if they addresses are identical. But earlier |
251 | // optimizations really should have eliminated one of the loads. |
252 | continue; |
253 | } |
254 | if (O2SMap.insert(KV: std::make_pair(x&: Offset1, y&: Base)).second) |
255 | Offsets.push_back(Elt: Offset1); |
256 | O2SMap.insert(KV: std::make_pair(x&: Offset2, y&: User)); |
257 | Offsets.push_back(Elt: Offset2); |
258 | if (Offset2 < Offset1) |
259 | Base = User; |
260 | Cluster = true; |
261 | // Reset UseCount to allow more matches. |
262 | UseCount = 0; |
263 | } |
264 | |
265 | if (!Cluster) |
266 | return; |
267 | |
268 | // Sort them in increasing order. |
269 | llvm::sort(C&: Offsets); |
270 | |
271 | // Check if the loads are close enough. |
272 | SmallVector<SDNode*, 4> Loads; |
273 | unsigned NumLoads = 0; |
274 | int64_t BaseOff = Offsets[0]; |
275 | SDNode *BaseLoad = O2SMap[BaseOff]; |
276 | Loads.push_back(Elt: BaseLoad); |
277 | for (unsigned i = 1, e = Offsets.size(); i != e; ++i) { |
278 | int64_t Offset = Offsets[i]; |
279 | SDNode *Load = O2SMap[Offset]; |
280 | if (!TII->shouldScheduleLoadsNear(Load1: BaseLoad, Load2: Load, Offset1: BaseOff, Offset2: Offset,NumLoads)) |
281 | break; // Stop right here. Ignore loads that are further away. |
282 | Loads.push_back(Elt: Load); |
283 | ++NumLoads; |
284 | } |
285 | |
286 | if (NumLoads == 0) |
287 | return; |
288 | |
289 | // Cluster loads by adding MVT::Glue outputs and inputs. This also |
290 | // ensure they are scheduled in order of increasing addresses. |
291 | SDNode *Lead = Loads[0]; |
292 | SDValue InGlue; |
293 | if (AddGlue(N: Lead, Glue: InGlue, AddGlue: true, DAG)) |
294 | InGlue = SDValue(Lead, Lead->getNumValues() - 1); |
295 | for (unsigned I = 1, E = Loads.size(); I != E; ++I) { |
296 | bool OutGlue = I < E - 1; |
297 | SDNode *Load = Loads[I]; |
298 | |
299 | // If AddGlue fails, we could leave an unsused glue value. This should not |
300 | // cause any |
301 | if (AddGlue(N: Load, Glue: InGlue, AddGlue: OutGlue, DAG)) { |
302 | if (OutGlue) |
303 | InGlue = SDValue(Load, Load->getNumValues() - 1); |
304 | |
305 | ++LoadsClustered; |
306 | } |
307 | else if (!OutGlue && InGlue.getNode()) |
308 | RemoveUnusedGlue(N: InGlue.getNode(), DAG); |
309 | } |
310 | } |
311 | |
312 | /// ClusterNodes - Cluster certain nodes which should be scheduled together. |
313 | /// |
314 | void ScheduleDAGSDNodes::ClusterNodes() { |
315 | for (SDNode &NI : DAG->allnodes()) { |
316 | SDNode *Node = &NI; |
317 | if (!Node || !Node->isMachineOpcode()) |
318 | continue; |
319 | |
320 | unsigned Opc = Node->getMachineOpcode(); |
321 | const MCInstrDesc &MCID = TII->get(Opcode: Opc); |
322 | if (MCID.mayLoad()) |
323 | // Cluster loads from "near" addresses into combined SUnits. |
324 | ClusterNeighboringLoads(Node); |
325 | } |
326 | } |
327 | |
328 | void ScheduleDAGSDNodes::BuildSchedUnits() { |
329 | // During scheduling, the NodeId field of SDNode is used to map SDNodes |
330 | // to their associated SUnits by holding SUnits table indices. A value |
331 | // of -1 means the SDNode does not yet have an associated SUnit. |
332 | unsigned NumNodes = 0; |
333 | for (SDNode &NI : DAG->allnodes()) { |
334 | NI.setNodeId(-1); |
335 | ++NumNodes; |
336 | } |
337 | |
338 | // Reserve entries in the vector for each of the SUnits we are creating. This |
339 | // ensure that reallocation of the vector won't happen, so SUnit*'s won't get |
340 | // invalidated. |
341 | // FIXME: Multiply by 2 because we may clone nodes during scheduling. |
342 | // This is a temporary workaround. |
343 | SUnits.reserve(n: NumNodes * 2); |
344 | |
345 | // Add all nodes in depth first order. |
346 | SmallVector<SDNode*, 64> Worklist; |
347 | SmallPtrSet<SDNode*, 32> Visited; |
348 | Worklist.push_back(Elt: DAG->getRoot().getNode()); |
349 | Visited.insert(Ptr: DAG->getRoot().getNode()); |
350 | |
351 | SmallVector<SUnit*, 8> CallSUnits; |
352 | while (!Worklist.empty()) { |
353 | SDNode *NI = Worklist.pop_back_val(); |
354 | |
355 | // Add all operands to the worklist unless they've already been added. |
356 | for (const SDValue &Op : NI->op_values()) |
357 | if (Visited.insert(Ptr: Op.getNode()).second) |
358 | Worklist.push_back(Elt: Op.getNode()); |
359 | |
360 | if (isPassiveNode(Node: NI)) // Leaf node, e.g. a TargetImmediate. |
361 | continue; |
362 | |
363 | // If this node has already been processed, stop now. |
364 | if (NI->getNodeId() != -1) continue; |
365 | |
366 | SUnit *NodeSUnit = newSUnit(N: NI); |
367 | |
368 | // See if anything is glued to this node, if so, add them to glued |
369 | // nodes. Nodes can have at most one glue input and one glue output. Glue |
370 | // is required to be the last operand and result of a node. |
371 | |
372 | // Scan up to find glued preds. |
373 | SDNode *N = NI; |
374 | while (N->getNumOperands() && |
375 | N->getOperand(Num: N->getNumOperands()-1).getValueType() == MVT::Glue) { |
376 | N = N->getOperand(Num: N->getNumOperands()-1).getNode(); |
377 | assert(N->getNodeId() == -1 && "Node already inserted!" ); |
378 | N->setNodeId(NodeSUnit->NodeNum); |
379 | if (N->isMachineOpcode() && TII->get(Opcode: N->getMachineOpcode()).isCall()) |
380 | NodeSUnit->isCall = true; |
381 | } |
382 | |
383 | // Scan down to find any glued succs. |
384 | N = NI; |
385 | while (N->getValueType(ResNo: N->getNumValues()-1) == MVT::Glue) { |
386 | SDValue GlueVal(N, N->getNumValues()-1); |
387 | |
388 | // There are either zero or one users of the Glue result. |
389 | bool HasGlueUse = false; |
390 | for (SDNode *U : N->uses()) |
391 | if (GlueVal.isOperandOf(N: U)) { |
392 | HasGlueUse = true; |
393 | assert(N->getNodeId() == -1 && "Node already inserted!" ); |
394 | N->setNodeId(NodeSUnit->NodeNum); |
395 | N = U; |
396 | if (N->isMachineOpcode() && TII->get(Opcode: N->getMachineOpcode()).isCall()) |
397 | NodeSUnit->isCall = true; |
398 | break; |
399 | } |
400 | if (!HasGlueUse) break; |
401 | } |
402 | |
403 | if (NodeSUnit->isCall) |
404 | CallSUnits.push_back(Elt: NodeSUnit); |
405 | |
406 | // Schedule zero-latency TokenFactor below any nodes that may increase the |
407 | // schedule height. Otherwise, ancestors of the TokenFactor may appear to |
408 | // have false stalls. |
409 | if (NI->getOpcode() == ISD::TokenFactor) |
410 | NodeSUnit->isScheduleLow = true; |
411 | |
412 | // If there are glue operands involved, N is now the bottom-most node |
413 | // of the sequence of nodes that are glued together. |
414 | // Update the SUnit. |
415 | NodeSUnit->setNode(N); |
416 | assert(N->getNodeId() == -1 && "Node already inserted!" ); |
417 | N->setNodeId(NodeSUnit->NodeNum); |
418 | |
419 | // Compute NumRegDefsLeft. This must be done before AddSchedEdges. |
420 | InitNumRegDefsLeft(SU: NodeSUnit); |
421 | |
422 | // Assign the Latency field of NodeSUnit using target-provided information. |
423 | computeLatency(SU: NodeSUnit); |
424 | } |
425 | |
426 | // Find all call operands. |
427 | while (!CallSUnits.empty()) { |
428 | SUnit *SU = CallSUnits.pop_back_val(); |
429 | for (const SDNode *SUNode = SU->getNode(); SUNode; |
430 | SUNode = SUNode->getGluedNode()) { |
431 | if (SUNode->getOpcode() != ISD::CopyToReg) |
432 | continue; |
433 | SDNode *SrcN = SUNode->getOperand(Num: 2).getNode(); |
434 | if (isPassiveNode(Node: SrcN)) continue; // Not scheduled. |
435 | SUnit *SrcSU = &SUnits[SrcN->getNodeId()]; |
436 | SrcSU->isCallOp = true; |
437 | } |
438 | } |
439 | } |
440 | |
441 | void ScheduleDAGSDNodes::AddSchedEdges() { |
442 | const TargetSubtargetInfo &ST = MF.getSubtarget(); |
443 | |
444 | // Check to see if the scheduler cares about latencies. |
445 | bool UnitLatencies = forceUnitLatencies(); |
446 | |
447 | // Pass 2: add the preds, succs, etc. |
448 | for (SUnit &SU : SUnits) { |
449 | SDNode *MainNode = SU.getNode(); |
450 | |
451 | if (MainNode->isMachineOpcode()) { |
452 | unsigned Opc = MainNode->getMachineOpcode(); |
453 | const MCInstrDesc &MCID = TII->get(Opcode: Opc); |
454 | for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { |
455 | if (MCID.getOperandConstraint(OpNum: i, Constraint: MCOI::TIED_TO) != -1) { |
456 | SU.isTwoAddress = true; |
457 | break; |
458 | } |
459 | } |
460 | if (MCID.isCommutable()) |
461 | SU.isCommutable = true; |
462 | } |
463 | |
464 | // Find all predecessors and successors of the group. |
465 | for (SDNode *N = SU.getNode(); N; N = N->getGluedNode()) { |
466 | if (N->isMachineOpcode() && |
467 | !TII->get(Opcode: N->getMachineOpcode()).implicit_defs().empty()) { |
468 | SU.hasPhysRegClobbers = true; |
469 | unsigned NumUsed = InstrEmitter::CountResults(Node: N); |
470 | while (NumUsed != 0 && !N->hasAnyUseOfValue(Value: NumUsed - 1)) |
471 | --NumUsed; // Skip over unused values at the end. |
472 | if (NumUsed > TII->get(Opcode: N->getMachineOpcode()).getNumDefs()) |
473 | SU.hasPhysRegDefs = true; |
474 | } |
475 | |
476 | for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { |
477 | SDNode *OpN = N->getOperand(Num: i).getNode(); |
478 | unsigned DefIdx = N->getOperand(Num: i).getResNo(); |
479 | if (isPassiveNode(Node: OpN)) continue; // Not scheduled. |
480 | SUnit *OpSU = &SUnits[OpN->getNodeId()]; |
481 | assert(OpSU && "Node has no SUnit!" ); |
482 | if (OpSU == &SU) |
483 | continue; // In the same group. |
484 | |
485 | EVT OpVT = N->getOperand(Num: i).getValueType(); |
486 | assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!" ); |
487 | bool isChain = OpVT == MVT::Other; |
488 | |
489 | unsigned PhysReg = 0; |
490 | int Cost = 1; |
491 | // Determine if this is a physical register dependency. |
492 | const TargetLowering &TLI = DAG->getTargetLoweringInfo(); |
493 | CheckForPhysRegDependency(Def: OpN, User: N, Op: i, TRI, TII, TLI, PhysReg, Cost); |
494 | assert((PhysReg == 0 || !isChain) && |
495 | "Chain dependence via physreg data?" ); |
496 | // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler |
497 | // emits a copy from the physical register to a virtual register unless |
498 | // it requires a cross class copy (cost < 0). That means we are only |
499 | // treating "expensive to copy" register dependency as physical register |
500 | // dependency. This may change in the future though. |
501 | if (Cost >= 0 && !StressSched) |
502 | PhysReg = 0; |
503 | |
504 | // If this is a ctrl dep, latency is 1. |
505 | unsigned OpLatency = isChain ? 1 : OpSU->Latency; |
506 | // Special-case TokenFactor chains as zero-latency. |
507 | if(isChain && OpN->getOpcode() == ISD::TokenFactor) |
508 | OpLatency = 0; |
509 | |
510 | SDep Dep = isChain ? SDep(OpSU, SDep::Barrier) |
511 | : SDep(OpSU, SDep::Data, PhysReg); |
512 | Dep.setLatency(OpLatency); |
513 | if (!isChain && !UnitLatencies) { |
514 | computeOperandLatency(Def: OpN, Use: N, OpIdx: i, dep&: Dep); |
515 | ST.adjustSchedDependency(Def: OpSU, DefOpIdx: DefIdx, Use: &SU, UseOpIdx: i, Dep); |
516 | } |
517 | |
518 | if (!SU.addPred(D: Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) { |
519 | // Multiple register uses are combined in the same SUnit. For example, |
520 | // we could have a set of glued nodes with all their defs consumed by |
521 | // another set of glued nodes. Register pressure tracking sees this as |
522 | // a single use, so to keep pressure balanced we reduce the defs. |
523 | // |
524 | // We can't tell (without more book-keeping) if this results from |
525 | // glued nodes or duplicate operands. As long as we don't reduce |
526 | // NumRegDefsLeft to zero, we handle the common cases well. |
527 | --OpSU->NumRegDefsLeft; |
528 | } |
529 | } |
530 | } |
531 | } |
532 | } |
533 | |
534 | /// BuildSchedGraph - Build the SUnit graph from the selection dag that we |
535 | /// are input. This SUnit graph is similar to the SelectionDAG, but |
536 | /// excludes nodes that aren't interesting to scheduling, and represents |
537 | /// glued together nodes with a single SUnit. |
538 | void ScheduleDAGSDNodes::BuildSchedGraph(AAResults *AA) { |
539 | // Cluster certain nodes which should be scheduled together. |
540 | ClusterNodes(); |
541 | // Populate the SUnits array. |
542 | BuildSchedUnits(); |
543 | // Compute all the scheduling dependencies between nodes. |
544 | AddSchedEdges(); |
545 | } |
546 | |
547 | // Initialize NumNodeDefs for the current Node's opcode. |
548 | void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() { |
549 | // Check for phys reg copy. |
550 | if (!Node) |
551 | return; |
552 | |
553 | if (!Node->isMachineOpcode()) { |
554 | if (Node->getOpcode() == ISD::CopyFromReg) |
555 | NodeNumDefs = 1; |
556 | else |
557 | NodeNumDefs = 0; |
558 | return; |
559 | } |
560 | unsigned POpc = Node->getMachineOpcode(); |
561 | if (POpc == TargetOpcode::IMPLICIT_DEF) { |
562 | // No register need be allocated for this. |
563 | NodeNumDefs = 0; |
564 | return; |
565 | } |
566 | if (POpc == TargetOpcode::PATCHPOINT && |
567 | Node->getValueType(ResNo: 0) == MVT::Other) { |
568 | // PATCHPOINT is defined to have one result, but it might really have none |
569 | // if we're not using CallingConv::AnyReg. Don't mistake the chain for a |
570 | // real definition. |
571 | NodeNumDefs = 0; |
572 | return; |
573 | } |
574 | unsigned NRegDefs = SchedDAG->TII->get(Opcode: Node->getMachineOpcode()).getNumDefs(); |
575 | // Some instructions define regs that are not represented in the selection DAG |
576 | // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues. |
577 | NodeNumDefs = std::min(a: Node->getNumValues(), b: NRegDefs); |
578 | DefIdx = 0; |
579 | } |
580 | |
581 | // Construct a RegDefIter for this SUnit and find the first valid value. |
582 | ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit *SU, |
583 | const ScheduleDAGSDNodes *SD) |
584 | : SchedDAG(SD), Node(SU->getNode()) { |
585 | InitNodeNumDefs(); |
586 | Advance(); |
587 | } |
588 | |
589 | // Advance to the next valid value defined by the SUnit. |
590 | void ScheduleDAGSDNodes::RegDefIter::Advance() { |
591 | for (;Node;) { // Visit all glued nodes. |
592 | for (;DefIdx < NodeNumDefs; ++DefIdx) { |
593 | if (!Node->hasAnyUseOfValue(Value: DefIdx)) |
594 | continue; |
595 | ValueType = Node->getSimpleValueType(ResNo: DefIdx); |
596 | ++DefIdx; |
597 | return; // Found a normal regdef. |
598 | } |
599 | Node = Node->getGluedNode(); |
600 | if (!Node) { |
601 | return; // No values left to visit. |
602 | } |
603 | InitNodeNumDefs(); |
604 | } |
605 | } |
606 | |
607 | void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) { |
608 | assert(SU->NumRegDefsLeft == 0 && "expect a new node" ); |
609 | for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) { |
610 | assert(SU->NumRegDefsLeft < USHRT_MAX && "overflow is ok but unexpected" ); |
611 | ++SU->NumRegDefsLeft; |
612 | } |
613 | } |
614 | |
615 | void ScheduleDAGSDNodes::computeLatency(SUnit *SU) { |
616 | SDNode *N = SU->getNode(); |
617 | |
618 | // TokenFactor operands are considered zero latency, and some schedulers |
619 | // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero |
620 | // whenever node latency is nonzero. |
621 | if (N && N->getOpcode() == ISD::TokenFactor) { |
622 | SU->Latency = 0; |
623 | return; |
624 | } |
625 | |
626 | // Check to see if the scheduler cares about latencies. |
627 | if (forceUnitLatencies()) { |
628 | SU->Latency = 1; |
629 | return; |
630 | } |
631 | |
632 | if (!InstrItins || InstrItins->isEmpty()) { |
633 | if (N && N->isMachineOpcode() && |
634 | TII->isHighLatencyDef(opc: N->getMachineOpcode())) |
635 | SU->Latency = HighLatencyCycles; |
636 | else |
637 | SU->Latency = 1; |
638 | return; |
639 | } |
640 | |
641 | // Compute the latency for the node. We use the sum of the latencies for |
642 | // all nodes glued together into this SUnit. |
643 | SU->Latency = 0; |
644 | for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) |
645 | if (N->isMachineOpcode()) |
646 | SU->Latency += TII->getInstrLatency(ItinData: InstrItins, Node: N); |
647 | } |
648 | |
649 | void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use, |
650 | unsigned OpIdx, SDep& dep) const{ |
651 | // Check to see if the scheduler cares about latencies. |
652 | if (forceUnitLatencies()) |
653 | return; |
654 | |
655 | if (dep.getKind() != SDep::Data) |
656 | return; |
657 | |
658 | unsigned DefIdx = Use->getOperand(Num: OpIdx).getResNo(); |
659 | if (Use->isMachineOpcode()) |
660 | // Adjust the use operand index by num of defs. |
661 | OpIdx += TII->get(Opcode: Use->getMachineOpcode()).getNumDefs(); |
662 | std::optional<unsigned> Latency = |
663 | TII->getOperandLatency(ItinData: InstrItins, DefNode: Def, DefIdx, UseNode: Use, UseIdx: OpIdx); |
664 | if (Latency > 1U && Use->getOpcode() == ISD::CopyToReg && |
665 | !BB->succ_empty()) { |
666 | unsigned Reg = cast<RegisterSDNode>(Val: Use->getOperand(Num: 1))->getReg(); |
667 | if (Register::isVirtualRegister(Reg)) |
668 | // This copy is a liveout value. It is likely coalesced, so reduce the |
669 | // latency so not to penalize the def. |
670 | // FIXME: need target specific adjustment here? |
671 | Latency = *Latency - 1; |
672 | } |
673 | if (Latency) |
674 | dep.setLatency(*Latency); |
675 | } |
676 | |
677 | void ScheduleDAGSDNodes::dumpNode(const SUnit &SU) const { |
678 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
679 | dumpNodeName(SU); |
680 | dbgs() << ": " ; |
681 | |
682 | if (!SU.getNode()) { |
683 | dbgs() << "PHYS REG COPY\n" ; |
684 | return; |
685 | } |
686 | |
687 | SU.getNode()->dump(G: DAG); |
688 | dbgs() << "\n" ; |
689 | SmallVector<SDNode *, 4> GluedNodes; |
690 | for (SDNode *N = SU.getNode()->getGluedNode(); N; N = N->getGluedNode()) |
691 | GluedNodes.push_back(Elt: N); |
692 | while (!GluedNodes.empty()) { |
693 | dbgs() << " " ; |
694 | GluedNodes.back()->dump(G: DAG); |
695 | dbgs() << "\n" ; |
696 | GluedNodes.pop_back(); |
697 | } |
698 | #endif |
699 | } |
700 | |
701 | void ScheduleDAGSDNodes::dump() const { |
702 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
703 | if (EntrySU.getNode() != nullptr) |
704 | dumpNodeAll(SU: EntrySU); |
705 | for (const SUnit &SU : SUnits) |
706 | dumpNodeAll(SU); |
707 | if (ExitSU.getNode() != nullptr) |
708 | dumpNodeAll(SU: ExitSU); |
709 | #endif |
710 | } |
711 | |
712 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
713 | void ScheduleDAGSDNodes::dumpSchedule() const { |
714 | for (const SUnit *SU : Sequence) { |
715 | if (SU) |
716 | dumpNode(SU: *SU); |
717 | else |
718 | dbgs() << "**** NOOP ****\n" ; |
719 | } |
720 | } |
721 | #endif |
722 | |
723 | #ifndef NDEBUG |
724 | /// VerifyScheduledSequence - Verify that all SUnits were scheduled and that |
725 | /// their state is consistent with the nodes listed in Sequence. |
726 | /// |
727 | void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) { |
728 | unsigned ScheduledNodes = ScheduleDAG::VerifyScheduledDAG(isBottomUp); |
729 | unsigned Noops = llvm::count(Range&: Sequence, Element: nullptr); |
730 | assert(Sequence.size() - Noops == ScheduledNodes && |
731 | "The number of nodes scheduled doesn't match the expected number!" ); |
732 | } |
733 | #endif // NDEBUG |
734 | |
735 | /// ProcessSDDbgValues - Process SDDbgValues associated with this node. |
736 | static void |
737 | ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, |
738 | SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders, |
739 | DenseMap<SDValue, Register> &VRBaseMap, unsigned Order) { |
740 | if (!N->getHasDebugValue()) |
741 | return; |
742 | |
743 | /// Returns true if \p DV has any VReg operand locations which don't exist in |
744 | /// VRBaseMap. |
745 | auto HasUnknownVReg = [&VRBaseMap](SDDbgValue *DV) { |
746 | for (const SDDbgOperand &L : DV->getLocationOps()) { |
747 | if (L.getKind() == SDDbgOperand::SDNODE && |
748 | VRBaseMap.count(Val: {L.getSDNode(), L.getResNo()}) == 0) |
749 | return true; |
750 | } |
751 | return false; |
752 | }; |
753 | |
754 | // Opportunistically insert immediate dbg_value uses, i.e. those with the same |
755 | // source order number as N. |
756 | MachineBasicBlock *BB = Emitter.getBlock(); |
757 | MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos(); |
758 | for (auto *DV : DAG->GetDbgValues(SD: N)) { |
759 | if (DV->isEmitted()) |
760 | continue; |
761 | unsigned DVOrder = DV->getOrder(); |
762 | if (Order != 0 && DVOrder != Order) |
763 | continue; |
764 | // If DV has any VReg location operands which haven't been mapped then |
765 | // either that node is no longer available or we just haven't visited the |
766 | // node yet. In the former case we should emit an undef dbg_value, but we |
767 | // can do it later. And for the latter we'll want to wait until all |
768 | // dependent nodes have been visited. |
769 | if (!DV->isInvalidated() && HasUnknownVReg(DV)) |
770 | continue; |
771 | MachineInstr *DbgMI = Emitter.EmitDbgValue(SD: DV, VRBaseMap); |
772 | if (!DbgMI) |
773 | continue; |
774 | Orders.push_back(Elt: {DVOrder, DbgMI}); |
775 | BB->insert(I: InsertPos, MI: DbgMI); |
776 | } |
777 | } |
778 | |
779 | // ProcessSourceNode - Process nodes with source order numbers. These are added |
780 | // to a vector which EmitSchedule uses to determine how to insert dbg_value |
781 | // instructions in the right order. |
782 | static void |
783 | ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, |
784 | DenseMap<SDValue, Register> &VRBaseMap, |
785 | SmallVectorImpl<std::pair<unsigned, MachineInstr *>> &Orders, |
786 | SmallSet<Register, 8> &Seen, MachineInstr *NewInsn) { |
787 | unsigned Order = N->getIROrder(); |
788 | if (!Order || Seen.count(V: Order)) { |
789 | // Process any valid SDDbgValues even if node does not have any order |
790 | // assigned. |
791 | ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order: 0); |
792 | return; |
793 | } |
794 | |
795 | // If a new instruction was generated for this Order number, record it. |
796 | // Otherwise, leave this order number unseen: we will either find later |
797 | // instructions for it, or leave it unseen if there were no instructions at |
798 | // all. |
799 | if (NewInsn) { |
800 | Seen.insert(V: Order); |
801 | Orders.push_back(Elt: {Order, NewInsn}); |
802 | } |
803 | |
804 | // Even if no instruction was generated, a Value may have become defined via |
805 | // earlier nodes. Try to process them now. |
806 | ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order); |
807 | } |
808 | |
809 | void ScheduleDAGSDNodes:: |
810 | EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, Register> &VRBaseMap, |
811 | MachineBasicBlock::iterator InsertPos) { |
812 | for (const SDep &Pred : SU->Preds) { |
813 | if (Pred.isCtrl()) |
814 | continue; // ignore chain preds |
815 | if (Pred.getSUnit()->CopyDstRC) { |
816 | // Copy to physical register. |
817 | DenseMap<SUnit *, Register>::iterator VRI = |
818 | VRBaseMap.find(Val: Pred.getSUnit()); |
819 | assert(VRI != VRBaseMap.end() && "Node emitted out of order - late" ); |
820 | // Find the destination physical register. |
821 | Register Reg; |
822 | for (const SDep &Succ : SU->Succs) { |
823 | if (Succ.isCtrl()) |
824 | continue; // ignore chain preds |
825 | if (Succ.getReg()) { |
826 | Reg = Succ.getReg(); |
827 | break; |
828 | } |
829 | } |
830 | BuildMI(BB&: *BB, I: InsertPos, MIMD: DebugLoc(), MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: Reg) |
831 | .addReg(RegNo: VRI->second); |
832 | } else { |
833 | // Copy from physical register. |
834 | assert(Pred.getReg() && "Unknown physical register!" ); |
835 | Register VRBase = MRI.createVirtualRegister(RegClass: SU->CopyDstRC); |
836 | bool isNew = VRBaseMap.insert(KV: std::make_pair(x&: SU, y&: VRBase)).second; |
837 | (void)isNew; // Silence compiler warning. |
838 | assert(isNew && "Node emitted out of order - early" ); |
839 | BuildMI(BB&: *BB, I: InsertPos, MIMD: DebugLoc(), MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: VRBase) |
840 | .addReg(RegNo: Pred.getReg()); |
841 | } |
842 | break; |
843 | } |
844 | } |
845 | |
846 | /// EmitSchedule - Emit the machine code in scheduled order. Return the new |
847 | /// InsertPos and MachineBasicBlock that contains this insertion |
848 | /// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does |
849 | /// not necessarily refer to returned BB. The emitter may split blocks. |
850 | MachineBasicBlock *ScheduleDAGSDNodes:: |
851 | EmitSchedule(MachineBasicBlock::iterator &InsertPos) { |
852 | InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos); |
853 | DenseMap<SDValue, Register> VRBaseMap; |
854 | DenseMap<SUnit*, Register> CopyVRBaseMap; |
855 | SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders; |
856 | SmallSet<Register, 8> Seen; |
857 | bool HasDbg = DAG->hasDebugValues(); |
858 | |
859 | // Emit a node, and determine where its first instruction is for debuginfo. |
860 | // Zero, one, or multiple instructions can be created when emitting a node. |
861 | auto EmitNode = |
862 | [&](SDNode *Node, bool IsClone, bool IsCloned, |
863 | DenseMap<SDValue, Register> &VRBaseMap) -> MachineInstr * { |
864 | // Fetch instruction prior to this, or end() if nonexistant. |
865 | auto GetPrevInsn = [&](MachineBasicBlock::iterator I) { |
866 | if (I == BB->begin()) |
867 | return BB->end(); |
868 | else |
869 | return std::prev(x: Emitter.getInsertPos()); |
870 | }; |
871 | |
872 | MachineBasicBlock::iterator Before = GetPrevInsn(Emitter.getInsertPos()); |
873 | Emitter.EmitNode(Node, IsClone, IsCloned, VRBaseMap); |
874 | MachineBasicBlock::iterator After = GetPrevInsn(Emitter.getInsertPos()); |
875 | |
876 | // If the iterator did not change, no instructions were inserted. |
877 | if (Before == After) |
878 | return nullptr; |
879 | |
880 | MachineInstr *MI; |
881 | if (Before == BB->end()) { |
882 | // There were no prior instructions; the new ones must start at the |
883 | // beginning of the block. |
884 | MI = &Emitter.getBlock()->instr_front(); |
885 | } else { |
886 | // Return first instruction after the pre-existing instructions. |
887 | MI = &*std::next(x: Before); |
888 | } |
889 | |
890 | if (MI->isCandidateForCallSiteEntry() && |
891 | DAG->getTarget().Options.EmitCallSiteInfo) |
892 | MF.addCallArgsForwardingRegs(CallI: MI, CallInfo: DAG->getCallSiteInfo(Node)); |
893 | |
894 | if (DAG->getNoMergeSiteInfo(Node)) { |
895 | MI->setFlag(MachineInstr::MIFlag::NoMerge); |
896 | } |
897 | |
898 | if (MDNode *MD = DAG->getPCSections(Node)) |
899 | MI->setPCSections(MF, MD); |
900 | |
901 | return MI; |
902 | }; |
903 | |
904 | // If this is the first BB, emit byval parameter dbg_value's. |
905 | if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) { |
906 | SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin(); |
907 | SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd(); |
908 | for (; PDI != PDE; ++PDI) { |
909 | MachineInstr *DbgMI= Emitter.EmitDbgValue(SD: *PDI, VRBaseMap); |
910 | if (DbgMI) { |
911 | BB->insert(I: InsertPos, MI: DbgMI); |
912 | // We re-emit the dbg_value closer to its use, too, after instructions |
913 | // are emitted to the BB. |
914 | (*PDI)->clearIsEmitted(); |
915 | } |
916 | } |
917 | } |
918 | |
919 | for (SUnit *SU : Sequence) { |
920 | if (!SU) { |
921 | // Null SUnit* is a noop. |
922 | TII->insertNoop(MBB&: *Emitter.getBlock(), MI: InsertPos); |
923 | continue; |
924 | } |
925 | |
926 | // For pre-regalloc scheduling, create instructions corresponding to the |
927 | // SDNode and any glued SDNodes and append them to the block. |
928 | if (!SU->getNode()) { |
929 | // Emit a copy. |
930 | EmitPhysRegCopy(SU, VRBaseMap&: CopyVRBaseMap, InsertPos); |
931 | continue; |
932 | } |
933 | |
934 | SmallVector<SDNode *, 4> GluedNodes; |
935 | for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode()) |
936 | GluedNodes.push_back(Elt: N); |
937 | while (!GluedNodes.empty()) { |
938 | SDNode *N = GluedNodes.back(); |
939 | auto NewInsn = EmitNode(N, SU->OrigNode != SU, SU->isCloned, VRBaseMap); |
940 | // Remember the source order of the inserted instruction. |
941 | if (HasDbg) |
942 | ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn); |
943 | |
944 | if (MDNode *MD = DAG->getHeapAllocSite(Node: N)) |
945 | if (NewInsn && NewInsn->isCall()) |
946 | NewInsn->setHeapAllocMarker(MF, MD); |
947 | |
948 | GluedNodes.pop_back(); |
949 | } |
950 | auto NewInsn = |
951 | EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned, VRBaseMap); |
952 | // Remember the source order of the inserted instruction. |
953 | if (HasDbg) |
954 | ProcessSourceNode(N: SU->getNode(), DAG, Emitter, VRBaseMap, Orders, Seen, |
955 | NewInsn); |
956 | |
957 | if (MDNode *MD = DAG->getHeapAllocSite(Node: SU->getNode())) { |
958 | if (NewInsn && NewInsn->isCall()) |
959 | NewInsn->setHeapAllocMarker(MF, MD); |
960 | } |
961 | } |
962 | |
963 | // Insert all the dbg_values which have not already been inserted in source |
964 | // order sequence. |
965 | if (HasDbg) { |
966 | MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI(); |
967 | |
968 | // Sort the source order instructions and use the order to insert debug |
969 | // values. Use stable_sort so that DBG_VALUEs are inserted in the same order |
970 | // regardless of the host's implementation fo std::sort. |
971 | llvm::stable_sort(Range&: Orders, C: less_first()); |
972 | std::stable_sort(first: DAG->DbgBegin(), last: DAG->DbgEnd(), |
973 | comp: [](const SDDbgValue *LHS, const SDDbgValue *RHS) { |
974 | return LHS->getOrder() < RHS->getOrder(); |
975 | }); |
976 | |
977 | SDDbgInfo::DbgIterator DI = DAG->DbgBegin(); |
978 | SDDbgInfo::DbgIterator DE = DAG->DbgEnd(); |
979 | // Now emit the rest according to source order. |
980 | unsigned LastOrder = 0; |
981 | for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) { |
982 | unsigned Order = Orders[i].first; |
983 | MachineInstr *MI = Orders[i].second; |
984 | // Insert all SDDbgValue's whose order(s) are before "Order". |
985 | assert(MI); |
986 | for (; DI != DE; ++DI) { |
987 | if ((*DI)->getOrder() < LastOrder || (*DI)->getOrder() >= Order) |
988 | break; |
989 | if ((*DI)->isEmitted()) |
990 | continue; |
991 | |
992 | MachineInstr *DbgMI = Emitter.EmitDbgValue(SD: *DI, VRBaseMap); |
993 | if (DbgMI) { |
994 | if (!LastOrder) |
995 | // Insert to start of the BB (after PHIs). |
996 | BB->insert(I: BBBegin, MI: DbgMI); |
997 | else { |
998 | // Insert at the instruction, which may be in a different |
999 | // block, if the block was split by a custom inserter. |
1000 | MachineBasicBlock::iterator Pos = MI; |
1001 | MI->getParent()->insert(I: Pos, MI: DbgMI); |
1002 | } |
1003 | } |
1004 | } |
1005 | LastOrder = Order; |
1006 | } |
1007 | // Add trailing DbgValue's before the terminator. FIXME: May want to add |
1008 | // some of them before one or more conditional branches? |
1009 | SmallVector<MachineInstr*, 8> DbgMIs; |
1010 | for (; DI != DE; ++DI) { |
1011 | if ((*DI)->isEmitted()) |
1012 | continue; |
1013 | assert((*DI)->getOrder() >= LastOrder && |
1014 | "emitting DBG_VALUE out of order" ); |
1015 | if (MachineInstr *DbgMI = Emitter.EmitDbgValue(SD: *DI, VRBaseMap)) |
1016 | DbgMIs.push_back(Elt: DbgMI); |
1017 | } |
1018 | |
1019 | MachineBasicBlock *InsertBB = Emitter.getBlock(); |
1020 | MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator(); |
1021 | InsertBB->insert(I: Pos, S: DbgMIs.begin(), E: DbgMIs.end()); |
1022 | |
1023 | SDDbgInfo::DbgLabelIterator DLI = DAG->DbgLabelBegin(); |
1024 | SDDbgInfo::DbgLabelIterator DLE = DAG->DbgLabelEnd(); |
1025 | // Now emit the rest according to source order. |
1026 | LastOrder = 0; |
1027 | for (const auto &InstrOrder : Orders) { |
1028 | unsigned Order = InstrOrder.first; |
1029 | MachineInstr *MI = InstrOrder.second; |
1030 | if (!MI) |
1031 | continue; |
1032 | |
1033 | // Insert all SDDbgLabel's whose order(s) are before "Order". |
1034 | for (; DLI != DLE && |
1035 | (*DLI)->getOrder() >= LastOrder && (*DLI)->getOrder() < Order; |
1036 | ++DLI) { |
1037 | MachineInstr *DbgMI = Emitter.EmitDbgLabel(SD: *DLI); |
1038 | if (DbgMI) { |
1039 | if (!LastOrder) |
1040 | // Insert to start of the BB (after PHIs). |
1041 | BB->insert(I: BBBegin, MI: DbgMI); |
1042 | else { |
1043 | // Insert at the instruction, which may be in a different |
1044 | // block, if the block was split by a custom inserter. |
1045 | MachineBasicBlock::iterator Pos = MI; |
1046 | MI->getParent()->insert(I: Pos, MI: DbgMI); |
1047 | } |
1048 | } |
1049 | } |
1050 | if (DLI == DLE) |
1051 | break; |
1052 | |
1053 | LastOrder = Order; |
1054 | } |
1055 | } |
1056 | |
1057 | InsertPos = Emitter.getInsertPos(); |
1058 | // In some cases, DBG_VALUEs might be inserted after the first terminator, |
1059 | // which results in an invalid MBB. If that happens, move the DBG_VALUEs |
1060 | // before the first terminator. |
1061 | MachineBasicBlock *InsertBB = Emitter.getBlock(); |
1062 | auto FirstTerm = InsertBB->getFirstTerminator(); |
1063 | if (FirstTerm != InsertBB->end()) { |
1064 | assert(!FirstTerm->isDebugValue() && |
1065 | "first terminator cannot be a debug value" ); |
1066 | for (MachineInstr &MI : make_early_inc_range( |
1067 | Range: make_range(x: std::next(x: FirstTerm), y: InsertBB->end()))) { |
1068 | // Only scan up to insertion point. |
1069 | if (&MI == InsertPos) |
1070 | break; |
1071 | |
1072 | if (!MI.isDebugValue()) |
1073 | continue; |
1074 | |
1075 | // The DBG_VALUE was referencing a value produced by a terminator. By |
1076 | // moving the DBG_VALUE, the referenced value also needs invalidating. |
1077 | MI.getOperand(i: 0).ChangeToRegister(Reg: 0, isDef: false); |
1078 | MI.moveBefore(MovePos: &*FirstTerm); |
1079 | } |
1080 | } |
1081 | return InsertBB; |
1082 | } |
1083 | |
1084 | /// Return the basic block label. |
1085 | std::string ScheduleDAGSDNodes::getDAGName() const { |
1086 | return "sunit-dag." + BB->getFullName(); |
1087 | } |
1088 | |