MachinePipeliner.cpp source code [llvm/lib/CodeGen/MachinePipeliner.cpp]

1	//===- MachinePipeliner.cpp - Machine Software Pipeliner Pass -------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
10	//
11	// This SMS implementation is a target-independent back-end pass. When enabled,
12	// the pass runs just prior to the register allocation pass, while the machine
13	// IR is in SSA form. If software pipelining is successful, then the original
14	// loop is replaced by the optimized loop. The optimized loop contains one or
15	// more prolog blocks, the pipelined kernel, and one or more epilog blocks. If
16	// the instructions cannot be scheduled in a given MII, we increase the MII by
17	// one and try again.
18	//
19	// The SMS implementation is an extension of the ScheduleDAGInstrs class. We
20	// represent loop carried dependences in the DAG as order edges to the Phi
21	// nodes. We also perform several passes over the DAG to eliminate unnecessary
22	// edges that inhibit the ability to pipeline. The implementation uses the
23	// DFAPacketizer class to compute the minimum initiation interval and the check
24	// where an instruction may be inserted in the pipelined schedule.
25	//
26	// In order for the SMS pass to work, several target specific hooks need to be
27	// implemented to get information about the loop structure and to rewrite
28	// instructions.
29	//
30	//===----------------------------------------------------------------------===//
31
32	#include "llvm/CodeGen/MachinePipeliner.h"
33	#include "llvm/ADT/ArrayRef.h"
34	#include "llvm/ADT/BitVector.h"
35	#include "llvm/ADT/DenseMap.h"
36	#include "llvm/ADT/MapVector.h"
37	#include "llvm/ADT/PriorityQueue.h"
38	#include "llvm/ADT/STLExtras.h"
39	#include "llvm/ADT/SetOperations.h"
40	#include "llvm/ADT/SetVector.h"
41	#include "llvm/ADT/SmallPtrSet.h"
42	#include "llvm/ADT/SmallSet.h"
43	#include "llvm/ADT/SmallVector.h"
44	#include "llvm/ADT/Statistic.h"
45	#include "llvm/ADT/iterator_range.h"
46	#include "llvm/Analysis/AliasAnalysis.h"
47	#include "llvm/Analysis/CycleAnalysis.h"
48	#include "llvm/Analysis/MemoryLocation.h"
49	#include "llvm/Analysis/OptimizationRemarkEmitter.h"
50	#include "llvm/Analysis/ValueTracking.h"
51	#include "llvm/CodeGen/DFAPacketizer.h"
52	#include "llvm/CodeGen/LiveIntervals.h"
53	#include "llvm/CodeGen/MachineBasicBlock.h"
54	#include "llvm/CodeGen/MachineDominators.h"
55	#include "llvm/CodeGen/MachineFunction.h"
56	#include "llvm/CodeGen/MachineFunctionPass.h"
57	#include "llvm/CodeGen/MachineInstr.h"
58	#include "llvm/CodeGen/MachineInstrBuilder.h"
59	#include "llvm/CodeGen/MachineLoopInfo.h"
60	#include "llvm/CodeGen/MachineMemOperand.h"
61	#include "llvm/CodeGen/MachineOperand.h"
62	#include "llvm/CodeGen/MachineRegisterInfo.h"
63	#include "llvm/CodeGen/ModuloSchedule.h"
64	#include "llvm/CodeGen/Register.h"
65	#include "llvm/CodeGen/RegisterClassInfo.h"
66	#include "llvm/CodeGen/RegisterPressure.h"
67	#include "llvm/CodeGen/ScheduleDAG.h"
68	#include "llvm/CodeGen/ScheduleDAGMutation.h"
69	#include "llvm/CodeGen/TargetInstrInfo.h"
70	#include "llvm/CodeGen/TargetOpcodes.h"
71	#include "llvm/CodeGen/TargetRegisterInfo.h"
72	#include "llvm/CodeGen/TargetSubtargetInfo.h"
73	#include "llvm/Config/llvm-config.h"
74	#include "llvm/IR/Attributes.h"
75	#include "llvm/IR/Function.h"
76	#include "llvm/MC/LaneBitmask.h"
77	#include "llvm/MC/MCInstrDesc.h"
78	#include "llvm/MC/MCInstrItineraries.h"
79	#include "llvm/MC/MCRegisterInfo.h"
80	#include "llvm/Pass.h"
81	#include "llvm/Support/CommandLine.h"
82	#include "llvm/Support/Compiler.h"
83	#include "llvm/Support/Debug.h"
84	#include "llvm/Support/MathExtras.h"
85	#include "llvm/Support/raw_ostream.h"
86	#include <algorithm>
87	#include <cassert>
88	#include <climits>
89	#include <cstdint>
90	#include <deque>
91	#include <functional>
92	#include <iomanip>
93	#include <iterator>
94	#include <map>
95	#include <memory>
96	#include <sstream>
97	#include <tuple>
98	#include <utility>
99	#include <vector>
100
101	using namespace llvm;
102
103	#define DEBUG_TYPE "pipeliner"
104
105	STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");
106	STATISTIC(NumPipelined, "Number of loops software pipelined");
107	STATISTIC(NumNodeOrderIssues, "Number of node order issues found");
108	STATISTIC(NumFailBranch, "Pipeliner abort due to unknown branch");
109	STATISTIC(NumFailLoop, "Pipeliner abort due to unsupported loop");
110	STATISTIC(NumFailPreheader, "Pipeliner abort due to missing preheader");
111	STATISTIC(NumFailLargeMaxMII, "Pipeliner abort due to MaxMII too large");
112	STATISTIC(NumFailZeroMII, "Pipeliner abort due to zero MII");
113	STATISTIC(NumFailNoSchedule, "Pipeliner abort due to no schedule found");
114	STATISTIC(NumFailZeroStage, "Pipeliner abort due to zero stage");
115	STATISTIC(NumFailLargeMaxStage, "Pipeliner abort due to too many stages");
116
117	/// A command line option to turn software pipelining on or off.
118	static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(Val: true),
119	cl::desc ("Enable Software Pipelining"));
120
121	/// A command line option to enable SWP at -Os.
122	static cl::opt<bool> EnableSWPOptSize("enable-pipeliner-opt-size",
123	cl::desc ("Enable SWP at Os."), cl::Hidden,
124	cl::init(Val: false));
125
126	/// A command line argument to limit minimum initial interval for pipelining.
127	static cl::opt<int> SwpMaxMii("pipeliner-max-mii",
128	cl::desc ("Size limit for the MII."),
129	cl::Hidden, cl::init(Val: `27`));
130
131	/// A command line argument to force pipeliner to use specified initial
132	/// interval.
133	static cl::opt<int> SwpForceII("pipeliner-force-ii",
134	cl::desc ("Force pipeliner to use specified II."),
135	cl::Hidden, cl::init(Val: -`1`));
136
137	/// A command line argument to limit the number of stages in the pipeline.
138	static cl::opt<int>
139	SwpMaxStages("pipeliner-max-stages",
140	cl::desc ("Maximum stages allowed in the generated scheduled."),
141	cl::Hidden, cl::init(Val: `3`));
142
143	/// A command line option to disable the pruning of chain dependences due to
144	/// an unrelated Phi.
145	static cl::opt<bool>
146	SwpPruneDeps("pipeliner-prune-deps",
147	cl::desc ("Prune dependences between unrelated Phi nodes."),
148	cl::Hidden, cl::init(Val: true));
149
150	/// A command line option to disable the pruning of loop carried order
151	/// dependences.
152	static cl::opt<bool>
153	SwpPruneLoopCarried("pipeliner-prune-loop-carried",
154	cl::desc ("Prune loop carried order dependences."),
155	cl::Hidden, cl::init(Val: true));
156
157	#ifndef NDEBUG
158	static cl::opt<int> SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(Val: -`1`));
159	#endif
160
161	static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",
162	cl::ReallyHidden,
163	cl::desc ("Ignore RecMII"));
164
165	static cl::opt<bool> SwpShowResMask("pipeliner-show-mask", cl::Hidden,
166	cl::init(Val: false));
167	static cl::opt<bool> SwpDebugResource("pipeliner-dbg-res", cl::Hidden,
168	cl::init(Val: false));
169
170	static cl::opt<bool> EmitTestAnnotations(
171	"pipeliner-annotate-for-testing", cl::Hidden, cl::init(Val: false),
172	cl::desc ("Instead of emitting the pipelined code, annotate instructions "
173	"with the generated schedule for feeding into the "
174	"-modulo-schedule-test pass"));
175
176	static cl::opt<bool> ExperimentalCodeGen(
177	"pipeliner-experimental-cg", cl::Hidden, cl::init(Val: false),
178	cl::desc (
179	"Use the experimental peeling code generator for software pipelining"));
180
181	static cl::opt<int> SwpIISearchRange("pipeliner-ii-search-range",
182	cl::desc ("Range to search for II"),
183	cl::Hidden, cl::init(Val: `10`));
184
185	static cl::opt<bool>
186	LimitRegPressure("pipeliner-register-pressure", cl::Hidden, cl::init(Val: false),
187	cl::desc ("Limit register pressure of scheduled loop"));
188
189	static cl::opt<int>
190	RegPressureMargin("pipeliner-register-pressure-margin", cl::Hidden,
191	cl::init(Val: `5`),
192	cl::desc ("Margin representing the unused percentage of "
193	"the register pressure limit"));
194
195	namespace llvm {
196
197	// A command line option to enable the CopyToPhi DAG mutation.
198	cl::opt<bool> SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden,
199	cl::init(Val: true),
200	cl::desc ("Enable CopyToPhi DAG Mutation"));
201
202	/// A command line argument to force pipeliner to use specified issue
203	/// width.
204	cl::opt<int> SwpForceIssueWidth(
205	"pipeliner-force-issue-width",
206	cl::desc ("Force pipeliner to use specified issue width."), cl::Hidden,
207	cl::init(Val: -`1`));
208
209	} // end namespace llvm
210
211	unsigned SwingSchedulerDAG::Circuits::MaxPaths = `5`;
212	char MachinePipeliner::ID = `0`;
213	#ifndef NDEBUG
214	int MachinePipeliner::NumTries = `0`;
215	#endif
216	char &llvm::MachinePipelinerID = MachinePipeliner::ID;
217
218	INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE,
219	"Modulo Software Pipelining", false, false)
220	INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
221	INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
222	INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
223	INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
224	INITIALIZE_PASS_END(MachinePipeliner, DEBUG_TYPE,
225	"Modulo Software Pipelining", false, false)
226
227	/// The "main" function for implementing Swing Modulo Scheduling.
228	bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
229	if (skipFunction(F: mf.getFunction()))
230	return false;
231
232	if (!EnableSWP)
233	return false;
234
235	if (mf.getFunction().getAttributes().hasFnAttr(Attribute::OptimizeForSize) &&
236	!EnableSWPOptSize.getPosition())
237	return false;
238
239	if (!mf.getSubtarget().enableMachinePipeliner())
240	return false;
241
242	// Cannot pipeline loops without instruction itineraries if we are using
243	// DFA for the pipeliner.
244	if (mf.getSubtarget().useDFAforSMS() &&
245	(!mf.getSubtarget().getInstrItineraryData() \|\|
246	mf.getSubtarget().getInstrItineraryData()->isEmpty()))
247	return false;
248
249	MF = &mf;
250	MLI = &getAnalysis<MachineLoopInfo>();
251	MDT = &getAnalysis<MachineDominatorTree>();
252	ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
253	TII = MF->getSubtarget().getInstrInfo();
254	RegClassInfo.runOnMachineFunction(MF: *MF);
255
256	for (const auto &L : *MLI)
257	scheduleLoop(L&: *L);
258
259	return false;
260	}
261
262	/// Attempt to perform the SMS algorithm on the specified loop. This function is
263	/// the main entry point for the algorithm. The function identifies candidate
264	/// loops, calculates the minimum initiation interval, and attempts to schedule
265	/// the loop.
266	bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
267	bool Changed = false;
268	for (const auto &InnerLoop : L)
269	Changed \|= scheduleLoop(L&: *InnerLoop);
270
271	#ifndef NDEBUG
272	// Stop trying after reaching the limit (if any).
273	int Limit = SwpLoopLimit;
274	if (Limit >= `0`) {
275	if (NumTries >= SwpLoopLimit)
276	return Changed;
277	NumTries++;
278	}
279	#endif
280
281	setPragmaPipelineOptions(L);
282	if (!canPipelineLoop(L)) {
283	LLVM_DEBUG(dbgs() << "\n!!! Can not pipeline loop.\n");
284	ORE->emit(RemarkBuilder: [&]() {
285	return MachineOptimizationRemarkMissed (DEBUG_TYPE, "canPipelineLoop",
286	L.getStartLoc(), L.getHeader())
287	<< "Failed to pipeline loop";
288	});
289
290	LI.LoopPipelinerInfo.reset();
291	return Changed;
292	}
293
294	++NumTrytoPipeline;
295
296	Changed = swingModuloScheduler(L);
297
298	LI.LoopPipelinerInfo.reset();
299	return Changed;
300	}
301
302	void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) {
303	// Reset the pragma for the next loop in iteration.
304	disabledByPragma = false;
305	II_setByPragma = `0`;
306
307	MachineBasicBlock *LBLK = L.getTopBlock();
308
309	if (LBLK == nullptr)
310	return;
311
312	const BasicBlock *BBLK = LBLK->getBasicBlock();
313	if (BBLK == nullptr)
314	return;
315
316	const Instruction *TI = BBLK->getTerminator();
317	if (TI == nullptr)
318	return;
319
320	MDNode *LoopID = TI->getMetadata(KindID: LLVMContext::MD_loop);
321	if (LoopID == nullptr)
322	return;
323
324	assert(LoopID->getNumOperands() > `0` && "requires atleast one operand");
325	assert(LoopID->getOperand(`0`) == LoopID && "invalid loop");
326
327	for (const MDOperand &MDO : llvm::drop_begin(RangeOrContainer: LoopID->operands())) {
328	MDNode *MD = dyn_cast<MDNode>(Val: MDO);
329
330	if (MD == nullptr)
331	continue;
332
333	MDString *S = dyn_cast<MDString>(Val: MD->getOperand(I: `0`));
334
335	if (S == nullptr)
336	continue;
337
338	if (S->getString() == "llvm.loop.pipeline.initiationinterval") {
339	assert(MD->getNumOperands() == `2` &&
340	"Pipeline initiation interval hint metadata should have two operands.");
341	II_setByPragma =
342	mdconst::extract<ConstantInt>(MD: MD->getOperand(I: `1`))->getZExtValue();
343	assert(II_setByPragma >= `1` && "Pipeline initiation interval must be positive.");
344	} else if (S->getString() == "llvm.loop.pipeline.disable") {
345	disabledByPragma = true;
346	}
347	}
348	}
349
350	/// Return true if the loop can be software pipelined. The algorithm is
351	/// restricted to loops with a single basic block. Make sure that the
352	/// branch in the loop can be analyzed.
353	bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
354	if (L.getNumBlocks() != `1`) {
355	ORE->emit(RemarkBuilder: [&]() {
356	return MachineOptimizationRemarkAnalysis (DEBUG_TYPE, "canPipelineLoop",
357	L.getStartLoc(), L.getHeader())
358	<< "Not a single basic block: "
359	<< ore::NV ("NumBlocks", L.getNumBlocks());
360	});
361	return false;
362	}
363
364	if (disabledByPragma) {
365	ORE->emit(RemarkBuilder: [&]() {
366	return MachineOptimizationRemarkAnalysis (DEBUG_TYPE, "canPipelineLoop",
367	L.getStartLoc(), L.getHeader())
368	<< "Disabled by Pragma.";
369	});
370	return false;
371	}
372
373	// Check if the branch can't be understood because we can't do pipelining
374	// if that's the case.
375	LI.TBB = nullptr;
376	LI.FBB = nullptr;
377	LI.BrCond.clear();
378	if (TII->analyzeBranch(MBB&: *L.getHeader(), TBB&: LI.TBB, FBB&: LI.FBB, Cond&: LI.BrCond)) {
379	LLVM_DEBUG(dbgs() << "Unable to analyzeBranch, can NOT pipeline Loop\n");
380	NumFailBranch ++;
381	ORE->emit(RemarkBuilder: [&]() {
382	return MachineOptimizationRemarkAnalysis (DEBUG_TYPE, "canPipelineLoop",
383	L.getStartLoc(), L.getHeader())
384	<< "The branch can't be understood";
385	});
386	return false;
387	}
388
389	LI.LoopInductionVar = nullptr;
390	LI.LoopCompare = nullptr;
391	LI.LoopPipelinerInfo = TII->analyzeLoopForPipelining(LoopBB: L.getTopBlock());
392	if (!LI.LoopPipelinerInfo) {
393	LLVM_DEBUG(dbgs() << "Unable to analyzeLoop, can NOT pipeline Loop\n");
394	NumFailLoop ++;
395	ORE->emit(RemarkBuilder: [&]() {
396	return MachineOptimizationRemarkAnalysis (DEBUG_TYPE, "canPipelineLoop",
397	L.getStartLoc(), L.getHeader())
398	<< "The loop structure is not supported";
399	});
400	return false;
401	}
402
403	if (!L.getLoopPreheader()) {
404	LLVM_DEBUG(dbgs() << "Preheader not found, can NOT pipeline Loop\n");
405	NumFailPreheader ++;
406	ORE->emit(RemarkBuilder: [&]() {
407	return MachineOptimizationRemarkAnalysis (DEBUG_TYPE, "canPipelineLoop",
408	L.getStartLoc(), L.getHeader())
409	<< "No loop preheader found";
410	});
411	return false;
412	}
413
414	// Remove any subregisters from inputs to phi nodes.
415	preprocessPhiNodes(B&: *L.getHeader());
416	return true;
417	}
418
419	void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) {
420	MachineRegisterInfo &MRI = MF->getRegInfo();
421	SlotIndexes &Slots = *getAnalysis<LiveIntervals>().getSlotIndexes();
422
423	for (MachineInstr &PI : B.phis()) {
424	MachineOperand &DefOp = PI.getOperand(i: `0`);
425	assert(DefOp.getSubReg() == `0`);
426	auto *RC = MRI.getRegClass(Reg: DefOp.getReg());
427
428	for (unsigned i = `1`, n = PI.getNumOperands(); i != n; i += `2`) {
429	MachineOperand &RegOp = PI.getOperand(i);
430	if (RegOp.getSubReg() == `0`)
431	continue;
432
433	// If the operand uses a subregister, replace it with a new register
434	// without subregisters, and generate a copy to the new register.
435	Register NewReg = MRI.createVirtualRegister(RegClass: RC);
436	MachineBasicBlock &PredB = *PI.getOperand(i: i+`1`).getMBB();
437	MachineBasicBlock::iterator At = PredB.getFirstTerminator();
438	const DebugLoc &DL = PredB.findDebugLoc(MBBI: At);
439	auto Copy = BuildMI(BB&: PredB, I: At, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: NewReg)
440	.addReg(RegNo: RegOp.getReg(), flags: getRegState(RegOp),
441	SubReg: RegOp.getSubReg());
442	Slots.insertMachineInstrInMaps(MI&: *Copy);
443	RegOp.setReg(NewReg);
444	RegOp.setSubReg(`0`);
445	}
446	}
447	}
448
449	/// The SMS algorithm consists of the following main steps:
450	/// 1. Computation and analysis of the dependence graph.
451	/// 2. Ordering of the nodes (instructions).
452	/// 3. Attempt to Schedule the loop.
453	bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
454	assert(L.getBlocks().size() == `1` && "SMS works on single blocks only.");
455
456	SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo,
457	II_setByPragma, LI.LoopPipelinerInfo.get());
458
459	MachineBasicBlock *MBB = L.getHeader();
460	// The kernel should not include any terminator instructions. These
461	// will be added back later.
462	SMS.startBlock(BB: MBB);
463
464	// Compute the number of 'real' instructions in the basic block by
465	// ignoring terminators.
466	unsigned size = MBB->size();
467	for (MachineBasicBlock::iterator I = MBB->getFirstTerminator(),
468	E = MBB->instr_end();
469	I != E; ++I, --size)
470	;
471
472	SMS.enterRegion(bb: MBB, begin: MBB->begin(), end: MBB->getFirstTerminator(), regioninstrs: size);
473	SMS.schedule();
474	SMS.exitRegion();
475
476	SMS.finishBlock();
477	return SMS.hasNewSchedule();
478	}
479
480	void MachinePipeliner::getAnalysisUsage(AnalysisUsage &AU) const {
481	AU.addRequired<AAResultsWrapperPass>();
482	AU.addPreserved<AAResultsWrapperPass>();
483	AU.addRequired<MachineLoopInfo>();
484	AU.addRequired<MachineDominatorTree>();
485	AU.addRequired<LiveIntervals>();
486	AU.addRequired<MachineOptimizationRemarkEmitterPass>();
487	MachineFunctionPass::getAnalysisUsage(AU);
488	}
489
490	void SwingSchedulerDAG::setMII(unsigned ResMII, unsigned RecMII) {
491	if (SwpForceII > `0`)
492	MII = SwpForceII;
493	else if (II_setByPragma > `0`)
494	MII = II_setByPragma;
495	else
496	MII = std::max(a: ResMII, b: RecMII);
497	}
498
499	void SwingSchedulerDAG::setMAX_II() {
500	if (SwpForceII > `0`)
501	MAX_II = SwpForceII;
502	else if (II_setByPragma > `0`)
503	MAX_II = II_setByPragma;
504	else
505	MAX_II = MII + SwpIISearchRange;
506	}
507
508	/// We override the schedule function in ScheduleDAGInstrs to implement the
509	/// scheduling part of the Swing Modulo Scheduling algorithm.
510	void SwingSchedulerDAG::schedule() {
511	AliasAnalysis *AA = &Pass.getAnalysis<AAResultsWrapperPass>().getAAResults();
512	buildSchedGraph(AA);
513	addLoopCarriedDependences(AA);
514	updatePhiDependences();
515	Topo.InitDAGTopologicalSorting();
516	changeDependences();
517	postProcessDAG();
518	LLVM_DEBUG(dump());
519
520	NodeSetType NodeSets;
521	findCircuits(NodeSets);
522	NodeSetType Circuits = NodeSets;
523
524	// Calculate the MII.
525	unsigned ResMII = calculateResMII();
526	unsigned RecMII = calculateRecMII(RecNodeSets&: NodeSets);
527
528	fuseRecs(NodeSets);
529
530	// This flag is used for testing and can cause correctness problems.
531	if (SwpIgnoreRecMII)
532	RecMII = `0`;
533
534	setMII(ResMII, RecMII);
535	setMAX_II();
536
537	LLVM_DEBUG(dbgs() << "MII = " << MII << " MAX_II = " << MAX_II
538	<< " (rec=" << RecMII << ", res=" << ResMII << ")\n");
539
540	// Can't schedule a loop without a valid MII.
541	if (MII == `0`) {
542	LLVM_DEBUG(dbgs() << "Invalid Minimal Initiation Interval: 0\n");
543	NumFailZeroMII ++;
544	Pass.ORE->emit(RemarkBuilder: [&]() {
545	return MachineOptimizationRemarkAnalysis (
546	DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
547	<< "Invalid Minimal Initiation Interval: 0";
548	});
549	return;
550	}
551
552	// Don't pipeline large loops.
553	if (SwpMaxMii != -`1` && (int)MII > SwpMaxMii) {
554	LLVM_DEBUG(dbgs() << "MII > " << SwpMaxMii
555	<< ", we don't pipeline large loops\n");
556	NumFailLargeMaxMII ++;
557	Pass.ORE->emit(RemarkBuilder: [&]() {
558	return MachineOptimizationRemarkAnalysis (
559	DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
560	<< "Minimal Initiation Interval too large: "
561	<< ore::NV ("MII", (int)MII) << " > "
562	<< ore::NV ("SwpMaxMii", SwpMaxMii) << "."
563	<< "Refer to -pipeliner-max-mii.";
564	});
565	return;
566	}
567
568	computeNodeFunctions(NodeSets);
569
570	registerPressureFilter(NodeSets);
571
572	colocateNodeSets(NodeSets);
573
574	checkNodeSets(NodeSets);
575
576	LLVM_DEBUG({
577	for (auto &I : NodeSets) {
578	dbgs() << " Rec NodeSet ";
579	I.dump();
580	}
581	});
582
583	llvm::stable_sort(Range&: NodeSets, C: std::greater<NodeSet>());
584
585	groupRemainingNodes(NodeSets);
586
587	removeDuplicateNodes(NodeSets);
588
589	LLVM_DEBUG({
590	for (auto &I : NodeSets) {
591	dbgs() << " NodeSet ";
592	I.dump();
593	}
594	});
595
596	computeNodeOrder(NodeSets);
597
598	// check for node order issues
599	checkValidNodeOrder(Circuits);
600
601	SMSchedule Schedule(Pass.MF, this);
602	Scheduled = schedulePipeline(Schedule);
603
604	if (!Scheduled){
605	LLVM_DEBUG(dbgs() << "No schedule found, return\n");
606	NumFailNoSchedule ++;
607	Pass.ORE->emit(RemarkBuilder: [&]() {
608	return MachineOptimizationRemarkAnalysis (
609	DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
610	<< "Unable to find schedule";
611	});
612	return;
613	}
614
615	unsigned numStages = Schedule.getMaxStageCount();
616	// No need to generate pipeline if there are no overlapped iterations.
617	if (numStages == `0`) {
618	LLVM_DEBUG(dbgs() << "No overlapped iterations, skip.\n");
619	NumFailZeroStage ++;
620	Pass.ORE->emit(RemarkBuilder: [&]() {
621	return MachineOptimizationRemarkAnalysis (
622	DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
623	<< "No need to pipeline - no overlapped iterations in schedule.";
624	});
625	return;
626	}
627	// Check that the maximum stage count is less than user-defined limit.
628	if (SwpMaxStages > -`1` && (int)numStages > SwpMaxStages) {
629	LLVM_DEBUG(dbgs() << "numStages:" << numStages << ">" << SwpMaxStages
630	<< " : too many stages, abort\n");
631	NumFailLargeMaxStage ++;
632	Pass.ORE->emit(RemarkBuilder: [&]() {
633	return MachineOptimizationRemarkAnalysis (
634	DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
635	<< "Too many stages in schedule: "
636	<< ore::NV ("numStages", (int)numStages) << " > "
637	<< ore::NV ("SwpMaxStages", SwpMaxStages)
638	<< ". Refer to -pipeliner-max-stages.";
639	});
640	return;
641	}
642
643	Pass.ORE->emit(RemarkBuilder: [&]() {
644	return MachineOptimizationRemark (DEBUG_TYPE, "schedule", Loop.getStartLoc(),
645	Loop.getHeader())
646	<< "Pipelined succesfully!";
647	});
648
649	// Generate the schedule as a ModuloSchedule.
650	DenseMap<MachineInstr , int*> Cycles, Stages;
651	std::vector<MachineInstr *> OrderedInsts;
652	for (int Cycle = Schedule.getFirstCycle(); Cycle <= Schedule.getFinalCycle();
653	++Cycle) {
654	for (SUnit *SU : Schedule.getInstructions(cycle: Cycle)) {
655	OrderedInsts.push_back(x: SU->getInstr());
656	Cycles [SU->getInstr()] = Cycle;
657	Stages [SU->getInstr()] = Schedule.stageScheduled(SU);
658	}
659	}
660	DenseMap<MachineInstr , std::pair<unsigned*, int64_t>> NewInstrChanges;
661	for (auto &KV : NewMIs) {
662	Cycles [KV.first] = Cycles [KV.second];
663	Stages [KV.first] = Stages [KV.second];
664	NewInstrChanges [KV.first] = InstrChanges [getSUnit(MI: KV.first)];
665	}
666
667	ModuloSchedule MS(MF, &Loop, std::move(OrderedInsts), std::move(Cycles),
668	std::move(Stages));
669	if (EmitTestAnnotations) {
670	assert(NewInstrChanges.empty() &&
671	"Cannot serialize a schedule with InstrChanges!");
672	ModuloScheduleTestAnnotater MSTI(MF, MS);
673	MSTI.annotate();
674	return;
675	}
676	// The experimental code generator can't work if there are InstChanges.
677	if (ExperimentalCodeGen && NewInstrChanges.empty()) {
678	PeelingModuloScheduleExpander MSE(MF, MS, &LIS);
679	MSE.expand();
680	} else {
681	ModuloScheduleExpander MSE(MF, MS, LIS, std::move(NewInstrChanges));
682	MSE.expand();
683	MSE.cleanup();
684	}
685	++NumPipelined;
686	}
687
688	/// Clean up after the software pipeliner runs.
689	void SwingSchedulerDAG::finishBlock() {
690	for (auto &KV : NewMIs)
691	MF.deleteMachineInstr(MI: KV.second);
692	NewMIs.clear();
693
694	// Call the superclass.
695	ScheduleDAGInstrs::finishBlock();
696	}
697
698	/// Return the register values for the operands of a Phi instruction.
699	/// This function assume the instruction is a Phi.
700	static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop,
701	unsigned &InitVal, unsigned &LoopVal) {
702	assert(Phi.isPHI() && "Expecting a Phi.");
703
704	InitVal = `0`;
705	LoopVal = `0`;
706	for (unsigned i = `1`, e = Phi.getNumOperands(); i != e; i += `2`)
707	if (Phi.getOperand(i: i + `1`).getMBB() != Loop)
708	InitVal = Phi.getOperand(i).getReg();
709	else
710	LoopVal = Phi.getOperand(i).getReg();
711
712	assert(InitVal != `0` && LoopVal != `0` && "Unexpected Phi structure.");
713	}
714
715	/// Return the Phi register value that comes the loop block.
716	static unsigned getLoopPhiReg(const MachineInstr &Phi,
717	const MachineBasicBlock *LoopBB) {
718	for (unsigned i = `1`, e = Phi.getNumOperands(); i != e; i += `2`)
719	if (Phi.getOperand(i: i + `1`).getMBB() == LoopBB)
720	return Phi.getOperand(i).getReg();
721	return `0`;
722	}
723
724	/// Return true if SUb can be reached from SUa following the chain edges.
725	static bool isSuccOrder(SUnit SUa, SUnit SUb) {
726	SmallPtrSet<SUnit *, `8`> Visited;
727	SmallVector<SUnit *, `8`> Worklist;
728	Worklist.push_back(Elt: SUa);
729	while (!Worklist.empty()) {
730	const SUnit *SU = Worklist.pop_back_val();
731	for (const auto &SI : SU->Succs) {
732	SUnit *SuccSU = SI.getSUnit();
733	if (SI.getKind() == SDep::Order) {
734	if (Visited.count(Ptr: SuccSU))
735	continue;
736	if (SuccSU == SUb)
737	return true;
738	Worklist.push_back(Elt: SuccSU);
739	Visited.insert(Ptr: SuccSU);
740	}
741	}
742	}
743	return false;
744	}
745
746	/// Return true if the instruction causes a chain between memory
747	/// references before and after it.
748	static bool isDependenceBarrier(MachineInstr &MI) {
749	return MI.isCall() \|\| MI.mayRaiseFPException() \|\|
750	MI.hasUnmodeledSideEffects() \|\|
751	(MI.hasOrderedMemoryRef() &&
752	(!MI.mayLoad() \|\| !MI.isDereferenceableInvariantLoad()));
753	}
754
755	/// Return the underlying objects for the memory references of an instruction.
756	/// This function calls the code in ValueTracking, but first checks that the
757	/// instruction has a memory operand.
758	static void getUnderlyingObjects(const MachineInstr *MI,
759	SmallVectorImpl<const Value *> &Objs) {
760	if (!MI->hasOneMemOperand())
761	return;
762	MachineMemOperand MM = MI->memoperands_begin();
763	if (!MM->getValue())
764	return;
765	getUnderlyingObjects(V: MM->getValue(), Objects&: Objs);
766	for (const Value *V : Objs) {
767	if (!isIdentifiedObject(V)) {
768	Objs.clear();
769	return;
770	}
771	Objs.push_back(Elt: V);
772	}
773	}
774
775	/// Add a chain edge between a load and store if the store can be an
776	/// alias of the load on a subsequent iteration, i.e., a loop carried
777	/// dependence. This code is very similar to the code in ScheduleDAGInstrs
778	/// but that code doesn't create loop carried dependences.
779	void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
780	MapVector<const Value , SmallVector<SUnit , `4`>> PendingLoads;
781	Value *UnknownValue =
782	UndefValue::get(T: Type::getVoidTy(C&: MF.getFunction().getContext()));
783	for (auto &SU : SUnits) {
784	MachineInstr &MI = *SU.getInstr();
785	if (isDependenceBarrier(MI))
786	PendingLoads.clear();
787	else if (MI.mayLoad()) {
788	SmallVector<const Value *, `4`> Objs;
789	::getUnderlyingObjects(MI: &MI, Objs);
790	if (Objs.empty())
791	Objs.push_back(Elt: UnknownValue);
792	for (const auto *V : Objs) {
793	SmallVector<SUnit *, `4`> &SUs = PendingLoads [V];
794	SUs.push_back(Elt: &SU);
795	}
796	} else if (MI.mayStore()) {
797	SmallVector<const Value *, `4`> Objs;
798	::getUnderlyingObjects(MI: &MI, Objs);
799	if (Objs.empty())
800	Objs.push_back(Elt: UnknownValue);
801	for (const auto *V : Objs) {
802	MapVector<const Value , SmallVector<SUnit , `4`>>::iterator I =
803	PendingLoads.find(Key: V);
804	if (I == PendingLoads.end())
805	continue;
806	for (auto *Load : I->second) {
807	if (isSuccOrder(SUa: Load, SUb: &SU))
808	continue;
809	MachineInstr &LdMI = *Load->getInstr();
810	// First, perform the cheaper check that compares the base register.
811	// If they are the same and the load offset is less than the store
812	// offset, then mark the dependence as loop carried potentially.
813	const MachineOperand BaseOp1, BaseOp2;
814	int64_t Offset1, Offset2;
815	bool Offset1IsScalable, Offset2IsScalable;
816	if (TII->getMemOperandWithOffset(MI: LdMI, BaseOp&: BaseOp1, Offset&: Offset1,
817	OffsetIsScalable&: Offset1IsScalable, TRI) &&
818	TII->getMemOperandWithOffset(MI, BaseOp&: BaseOp2, Offset&: Offset2,
819	OffsetIsScalable&: Offset2IsScalable, TRI)) {
820	if (BaseOp1->isIdenticalTo(Other: *BaseOp2) &&
821	Offset1IsScalable == Offset2IsScalable &&
822	(int)Offset1 < (int)Offset2) {
823	assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI) &&
824	"What happened to the chain edge?");
825	SDep Dep(Load, SDep::Barrier);
826	Dep.setLatency(`1`);
827	SU.addPred(D: Dep);
828	continue;
829	}
830	}
831	// Second, the more expensive check that uses alias analysis on the
832	// base registers. If they alias, and the load offset is less than
833	// the store offset, the mark the dependence as loop carried.
834	if (!AA) {
835	SDep Dep(Load, SDep::Barrier);
836	Dep.setLatency(`1`);
837	SU.addPred(D: Dep);
838	continue;
839	}
840	MachineMemOperand MMO1 = LdMI.memoperands_begin();
841	MachineMemOperand MMO2 = MI.memoperands_begin();
842	if (!MMO1->getValue() \|\| !MMO2->getValue()) {
843	SDep Dep(Load, SDep::Barrier);
844	Dep.setLatency(`1`);
845	SU.addPred(D: Dep);
846	continue;
847	}
848	if (MMO1->getValue() == MMO2->getValue() &&
849	MMO1->getOffset() <= MMO2->getOffset()) {
850	SDep Dep(Load, SDep::Barrier);
851	Dep.setLatency(`1`);
852	SU.addPred(D: Dep);
853	continue;
854	}
855	if (!AA->isNoAlias(
856	LocA: MemoryLocation::getAfter(Ptr: MMO1->getValue(), AATags: MMO1->getAAInfo()),
857	LocB: MemoryLocation::getAfter(Ptr: MMO2->getValue(),
858	AATags: MMO2->getAAInfo()))) {
859	SDep Dep(Load, SDep::Barrier);
860	Dep.setLatency(`1`);
861	SU.addPred(D: Dep);
862	}
863	}
864	}
865	}
866	}
867	}
868
869	/// Update the phi dependences to the DAG because ScheduleDAGInstrs no longer
870	/// processes dependences for PHIs. This function adds true dependences
871	/// from a PHI to a use, and a loop carried dependence from the use to the
872	/// PHI. The loop carried dependence is represented as an anti dependence
873	/// edge. This function also removes chain dependences between unrelated
874	/// PHIs.
875	void SwingSchedulerDAG::updatePhiDependences() {
876	SmallVector<SDep, `4`> RemoveDeps;
877	const TargetSubtargetInfo &ST = MF.getSubtarget<TargetSubtargetInfo>();
878
879	// Iterate over each DAG node.
880	for (SUnit &I : SUnits) {
881	RemoveDeps.clear();
882	// Set to true if the instruction has an operand defined by a Phi.
883	unsigned HasPhiUse = `0`;
884	unsigned HasPhiDef = `0`;
885	MachineInstr *MI = I.getInstr();
886	// Iterate over each operand, and we process the definitions.
887	for (const MachineOperand &MO : MI->operands()) {
888	if (!MO.isReg())
889	continue;
890	Register Reg = MO.getReg();
891	if (MO.isDef()) {
892	// If the register is used by a Phi, then create an anti dependence.
893	for (MachineRegisterInfo::use_instr_iterator
894	UI = MRI.use_instr_begin(RegNo: Reg),
895	UE = MRI.use_instr_end();
896	UI != UE; ++UI) {
897	MachineInstr UseMI = &UI;
898	SUnit *SU = getSUnit(MI: UseMI);
899	if (SU != nullptr && UseMI->isPHI()) {
900	if (!MI->isPHI()) {
901	SDep Dep(SU, SDep::Anti, Reg);
902	Dep.setLatency(`1`);
903	I.addPred(D: Dep);
904	} else {
905	HasPhiDef = Reg;
906	// Add a chain edge to a dependent Phi that isn't an existing
907	// predecessor.
908	if (SU->NodeNum < I.NodeNum && !I.isPred(N: SU))
909	I.addPred(D: SDep (SU, SDep::Barrier));
910	}
911	}
912	}
913	} else if (MO.isUse()) {
914	// If the register is defined by a Phi, then create a true dependence.
915	MachineInstr *DefMI = MRI.getUniqueVRegDef(Reg);
916	if (DefMI == nullptr)
917	continue;
918	SUnit *SU = getSUnit(MI: DefMI);
919	if (SU != nullptr && DefMI->isPHI()) {
920	if (!MI->isPHI()) {
921	SDep Dep(SU, SDep::Data, Reg);
922	Dep.setLatency(`0`);
923	ST.adjustSchedDependency(Def: SU, DefOpIdx: `0`, Use: &I, UseOpIdx: MO.getOperandNo(), Dep);
924	I.addPred(D: Dep);
925	} else {
926	HasPhiUse = Reg;
927	// Add a chain edge to a dependent Phi that isn't an existing
928	// predecessor.
929	if (SU->NodeNum < I.NodeNum && !I.isPred(N: SU))
930	I.addPred(D: SDep (SU, SDep::Barrier));
931	}
932	}
933	}
934	}
935	// Remove order dependences from an unrelated Phi.
936	if (!SwpPruneDeps)
937	continue;
938	for (auto &PI : I.Preds) {
939	MachineInstr *PMI = PI.getSUnit()->getInstr();
940	if (PMI->isPHI() && PI.getKind() == SDep::Order) {
941	if (I.getInstr()->isPHI()) {
942	if (PMI->getOperand(i: `0`).getReg() == HasPhiUse)
943	continue;
944	if (getLoopPhiReg(Phi: *PMI, LoopBB: PMI->getParent()) == HasPhiDef)
945	continue;
946	}
947	RemoveDeps.push_back(Elt: PI);
948	}
949	}
950	for (int i = `0`, e = RemoveDeps.size(); i != e; ++i)
951	I.removePred(D: RemoveDeps [i]);
952	}
953	}
954
955	/// Iterate over each DAG node and see if we can change any dependences
956	/// in order to reduce the recurrence MII.
957	void SwingSchedulerDAG::changeDependences() {
958	// See if an instruction can use a value from the previous iteration.
959	// If so, we update the base and offset of the instruction and change
960	// the dependences.
961	for (SUnit &I : SUnits) {
962	unsigned BasePos = `0`, OffsetPos = `0`, NewBase = `0`;
963	int64_t NewOffset = `0`;
964	if (!canUseLastOffsetValue(MI: I.getInstr(), BasePos, OffsetPos, NewBase,
965	NewOffset))
966	continue;
967
968	// Get the MI and SUnit for the instruction that defines the original base.
969	Register OrigBase = I.getInstr()->getOperand(i: BasePos).getReg();
970	MachineInstr *DefMI = MRI.getUniqueVRegDef(Reg: OrigBase);
971	if (!DefMI)
972	continue;
973	SUnit *DefSU = getSUnit(MI: DefMI);
974	if (!DefSU)
975	continue;
976	// Get the MI and SUnit for the instruction that defins the new base.
977	MachineInstr *LastMI = MRI.getUniqueVRegDef(Reg: NewBase);
978	if (!LastMI)
979	continue;
980	SUnit *LastSU = getSUnit(MI: LastMI);
981	if (!LastSU)
982	continue;
983
984	if (Topo.IsReachable(SU: &I, TargetSU: LastSU))
985	continue;
986
987	// Remove the dependence. The value now depends on a prior iteration.
988	SmallVector<SDep, `4`> Deps;
989	for (const SDep &P : I.Preds)
990	if (P.getSUnit() == DefSU)
991	Deps.push_back(Elt: P);
992	for (int i = `0`, e = Deps.size(); i != e; i++) {
993	Topo.RemovePred(M: &I, N: Deps [i].getSUnit());
994	I.removePred(D: Deps [i]);
995	}
996	// Remove the chain dependence between the instructions.
997	Deps.clear();
998	for (auto &P : LastSU->Preds)
999	if (P.getSUnit() == &I && P.getKind() == SDep::Order)
1000	Deps.push_back(Elt: P);
1001	for (int i = `0`, e = Deps.size(); i != e; i++) {
1002	Topo.RemovePred(M: LastSU, N: Deps [i].getSUnit());
1003	LastSU->removePred(D: Deps [i]);
1004	}
1005
1006	// Add a dependence between the new instruction and the instruction
1007	// that defines the new base.
1008	SDep Dep(&I, SDep::Anti, NewBase);
1009	Topo.AddPred(Y: LastSU, X: &I);
1010	LastSU->addPred(D: Dep);
1011
1012	// Remember the base and offset information so that we can update the
1013	// instruction during code generation.
1014	InstrChanges [&I] = std::make_pair(x&: NewBase, y&: NewOffset);
1015	}
1016	}
1017
1018	/// Create an instruction stream that represents a single iteration and stage of
1019	/// each instruction. This function differs from SMSchedule::finalizeSchedule in
1020	/// that this doesn't have any side-effect to SwingSchedulerDAG. That is, this
1021	/// function is an approximation of SMSchedule::finalizeSchedule with all
1022	/// non-const operations removed.
1023	static void computeScheduledInsts(const SwingSchedulerDAG *SSD,
1024	SMSchedule &Schedule,
1025	std::vector<MachineInstr *> &OrderedInsts,
1026	DenseMap<MachineInstr , unsigned*> &Stages) {
1027	DenseMap<int, std::deque<SUnit *>> Instrs;
1028
1029	// Move all instructions to the first stage from the later stages.
1030	for (int Cycle = Schedule.getFirstCycle(); Cycle <= Schedule.getFinalCycle();
1031	++Cycle) {
1032	for (int Stage = `0`, LastStage = Schedule.getMaxStageCount();
1033	Stage <= LastStage; ++Stage) {
1034	for (SUnit *SU : llvm::reverse(C&: Schedule.getInstructions(
1035	cycle: Cycle + Stage * Schedule.getInitiationInterval()))) {
1036	Instrs [Cycle].push_front(x: SU);
1037	}
1038	}
1039	}
1040
1041	for (int Cycle = Schedule.getFirstCycle(); Cycle <= Schedule.getFinalCycle();
1042	++Cycle) {
1043	std::deque<SUnit *> &CycleInstrs = Instrs [Cycle];
1044	CycleInstrs = Schedule.reorderInstructions(SSD, Instrs: CycleInstrs);
1045	for (SUnit *SU : CycleInstrs) {
1046	MachineInstr *MI = SU->getInstr();
1047	OrderedInsts.push_back(x: MI);
1048	Stages [MI] = Schedule.stageScheduled(SU);
1049	}
1050	}
1051	}
1052
1053	namespace {
1054
1055	// FuncUnitSorter - Comparison operator used to sort instructions by
1056	// the number of functional unit choices.
1057	struct FuncUnitSorter {
1058	const InstrItineraryData *InstrItins;
1059	const MCSubtargetInfo *STI;
1060	DenseMap<InstrStage::FuncUnits, unsigned> Resources;
1061
1062	FuncUnitSorter(const TargetSubtargetInfo &TSI)
1063	: InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}
1064
1065	// Compute the number of functional unit alternatives needed
1066	// at each stage, and take the minimum value. We prioritize the
1067	// instructions by the least number of choices first.
1068	unsigned minFuncUnits(const MachineInstr *Inst,
1069	InstrStage::FuncUnits &F) const {
1070	unsigned SchedClass = Inst->getDesc().getSchedClass();
1071	unsigned min = UINT_MAX;
1072	if (InstrItins && !InstrItins->isEmpty()) {
1073	for (const InstrStage &IS :
1074	make_range(x: InstrItins->beginStage(ItinClassIndx: SchedClass),
1075	y: InstrItins->endStage(ItinClassIndx: SchedClass))) {
1076	InstrStage::FuncUnits funcUnits = IS.getUnits();
1077	unsigned numAlternatives = llvm::popcount(Value: funcUnits);
1078	if (numAlternatives < min) {
1079	min = numAlternatives;
1080	F = funcUnits;
1081	}
1082	}
1083	return min;
1084	}
1085	if (STI && STI->getSchedModel().hasInstrSchedModel()) {
1086	const MCSchedClassDesc *SCDesc =
1087	STI->getSchedModel().getSchedClassDesc(SchedClassIdx: SchedClass);
1088	if (!SCDesc->isValid())
1089	// No valid Schedule Class Desc for schedClass, should be
1090	// Pseudo/PostRAPseudo
1091	return min;
1092
1093	for (const MCWriteProcResEntry &PRE :
1094	make_range(x: STI->getWriteProcResBegin(SC: SCDesc),
1095	y: STI->getWriteProcResEnd(SC: SCDesc))) {
1096	if (!PRE.ReleaseAtCycle)
1097	continue;
1098	const MCProcResourceDesc *ProcResource =
1099	STI->getSchedModel().getProcResource(ProcResourceIdx: PRE.ProcResourceIdx);
1100	unsigned NumUnits = ProcResource->NumUnits;
1101	if (NumUnits < min) {
1102	min = NumUnits;
1103	F = PRE.ProcResourceIdx;
1104	}
1105	}
1106	return min;
1107	}
1108	llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
1109	}
1110
1111	// Compute the critical resources needed by the instruction. This
1112	// function records the functional units needed by instructions that
1113	// must use only one functional unit. We use this as a tie breaker
1114	// for computing the resource MII. The instrutions that require
1115	// the same, highly used, functional unit have high priority.
1116	void calcCriticalResources(MachineInstr &MI) {
1117	unsigned SchedClass = MI.getDesc().getSchedClass();
1118	if (InstrItins && !InstrItins->isEmpty()) {
1119	for (const InstrStage &IS :
1120	make_range(x: InstrItins->beginStage(ItinClassIndx: SchedClass),
1121	y: InstrItins->endStage(ItinClassIndx: SchedClass))) {
1122	InstrStage::FuncUnits FuncUnits = IS.getUnits();
1123	if (llvm::popcount(Value: FuncUnits) == `1`)
1124	Resources [FuncUnits]++;
1125	}
1126	return;
1127	}
1128	if (STI && STI->getSchedModel().hasInstrSchedModel()) {
1129	const MCSchedClassDesc *SCDesc =
1130	STI->getSchedModel().getSchedClassDesc(SchedClassIdx: SchedClass);
1131	if (!SCDesc->isValid())
1132	// No valid Schedule Class Desc for schedClass, should be
1133	// Pseudo/PostRAPseudo
1134	return;
1135
1136	for (const MCWriteProcResEntry &PRE :
1137	make_range(x: STI->getWriteProcResBegin(SC: SCDesc),
1138	y: STI->getWriteProcResEnd(SC: SCDesc))) {
1139	if (!PRE.ReleaseAtCycle)
1140	continue;
1141	Resources [PRE.ProcResourceIdx]++;
1142	}
1143	return;
1144	}
1145	llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
1146	}
1147
1148	/// Return true if IS1 has less priority than IS2.
1149	bool operator()(const MachineInstr IS1, const* MachineInstr IS2) const* {
1150	InstrStage::FuncUnits F1 = `0`, F2 = `0`;
1151	unsigned MFUs1 = minFuncUnits(Inst: IS1, F&: F1);
1152	unsigned MFUs2 = minFuncUnits(Inst: IS2, F&: F2);
1153	if (MFUs1 == MFUs2)
1154	return Resources.lookup(Val: F1) < Resources.lookup(Val: F2);
1155	return MFUs1 > MFUs2;
1156	}
1157	};
1158
1159	/// Calculate the maximum register pressure of the scheduled instructions stream
1160	class HighRegisterPressureDetector {
1161	MachineBasicBlock *OrigMBB;
1162	const MachineFunction &MF;
1163	const MachineRegisterInfo &MRI;
1164	const TargetRegisterInfo *TRI;
1165
1166	const unsigned PSetNum;
1167
1168	// Indexed by PSet ID
1169	// InitSetPressure takes into account the register pressure of live-in
1170	// registers. It's not depend on how the loop is scheduled, so it's enough to
1171	// calculate them once at the beginning.
1172	std::vector<unsigned> InitSetPressure;
1173
1174	// Indexed by PSet ID
1175	// Upper limit for each register pressure set
1176	std::vector<unsigned> PressureSetLimit;
1177
1178	DenseMap<MachineInstr *, RegisterOperands> ROMap;
1179
1180	using Instr2LastUsesTy = DenseMap<MachineInstr *, SmallDenseSet<Register, `4`>>;
1181
1182	public:
1183	using OrderedInstsTy = std::vector<MachineInstr *>;
1184	using Instr2StageTy = DenseMap<MachineInstr , unsigned*>;
1185
1186	private:
1187	static void dumpRegisterPressures(const std::vector<unsigned> &Pressures) {
1188	if (Pressures.size() == `0`) {
1189	dbgs() << "[]";
1190	} else {
1191	char Prefix = `'['`;
1192	for (unsigned P : Pressures) {
1193	dbgs() << Prefix << P;
1194	Prefix = `' '`;
1195	}
1196	dbgs() << `']'`;
1197	}
1198	}
1199
1200	void dumpPSet(Register Reg) const {
1201	dbgs() << "Reg=" << printReg(Reg, TRI, SubIdx: `0`, MRI: &MRI) << " PSet=";
1202	for (auto PSetIter = MRI.getPressureSets(RegUnit: Reg); PSetIter.isValid();
1203	++PSetIter) {
1204	dbgs() << *PSetIter << `' '`;
1205	}
1206	dbgs() << `'\n'`;
1207	}
1208
1209	void increaseRegisterPressure(std::vector<unsigned> &Pressure,
1210	Register Reg) const {
1211	auto PSetIter = MRI.getPressureSets(RegUnit: Reg);
1212	unsigned Weight = PSetIter.getWeight();
1213	for (; PSetIter.isValid(); ++PSetIter)
1214	Pressure [*PSetIter] += Weight;
1215	}
1216
1217	void decreaseRegisterPressure(std::vector<unsigned> &Pressure,
1218	Register Reg) const {
1219	auto PSetIter = MRI.getPressureSets(RegUnit: Reg);
1220	unsigned Weight = PSetIter.getWeight();
1221	for (; PSetIter.isValid(); ++PSetIter) {
1222	auto &P = Pressure [*PSetIter];
1223	assert(P >= Weight &&
1224	"register pressure must be greater than or equal weight");
1225	P -= Weight;
1226	}
1227	}
1228
1229	// Return true if Reg is fixed one, for example, stack pointer
1230	bool isFixedRegister(Register Reg) const {
1231	return Reg.isPhysical() && TRI->isFixedRegister(MF, PhysReg: Reg.asMCReg());
1232	}
1233
1234	bool isDefinedInThisLoop(Register Reg) const {
1235	return Reg.isVirtual() && MRI.getVRegDef(Reg)->getParent() == OrigMBB;
1236	}
1237
1238	// Search for live-in variables. They are factored into the register pressure
1239	// from the begining. Live-in variables used by every iteration should be
1240	// considered as alive throughout the loop. For example, the variable `c` in
1241	// following code. \code
1242	// int c = ...;
1243	// for (int i = 0; i < n; i++)
1244	// a[i] += b[i] + c;
1245	// \endcode
1246	void computeLiveIn() {
1247	DenseSet<Register> Used;
1248	for (auto &MI : *OrigMBB) {
1249	if (MI.isDebugInstr())
1250	continue;
1251	for (auto Use : ROMap [&MI].Uses) {
1252	auto Reg = Use.RegUnit;
1253	// Ignore the variable that appears only on one side of phi instruction
1254	// because it's used only at the first iteration.
1255	if (MI.isPHI() && Reg != getLoopPhiReg(Phi: MI, LoopBB: OrigMBB))
1256	continue;
1257	if (isFixedRegister(Reg))
1258	continue;
1259	if (isDefinedInThisLoop(Reg))
1260	continue;
1261	Used.insert(V: Reg);
1262	}
1263	}
1264
1265	for (auto LiveIn : Used)
1266	increaseRegisterPressure(Pressure&: InitSetPressure, Reg: LiveIn);
1267	}
1268
1269	// Calculate the upper limit of each pressure set
1270	void computePressureSetLimit(const RegisterClassInfo &RCI) {
1271	for (unsigned PSet = `0`; PSet < PSetNum; PSet++)
1272	PressureSetLimit [PSet] = RCI.getRegPressureSetLimit(Idx: PSet);
1273
1274	// We assume fixed registers, such as stack pointer, are already in use.
1275	// Therefore subtracting the weight of the fixed registers from the limit of
1276	// each pressure set in advance.
1277	SmallDenseSet<Register, `8`> FixedRegs;
1278	for (const TargetRegisterClass *TRC : TRI->regclasses()) {
1279	for (const MCPhysReg Reg : *TRC)
1280	if (isFixedRegister(Reg))
1281	FixedRegs.insert(V: Reg);
1282	}
1283
1284	LLVM_DEBUG({
1285	for (auto Reg : FixedRegs) {
1286	dbgs() << printReg(Reg, TRI, `0`, &MRI) << ": [";
1287	const int *Sets = TRI->getRegUnitPressureSets(Reg);
1288	for (; *Sets != -`1`; Sets++) {
1289	dbgs() << TRI->getRegPressureSetName(*Sets) << ", ";
1290	}
1291	dbgs() << "]\n";
1292	}
1293	});
1294
1295	for (auto Reg : FixedRegs) {
1296	LLVM_DEBUG(dbgs() << "fixed register: " << printReg(Reg, TRI, `0`, &MRI)
1297	<< "\n");
1298	auto PSetIter = MRI.getPressureSets(RegUnit: Reg);
1299	unsigned Weight = PSetIter.getWeight();
1300	for (; PSetIter.isValid(); ++PSetIter) {
1301	unsigned &Limit = PressureSetLimit [*PSetIter];
1302	assert(Limit >= Weight &&
1303	"register pressure limit must be greater than or equal weight");
1304	Limit -= Weight;
1305	LLVM_DEBUG(dbgs() << "PSet=" << *PSetIter << " Limit=" << Limit
1306	<< " (decreased by " << Weight << ")\n");
1307	}
1308	}
1309	}
1310
1311	// There are two patterns of last-use.
1312	// - by an instruction of the current iteration
1313	// - by a phi instruction of the next iteration (loop carried value)
1314	//
1315	// Furthermore, following two groups of instructions are executed
1316	// simultaneously
1317	// - next iteration's phi instructions in i-th stage
1318	// - current iteration's instructions in i+1-th stage
1319	//
1320	// This function calculates the last-use of each register while taking into
1321	// account the above two patterns.
1322	Instr2LastUsesTy computeLastUses(const OrderedInstsTy &OrderedInsts,
1323	Instr2StageTy &Stages) const {
1324	// We treat virtual registers that are defined and used in this loop.
1325	// Following virtual register will be ignored
1326	// - live-in one
1327	// - defined but not used in the loop (potentially live-out)
1328	DenseSet<Register> TargetRegs;
1329	const auto UpdateTargetRegs = [this, &TargetRegs](Register Reg) {
1330	if (isDefinedInThisLoop(Reg))
1331	TargetRegs.insert(V: Reg);
1332	};
1333	for (MachineInstr *MI : OrderedInsts) {
1334	if (MI->isPHI()) {
1335	Register Reg = getLoopPhiReg(Phi: *MI, LoopBB: OrigMBB);
1336	UpdateTargetRegs(Reg);
1337	} else {
1338	for (auto Use : ROMap.find(Val: MI)->getSecond().Uses)
1339	UpdateTargetRegs(Use.RegUnit);
1340	}
1341	}
1342
1343	const auto InstrScore = [&Stages](MachineInstr *MI) {
1344	return Stages [MI] + MI->isPHI();
1345	};
1346
1347	DenseMap<Register, MachineInstr *> LastUseMI;
1348	for (MachineInstr *MI : llvm::reverse(C: OrderedInsts)) {
1349	for (auto Use : ROMap.find(Val: MI)->getSecond().Uses) {
1350	auto Reg = Use.RegUnit;
1351	if (!TargetRegs.contains(V: Reg))
1352	continue;
1353	auto Ite = LastUseMI.find(Val: Reg);
1354	if (Ite == LastUseMI.end()) {
1355	LastUseMI [Reg] = MI;
1356	} else {
1357	MachineInstr *Orig = Ite ->second;
1358	MachineInstr *New = MI;
1359	if (InstrScore(Orig) < InstrScore(New))
1360	LastUseMI [Reg] = New;
1361	}
1362	}
1363	}
1364
1365	Instr2LastUsesTy LastUses;
1366	for (auto &Entry : LastUseMI)
1367	LastUses [Entry.second].insert(V: Entry.first);
1368	return LastUses;
1369	}
1370
1371	// Compute the maximum register pressure of the kernel. We'll simulate #Stage
1372	// iterations and check the register pressure at the point where all stages
1373	// overlapping.
1374	//
1375	// An example of unrolled loop where #Stage is 4..
1376	// Iter i+0 i+1 i+2 i+3
1377	// ------------------------
1378	// Stage 0
1379	// Stage 1 0
1380	// Stage 2 1 0
1381	// Stage 3 2 1 0 <- All stages overlap
1382	//
1383	std::vector<unsigned>
1384	computeMaxSetPressure(const OrderedInstsTy &OrderedInsts,
1385	Instr2StageTy &Stages,
1386	const unsigned StageCount) const {
1387	using RegSetTy = SmallDenseSet<Register, `16`>;
1388
1389	// Indexed by #Iter. To treat "local" variables of each stage separately, we
1390	// manage the liveness of the registers independently by iterations.
1391	SmallVector<RegSetTy> LiveRegSets(StageCount);
1392
1393	auto CurSetPressure = InitSetPressure;
1394	auto MaxSetPressure = InitSetPressure;
1395	auto LastUses = computeLastUses(OrderedInsts, Stages);
1396
1397	LLVM_DEBUG({
1398	dbgs() << "Ordered instructions:\n";
1399	for (MachineInstr *MI : OrderedInsts) {
1400	dbgs() << "Stage " << Stages[MI] << ": ";
1401	MI->dump();
1402	}
1403	});
1404
1405	const auto InsertReg = [this, &CurSetPressure](RegSetTy &RegSet,
1406	Register Reg) {
1407	if (!Reg.isValid() \|\| isFixedRegister(Reg))
1408	return;
1409
1410	bool Inserted = RegSet.insert(V: Reg).second;
1411	if (!Inserted)
1412	return;
1413
1414	LLVM_DEBUG(dbgs() << "insert " << printReg(Reg, TRI, `0`, &MRI) << "\n");
1415	increaseRegisterPressure(Pressure&: CurSetPressure, Reg);
1416	LLVM_DEBUG(dumpPSet(Reg));
1417	};
1418
1419	const auto EraseReg = [this, &CurSetPressure](RegSetTy &RegSet,
1420	Register Reg) {
1421	if (!Reg.isValid() \|\| isFixedRegister(Reg))
1422	return;
1423
1424	// live-in register
1425	if (!RegSet.contains(V: Reg))
1426	return;
1427
1428	LLVM_DEBUG(dbgs() << "erase " << printReg(Reg, TRI, `0`, &MRI) << "\n");
1429	RegSet.erase(V: Reg);
1430	decreaseRegisterPressure(Pressure&: CurSetPressure, Reg);
1431	LLVM_DEBUG(dumpPSet(Reg));
1432	};
1433
1434	for (unsigned I = `0`; I < StageCount; I++) {
1435	for (MachineInstr *MI : OrderedInsts) {
1436	const auto Stage = Stages [MI];
1437	if (I < Stage)
1438	continue;
1439
1440	const unsigned Iter = I - Stage;
1441
1442	for (auto Def : ROMap.find(Val: MI)->getSecond().Defs)
1443	InsertReg(LiveRegSets [Iter], Def.RegUnit);
1444
1445	for (auto LastUse : LastUses [MI]) {
1446	if (MI->isPHI()) {
1447	if (Iter != `0`)
1448	EraseReg(LiveRegSets [Iter - `1`], LastUse);
1449	} else {
1450	EraseReg(LiveRegSets [Iter], LastUse);
1451	}
1452	}
1453
1454	for (unsigned PSet = `0`; PSet < PSetNum; PSet++)
1455	MaxSetPressure [PSet] =
1456	std::max(a: MaxSetPressure [PSet], b: CurSetPressure [PSet]);
1457
1458	LLVM_DEBUG({
1459	dbgs() << "CurSetPressure=";
1460	dumpRegisterPressures(CurSetPressure);
1461	dbgs() << " iter=" << Iter << " stage=" << Stage << ":";
1462	MI->dump();
1463	});
1464	}
1465	}
1466
1467	return MaxSetPressure;
1468	}
1469
1470	public:
1471	HighRegisterPressureDetector(MachineBasicBlock *OrigMBB,
1472	const MachineFunction &MF)
1473	: OrigMBB(OrigMBB), MF(MF), MRI(MF.getRegInfo()),
1474	TRI(MF.getSubtarget().getRegisterInfo()),
1475	PSetNum(TRI->getNumRegPressureSets()), InitSetPressure (PSetNum, `0`),
1476	PressureSetLimit (PSetNum, `0`) {}
1477
1478	// Used to calculate register pressure, which is independent of loop
1479	// scheduling.
1480	void init(const RegisterClassInfo &RCI) {
1481	for (MachineInstr &MI : *OrigMBB) {
1482	if (MI.isDebugInstr())
1483	continue;
1484	ROMap [&MI].collect(MI, TRI: TRI, MRI, TrackLaneMasks: false, IgnoreDead: true*);
1485	}
1486
1487	computeLiveIn();
1488	computePressureSetLimit(RCI);
1489	}
1490
1491	// Calculate the maximum register pressures of the loop and check if they
1492	// exceed the limit
1493	bool detect(const SwingSchedulerDAG *SSD, SMSchedule &Schedule,
1494	const unsigned MaxStage) const {
1495	assert(`0` <= RegPressureMargin && RegPressureMargin <= `100` &&
1496	"the percentage of the margin must be between 0 to 100");
1497
1498	OrderedInstsTy OrderedInsts;
1499	Instr2StageTy Stages;
1500	computeScheduledInsts(SSD, Schedule, OrderedInsts, Stages);
1501	const auto MaxSetPressure =
1502	computeMaxSetPressure(OrderedInsts, Stages, StageCount: MaxStage + `1`);
1503
1504	LLVM_DEBUG({
1505	dbgs() << "Dump MaxSetPressure:\n";
1506	for (unsigned I = `0`; I < MaxSetPressure.size(); I++) {
1507	dbgs() << format("MaxSetPressure[%d]=%d\n", I, MaxSetPressure[I]);
1508	}
1509	dbgs() << `'\n'`;
1510	});
1511
1512	for (unsigned PSet = `0`; PSet < PSetNum; PSet++) {
1513	unsigned Limit = PressureSetLimit [PSet];
1514	unsigned Margin = Limit * RegPressureMargin / `100`;
1515	LLVM_DEBUG(dbgs() << "PSet=" << PSet << " Limit=" << Limit
1516	<< " Margin=" << Margin << "\n");
1517	if (Limit < MaxSetPressure [PSet] + Margin) {
1518	LLVM_DEBUG(
1519	dbgs()
1520	<< "Rejected the schedule because of too high register pressure\n");
1521	return true;
1522	}
1523	}
1524	return false;
1525	}
1526	};
1527
1528	} // end anonymous namespace
1529
1530	/// Calculate the resource constrained minimum initiation interval for the
1531	/// specified loop. We use the DFA to model the resources needed for
1532	/// each instruction, and we ignore dependences. A different DFA is created
1533	/// for each cycle that is required. When adding a new instruction, we attempt
1534	/// to add it to each existing DFA, until a legal space is found. If the
1535	/// instruction cannot be reserved in an existing DFA, we create a new one.
1536	unsigned SwingSchedulerDAG::calculateResMII() {
1537	LLVM_DEBUG(dbgs() << "calculateResMII:\n");
1538	ResourceManager RM(&MF.getSubtarget(), this);
1539	return RM.calculateResMII();
1540	}
1541
1542	/// Calculate the recurrence-constrainted minimum initiation interval.
1543	/// Iterate over each circuit. Compute the delay(c) and distance(c)
1544	/// for each circuit. The II needs to satisfy the inequality
1545	/// delay(c) - IIdistance(c) <= 0. For each circuit, choose the smallest*
1546	/// II that satisfies the inequality, and the RecMII is the maximum
1547	/// of those values.
1548	unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
1549	unsigned RecMII = `0`;
1550
1551	for (NodeSet &Nodes : NodeSets) {
1552	if (Nodes.empty())
1553	continue;
1554
1555	unsigned Delay = Nodes.getLatency();
1556	unsigned Distance = `1`;
1557
1558	// ii = ceil(delay / distance)
1559	unsigned CurMII = (Delay + Distance - `1`) / Distance;
1560	Nodes.setRecMII(CurMII);
1561	if (CurMII > RecMII)
1562	RecMII = CurMII;
1563	}
1564
1565	return RecMII;
1566	}
1567
1568	/// Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1569	/// but we do this to find the circuits, and then change them back.
1570	static void swapAntiDependences(std::vector<SUnit> &SUnits) {
1571	SmallVector<std::pair<SUnit *, SDep>, `8`> DepsAdded;
1572	for (SUnit &SU : SUnits) {
1573	for (SDep &Pred : SU.Preds)
1574	if (Pred.getKind() == SDep::Anti)
1575	DepsAdded.push_back(Elt: std::make_pair(x: &SU, y&: Pred));
1576	}
1577	for (std::pair<SUnit *, SDep> &P : DepsAdded) {
1578	// Remove this anti dependency and add one in the reverse direction.
1579	SUnit *SU = P.first;
1580	SDep &D = P.second;
1581	SUnit *TargetSU = D.getSUnit();
1582	unsigned Reg = D.getReg();
1583	unsigned Lat = D.getLatency();
1584	SU->removePred(D);
1585	SDep Dep(SU, SDep::Anti, Reg);
1586	Dep.setLatency(Lat);
1587	TargetSU->addPred(D: Dep);
1588	}
1589	}
1590
1591	/// Create the adjacency structure of the nodes in the graph.
1592	void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
1593	SwingSchedulerDAG *DAG) {
1594	BitVector Added(SUnits.size());
1595	DenseMap<int, int> OutputDeps;
1596	for (int i = `0`, e = SUnits.size(); i != e; ++i) {
1597	Added.reset();
1598	// Add any successor to the adjacency matrix and exclude duplicates.
1599	for (auto &SI : SUnits [i].Succs) {
1600	// Only create a back-edge on the first and last nodes of a dependence
1601	// chain. This records any chains and adds them later.
1602	if (SI.getKind() == SDep::Output) {
1603	int N = SI.getSUnit()->NodeNum;
1604	int BackEdge = i;
1605	auto Dep = OutputDeps.find(Val: BackEdge);
1606	if (Dep != OutputDeps.end()) {
1607	BackEdge = Dep ->second;
1608	OutputDeps.erase(I: Dep);
1609	}
1610	OutputDeps [N] = BackEdge;
1611	}
1612	// Do not process a boundary node, an artificial node.
1613	// A back-edge is processed only if it goes to a Phi.
1614	if (SI.getSUnit()->isBoundaryNode() \|\| SI.isArtificial() \|\|
1615	(SI.getKind() == SDep::Anti && !SI.getSUnit()->getInstr()->isPHI()))
1616	continue;
1617	int N = SI.getSUnit()->NodeNum;
1618	if (!Added.test(Idx: N)) {
1619	AdjK [i].push_back(Elt: N);
1620	Added.set(N);
1621	}
1622	}
1623	// A chain edge between a store and a load is treated as a back-edge in the
1624	// adjacency matrix.
1625	for (auto &PI : SUnits [i].Preds) {
1626	if (!SUnits [i].getInstr()->mayStore() \|\|
1627	!DAG->isLoopCarriedDep(Source: &SUnits [i], Dep: PI, isSucc: false))
1628	continue;
1629	if (PI.getKind() == SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) {
1630	int N = PI.getSUnit()->NodeNum;
1631	if (!Added.test(Idx: N)) {
1632	AdjK [i].push_back(Elt: N);
1633	Added.set(N);
1634	}
1635	}
1636	}
1637	}
1638	// Add back-edges in the adjacency matrix for the output dependences.
1639	for (auto &OD : OutputDeps)
1640	if (!Added.test(Idx: OD.second)) {
1641	AdjK [OD.first].push_back(Elt: OD.second);
1642	Added.set(OD.second);
1643	}
1644	}
1645
1646	/// Identify an elementary circuit in the dependence graph starting at the
1647	/// specified node.
1648	bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets,
1649	bool HasBackedge) {
1650	SUnit *SV = &SUnits [V];
1651	bool F = false;
1652	Stack.insert(X: SV);
1653	Blocked.set(V);
1654
1655	for (auto W : AdjK [V]) {
1656	if (NumPaths > MaxPaths)
1657	break;
1658	if (W < S)
1659	continue;
1660	if (W == S) {
1661	if (!HasBackedge)
1662	NodeSets.push_back(Elt: NodeSet (Stack.begin(), Stack.end()));
1663	F = true;
1664	++NumPaths;
1665	break;
1666	} else if (!Blocked.test(Idx: W)) {
1667	if (circuit(V: W, S, NodeSets,
1668	HasBackedge: Node2Idx->at(n: W) < Node2Idx->at(n: V) ? true : HasBackedge))
1669	F = true;
1670	}
1671	}
1672
1673	if (F)
1674	unblock(U: V);
1675	else {
1676	for (auto W : AdjK [V]) {
1677	if (W < S)
1678	continue;
1679	B [W].insert(Ptr: SV);
1680	}
1681	}
1682	Stack.pop_back();
1683	return F;
1684	}
1685
1686	/// Unblock a node in the circuit finding algorithm.
1687	void SwingSchedulerDAG::Circuits::unblock(int U) {
1688	Blocked.reset(Idx: U);
1689	SmallPtrSet<SUnit *, `4`> &BU = B [U];
1690	while (!BU.empty()) {
1691	SmallPtrSet<SUnit *, `4`>::iterator SI = BU.begin();
1692	assert(SI != BU.end() && "Invalid B set.");
1693	SUnit W = SI;
1694	BU.erase(Ptr: W);
1695	if (Blocked.test(Idx: W->NodeNum))
1696	unblock(U: W->NodeNum);
1697	}
1698	}
1699
1700	/// Identify all the elementary circuits in the dependence graph using
1701	/// Johnson's circuit algorithm.
1702	void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
1703	// Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1704	// but we do this to find the circuits, and then change them back.
1705	swapAntiDependences(SUnits);
1706
1707	Circuits Cir(SUnits, Topo);
1708	// Create the adjacency structure.
1709	Cir.createAdjacencyStructure(DAG: this);
1710	for (int i = `0`, e = SUnits.size(); i != e; ++i) {
1711	Cir.reset();
1712	Cir.circuit(V: i, S: i, NodeSets);
1713	}
1714
1715	// Change the dependences back so that we've created a DAG again.
1716	swapAntiDependences(SUnits);
1717	}
1718
1719	// Create artificial dependencies between the source of COPY/REG_SEQUENCE that
1720	// is loop-carried to the USE in next iteration. This will help pipeliner avoid
1721	// additional copies that are needed across iterations. An artificial dependence
1722	// edge is added from USE to SOURCE of COPY/REG_SEQUENCE.
1723
1724	// PHI-------Anti-Dep-----> COPY/REG_SEQUENCE (loop-carried)
1725	// SRCOfCopY------True-Dep---> COPY/REG_SEQUENCE
1726	// PHI-------True-Dep------> USEOfPhi
1727
1728	// The mutation creates
1729	// USEOfPHI -------Artificial-Dep---> SRCOfCopy
1730
1731	// This overall will ensure, the USEOfPHI is scheduled before SRCOfCopy
1732	// (since USE is a predecessor), implies, the COPY/ REG_SEQUENCE is scheduled
1733	// late to avoid additional copies across iterations. The possible scheduling
1734	// order would be
1735	// USEOfPHI --- SRCOfCopy--- COPY/REG_SEQUENCE.
1736
1737	void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) {
1738	for (SUnit &SU : DAG->SUnits) {
1739	// Find the COPY/REG_SEQUENCE instruction.
1740	if (!SU.getInstr()->isCopy() && !SU.getInstr()->isRegSequence())
1741	continue;
1742
1743	// Record the loop carried PHIs.
1744	SmallVector<SUnit *, `4`> PHISUs;
1745	// Record the SrcSUs that feed the COPY/REG_SEQUENCE instructions.
1746	SmallVector<SUnit *, `4`> SrcSUs;
1747
1748	for (auto &Dep : SU.Preds) {
1749	SUnit *TmpSU = Dep.getSUnit();
1750	MachineInstr *TmpMI = TmpSU->getInstr();
1751	SDep::Kind DepKind = Dep.getKind();
1752	// Save the loop carried PHI.
1753	if (DepKind == SDep::Anti && TmpMI->isPHI())
1754	PHISUs.push_back(Elt: TmpSU);
1755	// Save the source of COPY/REG_SEQUENCE.
1756	// If the source has no pre-decessors, we will end up creating cycles.
1757	else if (DepKind == SDep::Data && !TmpMI->isPHI() && TmpSU->NumPreds > `0`)
1758	SrcSUs.push_back(Elt: TmpSU);
1759	}
1760
1761	if (PHISUs.size() == `0` \|\| SrcSUs.size() == `0`)
1762	continue;
1763
1764	// Find the USEs of PHI. If the use is a PHI or REG_SEQUENCE, push back this
1765	// SUnit to the container.
1766	SmallVector<SUnit *, `8`> UseSUs;
1767	// Do not use iterator based loop here as we are updating the container.
1768	for (size_t Index = `0`; Index < PHISUs.size(); ++Index) {
1769	for (auto &Dep : PHISUs [Index]->Succs) {
1770	if (Dep.getKind() != SDep::Data)
1771	continue;
1772
1773	SUnit *TmpSU = Dep.getSUnit();
1774	MachineInstr *TmpMI = TmpSU->getInstr();
1775	if (TmpMI->isPHI() \|\| TmpMI->isRegSequence()) {
1776	PHISUs.push_back(Elt: TmpSU);
1777	continue;
1778	}
1779	UseSUs.push_back(Elt: TmpSU);
1780	}
1781	}
1782
1783	if (UseSUs.size() == `0`)
1784	continue;
1785
1786	SwingSchedulerDAG *SDAG = cast<SwingSchedulerDAG>(Val: DAG);
1787	// Add the artificial dependencies if it does not form a cycle.
1788	for (auto *I : UseSUs) {
1789	for (auto *Src : SrcSUs) {
1790	if (!SDAG->Topo.IsReachable(SU: I, TargetSU: Src) && Src != I) {
1791	Src->addPred(D: SDep (I, SDep::Artificial));
1792	SDAG->Topo.AddPred(Y: Src, X: I);
1793	}
1794	}
1795	}
1796	}
1797	}
1798
1799	/// Return true for DAG nodes that we ignore when computing the cost functions.
1800	/// We ignore the back-edge recurrence in order to avoid unbounded recursion
1801	/// in the calculation of the ASAP, ALAP, etc functions.
1802	static bool ignoreDependence(const SDep &D, bool isPred) {
1803	if (D.isArtificial() \|\| D.getSUnit()->isBoundaryNode())
1804	return true;
1805	return D.getKind() == SDep::Anti && isPred;
1806	}
1807
1808	/// Compute several functions need to order the nodes for scheduling.
1809	/// ASAP - Earliest time to schedule a node.
1810	/// ALAP - Latest time to schedule a node.
1811	/// MOV - Mobility function, difference between ALAP and ASAP.
1812	/// D - Depth of each node.
1813	/// H - Height of each node.
1814	void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
1815	ScheduleInfo.resize(new_size: SUnits.size());
1816
1817	LLVM_DEBUG({
1818	for (int I : Topo) {
1819	const SUnit &SU = SUnits[I];
1820	dumpNode(SU);
1821	}
1822	});
1823
1824	int maxASAP = `0`;
1825	// Compute ASAP and ZeroLatencyDepth.
1826	for (int I : Topo) {
1827	int asap = `0`;
1828	int zeroLatencyDepth = `0`;
1829	SUnit *SU = &SUnits [I];
1830	for (const SDep &P : SU->Preds) {
1831	SUnit *pred = P.getSUnit();
1832	if (P.getLatency() == `0`)
1833	zeroLatencyDepth =
1834	std::max(a: zeroLatencyDepth, b: getZeroLatencyDepth(Node: pred) + `1`);
1835	if (ignoreDependence(D: P, isPred: true))
1836	continue;
1837	asap = std::max(a: asap, b: (int)(getASAP(Node: pred) + P.getLatency() -
1838	getDistance(U: pred, V: SU, Dep: P) * MII));
1839	}
1840	maxASAP = std::max(a: maxASAP, b: asap);
1841	ScheduleInfo [I].ASAP = asap;
1842	ScheduleInfo [I].ZeroLatencyDepth = zeroLatencyDepth;
1843	}
1844
1845	// Compute ALAP, ZeroLatencyHeight, and MOV.
1846	for (int I : llvm::reverse(C&: Topo)) {
1847	int alap = maxASAP;
1848	int zeroLatencyHeight = `0`;
1849	SUnit *SU = &SUnits [I];
1850	for (const SDep &S : SU->Succs) {
1851	SUnit *succ = S.getSUnit();
1852	if (succ->isBoundaryNode())
1853	continue;
1854	if (S.getLatency() == `0`)
1855	zeroLatencyHeight =
1856	std::max(a: zeroLatencyHeight, b: getZeroLatencyHeight(Node: succ) + `1`);
1857	if (ignoreDependence(D: S, isPred: true))
1858	continue;
1859	alap = std::min(a: alap, b: (int)(getALAP(Node: succ) - S.getLatency() +
1860	getDistance(U: SU, V: succ, Dep: S) * MII));
1861	}
1862
1863	ScheduleInfo [I].ALAP = alap;
1864	ScheduleInfo [I].ZeroLatencyHeight = zeroLatencyHeight;
1865	}
1866
1867	// After computing the node functions, compute the summary for each node set.
1868	for (NodeSet &I : NodeSets)
1869	I.computeNodeSetInfo(SSD: this);
1870
1871	LLVM_DEBUG({
1872	for (unsigned i = `0`; i < SUnits.size(); i++) {
1873	dbgs() << "\tNode " << i << ":\n";
1874	dbgs() << "\t ASAP = " << getASAP(&SUnits[i]) << "\n";
1875	dbgs() << "\t ALAP = " << getALAP(&SUnits[i]) << "\n";
1876	dbgs() << "\t MOV = " << getMOV(&SUnits[i]) << "\n";
1877	dbgs() << "\t D = " << getDepth(&SUnits[i]) << "\n";
1878	dbgs() << "\t H = " << getHeight(&SUnits[i]) << "\n";
1879	dbgs() << "\t ZLD = " << getZeroLatencyDepth(&SUnits[i]) << "\n";
1880	dbgs() << "\t ZLH = " << getZeroLatencyHeight(&SUnits[i]) << "\n";
1881	}
1882	});
1883	}
1884
1885	/// Compute the Pred_L(O) set, as defined in the paper. The set is defined
1886	/// as the predecessors of the elements of NodeOrder that are not also in
1887	/// NodeOrder.
1888	static bool pred_L(SetVector<SUnit *> &NodeOrder,
1889	SmallSetVector<SUnit *, `8`> &Preds,
1890	const NodeSet S = nullptr*) {
1891	Preds.clear();
1892	for (const SUnit *SU : NodeOrder) {
1893	for (const SDep &Pred : SU->Preds) {
1894	if (S && S->count(SU: Pred.getSUnit()) == `0`)
1895	continue;
1896	if (ignoreDependence(D: Pred, isPred: true))
1897	continue;
1898	if (NodeOrder.count(key: Pred.getSUnit()) == `0`)
1899	Preds.insert(X: Pred.getSUnit());
1900	}
1901	// Back-edges are predecessors with an anti-dependence.
1902	for (const SDep &Succ : SU->Succs) {
1903	if (Succ.getKind() != SDep::Anti)
1904	continue;
1905	if (S && S->count(SU: Succ.getSUnit()) == `0`)
1906	continue;
1907	if (NodeOrder.count(key: Succ.getSUnit()) == `0`)
1908	Preds.insert(X: Succ.getSUnit());
1909	}
1910	}
1911	return !Preds.empty();
1912	}
1913
1914	/// Compute the Succ_L(O) set, as defined in the paper. The set is defined
1915	/// as the successors of the elements of NodeOrder that are not also in
1916	/// NodeOrder.
1917	static bool succ_L(SetVector<SUnit *> &NodeOrder,
1918	SmallSetVector<SUnit *, `8`> &Succs,
1919	const NodeSet S = nullptr*) {
1920	Succs.clear();
1921	for (const SUnit *SU : NodeOrder) {
1922	for (const SDep &Succ : SU->Succs) {
1923	if (S && S->count(SU: Succ.getSUnit()) == `0`)
1924	continue;
1925	if (ignoreDependence(D: Succ, isPred: false))
1926	continue;
1927	if (NodeOrder.count(key: Succ.getSUnit()) == `0`)
1928	Succs.insert(X: Succ.getSUnit());
1929	}
1930	for (const SDep &Pred : SU->Preds) {
1931	if (Pred.getKind() != SDep::Anti)
1932	continue;
1933	if (S && S->count(SU: Pred.getSUnit()) == `0`)
1934	continue;
1935	if (NodeOrder.count(key: Pred.getSUnit()) == `0`)
1936	Succs.insert(X: Pred.getSUnit());
1937	}
1938	}
1939	return !Succs.empty();
1940	}
1941
1942	/// Return true if there is a path from the specified node to any of the nodes
1943	/// in DestNodes. Keep track and return the nodes in any path.
1944	static bool computePath(SUnit Cur, SetVector<SUnit > &Path,
1945	SetVector<SUnit *> &DestNodes,
1946	SetVector<SUnit *> &Exclude,
1947	SmallPtrSet<SUnit *, `8`> &Visited) {
1948	if (Cur->isBoundaryNode())
1949	return false;
1950	if (Exclude.contains(key: Cur))
1951	return false;
1952	if (DestNodes.contains(key: Cur))
1953	return true;
1954	if (!Visited.insert(Ptr: Cur).second)
1955	return Path.contains(key: Cur);
1956	bool FoundPath = false;
1957	for (auto &SI : Cur->Succs)
1958	if (!ignoreDependence(D: SI, isPred: false))
1959	FoundPath \|=
1960	computePath(Cur: SI.getSUnit(), Path, DestNodes, Exclude, Visited);
1961	for (auto &PI : Cur->Preds)
1962	if (PI.getKind() == SDep::Anti)
1963	FoundPath \|=
1964	computePath(Cur: PI.getSUnit(), Path, DestNodes, Exclude, Visited);
1965	if (FoundPath)
1966	Path.insert(X: Cur);
1967	return FoundPath;
1968	}
1969
1970	/// Compute the live-out registers for the instructions in a node-set.
1971	/// The live-out registers are those that are defined in the node-set,
1972	/// but not used. Except for use operands of Phis.
1973	static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker,
1974	NodeSet &NS) {
1975	const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1976	MachineRegisterInfo &MRI = MF.getRegInfo();
1977	SmallVector<RegisterMaskPair, `8`> LiveOutRegs;
1978	SmallSet<unsigned, `4`> Uses;
1979	for (SUnit *SU : NS) {
1980	const MachineInstr *MI = SU->getInstr();
1981	if (MI->isPHI())
1982	continue;
1983	for (const MachineOperand &MO : MI->all_uses()) {
1984	Register Reg = MO.getReg();
1985	if (Reg.isVirtual())
1986	Uses.insert(V: Reg);
1987	else if (MRI.isAllocatable(PhysReg: Reg))
1988	for (MCRegUnit Unit : TRI->regunits(Reg: Reg.asMCReg()))
1989	Uses.insert(V: Unit);
1990	}
1991	}
1992	for (SUnit *SU : NS)
1993	for (const MachineOperand &MO : SU->getInstr()->all_defs())
1994	if (!MO.isDead()) {
1995	Register Reg = MO.getReg();
1996	if (Reg.isVirtual()) {
1997	if (!Uses.count(V: Reg))
1998	LiveOutRegs.push_back(Elt: RegisterMaskPair (Reg,
1999	LaneBitmask::getNone()));
2000	} else if (MRI.isAllocatable(PhysReg: Reg)) {
2001	for (MCRegUnit Unit : TRI->regunits(Reg: Reg.asMCReg()))
2002	if (!Uses.count(V: Unit))
2003	LiveOutRegs.push_back(
2004	Elt: RegisterMaskPair (Unit, LaneBitmask::getNone()));
2005	}
2006	}
2007	RPTracker.addLiveRegs(Regs: LiveOutRegs);
2008	}
2009
2010	/// A heuristic to filter nodes in recurrent node-sets if the register
2011	/// pressure of a set is too high.
2012	void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
2013	for (auto &NS : NodeSets) {
2014	// Skip small node-sets since they won't cause register pressure problems.
2015	if (NS.size() <= `2`)
2016	continue;
2017	IntervalPressure RecRegPressure;
2018	RegPressureTracker RecRPTracker(RecRegPressure);
2019	RecRPTracker.init(mf: &MF, rci: &RegClassInfo, lis: &LIS, mbb: BB, pos: BB->end(), TrackLaneMasks: false, TrackUntiedDefs: true);
2020	computeLiveOuts(MF, RPTracker&: RecRPTracker, NS);
2021	RecRPTracker.closeBottom();
2022
2023	std::vector<SUnit *> SUnits(NS.begin(), NS.end());
2024	llvm::sort(C&: SUnits, Comp: [](const SUnit A, const* SUnit *B) {
2025	return A->NodeNum > B->NodeNum;
2026	});
2027
2028	for (auto &SU : SUnits) {
2029	// Since we're computing the register pressure for a subset of the
2030	// instructions in a block, we need to set the tracker for each
2031	// instruction in the node-set. The tracker is set to the instruction
2032	// just after the one we're interested in.
2033	MachineBasicBlock::const_iterator CurInstI = SU->getInstr();
2034	RecRPTracker.setPos(std::next(x: CurInstI));
2035
2036	RegPressureDelta RPDelta;
2037	ArrayRef<PressureChange> CriticalPSets;
2038	RecRPTracker.getMaxUpwardPressureDelta(MI: SU->getInstr(), PDiff: nullptr, Delta&: RPDelta,
2039	CriticalPSets,
2040	MaxPressureLimit: RecRegPressure.MaxSetPressure);
2041	if (RPDelta.Excess.isValid()) {
2042	LLVM_DEBUG(
2043	dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") "
2044	<< TRI->getRegPressureSetName(RPDelta.Excess.getPSet())
2045	<< ":" << RPDelta.Excess.getUnitInc() << "\n");
2046	NS.setExceedPressure(SU);
2047	break;
2048	}
2049	RecRPTracker.recede();
2050	}
2051	}
2052	}
2053
2054	/// A heuristic to colocate node sets that have the same set of
2055	/// successors.
2056	void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
2057	unsigned Colocate = `0`;
2058	for (int i = `0`, e = NodeSets.size(); i < e; ++i) {
2059	NodeSet &N1 = NodeSets [i];
2060	SmallSetVector<SUnit *, `8`> S1;
2061	if (N1.empty() \|\| !succ_L(NodeOrder&: N1, Succs&: S1))
2062	continue;
2063	for (int j = i + `1`; j < e; ++j) {
2064	NodeSet &N2 = NodeSets [j];
2065	if (N1.compareRecMII(RHS&: N2) != `0`)
2066	continue;
2067	SmallSetVector<SUnit *, `8`> S2;
2068	if (N2.empty() \|\| !succ_L(NodeOrder&: N2, Succs&: S2))
2069	continue;
2070	if (llvm::set_is_subset(S1, S2) && S1.size() == S2.size()) {
2071	N1.setColocate(++Colocate);
2072	N2.setColocate(Colocate);
2073	break;
2074	}
2075	}
2076	}
2077	}
2078
2079	/// Check if the existing node-sets are profitable. If not, then ignore the
2080	/// recurrent node-sets, and attempt to schedule all nodes together. This is
2081	/// a heuristic. If the MII is large and all the recurrent node-sets are small,
2082	/// then it's best to try to schedule all instructions together instead of
2083	/// starting with the recurrent node-sets.
2084	void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
2085	// Look for loops with a large MII.
2086	if (MII < `17`)
2087	return;
2088	// Check if the node-set contains only a simple add recurrence.
2089	for (auto &NS : NodeSets) {
2090	if (NS.getRecMII() > `2`)
2091	return;
2092	if (NS.getMaxDepth() > MII)
2093	return;
2094	}
2095	NodeSets.clear();
2096	LLVM_DEBUG(dbgs() << "Clear recurrence node-sets\n");
2097	}
2098
2099	/// Add the nodes that do not belong to a recurrence set into groups
2100	/// based upon connected components.
2101	void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
2102	SetVector<SUnit *> NodesAdded;
2103	SmallPtrSet<SUnit *, `8`> Visited;
2104	// Add the nodes that are on a path between the previous node sets and
2105	// the current node set.
2106	for (NodeSet &I : NodeSets) {
2107	SmallSetVector<SUnit *, `8`> N;
2108	// Add the nodes from the current node set to the previous node set.
2109	if (succ_L(NodeOrder&: I, Succs&: N)) {
2110	SetVector<SUnit *> Path;
2111	for (SUnit *NI : N) {
2112	Visited.clear();
2113	computePath(Cur: NI, Path, DestNodes&: NodesAdded, Exclude&: I, Visited);
2114	}
2115	if (!Path.empty())
2116	I.insert(S: Path.begin(), E: Path.end());
2117	}
2118	// Add the nodes from the previous node set to the current node set.
2119	N.clear();
2120	if (succ_L(NodeOrder&: NodesAdded, Succs&: N)) {
2121	SetVector<SUnit *> Path;
2122	for (SUnit *NI : N) {
2123	Visited.clear();
2124	computePath(Cur: NI, Path, DestNodes&: I, Exclude&: NodesAdded, Visited);
2125	}
2126	if (!Path.empty())
2127	I.insert(S: Path.begin(), E: Path.end());
2128	}
2129	NodesAdded.insert(Start: I.begin(), End: I.end());
2130	}
2131
2132	// Create a new node set with the connected nodes of any successor of a node
2133	// in a recurrent set.
2134	NodeSet NewSet;
2135	SmallSetVector<SUnit *, `8`> N;
2136	if (succ_L(NodeOrder&: NodesAdded, Succs&: N))
2137	for (SUnit *I : N)
2138	addConnectedNodes(SU: I, NewSet, NodesAdded);
2139	if (!NewSet.empty())
2140	NodeSets.push_back(Elt: NewSet);
2141
2142	// Create a new node set with the connected nodes of any predecessor of a node
2143	// in a recurrent set.
2144	NewSet.clear();
2145	if (pred_L(NodeOrder&: NodesAdded, Preds&: N))
2146	for (SUnit *I : N)
2147	addConnectedNodes(SU: I, NewSet, NodesAdded);
2148	if (!NewSet.empty())
2149	NodeSets.push_back(Elt: NewSet);
2150
2151	// Create new nodes sets with the connected nodes any remaining node that
2152	// has no predecessor.
2153	for (SUnit &SU : SUnits) {
2154	if (NodesAdded.count(key: &SU) == `0`) {
2155	NewSet.clear();
2156	addConnectedNodes(SU: &SU, NewSet, NodesAdded);
2157	if (!NewSet.empty())
2158	NodeSets.push_back(Elt: NewSet);
2159	}
2160	}
2161	}
2162
2163	/// Add the node to the set, and add all of its connected nodes to the set.
2164	void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
2165	SetVector<SUnit *> &NodesAdded) {
2166	NewSet.insert(SU);
2167	NodesAdded.insert(X: SU);
2168	for (auto &SI : SU->Succs) {
2169	SUnit *Successor = SI.getSUnit();
2170	if (!SI.isArtificial() && !Successor->isBoundaryNode() &&
2171	NodesAdded.count(key: Successor) == `0`)
2172	addConnectedNodes(SU: Successor, NewSet, NodesAdded);
2173	}
2174	for (auto &PI : SU->Preds) {
2175	SUnit *Predecessor = PI.getSUnit();
2176	if (!PI.isArtificial() && NodesAdded.count(key: Predecessor) == `0`)
2177	addConnectedNodes(SU: Predecessor, NewSet, NodesAdded);
2178	}
2179	}
2180
2181	/// Return true if Set1 contains elements in Set2. The elements in common
2182	/// are returned in a different container.
2183	static bool isIntersect(SmallSetVector<SUnit , `8`> &Set1, const* NodeSet &Set2,
2184	SmallSetVector<SUnit *, `8`> &Result) {
2185	Result.clear();
2186	for (SUnit *SU : Set1) {
2187	if (Set2.count(SU) != `0`)
2188	Result.insert(X: SU);
2189	}
2190	return !Result.empty();
2191	}
2192
2193	/// Merge the recurrence node sets that have the same initial node.
2194	void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
2195	for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
2196	++I) {
2197	NodeSet &NI = *I;
2198	for (NodeSetType::iterator J = I + `1`; J != E;) {
2199	NodeSet &NJ = *J;
2200	if (NI.getNode(i: `0`)->NodeNum == NJ.getNode(i: `0`)->NodeNum) {
2201	if (NJ.compareRecMII(RHS&: NI) > `0`)
2202	NI.setRecMII(NJ.getRecMII());
2203	for (SUnit SU : J)
2204	I->insert(SU);
2205	NodeSets.erase(CI: J);
2206	E = NodeSets.end();
2207	} else {
2208	++J;
2209	}
2210	}
2211	}
2212	}
2213
2214	/// Remove nodes that have been scheduled in previous NodeSets.
2215	void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
2216	for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
2217	++I)
2218	for (NodeSetType::iterator J = I + `1`; J != E;) {
2219	J->remove_if(P: [&](SUnit SUJ) { return* I->count(SU: SUJ); });
2220
2221	if (J->empty()) {
2222	NodeSets.erase(CI: J);
2223	E = NodeSets.end();
2224	} else {
2225	++J;
2226	}
2227	}
2228	}
2229
2230	/// Compute an ordered list of the dependence graph nodes, which
2231	/// indicates the order that the nodes will be scheduled. This is a
2232	/// two-level algorithm. First, a partial order is created, which
2233	/// consists of a list of sets ordered from highest to lowest priority.
2234	void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
2235	SmallSetVector<SUnit *, `8`> R;
2236	NodeOrder.clear();
2237
2238	for (auto &Nodes : NodeSets) {
2239	LLVM_DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n");
2240	OrderKind Order;
2241	SmallSetVector<SUnit *, `8`> N;
2242	if (pred_L(NodeOrder, Preds&: N) && llvm::set_is_subset(S1: N, S2: Nodes)) {
2243	R.insert(Start: N.begin(), End: N.end());
2244	Order = BottomUp;
2245	LLVM_DEBUG(dbgs() << " Bottom up (preds) ");
2246	} else if (succ_L(NodeOrder, Succs&: N) && llvm::set_is_subset(S1: N, S2: Nodes)) {
2247	R.insert(Start: N.begin(), End: N.end());
2248	Order = TopDown;
2249	LLVM_DEBUG(dbgs() << " Top down (succs) ");
2250	} else if (isIntersect(Set1&: N, Set2: Nodes, Result&: R)) {
2251	// If some of the successors are in the existing node-set, then use the
2252	// top-down ordering.
2253	Order = TopDown;
2254	LLVM_DEBUG(dbgs() << " Top down (intersect) ");
2255	} else if (NodeSets.size() == `1`) {
2256	for (const auto &N : Nodes)
2257	if (N->Succs.size() == `0`)
2258	R.insert(X: N);
2259	Order = BottomUp;
2260	LLVM_DEBUG(dbgs() << " Bottom up (all) ");
2261	} else {
2262	// Find the node with the highest ASAP.
2263	SUnit maxASAP = nullptr*;
2264	for (SUnit *SU : Nodes) {
2265	if (maxASAP == nullptr \|\| getASAP(Node: SU) > getASAP(Node: maxASAP) \|\|
2266	(getASAP(Node: SU) == getASAP(Node: maxASAP) && SU->NodeNum > maxASAP->NodeNum))
2267	maxASAP = SU;
2268	}
2269	R.insert(X: maxASAP);
2270	Order = BottomUp;
2271	LLVM_DEBUG(dbgs() << " Bottom up (default) ");
2272	}
2273
2274	while (!R.empty()) {
2275	if (Order == TopDown) {
2276	// Choose the node with the maximum height. If more than one, choose
2277	// the node wiTH the maximum ZeroLatencyHeight. If still more than one,
2278	// choose the node with the lowest MOV.
2279	while (!R.empty()) {
2280	SUnit maxHeight = nullptr*;
2281	for (SUnit *I : R) {
2282	if (maxHeight == nullptr \|\| getHeight(Node: I) > getHeight(Node: maxHeight))
2283	maxHeight = I;
2284	else if (getHeight(Node: I) == getHeight(Node: maxHeight) &&
2285	getZeroLatencyHeight(Node: I) > getZeroLatencyHeight(Node: maxHeight))
2286	maxHeight = I;
2287	else if (getHeight(Node: I) == getHeight(Node: maxHeight) &&
2288	getZeroLatencyHeight(Node: I) ==
2289	getZeroLatencyHeight(Node: maxHeight) &&
2290	getMOV(Node: I) < getMOV(Node: maxHeight))
2291	maxHeight = I;
2292	}
2293	NodeOrder.insert(X: maxHeight);
2294	LLVM_DEBUG(dbgs() << maxHeight->NodeNum << " ");
2295	R.remove(X: maxHeight);
2296	for (const auto &I : maxHeight->Succs) {
2297	if (Nodes.count(SU: I.getSUnit()) == `0`)
2298	continue;
2299	if (NodeOrder.contains(key: I.getSUnit()))
2300	continue;
2301	if (ignoreDependence(D: I, isPred: false))
2302	continue;
2303	R.insert(X: I.getSUnit());
2304	}
2305	// Back-edges are predecessors with an anti-dependence.
2306	for (const auto &I : maxHeight->Preds) {
2307	if (I.getKind() != SDep::Anti)
2308	continue;
2309	if (Nodes.count(SU: I.getSUnit()) == `0`)
2310	continue;
2311	if (NodeOrder.contains(key: I.getSUnit()))
2312	continue;
2313	R.insert(X: I.getSUnit());
2314	}
2315	}
2316	Order = BottomUp;
2317	LLVM_DEBUG(dbgs() << "\n Switching order to bottom up ");
2318	SmallSetVector<SUnit *, `8`> N;
2319	if (pred_L(NodeOrder, Preds&: N, S: &Nodes))
2320	R.insert(Start: N.begin(), End: N.end());
2321	} else {
2322	// Choose the node with the maximum depth. If more than one, choose
2323	// the node with the maximum ZeroLatencyDepth. If still more than one,
2324	// choose the node with the lowest MOV.
2325	while (!R.empty()) {
2326	SUnit maxDepth = nullptr*;
2327	for (SUnit *I : R) {
2328	if (maxDepth == nullptr \|\| getDepth(Node: I) > getDepth(Node: maxDepth))
2329	maxDepth = I;
2330	else if (getDepth(Node: I) == getDepth(Node: maxDepth) &&
2331	getZeroLatencyDepth(Node: I) > getZeroLatencyDepth(Node: maxDepth))
2332	maxDepth = I;
2333	else if (getDepth(Node: I) == getDepth(Node: maxDepth) &&
2334	getZeroLatencyDepth(Node: I) == getZeroLatencyDepth(Node: maxDepth) &&
2335	getMOV(Node: I) < getMOV(Node: maxDepth))
2336	maxDepth = I;
2337	}
2338	NodeOrder.insert(X: maxDepth);
2339	LLVM_DEBUG(dbgs() << maxDepth->NodeNum << " ");
2340	R.remove(X: maxDepth);
2341	if (Nodes.isExceedSU(SU: maxDepth)) {
2342	Order = TopDown;
2343	R.clear();
2344	R.insert(X: Nodes.getNode(i: `0`));
2345	break;
2346	}
2347	for (const auto &I : maxDepth->Preds) {
2348	if (Nodes.count(SU: I.getSUnit()) == `0`)
2349	continue;
2350	if (NodeOrder.contains(key: I.getSUnit()))
2351	continue;
2352	R.insert(X: I.getSUnit());
2353	}
2354	// Back-edges are predecessors with an anti-dependence.
2355	for (const auto &I : maxDepth->Succs) {
2356	if (I.getKind() != SDep::Anti)
2357	continue;
2358	if (Nodes.count(SU: I.getSUnit()) == `0`)
2359	continue;
2360	if (NodeOrder.contains(key: I.getSUnit()))
2361	continue;
2362	R.insert(X: I.getSUnit());
2363	}
2364	}
2365	Order = TopDown;
2366	LLVM_DEBUG(dbgs() << "\n Switching order to top down ");
2367	SmallSetVector<SUnit *, `8`> N;
2368	if (succ_L(NodeOrder, Succs&: N, S: &Nodes))
2369	R.insert(Start: N.begin(), End: N.end());
2370	}
2371	}
2372	LLVM_DEBUG(dbgs() << "\nDone with Nodeset\n");
2373	}
2374
2375	LLVM_DEBUG({
2376	dbgs() << "Node order: ";
2377	for (SUnit *I : NodeOrder)
2378	dbgs() << " " << I->NodeNum << " ";
2379	dbgs() << "\n";
2380	});
2381	}
2382
2383	/// Process the nodes in the computed order and create the pipelined schedule
2384	/// of the instructions, if possible. Return true if a schedule is found.
2385	bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
2386
2387	if (NodeOrder.empty()){
2388	LLVM_DEBUG(dbgs() << "NodeOrder is empty! abort scheduling\n" );
2389	return false;
2390	}
2391
2392	bool scheduleFound = false;
2393	std::unique_ptr<HighRegisterPressureDetector> HRPDetector;
2394	if (LimitRegPressure) {
2395	HRPDetector =
2396	std::make_unique<HighRegisterPressureDetector>(args: Loop.getHeader(), args&: MF);
2397	HRPDetector ->init(RCI: RegClassInfo);
2398	}
2399	// Keep increasing II until a valid schedule is found.
2400	for (unsigned II = MII; II <= MAX_II && !scheduleFound; ++II) {
2401	Schedule.reset();
2402	Schedule.setInitiationInterval(II);
2403	LLVM_DEBUG(dbgs() << "Try to schedule with " << II << "\n");
2404
2405	SetVector<SUnit *>::iterator NI = NodeOrder.begin();
2406	SetVector<SUnit *>::iterator NE = NodeOrder.end();
2407	do {
2408	SUnit SU = NI;
2409
2410	// Compute the schedule time for the instruction, which is based
2411	// upon the scheduled time for any predecessors/successors.
2412	int EarlyStart = INT_MIN;
2413	int LateStart = INT_MAX;
2414	// These values are set when the size of the schedule window is limited
2415	// due to chain dependences.
2416	int SchedEnd = INT_MAX;
2417	int SchedStart = INT_MIN;
2418	Schedule.computeStart(SU, MaxEarlyStart: &EarlyStart, MinLateStart: &LateStart, MinEnd: &SchedEnd, MaxStart: &SchedStart,
2419	II, DAG: this);
2420	LLVM_DEBUG({
2421	dbgs() << "\n";
2422	dbgs() << "Inst (" << SU->NodeNum << ") ";
2423	SU->getInstr()->dump();
2424	dbgs() << "\n";
2425	});
2426	LLVM_DEBUG({
2427	dbgs() << format("\tes: %8x ls: %8x me: %8x ms: %8x\n", EarlyStart,
2428	LateStart, SchedEnd, SchedStart);
2429	});
2430
2431	if (EarlyStart > LateStart \|\| SchedEnd < EarlyStart \|\|
2432	SchedStart > LateStart)
2433	scheduleFound = false;
2434	else if (EarlyStart != INT_MIN && LateStart == INT_MAX) {
2435	SchedEnd = std::min(a: SchedEnd, b: EarlyStart + (int)II - `1`);
2436	scheduleFound = Schedule.insert(SU, StartCycle: EarlyStart, EndCycle: SchedEnd, II);
2437	} else if (EarlyStart == INT_MIN && LateStart != INT_MAX) {
2438	SchedStart = std::max(a: SchedStart, b: LateStart - (int)II + `1`);
2439	scheduleFound = Schedule.insert(SU, StartCycle: LateStart, EndCycle: SchedStart, II);
2440	} else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
2441	SchedEnd =
2442	std::min(a: SchedEnd, b: std::min(a: LateStart, b: EarlyStart + (int)II - `1`));
2443	// When scheduling a Phi it is better to start at the late cycle and go
2444	// backwards. The default order may insert the Phi too far away from
2445	// its first dependence.
2446	if (SU->getInstr()->isPHI())
2447	scheduleFound = Schedule.insert(SU, StartCycle: SchedEnd, EndCycle: EarlyStart, II);
2448	else
2449	scheduleFound = Schedule.insert(SU, StartCycle: EarlyStart, EndCycle: SchedEnd, II);
2450	} else {
2451	int FirstCycle = Schedule.getFirstCycle();
2452	scheduleFound = Schedule.insert(SU, StartCycle: FirstCycle + getASAP(Node: SU),
2453	EndCycle: FirstCycle + getASAP(Node: SU) + II - `1`, II);
2454	}
2455	// Even if we find a schedule, make sure the schedule doesn't exceed the
2456	// allowable number of stages. We keep trying if this happens.
2457	if (scheduleFound)
2458	if (SwpMaxStages > -`1` &&
2459	Schedule.getMaxStageCount() > (unsigned)SwpMaxStages)
2460	scheduleFound = false;
2461
2462	LLVM_DEBUG({
2463	if (!scheduleFound)
2464	dbgs() << "\tCan't schedule\n";
2465	});
2466	} while (++NI != NE && scheduleFound);
2467
2468	// If a schedule is found, ensure non-pipelined instructions are in stage 0
2469	if (scheduleFound)
2470	scheduleFound =
2471	Schedule.normalizeNonPipelinedInstructions(SSD: this, PLI: LoopPipelinerInfo);
2472
2473	// If a schedule is found, check if it is a valid schedule too.
2474	if (scheduleFound)
2475	scheduleFound = Schedule.isValidSchedule(SSD: this);
2476
2477	// If a schedule was found and the option is enabled, check if the schedule
2478	// might generate additional register spills/fills.
2479	if (scheduleFound && LimitRegPressure)
2480	scheduleFound =
2481	!HRPDetector ->detect(SSD: this, Schedule, MaxStage: Schedule.getMaxStageCount());
2482	}
2483
2484	LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound
2485	<< " (II=" << Schedule.getInitiationInterval()
2486	<< ")\n");
2487
2488	if (scheduleFound) {
2489	scheduleFound = LoopPipelinerInfo->shouldUseSchedule(SSD&: *this, SMS&: Schedule);
2490	if (!scheduleFound)
2491	LLVM_DEBUG(dbgs() << "Target rejected schedule\n");
2492	}
2493
2494	if (scheduleFound) {
2495	Schedule.finalizeSchedule(SSD: this);
2496	Pass.ORE->emit(RemarkBuilder: [&]() {
2497	return MachineOptimizationRemarkAnalysis (
2498	DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
2499	<< "Schedule found with Initiation Interval: "
2500	<< ore::NV ("II", Schedule.getInitiationInterval())
2501	<< ", MaxStageCount: "
2502	<< ore::NV ("MaxStageCount", Schedule.getMaxStageCount());
2503	});
2504	} else
2505	Schedule.reset();
2506
2507	return scheduleFound && Schedule.getMaxStageCount() > `0`;
2508	}
2509
2510	/// Return true if we can compute the amount the instruction changes
2511	/// during each iteration. Set Delta to the amount of the change.
2512	bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
2513	const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2514	const MachineOperand *BaseOp;
2515	int64_t Offset;
2516	bool OffsetIsScalable;
2517	if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
2518	return false;
2519
2520	// FIXME: This algorithm assumes instructions have fixed-size offsets.
2521	if (OffsetIsScalable)
2522	return false;
2523
2524	if (!BaseOp->isReg())
2525	return false;
2526
2527	Register BaseReg = BaseOp->getReg();
2528
2529	MachineRegisterInfo &MRI = MF.getRegInfo();
2530	// Check if there is a Phi. If so, get the definition in the loop.
2531	MachineInstr *BaseDef = MRI.getVRegDef(Reg: BaseReg);
2532	if (BaseDef && BaseDef->isPHI()) {
2533	BaseReg = getLoopPhiReg(Phi: *BaseDef, LoopBB: MI.getParent());
2534	BaseDef = MRI.getVRegDef(Reg: BaseReg);
2535	}
2536	if (!BaseDef)
2537	return false;
2538
2539	int D = `0`;
2540	if (!TII->getIncrementValue(MI: *BaseDef, Value&: D) && D >= `0`)
2541	return false;
2542
2543	Delta = D;
2544	return true;
2545	}
2546
2547	/// Check if we can change the instruction to use an offset value from the
2548	/// previous iteration. If so, return true and set the base and offset values
2549	/// so that we can rewrite the load, if necessary.
2550	/// v1 = Phi(v0, v3)
2551	/// v2 = load v1, 0
2552	/// v3 = post_store v1, 4, x
2553	/// This function enables the load to be rewritten as v2 = load v3, 4.
2554	bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
2555	unsigned &BasePos,
2556	unsigned &OffsetPos,
2557	unsigned &NewBase,
2558	int64_t &Offset) {
2559	// Get the load instruction.
2560	if (TII->isPostIncrement(MI: *MI))
2561	return false;
2562	unsigned BasePosLd, OffsetPosLd;
2563	if (!TII->getBaseAndOffsetPosition(MI: *MI, BasePos&: BasePosLd, OffsetPos&: OffsetPosLd))
2564	return false;
2565	Register BaseReg = MI->getOperand(i: BasePosLd).getReg();
2566
2567	// Look for the Phi instruction.
2568	MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
2569	MachineInstr *Phi = MRI.getVRegDef(Reg: BaseReg);
2570	if (!Phi \|\| !Phi->isPHI())
2571	return false;
2572	// Get the register defined in the loop block.
2573	unsigned PrevReg = getLoopPhiReg(Phi: *Phi, LoopBB: MI->getParent());
2574	if (!PrevReg)
2575	return false;
2576
2577	// Check for the post-increment load/store instruction.
2578	MachineInstr *PrevDef = MRI.getVRegDef(Reg: PrevReg);
2579	if (!PrevDef \|\| PrevDef == MI)
2580	return false;
2581
2582	if (!TII->isPostIncrement(MI: *PrevDef))
2583	return false;
2584
2585	unsigned BasePos1 = `0`, OffsetPos1 = `0`;
2586	if (!TII->getBaseAndOffsetPosition(MI: *PrevDef, BasePos&: BasePos1, OffsetPos&: OffsetPos1))
2587	return false;
2588
2589	// Make sure that the instructions do not access the same memory location in
2590	// the next iteration.
2591	int64_t LoadOffset = MI->getOperand(i: OffsetPosLd).getImm();
2592	int64_t StoreOffset = PrevDef->getOperand(i: OffsetPos1).getImm();
2593	MachineInstr *NewMI = MF.CloneMachineInstr(Orig: MI);
2594	NewMI->getOperand(i: OffsetPosLd).setImm(LoadOffset + StoreOffset);
2595	bool Disjoint = TII->areMemAccessesTriviallyDisjoint(MIa: NewMI, MIb: PrevDef);
2596	MF.deleteMachineInstr(MI: NewMI);
2597	if (!Disjoint)
2598	return false;
2599
2600	// Set the return value once we determine that we return true.
2601	BasePos = BasePosLd;
2602	OffsetPos = OffsetPosLd;
2603	NewBase = PrevReg;
2604	Offset = StoreOffset;
2605	return true;
2606	}
2607
2608	/// Apply changes to the instruction if needed. The changes are need
2609	/// to improve the scheduling and depend up on the final schedule.
2610	void SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,
2611	SMSchedule &Schedule) {
2612	SUnit *SU = getSUnit(MI);
2613	DenseMap<SUnit , std::pair<unsigned*, int64_t>>::iterator It =
2614	InstrChanges.find(Val: SU);
2615	if (It != InstrChanges.end()) {
2616	std::pair<unsigned, int64_t> RegAndOffset = It ->second;
2617	unsigned BasePos, OffsetPos;
2618	if (!TII->getBaseAndOffsetPosition(MI: *MI, BasePos, OffsetPos))
2619	return;
2620	Register BaseReg = MI->getOperand(i: BasePos).getReg();
2621	MachineInstr *LoopDef = findDefInLoop(Reg: BaseReg);
2622	int DefStageNum = Schedule.stageScheduled(SU: getSUnit(MI: LoopDef));
2623	int DefCycleNum = Schedule.cycleScheduled(SU: getSUnit(MI: LoopDef));
2624	int BaseStageNum = Schedule.stageScheduled(SU);
2625	int BaseCycleNum = Schedule.cycleScheduled(SU);
2626	if (BaseStageNum < DefStageNum) {
2627	MachineInstr *NewMI = MF.CloneMachineInstr(Orig: MI);
2628	int OffsetDiff = DefStageNum - BaseStageNum;
2629	if (DefCycleNum < BaseCycleNum) {
2630	NewMI->getOperand(i: BasePos).setReg(RegAndOffset.first);
2631	if (OffsetDiff > `0`)
2632	--OffsetDiff;
2633	}
2634	int64_t NewOffset =
2635	MI->getOperand(i: OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
2636	NewMI->getOperand(i: OffsetPos).setImm(NewOffset);
2637	SU->setInstr(NewMI);
2638	MISUnitMap [NewMI] = SU;
2639	NewMIs [MI] = NewMI;
2640	}
2641	}
2642	}
2643
2644	/// Return the instruction in the loop that defines the register.
2645	/// If the definition is a Phi, then follow the Phi operand to
2646	/// the instruction in the loop.
2647	MachineInstr *SwingSchedulerDAG::findDefInLoop(Register Reg) {
2648	SmallPtrSet<MachineInstr *, `8`> Visited;
2649	MachineInstr *Def = MRI.getVRegDef(Reg);
2650	while (Def->isPHI()) {
2651	if (!Visited.insert(Ptr: Def).second)
2652	break;
2653	for (unsigned i = `1`, e = Def->getNumOperands(); i < e; i += `2`)
2654	if (Def->getOperand(i: i + `1`).getMBB() == BB) {
2655	Def = MRI.getVRegDef(Reg: Def->getOperand(i).getReg());
2656	break;
2657	}
2658	}
2659	return Def;
2660	}
2661
2662	/// Return true for an order or output dependence that is loop carried
2663	/// potentially. A dependence is loop carried if the destination defines a value
2664	/// that may be used or defined by the source in a subsequent iteration.
2665	bool SwingSchedulerDAG::isLoopCarriedDep(SUnit Source, const* SDep &Dep,
2666	bool isSucc) {
2667	if ((Dep.getKind() != SDep::Order && Dep.getKind() != SDep::Output) \|\|
2668	Dep.isArtificial() \|\| Dep.getSUnit()->isBoundaryNode())
2669	return false;
2670
2671	if (!SwpPruneLoopCarried)
2672	return true;
2673
2674	if (Dep.getKind() == SDep::Output)
2675	return true;
2676
2677	MachineInstr *SI = Source->getInstr();
2678	MachineInstr *DI = Dep.getSUnit()->getInstr();
2679	if (!isSucc)
2680	std::swap(a&: SI, b&: DI);
2681	assert(SI != nullptr && DI != nullptr && "Expecting SUnit with an MI.");
2682
2683	// Assume ordered loads and stores may have a loop carried dependence.
2684	if (SI->hasUnmodeledSideEffects() \|\| DI->hasUnmodeledSideEffects() \|\|
2685	SI->mayRaiseFPException() \|\| DI->mayRaiseFPException() \|\|
2686	SI->hasOrderedMemoryRef() \|\| DI->hasOrderedMemoryRef())
2687	return true;
2688
2689	if (!DI->mayLoadOrStore() \|\| !SI->mayLoadOrStore())
2690	return false;
2691
2692	// The conservative assumption is that a dependence between memory operations
2693	// may be loop carried. The following code checks when it can be proved that
2694	// there is no loop carried dependence.
2695	unsigned DeltaS, DeltaD;
2696	if (!computeDelta(MI&: SI, Delta&: DeltaS) \|\| !computeDelta(MI&: DI, Delta&: DeltaD))
2697	return true;
2698
2699	const MachineOperand BaseOpS, BaseOpD;
2700	int64_t OffsetS, OffsetD;
2701	bool OffsetSIsScalable, OffsetDIsScalable;
2702	const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2703	if (!TII->getMemOperandWithOffset(MI: *SI, BaseOp&: BaseOpS, Offset&: OffsetS, OffsetIsScalable&: OffsetSIsScalable,
2704	TRI) \|\|
2705	!TII->getMemOperandWithOffset(MI: *DI, BaseOp&: BaseOpD, Offset&: OffsetD, OffsetIsScalable&: OffsetDIsScalable,
2706	TRI))
2707	return true;
2708
2709	assert(!OffsetSIsScalable && !OffsetDIsScalable &&
2710	"Expected offsets to be byte offsets");
2711
2712	MachineInstr *DefS = MRI.getVRegDef(Reg: BaseOpS->getReg());
2713	MachineInstr *DefD = MRI.getVRegDef(Reg: BaseOpD->getReg());
2714	if (!DefS \|\| !DefD \|\| !DefS->isPHI() \|\| !DefD->isPHI())
2715	return true;
2716
2717	unsigned InitValS = `0`;
2718	unsigned LoopValS = `0`;
2719	unsigned InitValD = `0`;
2720	unsigned LoopValD = `0`;
2721	getPhiRegs(Phi&: *DefS, Loop: BB, InitVal&: InitValS, LoopVal&: LoopValS);
2722	getPhiRegs(Phi&: *DefD, Loop: BB, InitVal&: InitValD, LoopVal&: LoopValD);
2723	MachineInstr *InitDefS = MRI.getVRegDef(Reg: InitValS);
2724	MachineInstr *InitDefD = MRI.getVRegDef(Reg: InitValD);
2725
2726	if (!InitDefS->isIdenticalTo(Other: *InitDefD))
2727	return true;
2728
2729	// Check that the base register is incremented by a constant value for each
2730	// iteration.
2731	MachineInstr *LoopDefS = MRI.getVRegDef(Reg: LoopValS);
2732	int D = `0`;
2733	if (!LoopDefS \|\| !TII->getIncrementValue(MI: *LoopDefS, Value&: D))
2734	return true;
2735
2736	uint64_t AccessSizeS = (*SI->memoperands_begin())->getSize();
2737	uint64_t AccessSizeD = (*DI->memoperands_begin())->getSize();
2738
2739	// This is the main test, which checks the offset values and the loop
2740	// increment value to determine if the accesses may be loop carried.
2741	if (AccessSizeS == MemoryLocation::UnknownSize \|\|
2742	AccessSizeD == MemoryLocation::UnknownSize)
2743	return true;
2744
2745	if (DeltaS != DeltaD \|\| DeltaS < AccessSizeS \|\| DeltaD < AccessSizeD)
2746	return true;
2747
2748	return (OffsetS + (int64_t)AccessSizeS < OffsetD + (int64_t)AccessSizeD);
2749	}
2750
2751	void SwingSchedulerDAG::postProcessDAG() {
2752	for (auto &M : Mutations)
2753	M ->apply(DAG: this);
2754	}
2755
2756	/// Try to schedule the node at the specified StartCycle and continue
2757	/// until the node is schedule or the EndCycle is reached. This function
2758	/// returns true if the node is scheduled. This routine may search either
2759	/// forward or backward for a place to insert the instruction based upon
2760	/// the relative values of StartCycle and EndCycle.
2761	bool SMSchedule::insert(SUnit SU, int* StartCycle, int EndCycle, int II) {
2762	bool forward = true;
2763	LLVM_DEBUG({
2764	dbgs() << "Trying to insert node between " << StartCycle << " and "
2765	<< EndCycle << " II: " << II << "\n";
2766	});
2767	if (StartCycle > EndCycle)
2768	forward = false;
2769
2770	// The terminating condition depends on the direction.
2771	int termCycle = forward ? EndCycle + `1` : EndCycle - `1`;
2772	for (int curCycle = StartCycle; curCycle != termCycle;
2773	forward ? ++curCycle : --curCycle) {
2774
2775	if (ST.getInstrInfo()->isZeroCost(Opcode: SU->getInstr()->getOpcode()) \|\|
2776	ProcItinResources.canReserveResources(SU&: *SU, Cycle: curCycle)) {
2777	LLVM_DEBUG({
2778	dbgs() << "\tinsert at cycle " << curCycle << " ";
2779	SU->getInstr()->dump();
2780	});
2781
2782	if (!ST.getInstrInfo()->isZeroCost(Opcode: SU->getInstr()->getOpcode()))
2783	ProcItinResources.reserveResources(SU&: *SU, Cycle: curCycle);
2784	ScheduledInstrs [curCycle].push_back(x: SU);
2785	InstrToCycle.insert(x: std::make_pair(x&: SU, y&: curCycle));
2786	if (curCycle > LastCycle)
2787	LastCycle = curCycle;
2788	if (curCycle < FirstCycle)
2789	FirstCycle = curCycle;
2790	return true;
2791	}
2792	LLVM_DEBUG({
2793	dbgs() << "\tfailed to insert at cycle " << curCycle << " ";
2794	SU->getInstr()->dump();
2795	});
2796	}
2797	return false;
2798	}
2799
2800	// Return the cycle of the earliest scheduled instruction in the chain.
2801	int SMSchedule::earliestCycleInChain(const SDep &Dep) {
2802	SmallPtrSet<SUnit *, `8`> Visited;
2803	SmallVector<SDep, `8`> Worklist;
2804	Worklist.push_back(Elt: Dep);
2805	int EarlyCycle = INT_MAX;
2806	while (!Worklist.empty()) {
2807	const SDep &Cur = Worklist.pop_back_val();
2808	SUnit *PrevSU = Cur.getSUnit();
2809	if (Visited.count(Ptr: PrevSU))
2810	continue;
2811	std::map<SUnit , int*>::const_iterator it = InstrToCycle.find(x: PrevSU);
2812	if (it == InstrToCycle.end())
2813	continue;
2814	EarlyCycle = std::min(a: EarlyCycle, b: it ->second);
2815	for (const auto &PI : PrevSU->Preds)
2816	if (PI.getKind() == SDep::Order \|\| PI.getKind() == SDep::Output)
2817	Worklist.push_back(Elt: PI);
2818	Visited.insert(Ptr: PrevSU);
2819	}
2820	return EarlyCycle;
2821	}
2822
2823	// Return the cycle of the latest scheduled instruction in the chain.
2824	int SMSchedule::latestCycleInChain(const SDep &Dep) {
2825	SmallPtrSet<SUnit *, `8`> Visited;
2826	SmallVector<SDep, `8`> Worklist;
2827	Worklist.push_back(Elt: Dep);
2828	int LateCycle = INT_MIN;
2829	while (!Worklist.empty()) {
2830	const SDep &Cur = Worklist.pop_back_val();
2831	SUnit *SuccSU = Cur.getSUnit();
2832	if (Visited.count(Ptr: SuccSU) \|\| SuccSU->isBoundaryNode())
2833	continue;
2834	std::map<SUnit , int*>::const_iterator it = InstrToCycle.find(x: SuccSU);
2835	if (it == InstrToCycle.end())
2836	continue;
2837	LateCycle = std::max(a: LateCycle, b: it ->second);
2838	for (const auto &SI : SuccSU->Succs)
2839	if (SI.getKind() == SDep::Order \|\| SI.getKind() == SDep::Output)
2840	Worklist.push_back(Elt: SI);
2841	Visited.insert(Ptr: SuccSU);
2842	}
2843	return LateCycle;
2844	}
2845
2846	/// If an instruction has a use that spans multiple iterations, then
2847	/// return true. These instructions are characterized by having a back-ege
2848	/// to a Phi, which contains a reference to another Phi.
2849	static SUnit multipleIterations(SUnit SU, SwingSchedulerDAG *DAG) {
2850	for (auto &P : SU->Preds)
2851	if (DAG->isBackedge(Source: SU, Dep: P) && P.getSUnit()->getInstr()->isPHI())
2852	for (auto &S : P.getSUnit()->Succs)
2853	if (S.getKind() == SDep::Data && S.getSUnit()->getInstr()->isPHI())
2854	return P.getSUnit();
2855	return nullptr;
2856	}
2857
2858	/// Compute the scheduling start slot for the instruction. The start slot
2859	/// depends on any predecessor or successor nodes scheduled already.
2860	void SMSchedule::computeStart(SUnit SU, int* MaxEarlyStart, int* *MinLateStart,
2861	int MinEnd, int* MaxStart, int* II,
2862	SwingSchedulerDAG *DAG) {
2863	// Iterate over each instruction that has been scheduled already. The start
2864	// slot computation depends on whether the previously scheduled instruction
2865	// is a predecessor or successor of the specified instruction.
2866	for (int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) {
2867
2868	// Iterate over each instruction in the current cycle.
2869	for (SUnit *I : getInstructions(cycle)) {
2870	// Because we're processing a DAG for the dependences, we recognize
2871	// the back-edge in recurrences by anti dependences.
2872	for (unsigned i = `0`, e = (unsigned)SU->Preds.size(); i != e; ++i) {
2873	const SDep &Dep = SU->Preds [i];
2874	if (Dep.getSUnit() == I) {
2875	if (!DAG->isBackedge(Source: SU, Dep)) {
2876	int EarlyStart = cycle + Dep.getLatency() -
2877	DAG->getDistance(U: Dep.getSUnit(), V: SU, Dep) * II;
2878	MaxEarlyStart = std::max(a: MaxEarlyStart, b: EarlyStart);
2879	if (DAG->isLoopCarriedDep(Source: SU, Dep, isSucc: false)) {
2880	int End = earliestCycleInChain(Dep) + (II - `1`);
2881	MinEnd = std::min(a: MinEnd, b: End);
2882	}
2883	} else {
2884	int LateStart = cycle - Dep.getLatency() +
2885	DAG->getDistance(U: SU, V: Dep.getSUnit(), Dep) * II;
2886	MinLateStart = std::min(a: MinLateStart, b: LateStart);
2887	}
2888	}
2889	// For instruction that requires multiple iterations, make sure that
2890	// the dependent instruction is not scheduled past the definition.
2891	SUnit *BE = multipleIterations(SU: I, DAG);
2892	if (BE && Dep.getSUnit() == BE && !SU->getInstr()->isPHI() &&
2893	!SU->isPred(N: I))
2894	MinLateStart = std::min(a: MinLateStart, b: cycle);
2895	}
2896	for (unsigned i = `0`, e = (unsigned)SU->Succs.size(); i != e; ++i) {
2897	if (SU->Succs [i].getSUnit() == I) {
2898	const SDep &Dep = SU->Succs [i];
2899	if (!DAG->isBackedge(Source: SU, Dep)) {
2900	int LateStart = cycle - Dep.getLatency() +
2901	DAG->getDistance(U: SU, V: Dep.getSUnit(), Dep) * II;
2902	MinLateStart = std::min(a: MinLateStart, b: LateStart);
2903	if (DAG->isLoopCarriedDep(Source: SU, Dep)) {
2904	int Start = latestCycleInChain(Dep) + `1` - II;
2905	MaxStart = std::max(a: MaxStart, b: Start);
2906	}
2907	} else {
2908	int EarlyStart = cycle + Dep.getLatency() -
2909	DAG->getDistance(U: Dep.getSUnit(), V: SU, Dep) * II;
2910	MaxEarlyStart = std::max(a: MaxEarlyStart, b: EarlyStart);
2911	}
2912	}
2913	}
2914	}
2915	}
2916	}
2917
2918	/// Order the instructions within a cycle so that the definitions occur
2919	/// before the uses. Returns true if the instruction is added to the start
2920	/// of the list, or false if added to the end.
2921	void SMSchedule::orderDependence(const SwingSchedulerDAG SSD, SUnit SU,
2922	std::deque<SUnit > &Insts) const* {
2923	MachineInstr *MI = SU->getInstr();
2924	bool OrderBeforeUse = false;
2925	bool OrderAfterDef = false;
2926	bool OrderBeforeDef = false;
2927	unsigned MoveDef = `0`;
2928	unsigned MoveUse = `0`;
2929	int StageInst1 = stageScheduled(SU);
2930
2931	unsigned Pos = `0`;
2932	for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;
2933	++I, ++Pos) {
2934	for (MachineOperand &MO : MI->operands()) {
2935	if (!MO.isReg() \|\| !MO.getReg().isVirtual())
2936	continue;
2937
2938	Register Reg = MO.getReg();
2939	unsigned BasePos, OffsetPos;
2940	if (ST.getInstrInfo()->getBaseAndOffsetPosition(MI: *MI, BasePos, OffsetPos))
2941	if (MI->getOperand(i: BasePos).getReg() == Reg)
2942	if (unsigned NewReg = SSD->getInstrBaseReg(SU))
2943	Reg = NewReg;
2944	bool Reads, Writes;
2945	std::tie(args&: Reads, args&: Writes) =
2946	(*I)->getInstr()->readsWritesVirtualRegister(Reg);
2947	if (MO.isDef() && Reads && stageScheduled(SU: *I) <= StageInst1) {
2948	OrderBeforeUse = true;
2949	if (MoveUse == `0`)
2950	MoveUse = Pos;
2951	} else if (MO.isDef() && Reads && stageScheduled(SU: *I) > StageInst1) {
2952	// Add the instruction after the scheduled instruction.
2953	OrderAfterDef = true;
2954	MoveDef = Pos;
2955	} else if (MO.isUse() && Writes && stageScheduled(SU: *I) == StageInst1) {
2956	if (cycleScheduled(SU: I) == cycleScheduled(SU) && !(I)->isSucc(N: SU)) {
2957	OrderBeforeUse = true;
2958	if (MoveUse == `0`)
2959	MoveUse = Pos;
2960	} else {
2961	OrderAfterDef = true;
2962	MoveDef = Pos;
2963	}
2964	} else if (MO.isUse() && Writes && stageScheduled(SU: *I) > StageInst1) {
2965	OrderBeforeUse = true;
2966	if (MoveUse == `0`)
2967	MoveUse = Pos;
2968	if (MoveUse != `0`) {
2969	OrderAfterDef = true;
2970	MoveDef = Pos - `1`;
2971	}
2972	} else if (MO.isUse() && Writes && stageScheduled(SU: *I) < StageInst1) {
2973	// Add the instruction before the scheduled instruction.
2974	OrderBeforeUse = true;
2975	if (MoveUse == `0`)
2976	MoveUse = Pos;
2977	} else if (MO.isUse() && stageScheduled(SU: *I) == StageInst1 &&
2978	isLoopCarriedDefOfUse(SSD, Def: (*I)->getInstr(), MO)) {
2979	if (MoveUse == `0`) {
2980	OrderBeforeDef = true;
2981	MoveUse = Pos;
2982	}
2983	}
2984	}
2985	// Check for order dependences between instructions. Make sure the source
2986	// is ordered before the destination.
2987	for (auto &S : SU->Succs) {
2988	if (S.getSUnit() != *I)
2989	continue;
2990	if (S.getKind() == SDep::Order && stageScheduled(SU: *I) == StageInst1) {
2991	OrderBeforeUse = true;
2992	if (Pos < MoveUse)
2993	MoveUse = Pos;
2994	}
2995	// We did not handle HW dependences in previous for loop,
2996	// and we normally set Latency = 0 for Anti deps,
2997	// so may have nodes in same cycle with Anti denpendent on HW regs.
2998	else if (S.getKind() == SDep::Anti && stageScheduled(SU: *I) == StageInst1) {
2999	OrderBeforeUse = true;
3000	if ((MoveUse == `0`) \|\| (Pos < MoveUse))
3001	MoveUse = Pos;
3002	}
3003	}
3004	for (auto &P : SU->Preds) {
3005	if (P.getSUnit() != *I)
3006	continue;
3007	if (P.getKind() == SDep::Order && stageScheduled(SU: *I) == StageInst1) {
3008	OrderAfterDef = true;
3009	MoveDef = Pos;
3010	}
3011	}
3012	}
3013
3014	// A circular dependence.
3015	if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
3016	OrderBeforeUse = false;
3017
3018	// OrderAfterDef takes precedences over OrderBeforeDef. The latter is due
3019	// to a loop-carried dependence.
3020	if (OrderBeforeDef)
3021	OrderBeforeUse = !OrderAfterDef \|\| (MoveUse > MoveDef);
3022
3023	// The uncommon case when the instruction order needs to be updated because
3024	// there is both a use and def.
3025	if (OrderBeforeUse && OrderAfterDef) {
3026	SUnit *UseSU = Insts.at(n: MoveUse);
3027	SUnit *DefSU = Insts.at(n: MoveDef);
3028	if (MoveUse > MoveDef) {
3029	Insts.erase(position: Insts.begin() + MoveUse);
3030	Insts.erase(position: Insts.begin() + MoveDef);
3031	} else {
3032	Insts.erase(position: Insts.begin() + MoveDef);
3033	Insts.erase(position: Insts.begin() + MoveUse);
3034	}
3035	orderDependence(SSD, SU: UseSU, Insts);
3036	orderDependence(SSD, SU, Insts);
3037	orderDependence(SSD, SU: DefSU, Insts);
3038	return;
3039	}
3040	// Put the new instruction first if there is a use in the list. Otherwise,
3041	// put it at the end of the list.
3042	if (OrderBeforeUse)
3043	Insts.push_front(x: SU);
3044	else
3045	Insts.push_back(x: SU);
3046	}
3047
3048	/// Return true if the scheduled Phi has a loop carried operand.
3049	bool SMSchedule::isLoopCarried(const SwingSchedulerDAG *SSD,
3050	MachineInstr &Phi) const {
3051	if (!Phi.isPHI())
3052	return false;
3053	assert(Phi.isPHI() && "Expecting a Phi.");
3054	SUnit *DefSU = SSD->getSUnit(MI: &Phi);
3055	unsigned DefCycle = cycleScheduled(SU: DefSU);
3056	int DefStage = stageScheduled(SU: DefSU);
3057
3058	unsigned InitVal = `0`;
3059	unsigned LoopVal = `0`;
3060	getPhiRegs(Phi, Loop: Phi.getParent(), InitVal, LoopVal);
3061	SUnit *UseSU = SSD->getSUnit(MI: MRI.getVRegDef(Reg: LoopVal));
3062	if (!UseSU)
3063	return true;
3064	if (UseSU->getInstr()->isPHI())
3065	return true;
3066	unsigned LoopCycle = cycleScheduled(SU: UseSU);
3067	int LoopStage = stageScheduled(SU: UseSU);
3068	return (LoopCycle > DefCycle) \|\| (LoopStage <= DefStage);
3069	}
3070
3071	/// Return true if the instruction is a definition that is loop carried
3072	/// and defines the use on the next iteration.
3073	/// v1 = phi(v2, v3)
3074	/// (Def) v3 = op v1
3075	/// (MO) = v1
3076	/// If MO appears before Def, then v1 and v3 may get assigned to the same
3077	/// register.
3078	bool SMSchedule::isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD,
3079	MachineInstr *Def,
3080	MachineOperand &MO) const {
3081	if (!MO.isReg())
3082	return false;
3083	if (Def->isPHI())
3084	return false;
3085	MachineInstr *Phi = MRI.getVRegDef(Reg: MO.getReg());
3086	if (!Phi \|\| !Phi->isPHI() \|\| Phi->getParent() != Def->getParent())
3087	return false;
3088	if (!isLoopCarried(SSD, Phi&: *Phi))
3089	return false;
3090	unsigned LoopReg = getLoopPhiReg(Phi: *Phi, LoopBB: Phi->getParent());
3091	for (MachineOperand &DMO : Def->all_defs()) {
3092	if (DMO.getReg() == LoopReg)
3093	return true;
3094	}
3095	return false;
3096	}
3097
3098	/// Determine transitive dependences of unpipelineable instructions
3099	SmallSet<SUnit *, `8`> SMSchedule::computeUnpipelineableNodes(
3100	SwingSchedulerDAG SSD, TargetInstrInfo::PipelinerLoopInfo PLI) {
3101	SmallSet<SUnit *, `8`> DoNotPipeline;
3102	SmallVector<SUnit *, `8`> Worklist;
3103
3104	for (auto &SU : SSD->SUnits)
3105	if (SU.isInstr() && PLI->shouldIgnoreForPipelining(MI: SU.getInstr()))
3106	Worklist.push_back(Elt: &SU);
3107
3108	while (!Worklist.empty()) {
3109	auto SU = Worklist.pop_back_val();
3110	if (DoNotPipeline.count(Ptr: SU))
3111	continue;
3112	LLVM_DEBUG(dbgs() << "Do not pipeline SU(" << SU->NodeNum << ")\n");
3113	DoNotPipeline.insert(Ptr: SU);
3114	for (auto &Dep : SU->Preds)
3115	Worklist.push_back(Elt: Dep.getSUnit());
3116	if (SU->getInstr()->isPHI())
3117	for (auto &Dep : SU->Succs)
3118	if (Dep.getKind() == SDep::Anti)
3119	Worklist.push_back(Elt: Dep.getSUnit());
3120	}
3121	return DoNotPipeline;
3122	}
3123
3124	// Determine all instructions upon which any unpipelineable instruction depends
3125	// and ensure that they are in stage 0. If unable to do so, return false.
3126	bool SMSchedule::normalizeNonPipelinedInstructions(
3127	SwingSchedulerDAG SSD, TargetInstrInfo::PipelinerLoopInfo PLI) {
3128	SmallSet<SUnit *, `8`> DNP = computeUnpipelineableNodes(SSD, PLI);
3129
3130	int NewLastCycle = INT_MIN;
3131	for (SUnit &SU : SSD->SUnits) {
3132	if (!SU.isInstr())
3133	continue;
3134	if (!DNP.contains(Ptr: &SU) \|\| stageScheduled(SU: &SU) == `0`) {
3135	NewLastCycle = std::max(a: NewLastCycle, b: InstrToCycle [&SU]);
3136	continue;
3137	}
3138
3139	// Put the non-pipelined instruction as early as possible in the schedule
3140	int NewCycle = getFirstCycle();
3141	for (auto &Dep : SU.Preds)
3142	NewCycle = std::max(a: InstrToCycle [Dep.getSUnit()], b: NewCycle);
3143
3144	int OldCycle = InstrToCycle [&SU];
3145	if (OldCycle != NewCycle) {
3146	InstrToCycle [&SU] = NewCycle;
3147	auto &OldS = getInstructions(cycle: OldCycle);
3148	llvm::erase(C&: OldS, V: &SU);
3149	getInstructions(cycle: NewCycle).emplace_back(args: &SU);
3150	LLVM_DEBUG(dbgs() << "SU(" << SU.NodeNum
3151	<< ") is not pipelined; moving from cycle " << OldCycle
3152	<< " to " << NewCycle << " Instr:" << *SU.getInstr());
3153	}
3154	NewLastCycle = std::max(a: NewLastCycle, b: NewCycle);
3155	}
3156	LastCycle = NewLastCycle;
3157	return true;
3158	}
3159
3160	// Check if the generated schedule is valid. This function checks if
3161	// an instruction that uses a physical register is scheduled in a
3162	// different stage than the definition. The pipeliner does not handle
3163	// physical register values that may cross a basic block boundary.
3164	// Furthermore, if a physical def/use pair is assigned to the same
3165	// cycle, orderDependence does not guarantee def/use ordering, so that
3166	// case should be considered invalid. (The test checks for both
3167	// earlier and same-cycle use to be more robust.)
3168	bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) {
3169	for (SUnit &SU : SSD->SUnits) {
3170	if (!SU.hasPhysRegDefs)
3171	continue;
3172	int StageDef = stageScheduled(SU: &SU);
3173	int CycleDef = InstrToCycle [&SU];
3174	assert(StageDef != -`1` && "Instruction should have been scheduled.");
3175	for (auto &SI : SU.Succs)
3176	if (SI.isAssignedRegDep() && !SI.getSUnit()->isBoundaryNode())
3177	if (Register::isPhysicalRegister(Reg: SI.getReg())) {
3178	if (stageScheduled(SU: SI.getSUnit()) != StageDef)
3179	return false;
3180	if (InstrToCycle [SI.getSUnit()] <= CycleDef)
3181	return false;
3182	}
3183	}
3184	return true;
3185	}
3186
3187	/// A property of the node order in swing-modulo-scheduling is
3188	/// that for nodes outside circuits the following holds:
3189	/// none of them is scheduled after both a successor and a
3190	/// predecessor.
3191	/// The method below checks whether the property is met.
3192	/// If not, debug information is printed and statistics information updated.
3193	/// Note that we do not use an assert statement.
3194	/// The reason is that although an invalid node oder may prevent
3195	/// the pipeliner from finding a pipelined schedule for arbitrary II,
3196	/// it does not lead to the generation of incorrect code.
3197	void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {
3198
3199	// a sorted vector that maps each SUnit to its index in the NodeOrder
3200	typedef std::pair<SUnit , unsigned*> UnitIndex;
3201	std::vector<UnitIndex> Indices(NodeOrder.size(), std::make_pair(x: nullptr, y: `0`));
3202
3203	for (unsigned i = `0`, s = NodeOrder.size(); i < s; ++i)
3204	Indices.push_back(x: std::make_pair(x: NodeOrder [i], y&: i));
3205
3206	auto CompareKey = [](UnitIndex i1, UnitIndex i2) {
3207	return std::get<`0`>(in&: i1) < std::get<`0`>(in&: i2);
3208	};
3209
3210	// sort, so that we can perform a binary search
3211	llvm::sort(C&: Indices, Comp: CompareKey);
3212
3213	bool Valid = true;
3214	(void)Valid;
3215	// for each SUnit in the NodeOrder, check whether
3216	// it appears after both a successor and a predecessor
3217	// of the SUnit. If this is the case, and the SUnit
3218	// is not part of circuit, then the NodeOrder is not
3219	// valid.
3220	for (unsigned i = `0`, s = NodeOrder.size(); i < s; ++i) {
3221	SUnit *SU = NodeOrder [i];
3222	unsigned Index = i;
3223
3224	bool PredBefore = false;
3225	bool SuccBefore = false;
3226
3227	SUnit *Succ;
3228	SUnit *Pred;
3229	(void)Succ;
3230	(void)Pred;
3231
3232	for (SDep &PredEdge : SU->Preds) {
3233	SUnit *PredSU = PredEdge.getSUnit();
3234	unsigned PredIndex = std::get<`1`>(
3235	in&: *llvm::lower_bound(Range&: Indices, Value: std::make_pair(x&: PredSU, y: `0`), C: CompareKey));
3236	if (!PredSU->getInstr()->isPHI() && PredIndex < Index) {
3237	PredBefore = true;
3238	Pred = PredSU;
3239	break;
3240	}
3241	}
3242
3243	for (SDep &SuccEdge : SU->Succs) {
3244	SUnit *SuccSU = SuccEdge.getSUnit();
3245	// Do not process a boundary node, it was not included in NodeOrder,
3246	// hence not in Indices either, call to std::lower_bound() below will
3247	// return Indices.end().
3248	if (SuccSU->isBoundaryNode())
3249	continue;
3250	unsigned SuccIndex = std::get<`1`>(
3251	in&: *llvm::lower_bound(Range&: Indices, Value: std::make_pair(x&: SuccSU, y: `0`), C: CompareKey));
3252	if (!SuccSU->getInstr()->isPHI() && SuccIndex < Index) {
3253	SuccBefore = true;
3254	Succ = SuccSU;
3255	break;
3256	}
3257	}
3258
3259	if (PredBefore && SuccBefore && !SU->getInstr()->isPHI()) {
3260	// instructions in circuits are allowed to be scheduled
3261	// after both a successor and predecessor.
3262	bool InCircuit = llvm::any_of(
3263	Range: Circuits, P: [SU](const NodeSet &Circuit) { return Circuit.count(SU); });
3264	if (InCircuit)
3265	LLVM_DEBUG(dbgs() << "In a circuit, predecessor ";);
3266	else {
3267	Valid = false;
3268	NumNodeOrderIssues ++;
3269	LLVM_DEBUG(dbgs() << "Predecessor ";);
3270	}
3271	LLVM_DEBUG(dbgs() << Pred->NodeNum << " and successor " << Succ->NodeNum
3272	<< " are scheduled before node " << SU->NodeNum
3273	<< "\n";);
3274	}
3275	}
3276
3277	LLVM_DEBUG({
3278	if (!Valid)
3279	dbgs() << "Invalid node order found!\n";
3280	});
3281	}
3282
3283	/// Attempt to fix the degenerate cases when the instruction serialization
3284	/// causes the register lifetimes to overlap. For example,
3285	/// p' = store_pi(p, b)
3286	/// = load p, offset
3287	/// In this case p and p' overlap, which means that two registers are needed.
3288	/// Instead, this function changes the load to use p' and updates the offset.
3289	void SwingSchedulerDAG::fixupRegisterOverlaps(std::deque<SUnit *> &Instrs) {
3290	unsigned OverlapReg = `0`;
3291	unsigned NewBaseReg = `0`;
3292	for (SUnit *SU : Instrs) {
3293	MachineInstr *MI = SU->getInstr();
3294	for (unsigned i = `0`, e = MI->getNumOperands(); i < e; ++i) {
3295	const MachineOperand &MO = MI->getOperand(i);
3296	// Look for an instruction that uses p. The instruction occurs in the
3297	// same cycle but occurs later in the serialized order.
3298	if (MO.isReg() && MO.isUse() && MO.getReg() == OverlapReg) {
3299	// Check that the instruction appears in the InstrChanges structure,
3300	// which contains instructions that can have the offset updated.
3301	DenseMap<SUnit , std::pair<unsigned*, int64_t>>::iterator It =
3302	InstrChanges.find(Val: SU);
3303	if (It != InstrChanges.end()) {
3304	unsigned BasePos, OffsetPos;
3305	// Update the base register and adjust the offset.
3306	if (TII->getBaseAndOffsetPosition(MI: *MI, BasePos, OffsetPos)) {
3307	MachineInstr *NewMI = MF.CloneMachineInstr(Orig: MI);
3308	NewMI->getOperand(i: BasePos).setReg(NewBaseReg);
3309	int64_t NewOffset =
3310	MI->getOperand(i: OffsetPos).getImm() - It ->second.second;
3311	NewMI->getOperand(i: OffsetPos).setImm(NewOffset);
3312	SU->setInstr(NewMI);
3313	MISUnitMap [NewMI] = SU;
3314	NewMIs [MI] = NewMI;
3315	}
3316	}
3317	OverlapReg = `0`;
3318	NewBaseReg = `0`;
3319	break;
3320	}
3321	// Look for an instruction of the form p' = op(p), which uses and defines
3322	// two virtual registers that get allocated to the same physical register.
3323	unsigned TiedUseIdx = `0`;
3324	if (MI->isRegTiedToUseOperand(DefOpIdx: i, UseOpIdx: &TiedUseIdx)) {
3325	// OverlapReg is p in the example above.
3326	OverlapReg = MI->getOperand(i: TiedUseIdx).getReg();
3327	// NewBaseReg is p' in the example above.
3328	NewBaseReg = MI->getOperand(i).getReg();
3329	break;
3330	}
3331	}
3332	}
3333	}
3334
3335	std::deque<SUnit *>
3336	SMSchedule::reorderInstructions(const SwingSchedulerDAG *SSD,
3337	const std::deque<SUnit > &Instrs) const* {
3338	std::deque<SUnit *> NewOrderPhi;
3339	for (SUnit *SU : Instrs) {
3340	if (SU->getInstr()->isPHI())
3341	NewOrderPhi.push_back(x: SU);
3342	}
3343	std::deque<SUnit *> NewOrderI;
3344	for (SUnit *SU : Instrs) {
3345	if (!SU->getInstr()->isPHI())
3346	orderDependence(SSD, SU, Insts&: NewOrderI);
3347	}
3348	llvm::append_range(C&: NewOrderPhi, R&: NewOrderI);
3349	return NewOrderPhi;
3350	}
3351
3352	/// After the schedule has been formed, call this function to combine
3353	/// the instructions from the different stages/cycles. That is, this
3354	/// function creates a schedule that represents a single iteration.
3355	void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) {
3356	// Move all instructions to the first stage from later stages.
3357	for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
3358	for (int stage = `1`, lastStage = getMaxStageCount(); stage <= lastStage;
3359	++stage) {
3360	std::deque<SUnit *> &cycleInstrs =
3361	ScheduledInstrs [cycle + (stage * InitiationInterval)];
3362	for (SUnit *SU : llvm::reverse(C&: cycleInstrs))
3363	ScheduledInstrs [cycle].push_front(x: SU);
3364	}
3365	}
3366
3367	// Erase all the elements in the later stages. Only one iteration should
3368	// remain in the scheduled list, and it contains all the instructions.
3369	for (int cycle = getFinalCycle() + `1`; cycle <= LastCycle; ++cycle)
3370	ScheduledInstrs.erase(Val: cycle);
3371
3372	// Change the registers in instruction as specified in the InstrChanges
3373	// map. We need to use the new registers to create the correct order.
3374	for (const SUnit &SU : SSD->SUnits)
3375	SSD->applyInstrChange(MI: SU.getInstr(), Schedule&: *this);
3376
3377	// Reorder the instructions in each cycle to fix and improve the
3378	// generated code.
3379	for (int Cycle = getFirstCycle(), E = getFinalCycle(); Cycle <= E; ++Cycle) {
3380	std::deque<SUnit *> &cycleInstrs = ScheduledInstrs [Cycle];
3381	cycleInstrs = reorderInstructions(SSD, Instrs: cycleInstrs);
3382	SSD->fixupRegisterOverlaps(Instrs&: cycleInstrs);
3383	}
3384
3385	LLVM_DEBUG(dump(););
3386	}
3387
3388	void NodeSet::print(raw_ostream &os) const {
3389	os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV
3390	<< " depth " << MaxDepth << " col " << Colocate << "\n";
3391	for (const auto &I : Nodes)
3392	os << " SU(" << I->NodeNum << ") " << *(I->getInstr());
3393	os << "\n";
3394	}
3395
3396	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
3397	/// Print the schedule information to the given output.
3398	void SMSchedule::print(raw_ostream &os) const {
3399	// Iterate over each cycle.
3400	for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
3401	// Iterate over each instruction in the cycle.
3402	const_sched_iterator cycleInstrs = ScheduledInstrs.find(Val: cycle);
3403	for (SUnit *CI : cycleInstrs ->second) {
3404	os << "cycle " << cycle << " (" << stageScheduled(SU: CI) << ") ";
3405	os << "(" << CI->NodeNum << ") ";
3406	CI->getInstr()->print(OS&: os);
3407	os << "\n";
3408	}
3409	}
3410	}
3411
3412	/// Utility function used for debugging to print the schedule.
3413	LLVM_DUMP_METHOD void SMSchedule::dump() const { print(os&: dbgs()); }
3414	LLVM_DUMP_METHOD void NodeSet::dump() const { print(os&: dbgs()); }
3415
3416	void ResourceManager::dumpMRT() const {
3417	LLVM_DEBUG({
3418	if (UseDFA)
3419	return;
3420	std::stringstream SS;
3421	SS << "MRT:\n";
3422	SS << std::setw(`4`) << "Slot";
3423	for (unsigned I = `1`, E = SM.getNumProcResourceKinds(); I < E; ++I)
3424	SS << std::setw(`3`) << I;
3425	SS << std::setw(`7`) << "#Mops"
3426	<< "\n";
3427	for (int Slot = `0`; Slot < InitiationInterval; ++Slot) {
3428	SS << std::setw(`4`) << Slot;
3429	for (unsigned I = `1`, E = SM.getNumProcResourceKinds(); I < E; ++I)
3430	SS << std::setw(`3`) << MRT[Slot][I];
3431	SS << std::setw(`7`) << NumScheduledMops[Slot] << "\n";
3432	}
3433	dbgs() << SS.str();
3434	});
3435	}
3436	#endif
3437
3438	void ResourceManager::initProcResourceVectors(
3439	const MCSchedModel &SM, SmallVectorImpl<uint64_t> &Masks) {
3440	unsigned ProcResourceID = `0`;
3441
3442	// We currently limit the resource kinds to 64 and below so that we can use
3443	// uint64_t for Masks
3444	assert(SM.getNumProcResourceKinds() < `64` &&
3445	"Too many kinds of resources, unsupported");
3446	// Create a unique bitmask for every processor resource unit.
3447	// Skip resource at index 0, since it always references 'InvalidUnit'.
3448	Masks.resize(N: SM.getNumProcResourceKinds());
3449	for (unsigned I = `1`, E = SM.getNumProcResourceKinds(); I < E; ++I) {
3450	const MCProcResourceDesc &Desc = *SM.getProcResource(ProcResourceIdx: I);
3451	if (Desc.SubUnitsIdxBegin)
3452	continue;
3453	Masks [I] = `1ULL` << ProcResourceID;
3454	ProcResourceID++;
3455	}
3456	// Create a unique bitmask for every processor resource group.
3457	for (unsigned I = `1`, E = SM.getNumProcResourceKinds(); I < E; ++I) {
3458	const MCProcResourceDesc &Desc = *SM.getProcResource(ProcResourceIdx: I);
3459	if (!Desc.SubUnitsIdxBegin)
3460	continue;
3461	Masks [I] = `1ULL` << ProcResourceID;
3462	for (unsigned U = `0`; U < Desc.NumUnits; ++U)
3463	Masks [I] \|= Masks [Desc.SubUnitsIdxBegin[U]];
3464	ProcResourceID++;
3465	}
3466	LLVM_DEBUG({
3467	if (SwpShowResMask) {
3468	dbgs() << "ProcResourceDesc:\n";
3469	for (unsigned I = `1`, E = SM.getNumProcResourceKinds(); I < E; ++I) {
3470	const MCProcResourceDesc *ProcResource = SM.getProcResource(I);
3471	dbgs() << format(" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",
3472	ProcResource->Name, I, Masks[I],
3473	ProcResource->NumUnits);
3474	}
3475	dbgs() << " -----------------\n";
3476	}
3477	});
3478	}
3479
3480	bool ResourceManager::canReserveResources(SUnit &SU, int Cycle) {
3481	LLVM_DEBUG({
3482	if (SwpDebugResource)
3483	dbgs() << "canReserveResources:\n";
3484	});
3485	if (UseDFA)
3486	return DFAResources [positiveModulo(Dividend: Cycle, Divisor: InitiationInterval)]
3487	->canReserveResources(MID: &SU.getInstr()->getDesc());
3488
3489	const MCSchedClassDesc *SCDesc = DAG->getSchedClass(SU: &SU);
3490	if (!SCDesc->isValid()) {
3491	LLVM_DEBUG({
3492	dbgs() << "No valid Schedule Class Desc for schedClass!\n";
3493	dbgs() << "isPseudo:" << SU.getInstr()->isPseudo() << "\n";
3494	});
3495	return true;
3496	}
3497
3498	reserveResources(SCDesc, Cycle);
3499	bool Result = !isOverbooked();
3500	unreserveResources(SCDesc, Cycle);
3501
3502	LLVM_DEBUG(if (SwpDebugResource) dbgs() << "return " << Result << "\n\n";);
3503	return Result;
3504	}
3505
3506	void ResourceManager::reserveResources(SUnit &SU, int Cycle) {
3507	LLVM_DEBUG({
3508	if (SwpDebugResource)
3509	dbgs() << "reserveResources:\n";
3510	});
3511	if (UseDFA)
3512	return DFAResources [positiveModulo(Dividend: Cycle, Divisor: InitiationInterval)]
3513	->reserveResources(MID: &SU.getInstr()->getDesc());
3514
3515	const MCSchedClassDesc *SCDesc = DAG->getSchedClass(SU: &SU);
3516	if (!SCDesc->isValid()) {
3517	LLVM_DEBUG({
3518	dbgs() << "No valid Schedule Class Desc for schedClass!\n";
3519	dbgs() << "isPseudo:" << SU.getInstr()->isPseudo() << "\n";
3520	});
3521	return;
3522	}
3523
3524	reserveResources(SCDesc, Cycle);
3525
3526	LLVM_DEBUG({
3527	if (SwpDebugResource) {
3528	dumpMRT();
3529	dbgs() << "reserveResources: done!\n\n";
3530	}
3531	});
3532	}
3533
3534	void ResourceManager::reserveResources(const MCSchedClassDesc *SCDesc,
3535	int Cycle) {
3536	assert(!UseDFA);
3537	for (const MCWriteProcResEntry &PRE : make_range(
3538	x: STI->getWriteProcResBegin(SC: SCDesc), y: STI->getWriteProcResEnd(SC: SCDesc)))
3539	for (int C = Cycle; C < Cycle + PRE.ReleaseAtCycle; ++C)
3540	++MRT [positiveModulo(Dividend: C, Divisor: InitiationInterval)][PRE.ProcResourceIdx];
3541
3542	for (int C = Cycle; C < Cycle + SCDesc->NumMicroOps; ++C)
3543	++NumScheduledMops [positiveModulo(Dividend: C, Divisor: InitiationInterval)];
3544	}
3545
3546	void ResourceManager::unreserveResources(const MCSchedClassDesc *SCDesc,
3547	int Cycle) {
3548	assert(!UseDFA);
3549	for (const MCWriteProcResEntry &PRE : make_range(
3550	x: STI->getWriteProcResBegin(SC: SCDesc), y: STI->getWriteProcResEnd(SC: SCDesc)))
3551	for (int C = Cycle; C < Cycle + PRE.ReleaseAtCycle; ++C)
3552	--MRT [positiveModulo(Dividend: C, Divisor: InitiationInterval)][PRE.ProcResourceIdx];
3553
3554	for (int C = Cycle; C < Cycle + SCDesc->NumMicroOps; ++C)
3555	--NumScheduledMops [positiveModulo(Dividend: C, Divisor: InitiationInterval)];
3556	}
3557
3558	bool ResourceManager::isOverbooked() const {
3559	assert(!UseDFA);
3560	for (int Slot = `0`; Slot < InitiationInterval; ++Slot) {
3561	for (unsigned I = `1`, E = SM.getNumProcResourceKinds(); I < E; ++I) {
3562	const MCProcResourceDesc *Desc = SM.getProcResource(ProcResourceIdx: I);
3563	if (MRT [Slot][I] > Desc->NumUnits)
3564	return true;
3565	}
3566	if (NumScheduledMops [Slot] > IssueWidth)
3567	return true;
3568	}
3569	return false;
3570	}
3571
3572	int ResourceManager::calculateResMIIDFA() const {
3573	assert(UseDFA);
3574
3575	// Sort the instructions by the number of available choices for scheduling,
3576	// least to most. Use the number of critical resources as the tie breaker.
3577	FuncUnitSorter FUS = FuncUnitSorter (*ST);
3578	for (SUnit &SU : DAG->SUnits)
3579	FUS.calcCriticalResources(MI&: *SU.getInstr());
3580	PriorityQueue<MachineInstr , std::vector<MachineInstr >, FuncUnitSorter>
3581	FuncUnitOrder(FUS);
3582
3583	for (SUnit &SU : DAG->SUnits)
3584	FuncUnitOrder.push(x: SU.getInstr());
3585
3586	SmallVector<std::unique_ptr<DFAPacketizer>, `8`> Resources;
3587	Resources.push_back(
3588	Elt: std::unique_ptr<DFAPacketizer>(TII->CreateTargetScheduleState(*ST)));
3589
3590	while (!FuncUnitOrder.empty()) {
3591	MachineInstr *MI = FuncUnitOrder.top();
3592	FuncUnitOrder.pop();
3593	if (TII->isZeroCost(Opcode: MI->getOpcode()))
3594	continue;
3595
3596	// Attempt to reserve the instruction in an existing DFA. At least one
3597	// DFA is needed for each cycle.
3598	unsigned NumCycles = DAG->getSUnit(MI)->Latency;
3599	unsigned ReservedCycles = `0`;
3600	auto *RI = Resources.begin();
3601	auto *RE = Resources.end();
3602	LLVM_DEBUG({
3603	dbgs() << "Trying to reserve resource for " << NumCycles
3604	<< " cycles for \n";
3605	MI->dump();
3606	});
3607	for (unsigned C = `0`; C < NumCycles; ++C)
3608	while (RI != RE) {
3609	if ((RI)->canReserveResources(MI&: MI)) {
3610	(RI)->reserveResources(MI&: MI);
3611	++ReservedCycles;
3612	break;
3613	}
3614	RI++;
3615	}
3616	LLVM_DEBUG(dbgs() << "ReservedCycles:" << ReservedCycles
3617	<< ", NumCycles:" << NumCycles << "\n");
3618	// Add new DFAs, if needed, to reserve resources.
3619	for (unsigned C = ReservedCycles; C < NumCycles; ++C) {
3620	LLVM_DEBUG(if (SwpDebugResource) dbgs()
3621	<< "NewResource created to reserve resources"
3622	<< "\n");
3623	auto NewResource = TII->CreateTargetScheduleState(ST);
3624	assert(NewResource->canReserveResources(*MI) && "Reserve error.");
3625	NewResource->reserveResources(MI&: *MI);
3626	Resources.push_back(Elt: std::unique_ptr<DFAPacketizer>(NewResource));
3627	}
3628	}
3629
3630	int Resmii = Resources.size();
3631	LLVM_DEBUG(dbgs() << "Return Res MII:" << Resmii << "\n");
3632	return Resmii;
3633	}
3634
3635	int ResourceManager::calculateResMII() const {
3636	if (UseDFA)
3637	return calculateResMIIDFA();
3638
3639	// Count each resource consumption and divide it by the number of units.
3640	// ResMII is the max value among them.
3641
3642	int NumMops = `0`;
3643	SmallVector<uint64_t> ResourceCount(SM.getNumProcResourceKinds());
3644	for (SUnit &SU : DAG->SUnits) {
3645	if (TII->isZeroCost(Opcode: SU.getInstr()->getOpcode()))
3646	continue;
3647
3648	const MCSchedClassDesc *SCDesc = DAG->getSchedClass(SU: &SU);
3649	if (!SCDesc->isValid())
3650	continue;
3651
3652	LLVM_DEBUG({
3653	if (SwpDebugResource) {
3654	DAG->dumpNode(SU);
3655	dbgs() << " #Mops: " << SCDesc->NumMicroOps << "\n"
3656	<< " WriteProcRes: ";
3657	}
3658	});
3659	NumMops += SCDesc->NumMicroOps;
3660	for (const MCWriteProcResEntry &PRE :
3661	make_range(x: STI->getWriteProcResBegin(SC: SCDesc),
3662	y: STI->getWriteProcResEnd(SC: SCDesc))) {
3663	LLVM_DEBUG({
3664	if (SwpDebugResource) {
3665	const MCProcResourceDesc *Desc =
3666	SM.getProcResource(PRE.ProcResourceIdx);
3667	dbgs() << Desc->Name << ": " << PRE.ReleaseAtCycle << ", ";
3668	}
3669	});
3670	ResourceCount [PRE.ProcResourceIdx] += PRE.ReleaseAtCycle;
3671	}
3672	LLVM_DEBUG(if (SwpDebugResource) dbgs() << "\n");
3673	}
3674
3675	int Result = (NumMops + IssueWidth - `1`) / IssueWidth;
3676	LLVM_DEBUG({
3677	if (SwpDebugResource)
3678	dbgs() << "#Mops: " << NumMops << ", "
3679	<< "IssueWidth: " << IssueWidth << ", "
3680	<< "Cycles: " << Result << "\n";
3681	});
3682
3683	LLVM_DEBUG({
3684	if (SwpDebugResource) {
3685	std::stringstream SS;
3686	SS << std::setw(`2`) << "ID" << std::setw(`16`) << "Name" << std::setw(`10`)
3687	<< "Units" << std::setw(`10`) << "Consumed" << std::setw(`10`) << "Cycles"
3688	<< "\n";
3689	dbgs() << SS.str();
3690	}
3691	});
3692	for (unsigned I = `1`, E = SM.getNumProcResourceKinds(); I < E; ++I) {
3693	const MCProcResourceDesc *Desc = SM.getProcResource(ProcResourceIdx: I);
3694	int Cycles = (ResourceCount [I] + Desc->NumUnits - `1`) / Desc->NumUnits;
3695	LLVM_DEBUG({
3696	if (SwpDebugResource) {
3697	std::stringstream SS;
3698	SS << std::setw(`2`) << I << std::setw(`16`) << Desc->Name << std::setw(`10`)
3699	<< Desc->NumUnits << std::setw(`10`) << ResourceCount[I]
3700	<< std::setw(`10`) << Cycles << "\n";
3701	dbgs() << SS.str();
3702	}
3703	});
3704	if (Cycles > Result)
3705	Result = Cycles;
3706	}
3707	return Result;
3708	}
3709
3710	void ResourceManager::init(int II) {
3711	InitiationInterval = II;
3712	DFAResources.clear();
3713	DFAResources.resize(N: II);
3714	for (auto &I : DFAResources)
3715	I.reset(p: ST->getInstrInfo()->CreateTargetScheduleState(*ST));
3716	MRT.clear();
3717	MRT.resize(N: II, NV: SmallVector<uint64_t>(SM.getNumProcResourceKinds()));
3718	NumScheduledMops.clear();
3719	NumScheduledMops.resize(N: II);
3720	}
3721

source code of llvm/lib/CodeGen/MachinePipeliner.cpp