1 | //===--------------------- Scheduler.h ------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// |
10 | /// A scheduler for Processor Resource Units and Processor Resource Groups. |
11 | /// |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_MCA_HARDWAREUNITS_SCHEDULER_H |
15 | #define LLVM_MCA_HARDWAREUNITS_SCHEDULER_H |
16 | |
17 | #include "llvm/ADT/SmallVector.h" |
18 | #include "llvm/MC/MCSchedule.h" |
19 | #include "llvm/MCA/HardwareUnits/HardwareUnit.h" |
20 | #include "llvm/MCA/HardwareUnits/LSUnit.h" |
21 | #include "llvm/MCA/HardwareUnits/ResourceManager.h" |
22 | #include "llvm/MCA/Support.h" |
23 | |
24 | namespace llvm { |
25 | namespace mca { |
26 | |
27 | class SchedulerStrategy { |
28 | public: |
29 | SchedulerStrategy() = default; |
30 | virtual ~SchedulerStrategy(); |
31 | |
32 | /// Returns true if Lhs should take priority over Rhs. |
33 | /// |
34 | /// This method is used by class Scheduler to select the "best" ready |
35 | /// instruction to issue to the underlying pipelines. |
36 | virtual bool compare(const InstRef &Lhs, const InstRef &Rhs) const = 0; |
37 | }; |
38 | |
39 | /// Default instruction selection strategy used by class Scheduler. |
40 | class DefaultSchedulerStrategy : public SchedulerStrategy { |
41 | /// This method ranks instructions based on their age, and the number of known |
42 | /// users. The lower the rank value, the better. |
43 | int computeRank(const InstRef &Lhs) const { |
44 | return Lhs.getSourceIndex() - Lhs.getInstruction()->getNumUsers(); |
45 | } |
46 | |
47 | public: |
48 | DefaultSchedulerStrategy() = default; |
49 | virtual ~DefaultSchedulerStrategy(); |
50 | |
51 | bool compare(const InstRef &Lhs, const InstRef &Rhs) const override { |
52 | int LhsRank = computeRank(Lhs); |
53 | int RhsRank = computeRank(Lhs: Rhs); |
54 | |
55 | /// Prioritize older instructions over younger instructions to minimize the |
56 | /// pressure on the reorder buffer. |
57 | if (LhsRank == RhsRank) |
58 | return Lhs.getSourceIndex() < Rhs.getSourceIndex(); |
59 | return LhsRank < RhsRank; |
60 | } |
61 | }; |
62 | |
63 | /// Class Scheduler is responsible for issuing instructions to pipeline |
64 | /// resources. |
65 | /// |
66 | /// Internally, it delegates to a ResourceManager the management of processor |
67 | /// resources. This class is also responsible for tracking the progress of |
68 | /// instructions from the dispatch stage, until the write-back stage. |
69 | /// |
70 | class Scheduler : public HardwareUnit { |
71 | LSUnitBase &LSU; |
72 | |
73 | // Instruction selection strategy for this Scheduler. |
74 | std::unique_ptr<SchedulerStrategy> Strategy; |
75 | |
76 | // Hardware resources that are managed by this scheduler. |
77 | std::unique_ptr<ResourceManager> Resources; |
78 | |
79 | // Instructions dispatched to the Scheduler are internally classified based on |
80 | // the instruction stage (see Instruction::InstrStage). |
81 | // |
82 | // An Instruction dispatched to the Scheduler is added to the WaitSet if not |
83 | // all its register operands are available, and at least one latency is |
84 | // unknown. By construction, the WaitSet only contains instructions that are |
85 | // in the IS_DISPATCHED stage. |
86 | // |
87 | // An Instruction transitions from the WaitSet to the PendingSet if the |
88 | // instruction is not ready yet, but the latency of every register read is |
89 | // known. Instructions in the PendingSet can only be in the IS_PENDING or |
90 | // IS_READY stage. Only IS_READY instructions that are waiting on memory |
91 | // dependencies can be added to the PendingSet. |
92 | // |
93 | // Instructions in the PendingSet are immediately dominated only by |
94 | // instructions that have already been issued to the underlying pipelines. In |
95 | // the presence of bottlenecks caused by data dependencies, the PendingSet can |
96 | // be inspected to identify problematic data dependencies between |
97 | // instructions. |
98 | // |
99 | // An instruction is moved to the ReadySet when all register operands become |
100 | // available, and all memory dependencies are met. Instructions that are |
101 | // moved from the PendingSet to the ReadySet must transition to the 'IS_READY' |
102 | // stage. |
103 | // |
104 | // On every cycle, the Scheduler checks if it can promote instructions from the |
105 | // PendingSet to the ReadySet. |
106 | // |
107 | // An Instruction is moved from the ReadySet to the `IssuedSet` when it starts |
108 | // exection. This event also causes an instruction state transition (i.e. from |
109 | // state IS_READY, to state IS_EXECUTING). An Instruction leaves the IssuedSet |
110 | // only when it reaches the write-back stage. |
111 | std::vector<InstRef> WaitSet; |
112 | std::vector<InstRef> PendingSet; |
113 | std::vector<InstRef> ReadySet; |
114 | std::vector<InstRef> IssuedSet; |
115 | |
116 | // A mask of busy resource units. It defaults to the empty set (i.e. a zero |
117 | // mask), and it is cleared at the beginning of every cycle. |
118 | // It is updated every time the scheduler fails to issue an instruction from |
119 | // the ready set due to unavailable pipeline resources. |
120 | // Each bit of the mask represents an unavailable resource. |
121 | uint64_t BusyResourceUnits; |
122 | |
123 | // Counts the number of instructions in the pending set that were dispatched |
124 | // during this cycle. |
125 | unsigned NumDispatchedToThePendingSet; |
126 | |
127 | // True if the previous pipeline Stage was unable to dispatch a full group of |
128 | // opcodes because scheduler buffers (or LS queues) were unavailable. |
129 | bool HadTokenStall; |
130 | |
131 | /// Verify the given selection strategy and set the Strategy member |
132 | /// accordingly. If no strategy is provided, the DefaultSchedulerStrategy is |
133 | /// used. |
134 | void initializeStrategy(std::unique_ptr<SchedulerStrategy> S); |
135 | |
136 | /// Issue an instruction without updating the ready queue. |
137 | void issueInstructionImpl( |
138 | InstRef &IR, |
139 | SmallVectorImpl<std::pair<ResourceRef, ReleaseAtCycles>> &Pipes); |
140 | |
141 | // Identify instructions that have finished executing, and remove them from |
142 | // the IssuedSet. References to executed instructions are added to input |
143 | // vector 'Executed'. |
144 | void updateIssuedSet(SmallVectorImpl<InstRef> &Executed); |
145 | |
146 | // Try to promote instructions from the PendingSet to the ReadySet. |
147 | // Add promoted instructions to the 'Ready' vector in input. |
148 | // Returns true if at least one instruction was promoted. |
149 | bool promoteToReadySet(SmallVectorImpl<InstRef> &Ready); |
150 | |
151 | // Try to promote instructions from the WaitSet to the PendingSet. |
152 | // Add promoted instructions to the 'Pending' vector in input. |
153 | // Returns true if at least one instruction was promoted. |
154 | bool promoteToPendingSet(SmallVectorImpl<InstRef> &Pending); |
155 | |
156 | public: |
157 | Scheduler(const MCSchedModel &Model, LSUnitBase &Lsu) |
158 | : Scheduler(Model, Lsu, nullptr) {} |
159 | |
160 | Scheduler(const MCSchedModel &Model, LSUnitBase &Lsu, |
161 | std::unique_ptr<SchedulerStrategy> SelectStrategy) |
162 | : Scheduler(std::make_unique<ResourceManager>(args: Model), Lsu, |
163 | std::move(SelectStrategy)) {} |
164 | |
165 | Scheduler(std::unique_ptr<ResourceManager> RM, LSUnitBase &Lsu, |
166 | std::unique_ptr<SchedulerStrategy> SelectStrategy) |
167 | : LSU(Lsu), Resources(std::move(RM)), BusyResourceUnits(0), |
168 | NumDispatchedToThePendingSet(0), HadTokenStall(false) { |
169 | initializeStrategy(S: std::move(SelectStrategy)); |
170 | } |
171 | |
172 | // Stalls generated by the scheduler. |
173 | enum Status { |
174 | SC_AVAILABLE, |
175 | SC_LOAD_QUEUE_FULL, |
176 | SC_STORE_QUEUE_FULL, |
177 | SC_BUFFERS_FULL, |
178 | SC_DISPATCH_GROUP_STALL, |
179 | }; |
180 | |
181 | /// Check if the instruction in 'IR' can be dispatched during this cycle. |
182 | /// Return SC_AVAILABLE if both scheduler and LS resources are available. |
183 | /// |
184 | /// This method is also responsible for setting field HadTokenStall if |
185 | /// IR cannot be dispatched to the Scheduler due to unavailable resources. |
186 | Status isAvailable(const InstRef &IR); |
187 | |
188 | /// Reserves buffer and LSUnit queue resources that are necessary to issue |
189 | /// this instruction. |
190 | /// |
191 | /// Returns true if instruction IR is ready to be issued to the underlying |
192 | /// pipelines. Note that this operation cannot fail; it assumes that a |
193 | /// previous call to method `isAvailable(IR)` returned `SC_AVAILABLE`. |
194 | /// |
195 | /// If IR is a memory operation, then the Scheduler queries the LS unit to |
196 | /// obtain a LS token. An LS token is used internally to track memory |
197 | /// dependencies. |
198 | bool dispatch(InstRef &IR); |
199 | |
200 | /// Issue an instruction and populates a vector of used pipeline resources, |
201 | /// and a vector of instructions that transitioned to the ready state as a |
202 | /// result of this event. |
203 | void issueInstruction( |
204 | InstRef &IR, |
205 | SmallVectorImpl<std::pair<ResourceRef, ReleaseAtCycles>> &Used, |
206 | SmallVectorImpl<InstRef> &Pending, |
207 | SmallVectorImpl<InstRef> &Ready); |
208 | |
209 | /// Returns true if IR has to be issued immediately, or if IR is a zero |
210 | /// latency instruction. |
211 | bool mustIssueImmediately(const InstRef &IR) const; |
212 | |
213 | /// This routine notifies the Scheduler that a new cycle just started. |
214 | /// |
215 | /// It notifies the underlying ResourceManager that a new cycle just started. |
216 | /// Vector `Freed` is populated with resourceRef related to resources that |
217 | /// have changed in state, and that are now available to new instructions. |
218 | /// Instructions executed are added to vector Executed, while vector Ready is |
219 | /// populated with instructions that have become ready in this new cycle. |
220 | /// Vector Pending is popluated by instructions that have transitioned through |
221 | /// the pending stat during this cycle. The Pending and Ready sets may not be |
222 | /// disjoint. An instruction is allowed to transition from the WAIT state to |
223 | /// the READY state (going through the PENDING state) within a single cycle. |
224 | /// That means, instructions may appear in both the Pending and Ready set. |
225 | void cycleEvent(SmallVectorImpl<ResourceRef> &Freed, |
226 | SmallVectorImpl<InstRef> &Executed, |
227 | SmallVectorImpl<InstRef> &Pending, |
228 | SmallVectorImpl<InstRef> &Ready); |
229 | |
230 | /// Convert a resource mask into a valid llvm processor resource identifier. |
231 | /// |
232 | /// Only the most significant bit of the Mask is used by this method to |
233 | /// identify the processor resource. |
234 | unsigned getResourceID(uint64_t Mask) const { |
235 | return Resources->resolveResourceMask(Mask); |
236 | } |
237 | |
238 | /// Select the next instruction to issue from the ReadySet. Returns an invalid |
239 | /// instruction reference if there are no ready instructions, or if processor |
240 | /// resources are not available. |
241 | InstRef select(); |
242 | |
243 | bool isReadySetEmpty() const { return ReadySet.empty(); } |
244 | bool isWaitSetEmpty() const { return WaitSet.empty(); } |
245 | |
246 | /// This method is called by the ExecuteStage at the end of each cycle to |
247 | /// identify bottlenecks caused by data dependencies. Vector RegDeps is |
248 | /// populated by instructions that were not issued because of unsolved |
249 | /// register dependencies. Vector MemDeps is populated by instructions that |
250 | /// were not issued because of unsolved memory dependencies. |
251 | void analyzeDataDependencies(SmallVectorImpl<InstRef> &RegDeps, |
252 | SmallVectorImpl<InstRef> &MemDeps); |
253 | |
254 | /// Returns a mask of busy resources, and populates vector Insts with |
255 | /// instructions that could not be issued to the underlying pipelines because |
256 | /// not all pipeline resources were available. |
257 | uint64_t analyzeResourcePressure(SmallVectorImpl<InstRef> &Insts); |
258 | |
259 | // Returns true if the dispatch logic couldn't dispatch a full group due to |
260 | // unavailable scheduler and/or LS resources. |
261 | bool hadTokenStall() const { return HadTokenStall; } |
262 | |
263 | #ifndef NDEBUG |
264 | // Update the ready queues. |
265 | void dump() const; |
266 | |
267 | // This routine performs a basic correctness check. This routine should only |
268 | // be called when we know that 'IR' is not in the scheduler's instruction |
269 | // queues. |
270 | void instructionCheck(const InstRef &IR) const { |
271 | assert(!is_contained(WaitSet, IR) && "Already in the wait set!" ); |
272 | assert(!is_contained(ReadySet, IR) && "Already in the ready set!" ); |
273 | assert(!is_contained(IssuedSet, IR) && "Already executing!" ); |
274 | } |
275 | #endif // !NDEBUG |
276 | }; |
277 | } // namespace mca |
278 | } // namespace llvm |
279 | |
280 | #endif // LLVM_MCA_HARDWAREUNITS_SCHEDULER_H |
281 | |