1 | //===--------------------- ResourceManager.h --------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// |
10 | /// The classes here represent processor resource units and their management |
11 | /// strategy. These classes are managed by the Scheduler. |
12 | /// |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #ifndef LLVM_MCA_HARDWAREUNITS_RESOURCEMANAGER_H |
16 | #define LLVM_MCA_HARDWAREUNITS_RESOURCEMANAGER_H |
17 | |
18 | #include "llvm/ADT/DenseMap.h" |
19 | #include "llvm/ADT/SmallVector.h" |
20 | #include "llvm/MC/MCSchedule.h" |
21 | #include "llvm/MCA/Instruction.h" |
22 | #include "llvm/MCA/Support.h" |
23 | |
24 | namespace llvm { |
25 | namespace mca { |
26 | |
27 | /// Used to notify the internal state of a processor resource. |
28 | /// |
29 | /// A processor resource is available if it is not reserved, and there are |
30 | /// available slots in the buffer. A processor resource is unavailable if it |
31 | /// is either reserved, or the associated buffer is full. A processor resource |
32 | /// with a buffer size of -1 is always available if it is not reserved. |
33 | /// |
34 | /// Values of type ResourceStateEvent are returned by method |
35 | /// ResourceManager::canBeDispatched() |
36 | /// |
37 | /// The naming convention for resource state events is: |
38 | /// * Event names start with prefix RS_ |
39 | /// * Prefix RS_ is followed by a string describing the actual resource state. |
40 | enum ResourceStateEvent { |
41 | RS_BUFFER_AVAILABLE, |
42 | RS_BUFFER_UNAVAILABLE, |
43 | RS_RESERVED |
44 | }; |
45 | |
46 | /// Resource allocation strategy used by hardware scheduler resources. |
47 | class ResourceStrategy { |
48 | ResourceStrategy(const ResourceStrategy &) = delete; |
49 | ResourceStrategy &operator=(const ResourceStrategy &) = delete; |
50 | |
51 | public: |
52 | ResourceStrategy() = default; |
53 | virtual ~ResourceStrategy(); |
54 | |
55 | /// Selects a processor resource unit from a ReadyMask. |
56 | virtual uint64_t select(uint64_t ReadyMask) = 0; |
57 | |
58 | /// Called by the ResourceManager when a processor resource group, or a |
59 | /// processor resource with multiple units has become unavailable. |
60 | /// |
61 | /// The default strategy uses this information to bias its selection logic. |
62 | virtual void used(uint64_t ResourceMask) {} |
63 | }; |
64 | |
65 | /// Default resource allocation strategy used by processor resource groups and |
66 | /// processor resources with multiple units. |
67 | class DefaultResourceStrategy final : public ResourceStrategy { |
68 | /// A Mask of resource unit identifiers. |
69 | /// |
70 | /// There is one bit set for every available resource unit. |
71 | /// It defaults to the value of field ResourceSizeMask in ResourceState. |
72 | const uint64_t ResourceUnitMask; |
73 | |
74 | /// A simple round-robin selector for processor resource units. |
75 | /// Each bit of this mask identifies a sub resource within a group. |
76 | /// |
77 | /// As an example, lets assume that this is a default policy for a |
78 | /// processor resource group composed by the following three units: |
79 | /// ResourceA -- 0b001 |
80 | /// ResourceB -- 0b010 |
81 | /// ResourceC -- 0b100 |
82 | /// |
83 | /// Field NextInSequenceMask is used to select the next unit from the set of |
84 | /// resource units. It defaults to the value of field `ResourceUnitMasks` (in |
85 | /// this example, it defaults to mask '0b111'). |
86 | /// |
87 | /// The round-robin selector would firstly select 'ResourceC', then |
88 | /// 'ResourceB', and eventually 'ResourceA'. When a resource R is used, the |
89 | /// corresponding bit in NextInSequenceMask is cleared. For example, if |
90 | /// 'ResourceC' is selected, then the new value of NextInSequenceMask becomes |
91 | /// 0xb011. |
92 | /// |
93 | /// When NextInSequenceMask becomes zero, it is automatically reset to the |
94 | /// default value (i.e. ResourceUnitMask). |
95 | uint64_t NextInSequenceMask; |
96 | |
97 | /// This field is used to track resource units that are used (i.e. selected) |
98 | /// by other groups other than the one associated with this strategy object. |
99 | /// |
100 | /// In LLVM processor resource groups are allowed to partially (or fully) |
101 | /// overlap. That means, a same unit may be visible to multiple groups. |
102 | /// This field keeps track of uses that have originated from outside of |
103 | /// this group. The idea is to bias the selection strategy, so that resources |
104 | /// that haven't been used by other groups get prioritized. |
105 | /// |
106 | /// The end goal is to (try to) keep the resource distribution as much uniform |
107 | /// as possible. By construction, this mask only tracks one-level of resource |
108 | /// usage. Therefore, this strategy is expected to be less accurate when same |
109 | /// units are used multiple times by other groups within a single round of |
110 | /// select. |
111 | /// |
112 | /// Note: an LRU selector would have a better accuracy at the cost of being |
113 | /// slightly more expensive (mostly in terms of runtime cost). Methods |
114 | /// 'select' and 'used', are always in the hot execution path of llvm-mca. |
115 | /// Therefore, a slow implementation of 'select' would have a negative impact |
116 | /// on the overall performance of the tool. |
117 | uint64_t RemovedFromNextInSequence; |
118 | |
119 | public: |
120 | DefaultResourceStrategy(uint64_t UnitMask) |
121 | : ResourceUnitMask(UnitMask), NextInSequenceMask(UnitMask), |
122 | RemovedFromNextInSequence(0) {} |
123 | virtual ~DefaultResourceStrategy() = default; |
124 | |
125 | uint64_t select(uint64_t ReadyMask) override; |
126 | void used(uint64_t Mask) override; |
127 | }; |
128 | |
129 | /// A processor resource descriptor. |
130 | /// |
131 | /// There is an instance of this class for every processor resource defined by |
132 | /// the machine scheduling model. |
133 | /// Objects of class ResourceState dynamically track the usage of processor |
134 | /// resource units. |
135 | class ResourceState { |
136 | /// An index to the MCProcResourceDesc entry in the processor model. |
137 | const unsigned ProcResourceDescIndex; |
138 | /// A resource mask. This is generated by the tool with the help of |
139 | /// function `mca::computeProcResourceMasks' (see Support.h). |
140 | /// |
141 | /// Field ResourceMask only has one bit set if this resource state describes a |
142 | /// processor resource unit (i.e. this is not a group). That means, we can |
143 | /// quickly check if a resource is a group by simply counting the number of |
144 | /// bits that are set in the mask. |
145 | /// |
146 | /// The most significant bit of a mask (MSB) uniquely identifies a resource. |
147 | /// Remaining bits are used to describe the composition of a group (Group). |
148 | /// |
149 | /// Example (little endian): |
150 | /// Resource | Mask | MSB | Group |
151 | /// ---------+------------+------------+------------ |
152 | /// A | 0b000001 | 0b000001 | 0b000000 |
153 | /// | | | |
154 | /// B | 0b000010 | 0b000010 | 0b000000 |
155 | /// | | | |
156 | /// C | 0b010000 | 0b010000 | 0b000000 |
157 | /// | | | |
158 | /// D | 0b110010 | 0b100000 | 0b010010 |
159 | /// |
160 | /// In this example, resources A, B and C are processor resource units. |
161 | /// Only resource D is a group resource, and it contains resources B and C. |
162 | /// That is because MSB(B) and MSB(C) are both contained within Group(D). |
163 | const uint64_t ResourceMask; |
164 | |
165 | /// A ProcResource can have multiple units. |
166 | /// |
167 | /// For processor resource groups this field is a mask of contained resource |
168 | /// units. It is obtained from ResourceMask by clearing the highest set bit. |
169 | /// The number of resource units in a group can be simply computed as the |
170 | /// population count of this field. |
171 | /// |
172 | /// For normal (i.e. non-group) resources, the number of bits set in this mask |
173 | /// is equivalent to the number of units declared by the processor model (see |
174 | /// field 'NumUnits' in 'ProcResourceUnits'). |
175 | uint64_t ResourceSizeMask; |
176 | |
177 | /// A mask of ready units. |
178 | uint64_t ReadyMask; |
179 | |
180 | /// Buffered resources will have this field set to a positive number different |
181 | /// than zero. A buffered resource behaves like a reservation station |
182 | /// implementing its own buffer for out-of-order execution. |
183 | /// |
184 | /// A BufferSize of 1 is used by scheduler resources that force in-order |
185 | /// execution. |
186 | /// |
187 | /// A BufferSize of 0 is used to model in-order issue/dispatch resources. |
188 | /// Since in-order issue/dispatch resources don't implement buffers, dispatch |
189 | /// events coincide with issue events. |
190 | /// Also, no other instruction ca be dispatched/issue while this resource is |
191 | /// in use. Only when all the "resource cycles" are consumed (after the issue |
192 | /// event), a new instruction ca be dispatched. |
193 | const int BufferSize; |
194 | |
195 | /// Available slots in the buffer (zero, if this is not a buffered resource). |
196 | unsigned AvailableSlots; |
197 | |
198 | /// This field is set if this resource is currently reserved. |
199 | /// |
200 | /// Resources can be reserved for a number of cycles. |
201 | /// Instructions can still be dispatched to reserved resources. However, |
202 | /// istructions dispatched to a reserved resource cannot be issued to the |
203 | /// underlying units (i.e. pipelines) until the resource is released. |
204 | bool Unavailable; |
205 | |
206 | const bool IsAGroup; |
207 | |
208 | /// Checks for the availability of unit 'SubResMask' in the group. |
209 | bool isSubResourceReady(uint64_t SubResMask) const { |
210 | return ReadyMask & SubResMask; |
211 | } |
212 | |
213 | public: |
214 | ResourceState(const MCProcResourceDesc &Desc, unsigned Index, uint64_t Mask); |
215 | |
216 | unsigned getProcResourceID() const { return ProcResourceDescIndex; } |
217 | uint64_t getResourceMask() const { return ResourceMask; } |
218 | uint64_t getReadyMask() const { return ReadyMask; } |
219 | int getBufferSize() const { return BufferSize; } |
220 | |
221 | bool isBuffered() const { return BufferSize > 0; } |
222 | bool isInOrder() const { return BufferSize == 1; } |
223 | |
224 | /// Returns true if this is an in-order dispatch/issue resource. |
225 | bool isADispatchHazard() const { return BufferSize == 0; } |
226 | bool isReserved() const { return Unavailable; } |
227 | |
228 | void setReserved() { Unavailable = true; } |
229 | void clearReserved() { Unavailable = false; } |
230 | |
231 | /// Returs true if this resource is not reserved, and if there are at least |
232 | /// `NumUnits` available units. |
233 | bool isReady(unsigned NumUnits = 1) const; |
234 | |
235 | bool isAResourceGroup() const { return IsAGroup; } |
236 | |
237 | bool containsResource(uint64_t ID) const { return ResourceMask & ID; } |
238 | |
239 | void markSubResourceAsUsed(uint64_t ID) { |
240 | assert(isSubResourceReady(ID)); |
241 | ReadyMask ^= ID; |
242 | } |
243 | |
244 | void releaseSubResource(uint64_t ID) { |
245 | assert(!isSubResourceReady(ID)); |
246 | ReadyMask ^= ID; |
247 | } |
248 | |
249 | unsigned getNumUnits() const { |
250 | return isAResourceGroup() ? 1U : llvm::popcount(Value: ResourceSizeMask); |
251 | } |
252 | |
253 | /// Checks if there is an available slot in the resource buffer. |
254 | /// |
255 | /// Returns RS_BUFFER_AVAILABLE if this is not a buffered resource, or if |
256 | /// there is a slot available. |
257 | /// |
258 | /// Returns RS_RESERVED if this buffered resource is a dispatch hazard, and it |
259 | /// is reserved. |
260 | /// |
261 | /// Returns RS_BUFFER_UNAVAILABLE if there are no available slots. |
262 | ResourceStateEvent isBufferAvailable() const; |
263 | |
264 | /// Reserve a buffer slot. |
265 | /// |
266 | /// Returns true if the buffer is not full. |
267 | /// It always returns true if BufferSize is set to zero. |
268 | bool reserveBuffer() { |
269 | if (BufferSize <= 0) |
270 | return true; |
271 | |
272 | --AvailableSlots; |
273 | assert(AvailableSlots <= static_cast<unsigned>(BufferSize)); |
274 | return AvailableSlots; |
275 | } |
276 | |
277 | /// Releases a slot in the buffer. |
278 | void releaseBuffer() { |
279 | // Ignore dispatch hazards or invalid buffer sizes. |
280 | if (BufferSize <= 0) |
281 | return; |
282 | |
283 | ++AvailableSlots; |
284 | assert(AvailableSlots <= static_cast<unsigned>(BufferSize)); |
285 | } |
286 | |
287 | #ifndef NDEBUG |
288 | void dump() const; |
289 | #endif |
290 | }; |
291 | |
292 | /// A resource unit identifier. |
293 | /// |
294 | /// This is used to identify a specific processor resource unit using a pair |
295 | /// of indices where the 'first' index is a processor resource mask, and the |
296 | /// 'second' index is an index for a "sub-resource" (i.e. unit). |
297 | typedef std::pair<uint64_t, uint64_t> ResourceRef; |
298 | |
299 | // First: a MCProcResourceDesc index identifying a buffered resource. |
300 | // Second: max number of buffer entries used in this resource. |
301 | typedef std::pair<unsigned, unsigned> BufferUsageEntry; |
302 | |
303 | /// A resource manager for processor resource units and groups. |
304 | /// |
305 | /// This class owns all the ResourceState objects, and it is responsible for |
306 | /// acting on requests from a Scheduler by updating the internal state of |
307 | /// ResourceState objects. |
308 | /// This class doesn't know about instruction itineraries and functional units. |
309 | /// In future, it can be extended to support itineraries too through the same |
310 | /// public interface. |
311 | class ResourceManager { |
312 | // Set of resources available on the subtarget. |
313 | // |
314 | // There is an instance of ResourceState for every resource declared by the |
315 | // target scheduling model. |
316 | // |
317 | // Elements of this vector are ordered by resource kind. In particular, |
318 | // resource units take precedence over resource groups. |
319 | // |
320 | // The index of a processor resource in this vector depends on the value of |
321 | // its mask (see the description of field ResourceState::ResourceMask). In |
322 | // particular, it is computed as the position of the most significant bit set |
323 | // (MSB) in the mask plus one (since we want to ignore the invalid resource |
324 | // descriptor at index zero). |
325 | // |
326 | // Example (little endian): |
327 | // |
328 | // Resource | Mask | MSB | Index |
329 | // ---------+---------+---------+------- |
330 | // A | 0b00001 | 0b00001 | 1 |
331 | // | | | |
332 | // B | 0b00100 | 0b00100 | 3 |
333 | // | | | |
334 | // C | 0b10010 | 0b10000 | 5 |
335 | // |
336 | // |
337 | // The same index is also used to address elements within vector `Strategies` |
338 | // and vector `Resource2Groups`. |
339 | std::vector<std::unique_ptr<ResourceState>> Resources; |
340 | std::vector<std::unique_ptr<ResourceStrategy>> Strategies; |
341 | |
342 | // Used to quickly identify groups that own a particular resource unit. |
343 | std::vector<uint64_t> Resource2Groups; |
344 | |
345 | // A table that maps processor resource IDs to processor resource masks. |
346 | SmallVector<uint64_t, 8> ProcResID2Mask; |
347 | |
348 | // A table that maps resource indices to actual processor resource IDs in the |
349 | // scheduling model. |
350 | SmallVector<unsigned, 8> ResIndex2ProcResID; |
351 | |
352 | // Keeps track of which resources are busy, and how many cycles are left |
353 | // before those become usable again. |
354 | SmallDenseMap<ResourceRef, unsigned> BusyResources; |
355 | |
356 | // Set of processor resource units available on the target. |
357 | uint64_t ProcResUnitMask; |
358 | |
359 | // Set of processor resource units that are available during this cycle. |
360 | uint64_t AvailableProcResUnits; |
361 | |
362 | // Set of processor resources that are currently reserved. |
363 | uint64_t ReservedResourceGroups; |
364 | |
365 | // Set of unavailable scheduler buffer resources. This is used internally to |
366 | // speedup `canBeDispatched()` queries. |
367 | uint64_t AvailableBuffers; |
368 | |
369 | // Set of dispatch hazard buffer resources that are currently unavailable. |
370 | uint64_t ReservedBuffers; |
371 | |
372 | // Returns the actual resource unit that will be used. |
373 | ResourceRef selectPipe(uint64_t ResourceID); |
374 | |
375 | void use(const ResourceRef &RR); |
376 | void release(const ResourceRef &RR); |
377 | |
378 | unsigned getNumUnits(uint64_t ResourceID) const; |
379 | |
380 | // Overrides the selection strategy for the processor resource with the given |
381 | // mask. |
382 | void setCustomStrategyImpl(std::unique_ptr<ResourceStrategy> S, |
383 | uint64_t ResourceMask); |
384 | |
385 | public: |
386 | ResourceManager(const MCSchedModel &SM); |
387 | virtual ~ResourceManager() = default; |
388 | |
389 | // Overrides the selection strategy for the resource at index ResourceID in |
390 | // the MCProcResourceDesc table. |
391 | void setCustomStrategy(std::unique_ptr<ResourceStrategy> S, |
392 | unsigned ResourceID) { |
393 | assert(ResourceID < ProcResID2Mask.size() && |
394 | "Invalid resource index in input!" ); |
395 | return setCustomStrategyImpl(S: std::move(S), ResourceMask: ProcResID2Mask[ResourceID]); |
396 | } |
397 | |
398 | // Returns RS_BUFFER_AVAILABLE if buffered resources are not reserved, and if |
399 | // there are enough available slots in the buffers. |
400 | ResourceStateEvent canBeDispatched(uint64_t ConsumedBuffers) const; |
401 | |
402 | // Return the processor resource identifier associated to this Mask. |
403 | unsigned resolveResourceMask(uint64_t Mask) const; |
404 | |
405 | // Acquires a slot from every buffered resource in mask `ConsumedBuffers`. |
406 | // Units that are dispatch hazards (i.e. BufferSize=0) are marked as reserved. |
407 | void reserveBuffers(uint64_t ConsumedBuffers); |
408 | |
409 | // Releases a slot from every buffered resource in mask `ConsumedBuffers`. |
410 | // ConsumedBuffers is a bitmask of previously acquired buffers (using method |
411 | // `reserveBuffers`). Units that are dispatch hazards (i.e. BufferSize=0) are |
412 | // not automatically unreserved by this method. |
413 | void releaseBuffers(uint64_t ConsumedBuffers); |
414 | |
415 | // Reserve a processor resource. A reserved resource is not available for |
416 | // instruction issue until it is released. |
417 | void reserveResource(uint64_t ResourceID); |
418 | |
419 | // Release a previously reserved processor resource. |
420 | void releaseResource(uint64_t ResourceID); |
421 | |
422 | // Returns a zero mask if resources requested by Desc are all available during |
423 | // this cycle. It returns a non-zero mask value only if there are unavailable |
424 | // processor resources; each bit set in the mask represents a busy processor |
425 | // resource unit or a reserved processor resource group. |
426 | uint64_t checkAvailability(const InstrDesc &Desc) const; |
427 | |
428 | uint64_t getProcResUnitMask() const { return ProcResUnitMask; } |
429 | uint64_t getAvailableProcResUnits() const { return AvailableProcResUnits; } |
430 | |
431 | void issueInstruction( |
432 | const InstrDesc &Desc, |
433 | SmallVectorImpl<std::pair<ResourceRef, ReleaseAtCycles>> &Pipes); |
434 | |
435 | void cycleEvent(SmallVectorImpl<ResourceRef> &ResourcesFreed); |
436 | |
437 | #ifndef NDEBUG |
438 | void dump() const { |
439 | for (const std::unique_ptr<ResourceState> &Resource : Resources) |
440 | Resource->dump(); |
441 | } |
442 | #endif |
443 | }; |
444 | } // namespace mca |
445 | } // namespace llvm |
446 | |
447 | #endif // LLVM_MCA_HARDWAREUNITS_RESOURCEMANAGER_H |
448 | |