1 | /* |
2 | * Copyright © 2017 Intel Corporation |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), |
6 | * to deal in the Software without restriction, including without limitation |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
8 | * and/or sell copies of the Software, and to permit persons to whom the |
9 | * Software is furnished to do so, subject to the following conditions: |
10 | * |
11 | * The above copyright notice and this permission notice (including the next |
12 | * paragraph) shall be included in all copies or substantial portions of the |
13 | * Software. |
14 | * |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
21 | * IN THE SOFTWARE. |
22 | * |
23 | */ |
24 | |
25 | #include <linux/slab.h> |
26 | |
27 | #include "i915_syncmap.h" |
28 | |
29 | #include "i915_gem.h" /* GEM_BUG_ON() */ |
30 | #include "i915_selftest.h" |
31 | |
32 | #define SHIFT ilog2(KSYNCMAP) |
33 | #define MASK (KSYNCMAP - 1) |
34 | |
35 | /* |
36 | * struct i915_syncmap is a layer of a radixtree that maps a u64 fence |
37 | * context id to the last u32 fence seqno waited upon from that context. |
38 | * Unlike lib/radixtree it uses a parent pointer that allows traversal back to |
39 | * the root. This allows us to access the whole tree via a single pointer |
40 | * to the most recently used layer. We expect fence contexts to be dense |
41 | * and most reuse to be on the same i915_gem_context but on neighbouring |
42 | * engines (i.e. on adjacent contexts) and reuse the same leaf, a very |
43 | * effective lookup cache. If the new lookup is not on the same leaf, we |
44 | * expect it to be on the neighbouring branch. |
45 | * |
46 | * A leaf holds an array of u32 seqno, and has height 0. The bitmap field |
47 | * allows us to store whether a particular seqno is valid (i.e. allows us |
48 | * to distinguish unset from 0). |
49 | * |
50 | * A branch holds an array of layer pointers, and has height > 0, and always |
51 | * has at least 2 layers (either branches or leaves) below it. |
52 | * |
53 | * For example, |
54 | * for x in |
55 | * 0 1 2 0x10 0x11 0x200 0x201 |
56 | * 0x500000 0x500001 0x503000 0x503001 |
57 | * 0xE<<60: |
58 | * i915_syncmap_set(&sync, x, lower_32_bits(x)); |
59 | * will build a tree like: |
60 | * 0xXXXXXXXXXXXXXXXX |
61 | * 0-> 0x0000000000XXXXXX |
62 | * | 0-> 0x0000000000000XXX |
63 | * | | 0-> 0x00000000000000XX |
64 | * | | | 0-> 0x000000000000000X 0:0, 1:1, 2:2 |
65 | * | | | 1-> 0x000000000000001X 0:10, 1:11 |
66 | * | | 2-> 0x000000000000020X 0:200, 1:201 |
67 | * | 5-> 0x000000000050XXXX |
68 | * | 0-> 0x000000000050000X 0:500000, 1:500001 |
69 | * | 3-> 0x000000000050300X 0:503000, 1:503001 |
70 | * e-> 0xe00000000000000X e:e |
71 | */ |
72 | |
73 | struct i915_syncmap { |
74 | u64 prefix; |
75 | unsigned int height; |
76 | unsigned int bitmap; |
77 | struct i915_syncmap *parent; |
78 | union { |
79 | DECLARE_FLEX_ARRAY(u32, seqno); |
80 | DECLARE_FLEX_ARRAY(struct i915_syncmap *, child); |
81 | }; |
82 | }; |
83 | |
84 | /** |
85 | * i915_syncmap_init -- initialise the #i915_syncmap |
86 | * @root: pointer to the #i915_syncmap |
87 | */ |
88 | void i915_syncmap_init(struct i915_syncmap **root) |
89 | { |
90 | BUILD_BUG_ON_NOT_POWER_OF_2(KSYNCMAP); |
91 | BUILD_BUG_ON_NOT_POWER_OF_2(SHIFT); |
92 | BUILD_BUG_ON(KSYNCMAP > BITS_PER_TYPE((*root)->bitmap)); |
93 | *root = NULL; |
94 | } |
95 | |
96 | static inline u32 *__sync_seqno(struct i915_syncmap *p) |
97 | { |
98 | GEM_BUG_ON(p->height); |
99 | return p->seqno; |
100 | } |
101 | |
102 | static inline struct i915_syncmap **__sync_child(struct i915_syncmap *p) |
103 | { |
104 | GEM_BUG_ON(!p->height); |
105 | return p->child; |
106 | } |
107 | |
108 | static inline unsigned int |
109 | __sync_branch_idx(const struct i915_syncmap *p, u64 id) |
110 | { |
111 | return (id >> p->height) & MASK; |
112 | } |
113 | |
114 | static inline unsigned int |
115 | __sync_leaf_idx(const struct i915_syncmap *p, u64 id) |
116 | { |
117 | GEM_BUG_ON(p->height); |
118 | return id & MASK; |
119 | } |
120 | |
121 | static inline u64 __sync_branch_prefix(const struct i915_syncmap *p, u64 id) |
122 | { |
123 | return id >> p->height >> SHIFT; |
124 | } |
125 | |
126 | static inline u64 __sync_leaf_prefix(const struct i915_syncmap *p, u64 id) |
127 | { |
128 | GEM_BUG_ON(p->height); |
129 | return id >> SHIFT; |
130 | } |
131 | |
132 | static inline bool seqno_later(u32 a, u32 b) |
133 | { |
134 | return (s32)(a - b) >= 0; |
135 | } |
136 | |
137 | /** |
138 | * i915_syncmap_is_later -- compare against the last know sync point |
139 | * @root: pointer to the #i915_syncmap |
140 | * @id: the context id (other timeline) we are synchronising to |
141 | * @seqno: the sequence number along the other timeline |
142 | * |
143 | * If we have already synchronised this @root timeline with another (@id) then |
144 | * we can omit any repeated or earlier synchronisation requests. If the two |
145 | * timelines are already coupled, we can also omit the dependency between the |
146 | * two as that is already known via the timeline. |
147 | * |
148 | * Returns true if the two timelines are already synchronised wrt to @seqno, |
149 | * false if not and the synchronisation must be emitted. |
150 | */ |
151 | bool i915_syncmap_is_later(struct i915_syncmap **root, u64 id, u32 seqno) |
152 | { |
153 | struct i915_syncmap *p; |
154 | unsigned int idx; |
155 | |
156 | p = *root; |
157 | if (!p) |
158 | return false; |
159 | |
160 | if (likely(__sync_leaf_prefix(p, id) == p->prefix)) |
161 | goto found; |
162 | |
163 | /* First climb the tree back to a parent branch */ |
164 | do { |
165 | p = p->parent; |
166 | if (!p) |
167 | return false; |
168 | |
169 | if (__sync_branch_prefix(p, id) == p->prefix) |
170 | break; |
171 | } while (1); |
172 | |
173 | /* And then descend again until we find our leaf */ |
174 | do { |
175 | if (!p->height) |
176 | break; |
177 | |
178 | p = __sync_child(p)[__sync_branch_idx(p, id)]; |
179 | if (!p) |
180 | return false; |
181 | |
182 | if (__sync_branch_prefix(p, id) != p->prefix) |
183 | return false; |
184 | } while (1); |
185 | |
186 | *root = p; |
187 | found: |
188 | idx = __sync_leaf_idx(p, id); |
189 | if (!(p->bitmap & BIT(idx))) |
190 | return false; |
191 | |
192 | return seqno_later(a: __sync_seqno(p)[idx], b: seqno); |
193 | } |
194 | |
195 | static struct i915_syncmap * |
196 | __sync_alloc_leaf(struct i915_syncmap *parent, u64 id) |
197 | { |
198 | struct i915_syncmap *p; |
199 | |
200 | p = kmalloc(struct_size(p, seqno, KSYNCMAP), GFP_KERNEL); |
201 | if (unlikely(!p)) |
202 | return NULL; |
203 | |
204 | p->parent = parent; |
205 | p->height = 0; |
206 | p->bitmap = 0; |
207 | p->prefix = __sync_leaf_prefix(p, id); |
208 | return p; |
209 | } |
210 | |
211 | static inline void __sync_set_seqno(struct i915_syncmap *p, u64 id, u32 seqno) |
212 | { |
213 | unsigned int idx = __sync_leaf_idx(p, id); |
214 | |
215 | p->bitmap |= BIT(idx); |
216 | __sync_seqno(p)[idx] = seqno; |
217 | } |
218 | |
219 | static inline void __sync_set_child(struct i915_syncmap *p, |
220 | unsigned int idx, |
221 | struct i915_syncmap *child) |
222 | { |
223 | p->bitmap |= BIT(idx); |
224 | __sync_child(p)[idx] = child; |
225 | } |
226 | |
227 | static noinline int __sync_set(struct i915_syncmap **root, u64 id, u32 seqno) |
228 | { |
229 | struct i915_syncmap *p = *root; |
230 | unsigned int idx; |
231 | |
232 | if (!p) { |
233 | p = __sync_alloc_leaf(NULL, id); |
234 | if (unlikely(!p)) |
235 | return -ENOMEM; |
236 | |
237 | goto found; |
238 | } |
239 | |
240 | /* Caller handled the likely cached case */ |
241 | GEM_BUG_ON(__sync_leaf_prefix(p, id) == p->prefix); |
242 | |
243 | /* Climb back up the tree until we find a common prefix */ |
244 | do { |
245 | if (!p->parent) |
246 | break; |
247 | |
248 | p = p->parent; |
249 | |
250 | if (__sync_branch_prefix(p, id) == p->prefix) |
251 | break; |
252 | } while (1); |
253 | |
254 | /* |
255 | * No shortcut, we have to descend the tree to find the right layer |
256 | * containing this fence. |
257 | * |
258 | * Each layer in the tree holds 16 (KSYNCMAP) pointers, either fences |
259 | * or lower layers. Leaf nodes (height = 0) contain the fences, all |
260 | * other nodes (height > 0) are internal layers that point to a lower |
261 | * node. Each internal layer has at least 2 descendents. |
262 | * |
263 | * Starting at the top, we check whether the current prefix matches. If |
264 | * it doesn't, we have gone past our target and need to insert a join |
265 | * into the tree, and a new leaf node for the target as a descendent |
266 | * of the join, as well as the original layer. |
267 | * |
268 | * The matching prefix means we are still following the right branch |
269 | * of the tree. If it has height 0, we have found our leaf and just |
270 | * need to replace the fence slot with ourselves. If the height is |
271 | * not zero, our slot contains the next layer in the tree (unless |
272 | * it is empty, in which case we can add ourselves as a new leaf). |
273 | * As descend the tree the prefix grows (and height decreases). |
274 | */ |
275 | do { |
276 | struct i915_syncmap *next; |
277 | |
278 | if (__sync_branch_prefix(p, id) != p->prefix) { |
279 | unsigned int above; |
280 | |
281 | /* Insert a join above the current layer */ |
282 | next = kzalloc(struct_size(next, child, KSYNCMAP), |
283 | GFP_KERNEL); |
284 | if (unlikely(!next)) |
285 | return -ENOMEM; |
286 | |
287 | /* Compute the height at which these two diverge */ |
288 | above = fls64(x: __sync_branch_prefix(p, id) ^ p->prefix); |
289 | above = round_up(above, SHIFT); |
290 | next->height = above + p->height; |
291 | next->prefix = __sync_branch_prefix(p: next, id); |
292 | |
293 | /* Insert the join into the parent */ |
294 | if (p->parent) { |
295 | idx = __sync_branch_idx(p: p->parent, id); |
296 | __sync_child(p: p->parent)[idx] = next; |
297 | GEM_BUG_ON(!(p->parent->bitmap & BIT(idx))); |
298 | } |
299 | next->parent = p->parent; |
300 | |
301 | /* Compute the idx of the other branch, not our id! */ |
302 | idx = p->prefix >> (above - SHIFT) & MASK; |
303 | __sync_set_child(p: next, idx, child: p); |
304 | p->parent = next; |
305 | |
306 | /* Ascend to the join */ |
307 | p = next; |
308 | } else { |
309 | if (!p->height) |
310 | break; |
311 | } |
312 | |
313 | /* Descend into the next layer */ |
314 | GEM_BUG_ON(!p->height); |
315 | idx = __sync_branch_idx(p, id); |
316 | next = __sync_child(p)[idx]; |
317 | if (!next) { |
318 | next = __sync_alloc_leaf(parent: p, id); |
319 | if (unlikely(!next)) |
320 | return -ENOMEM; |
321 | |
322 | __sync_set_child(p, idx, child: next); |
323 | p = next; |
324 | break; |
325 | } |
326 | |
327 | p = next; |
328 | } while (1); |
329 | |
330 | found: |
331 | GEM_BUG_ON(p->prefix != __sync_leaf_prefix(p, id)); |
332 | __sync_set_seqno(p, id, seqno); |
333 | *root = p; |
334 | return 0; |
335 | } |
336 | |
337 | /** |
338 | * i915_syncmap_set -- mark the most recent syncpoint between contexts |
339 | * @root: pointer to the #i915_syncmap |
340 | * @id: the context id (other timeline) we have synchronised to |
341 | * @seqno: the sequence number along the other timeline |
342 | * |
343 | * When we synchronise this @root timeline with another (@id), we also know |
344 | * that we have synchronized with all previous seqno along that timeline. If |
345 | * we then have a request to synchronise with the same seqno or older, we can |
346 | * omit it, see i915_syncmap_is_later() |
347 | * |
348 | * Returns 0 on success, or a negative error code. |
349 | */ |
350 | int i915_syncmap_set(struct i915_syncmap **root, u64 id, u32 seqno) |
351 | { |
352 | struct i915_syncmap *p = *root; |
353 | |
354 | /* |
355 | * We expect to be called in sequence following is_later(id), which |
356 | * should have preloaded the root for us. |
357 | */ |
358 | if (likely(p && __sync_leaf_prefix(p, id) == p->prefix)) { |
359 | __sync_set_seqno(p, id, seqno); |
360 | return 0; |
361 | } |
362 | |
363 | return __sync_set(root, id, seqno); |
364 | } |
365 | |
366 | static void __sync_free(struct i915_syncmap *p) |
367 | { |
368 | if (p->height) { |
369 | unsigned int i; |
370 | |
371 | while ((i = ffs(p->bitmap))) { |
372 | p->bitmap &= ~0u << i; |
373 | __sync_free(p: __sync_child(p)[i - 1]); |
374 | } |
375 | } |
376 | |
377 | kfree(objp: p); |
378 | } |
379 | |
380 | /** |
381 | * i915_syncmap_free -- free all memory associated with the syncmap |
382 | * @root: pointer to the #i915_syncmap |
383 | * |
384 | * Either when the timeline is to be freed and we no longer need the sync |
385 | * point tracking, or when the fences are all known to be signaled and the |
386 | * sync point tracking is redundant, we can free the #i915_syncmap to recover |
387 | * its allocations. |
388 | * |
389 | * Will reinitialise the @root pointer so that the #i915_syncmap is ready for |
390 | * reuse. |
391 | */ |
392 | void i915_syncmap_free(struct i915_syncmap **root) |
393 | { |
394 | struct i915_syncmap *p; |
395 | |
396 | p = *root; |
397 | if (!p) |
398 | return; |
399 | |
400 | while (p->parent) |
401 | p = p->parent; |
402 | |
403 | __sync_free(p); |
404 | *root = NULL; |
405 | } |
406 | |
407 | #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) |
408 | #include "selftests/i915_syncmap.c" |
409 | #endif |
410 | |