1 | // SPDX-License-Identifier: MIT |
2 | /* |
3 | * Copyright 2014 Advanced Micro Devices, Inc. |
4 | * All Rights Reserved. |
5 | * |
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
7 | * copy of this software and associated documentation files (the |
8 | * "Software"), to deal in the Software without restriction, including |
9 | * without limitation the rights to use, copy, modify, merge, publish, |
10 | * distribute, sub license, and/or sell copies of the Software, and to |
11 | * permit persons to whom the Software is furnished to do so, subject to |
12 | * the following conditions: |
13 | * |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
17 | * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, |
18 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
19 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
20 | * USE OR OTHER DEALINGS IN THE SOFTWARE. |
21 | * |
22 | * The above copyright notice and this permission notice (including the |
23 | * next paragraph) shall be included in all copies or substantial portions |
24 | * of the Software. |
25 | * |
26 | */ |
27 | /* |
28 | * Authors: |
29 | * Christian König <christian.koenig@amd.com> |
30 | */ |
31 | |
32 | #include <linux/dma-fence-chain.h> |
33 | |
34 | #include "amdgpu.h" |
35 | #include "amdgpu_trace.h" |
36 | #include "amdgpu_amdkfd.h" |
37 | |
38 | struct amdgpu_sync_entry { |
39 | struct hlist_node node; |
40 | struct dma_fence *fence; |
41 | }; |
42 | |
43 | static struct kmem_cache *amdgpu_sync_slab; |
44 | |
45 | /** |
46 | * amdgpu_sync_create - zero init sync object |
47 | * |
48 | * @sync: sync object to initialize |
49 | * |
50 | * Just clear the sync object for now. |
51 | */ |
52 | void amdgpu_sync_create(struct amdgpu_sync *sync) |
53 | { |
54 | hash_init(sync->fences); |
55 | } |
56 | |
57 | /** |
58 | * amdgpu_sync_same_dev - test if fence belong to us |
59 | * |
60 | * @adev: amdgpu device to use for the test |
61 | * @f: fence to test |
62 | * |
63 | * Test if the fence was issued by us. |
64 | */ |
65 | static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, |
66 | struct dma_fence *f) |
67 | { |
68 | struct drm_sched_fence *s_fence = to_drm_sched_fence(f); |
69 | |
70 | if (s_fence) { |
71 | struct amdgpu_ring *ring; |
72 | |
73 | ring = container_of(s_fence->sched, struct amdgpu_ring, sched); |
74 | return ring->adev == adev; |
75 | } |
76 | |
77 | return false; |
78 | } |
79 | |
80 | /** |
81 | * amdgpu_sync_get_owner - extract the owner of a fence |
82 | * |
83 | * @f: fence get the owner from |
84 | * |
85 | * Extract who originally created the fence. |
86 | */ |
87 | static void *amdgpu_sync_get_owner(struct dma_fence *f) |
88 | { |
89 | struct drm_sched_fence *s_fence; |
90 | struct amdgpu_amdkfd_fence *kfd_fence; |
91 | |
92 | if (!f) |
93 | return AMDGPU_FENCE_OWNER_UNDEFINED; |
94 | |
95 | s_fence = to_drm_sched_fence(f); |
96 | if (s_fence) |
97 | return s_fence->owner; |
98 | |
99 | kfd_fence = to_amdgpu_amdkfd_fence(f); |
100 | if (kfd_fence) |
101 | return AMDGPU_FENCE_OWNER_KFD; |
102 | |
103 | return AMDGPU_FENCE_OWNER_UNDEFINED; |
104 | } |
105 | |
106 | /** |
107 | * amdgpu_sync_keep_later - Keep the later fence |
108 | * |
109 | * @keep: existing fence to test |
110 | * @fence: new fence |
111 | * |
112 | * Either keep the existing fence or the new one, depending which one is later. |
113 | */ |
114 | static void amdgpu_sync_keep_later(struct dma_fence **keep, |
115 | struct dma_fence *fence) |
116 | { |
117 | if (*keep && dma_fence_is_later(f1: *keep, f2: fence)) |
118 | return; |
119 | |
120 | dma_fence_put(fence: *keep); |
121 | *keep = dma_fence_get(fence); |
122 | } |
123 | |
124 | /** |
125 | * amdgpu_sync_add_later - add the fence to the hash |
126 | * |
127 | * @sync: sync object to add the fence to |
128 | * @f: fence to add |
129 | * |
130 | * Tries to add the fence to an existing hash entry. Returns true when an entry |
131 | * was found, false otherwise. |
132 | */ |
133 | static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f) |
134 | { |
135 | struct amdgpu_sync_entry *e; |
136 | |
137 | hash_for_each_possible(sync->fences, e, node, f->context) { |
138 | if (unlikely(e->fence->context != f->context)) |
139 | continue; |
140 | |
141 | amdgpu_sync_keep_later(keep: &e->fence, fence: f); |
142 | return true; |
143 | } |
144 | return false; |
145 | } |
146 | |
147 | /** |
148 | * amdgpu_sync_fence - remember to sync to this fence |
149 | * |
150 | * @sync: sync object to add fence to |
151 | * @f: fence to sync to |
152 | * |
153 | * Add the fence to the sync object. |
154 | */ |
155 | int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f) |
156 | { |
157 | struct amdgpu_sync_entry *e; |
158 | |
159 | if (!f) |
160 | return 0; |
161 | |
162 | if (amdgpu_sync_add_later(sync, f)) |
163 | return 0; |
164 | |
165 | e = kmem_cache_alloc(cachep: amdgpu_sync_slab, GFP_KERNEL); |
166 | if (!e) |
167 | return -ENOMEM; |
168 | |
169 | hash_add(sync->fences, &e->node, f->context); |
170 | e->fence = dma_fence_get(fence: f); |
171 | return 0; |
172 | } |
173 | |
174 | /* Determine based on the owner and mode if we should sync to a fence or not */ |
175 | static bool amdgpu_sync_test_fence(struct amdgpu_device *adev, |
176 | enum amdgpu_sync_mode mode, |
177 | void *owner, struct dma_fence *f) |
178 | { |
179 | void *fence_owner = amdgpu_sync_get_owner(f); |
180 | |
181 | /* Always sync to moves, no matter what */ |
182 | if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED) |
183 | return true; |
184 | |
185 | /* We only want to trigger KFD eviction fences on |
186 | * evict or move jobs. Skip KFD fences otherwise. |
187 | */ |
188 | if (fence_owner == AMDGPU_FENCE_OWNER_KFD && |
189 | owner != AMDGPU_FENCE_OWNER_UNDEFINED) |
190 | return false; |
191 | |
192 | /* Never sync to VM updates either. */ |
193 | if (fence_owner == AMDGPU_FENCE_OWNER_VM && |
194 | owner != AMDGPU_FENCE_OWNER_UNDEFINED) |
195 | return false; |
196 | |
197 | /* Ignore fences depending on the sync mode */ |
198 | switch (mode) { |
199 | case AMDGPU_SYNC_ALWAYS: |
200 | return true; |
201 | |
202 | case AMDGPU_SYNC_NE_OWNER: |
203 | if (amdgpu_sync_same_dev(adev, f) && |
204 | fence_owner == owner) |
205 | return false; |
206 | break; |
207 | |
208 | case AMDGPU_SYNC_EQ_OWNER: |
209 | if (amdgpu_sync_same_dev(adev, f) && |
210 | fence_owner != owner) |
211 | return false; |
212 | break; |
213 | |
214 | case AMDGPU_SYNC_EXPLICIT: |
215 | return false; |
216 | } |
217 | |
218 | WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD, |
219 | "Adding eviction fence to sync obj" ); |
220 | return true; |
221 | } |
222 | |
223 | /** |
224 | * amdgpu_sync_resv - sync to a reservation object |
225 | * |
226 | * @adev: amdgpu device |
227 | * @sync: sync object to add fences from reservation object to |
228 | * @resv: reservation object with embedded fence |
229 | * @mode: how owner affects which fences we sync to |
230 | * @owner: owner of the planned job submission |
231 | * |
232 | * Sync to the fence |
233 | */ |
234 | int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, |
235 | struct dma_resv *resv, enum amdgpu_sync_mode mode, |
236 | void *owner) |
237 | { |
238 | struct dma_resv_iter cursor; |
239 | struct dma_fence *f; |
240 | int r; |
241 | |
242 | if (resv == NULL) |
243 | return -EINVAL; |
244 | |
245 | /* TODO: Use DMA_RESV_USAGE_READ here */ |
246 | dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, f) { |
247 | dma_fence_chain_for_each(f, f) { |
248 | struct dma_fence *tmp = dma_fence_chain_contained(fence: f); |
249 | |
250 | if (amdgpu_sync_test_fence(adev, mode, owner, f: tmp)) { |
251 | r = amdgpu_sync_fence(sync, f); |
252 | dma_fence_put(fence: f); |
253 | if (r) |
254 | return r; |
255 | break; |
256 | } |
257 | } |
258 | } |
259 | return 0; |
260 | } |
261 | |
262 | /* Free the entry back to the slab */ |
263 | static void amdgpu_sync_entry_free(struct amdgpu_sync_entry *e) |
264 | { |
265 | hash_del(node: &e->node); |
266 | dma_fence_put(fence: e->fence); |
267 | kmem_cache_free(s: amdgpu_sync_slab, objp: e); |
268 | } |
269 | |
270 | /** |
271 | * amdgpu_sync_peek_fence - get the next fence not signaled yet |
272 | * |
273 | * @sync: the sync object |
274 | * @ring: optional ring to use for test |
275 | * |
276 | * Returns the next fence not signaled yet without removing it from the sync |
277 | * object. |
278 | */ |
279 | struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, |
280 | struct amdgpu_ring *ring) |
281 | { |
282 | struct amdgpu_sync_entry *e; |
283 | struct hlist_node *tmp; |
284 | int i; |
285 | |
286 | hash_for_each_safe(sync->fences, i, tmp, e, node) { |
287 | struct dma_fence *f = e->fence; |
288 | struct drm_sched_fence *s_fence = to_drm_sched_fence(f); |
289 | |
290 | if (dma_fence_is_signaled(fence: f)) { |
291 | amdgpu_sync_entry_free(e); |
292 | continue; |
293 | } |
294 | if (ring && s_fence) { |
295 | /* For fences from the same ring it is sufficient |
296 | * when they are scheduled. |
297 | */ |
298 | if (s_fence->sched == &ring->sched) { |
299 | if (dma_fence_is_signaled(fence: &s_fence->scheduled)) |
300 | continue; |
301 | |
302 | return &s_fence->scheduled; |
303 | } |
304 | } |
305 | |
306 | return f; |
307 | } |
308 | |
309 | return NULL; |
310 | } |
311 | |
312 | /** |
313 | * amdgpu_sync_get_fence - get the next fence from the sync object |
314 | * |
315 | * @sync: sync object to use |
316 | * |
317 | * Get and removes the next fence from the sync object not signaled yet. |
318 | */ |
319 | struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync) |
320 | { |
321 | struct amdgpu_sync_entry *e; |
322 | struct hlist_node *tmp; |
323 | struct dma_fence *f; |
324 | int i; |
325 | |
326 | hash_for_each_safe(sync->fences, i, tmp, e, node) { |
327 | |
328 | f = e->fence; |
329 | |
330 | hash_del(node: &e->node); |
331 | kmem_cache_free(s: amdgpu_sync_slab, objp: e); |
332 | |
333 | if (!dma_fence_is_signaled(fence: f)) |
334 | return f; |
335 | |
336 | dma_fence_put(fence: f); |
337 | } |
338 | return NULL; |
339 | } |
340 | |
341 | /** |
342 | * amdgpu_sync_clone - clone a sync object |
343 | * |
344 | * @source: sync object to clone |
345 | * @clone: pointer to destination sync object |
346 | * |
347 | * Adds references to all unsignaled fences in @source to @clone. Also |
348 | * removes signaled fences from @source while at it. |
349 | */ |
350 | int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone) |
351 | { |
352 | struct amdgpu_sync_entry *e; |
353 | struct hlist_node *tmp; |
354 | struct dma_fence *f; |
355 | int i, r; |
356 | |
357 | hash_for_each_safe(source->fences, i, tmp, e, node) { |
358 | f = e->fence; |
359 | if (!dma_fence_is_signaled(fence: f)) { |
360 | r = amdgpu_sync_fence(sync: clone, f); |
361 | if (r) |
362 | return r; |
363 | } else { |
364 | amdgpu_sync_entry_free(e); |
365 | } |
366 | } |
367 | |
368 | return 0; |
369 | } |
370 | |
371 | /** |
372 | * amdgpu_sync_push_to_job - push fences into job |
373 | * @sync: sync object to get the fences from |
374 | * @job: job to push the fences into |
375 | * |
376 | * Add all unsignaled fences from sync to job. |
377 | */ |
378 | int amdgpu_sync_push_to_job(struct amdgpu_sync *sync, struct amdgpu_job *job) |
379 | { |
380 | struct amdgpu_sync_entry *e; |
381 | struct hlist_node *tmp; |
382 | struct dma_fence *f; |
383 | int i, r; |
384 | |
385 | hash_for_each_safe(sync->fences, i, tmp, e, node) { |
386 | f = e->fence; |
387 | if (dma_fence_is_signaled(fence: f)) { |
388 | amdgpu_sync_entry_free(e); |
389 | continue; |
390 | } |
391 | |
392 | dma_fence_get(fence: f); |
393 | r = drm_sched_job_add_dependency(job: &job->base, fence: f); |
394 | if (r) { |
395 | dma_fence_put(fence: f); |
396 | return r; |
397 | } |
398 | } |
399 | return 0; |
400 | } |
401 | |
402 | int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr) |
403 | { |
404 | struct amdgpu_sync_entry *e; |
405 | struct hlist_node *tmp; |
406 | int i, r; |
407 | |
408 | hash_for_each_safe(sync->fences, i, tmp, e, node) { |
409 | r = dma_fence_wait(fence: e->fence, intr); |
410 | if (r) |
411 | return r; |
412 | |
413 | amdgpu_sync_entry_free(e); |
414 | } |
415 | |
416 | return 0; |
417 | } |
418 | |
419 | /** |
420 | * amdgpu_sync_free - free the sync object |
421 | * |
422 | * @sync: sync object to use |
423 | * |
424 | * Free the sync object. |
425 | */ |
426 | void amdgpu_sync_free(struct amdgpu_sync *sync) |
427 | { |
428 | struct amdgpu_sync_entry *e; |
429 | struct hlist_node *tmp; |
430 | unsigned int i; |
431 | |
432 | hash_for_each_safe(sync->fences, i, tmp, e, node) |
433 | amdgpu_sync_entry_free(e); |
434 | } |
435 | |
436 | /** |
437 | * amdgpu_sync_init - init sync object subsystem |
438 | * |
439 | * Allocate the slab allocator. |
440 | */ |
441 | int amdgpu_sync_init(void) |
442 | { |
443 | amdgpu_sync_slab = kmem_cache_create( |
444 | name: "amdgpu_sync" , size: sizeof(struct amdgpu_sync_entry), align: 0, |
445 | SLAB_HWCACHE_ALIGN, NULL); |
446 | if (!amdgpu_sync_slab) |
447 | return -ENOMEM; |
448 | |
449 | return 0; |
450 | } |
451 | |
452 | /** |
453 | * amdgpu_sync_fini - fini sync object subsystem |
454 | * |
455 | * Free the slab allocator. |
456 | */ |
457 | void amdgpu_sync_fini(void) |
458 | { |
459 | kmem_cache_destroy(s: amdgpu_sync_slab); |
460 | } |
461 | |