1 | // SPDX-License-Identifier: MIT |
2 | |
3 | #include <linux/slab.h> |
4 | #include <drm/gpu_scheduler.h> |
5 | #include <drm/drm_syncobj.h> |
6 | |
7 | #include "nouveau_drv.h" |
8 | #include "nouveau_gem.h" |
9 | #include "nouveau_mem.h" |
10 | #include "nouveau_dma.h" |
11 | #include "nouveau_exec.h" |
12 | #include "nouveau_abi16.h" |
13 | #include "nouveau_sched.h" |
14 | |
15 | #define NOUVEAU_SCHED_JOB_TIMEOUT_MS 10000 |
16 | |
17 | /* Starts at 0, since the DRM scheduler interprets those parameters as (initial) |
18 | * index to the run-queue array. |
19 | */ |
20 | enum nouveau_sched_priority { |
21 | NOUVEAU_SCHED_PRIORITY_SINGLE = DRM_SCHED_PRIORITY_KERNEL, |
22 | NOUVEAU_SCHED_PRIORITY_COUNT, |
23 | }; |
24 | |
25 | int |
26 | nouveau_job_init(struct nouveau_job *job, |
27 | struct nouveau_job_args *args) |
28 | { |
29 | struct nouveau_sched *sched = args->sched; |
30 | int ret; |
31 | |
32 | INIT_LIST_HEAD(list: &job->entry); |
33 | |
34 | job->file_priv = args->file_priv; |
35 | job->cli = nouveau_cli(fpriv: args->file_priv); |
36 | job->sched = sched; |
37 | |
38 | job->sync = args->sync; |
39 | job->resv_usage = args->resv_usage; |
40 | |
41 | job->ops = args->ops; |
42 | |
43 | job->in_sync.count = args->in_sync.count; |
44 | if (job->in_sync.count) { |
45 | if (job->sync) |
46 | return -EINVAL; |
47 | |
48 | job->in_sync.data = kmemdup(p: args->in_sync.s, |
49 | size: sizeof(*args->in_sync.s) * |
50 | args->in_sync.count, |
51 | GFP_KERNEL); |
52 | if (!job->in_sync.data) |
53 | return -ENOMEM; |
54 | } |
55 | |
56 | job->out_sync.count = args->out_sync.count; |
57 | if (job->out_sync.count) { |
58 | if (job->sync) { |
59 | ret = -EINVAL; |
60 | goto err_free_in_sync; |
61 | } |
62 | |
63 | job->out_sync.data = kmemdup(p: args->out_sync.s, |
64 | size: sizeof(*args->out_sync.s) * |
65 | args->out_sync.count, |
66 | GFP_KERNEL); |
67 | if (!job->out_sync.data) { |
68 | ret = -ENOMEM; |
69 | goto err_free_in_sync; |
70 | } |
71 | |
72 | job->out_sync.objs = kcalloc(n: job->out_sync.count, |
73 | size: sizeof(*job->out_sync.objs), |
74 | GFP_KERNEL); |
75 | if (!job->out_sync.objs) { |
76 | ret = -ENOMEM; |
77 | goto err_free_out_sync; |
78 | } |
79 | |
80 | job->out_sync.chains = kcalloc(n: job->out_sync.count, |
81 | size: sizeof(*job->out_sync.chains), |
82 | GFP_KERNEL); |
83 | if (!job->out_sync.chains) { |
84 | ret = -ENOMEM; |
85 | goto err_free_objs; |
86 | } |
87 | } |
88 | |
89 | ret = drm_sched_job_init(job: &job->base, entity: &sched->entity, |
90 | credits: args->credits, NULL); |
91 | if (ret) |
92 | goto err_free_chains; |
93 | |
94 | job->state = NOUVEAU_JOB_INITIALIZED; |
95 | |
96 | return 0; |
97 | |
98 | err_free_chains: |
99 | kfree(objp: job->out_sync.chains); |
100 | err_free_objs: |
101 | kfree(objp: job->out_sync.objs); |
102 | err_free_out_sync: |
103 | kfree(objp: job->out_sync.data); |
104 | err_free_in_sync: |
105 | kfree(objp: job->in_sync.data); |
106 | return ret; |
107 | } |
108 | |
109 | void |
110 | nouveau_job_fini(struct nouveau_job *job) |
111 | { |
112 | dma_fence_put(fence: job->done_fence); |
113 | drm_sched_job_cleanup(job: &job->base); |
114 | |
115 | job->ops->free(job); |
116 | } |
117 | |
118 | void |
119 | nouveau_job_done(struct nouveau_job *job) |
120 | { |
121 | struct nouveau_sched *sched = job->sched; |
122 | |
123 | spin_lock(lock: &sched->job.list.lock); |
124 | list_del(entry: &job->entry); |
125 | spin_unlock(lock: &sched->job.list.lock); |
126 | |
127 | wake_up(&sched->job.wq); |
128 | } |
129 | |
130 | void |
131 | nouveau_job_free(struct nouveau_job *job) |
132 | { |
133 | kfree(objp: job->in_sync.data); |
134 | kfree(objp: job->out_sync.data); |
135 | kfree(objp: job->out_sync.objs); |
136 | kfree(objp: job->out_sync.chains); |
137 | } |
138 | |
139 | static int |
140 | sync_find_fence(struct nouveau_job *job, |
141 | struct drm_nouveau_sync *sync, |
142 | struct dma_fence **fence) |
143 | { |
144 | u32 stype = sync->flags & DRM_NOUVEAU_SYNC_TYPE_MASK; |
145 | u64 point = 0; |
146 | int ret; |
147 | |
148 | if (stype != DRM_NOUVEAU_SYNC_SYNCOBJ && |
149 | stype != DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) |
150 | return -EOPNOTSUPP; |
151 | |
152 | if (stype == DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) |
153 | point = sync->timeline_value; |
154 | |
155 | ret = drm_syncobj_find_fence(file_private: job->file_priv, |
156 | handle: sync->handle, point, |
157 | flags: 0 /* flags */, fence); |
158 | if (ret) |
159 | return ret; |
160 | |
161 | return 0; |
162 | } |
163 | |
164 | static int |
165 | nouveau_job_add_deps(struct nouveau_job *job) |
166 | { |
167 | struct dma_fence *in_fence = NULL; |
168 | int ret, i; |
169 | |
170 | for (i = 0; i < job->in_sync.count; i++) { |
171 | struct drm_nouveau_sync *sync = &job->in_sync.data[i]; |
172 | |
173 | ret = sync_find_fence(job, sync, fence: &in_fence); |
174 | if (ret) { |
175 | NV_PRINTK(warn, job->cli, |
176 | "Failed to find syncobj (-> in): handle=%d\n" , |
177 | sync->handle); |
178 | return ret; |
179 | } |
180 | |
181 | ret = drm_sched_job_add_dependency(job: &job->base, fence: in_fence); |
182 | if (ret) |
183 | return ret; |
184 | } |
185 | |
186 | return 0; |
187 | } |
188 | |
189 | static void |
190 | nouveau_job_fence_attach_cleanup(struct nouveau_job *job) |
191 | { |
192 | int i; |
193 | |
194 | for (i = 0; i < job->out_sync.count; i++) { |
195 | struct drm_syncobj *obj = job->out_sync.objs[i]; |
196 | struct dma_fence_chain *chain = job->out_sync.chains[i]; |
197 | |
198 | if (obj) |
199 | drm_syncobj_put(obj); |
200 | |
201 | if (chain) |
202 | dma_fence_chain_free(chain); |
203 | } |
204 | } |
205 | |
206 | static int |
207 | nouveau_job_fence_attach_prepare(struct nouveau_job *job) |
208 | { |
209 | int i, ret; |
210 | |
211 | for (i = 0; i < job->out_sync.count; i++) { |
212 | struct drm_nouveau_sync *sync = &job->out_sync.data[i]; |
213 | struct drm_syncobj **pobj = &job->out_sync.objs[i]; |
214 | struct dma_fence_chain **pchain = &job->out_sync.chains[i]; |
215 | u32 stype = sync->flags & DRM_NOUVEAU_SYNC_TYPE_MASK; |
216 | |
217 | if (stype != DRM_NOUVEAU_SYNC_SYNCOBJ && |
218 | stype != DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) { |
219 | ret = -EINVAL; |
220 | goto err_sync_cleanup; |
221 | } |
222 | |
223 | *pobj = drm_syncobj_find(file_private: job->file_priv, handle: sync->handle); |
224 | if (!*pobj) { |
225 | NV_PRINTK(warn, job->cli, |
226 | "Failed to find syncobj (-> out): handle=%d\n" , |
227 | sync->handle); |
228 | ret = -ENOENT; |
229 | goto err_sync_cleanup; |
230 | } |
231 | |
232 | if (stype == DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) { |
233 | *pchain = dma_fence_chain_alloc(); |
234 | if (!*pchain) { |
235 | ret = -ENOMEM; |
236 | goto err_sync_cleanup; |
237 | } |
238 | } |
239 | } |
240 | |
241 | return 0; |
242 | |
243 | err_sync_cleanup: |
244 | nouveau_job_fence_attach_cleanup(job); |
245 | return ret; |
246 | } |
247 | |
248 | static void |
249 | nouveau_job_fence_attach(struct nouveau_job *job) |
250 | { |
251 | struct dma_fence *fence = job->done_fence; |
252 | int i; |
253 | |
254 | for (i = 0; i < job->out_sync.count; i++) { |
255 | struct drm_nouveau_sync *sync = &job->out_sync.data[i]; |
256 | struct drm_syncobj **pobj = &job->out_sync.objs[i]; |
257 | struct dma_fence_chain **pchain = &job->out_sync.chains[i]; |
258 | u32 stype = sync->flags & DRM_NOUVEAU_SYNC_TYPE_MASK; |
259 | |
260 | if (stype == DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) { |
261 | drm_syncobj_add_point(syncobj: *pobj, chain: *pchain, fence, |
262 | point: sync->timeline_value); |
263 | } else { |
264 | drm_syncobj_replace_fence(syncobj: *pobj, fence); |
265 | } |
266 | |
267 | drm_syncobj_put(obj: *pobj); |
268 | *pobj = NULL; |
269 | *pchain = NULL; |
270 | } |
271 | } |
272 | |
273 | int |
274 | nouveau_job_submit(struct nouveau_job *job) |
275 | { |
276 | struct nouveau_sched *sched = job->sched; |
277 | struct dma_fence *done_fence = NULL; |
278 | struct drm_gpuvm_exec vm_exec = { |
279 | .vm = &nouveau_cli_uvmm(cli: job->cli)->base, |
280 | .flags = DRM_EXEC_IGNORE_DUPLICATES, |
281 | .num_fences = 1, |
282 | }; |
283 | int ret; |
284 | |
285 | ret = nouveau_job_add_deps(job); |
286 | if (ret) |
287 | goto err; |
288 | |
289 | ret = nouveau_job_fence_attach_prepare(job); |
290 | if (ret) |
291 | goto err; |
292 | |
293 | /* Make sure the job appears on the sched_entity's queue in the same |
294 | * order as it was submitted. |
295 | */ |
296 | mutex_lock(&sched->mutex); |
297 | |
298 | /* Guarantee we won't fail after the submit() callback returned |
299 | * successfully. |
300 | */ |
301 | if (job->ops->submit) { |
302 | ret = job->ops->submit(job, &vm_exec); |
303 | if (ret) |
304 | goto err_cleanup; |
305 | } |
306 | |
307 | /* Submit was successful; add the job to the schedulers job list. */ |
308 | spin_lock(lock: &sched->job.list.lock); |
309 | list_add(new: &job->entry, head: &sched->job.list.head); |
310 | spin_unlock(lock: &sched->job.list.lock); |
311 | |
312 | drm_sched_job_arm(job: &job->base); |
313 | job->done_fence = dma_fence_get(fence: &job->base.s_fence->finished); |
314 | if (job->sync) |
315 | done_fence = dma_fence_get(fence: job->done_fence); |
316 | |
317 | if (job->ops->armed_submit) |
318 | job->ops->armed_submit(job, &vm_exec); |
319 | |
320 | nouveau_job_fence_attach(job); |
321 | |
322 | /* Set job state before pushing the job to the scheduler, |
323 | * such that we do not overwrite the job state set in run(). |
324 | */ |
325 | job->state = NOUVEAU_JOB_SUBMIT_SUCCESS; |
326 | |
327 | drm_sched_entity_push_job(sched_job: &job->base); |
328 | |
329 | mutex_unlock(lock: &sched->mutex); |
330 | |
331 | if (done_fence) { |
332 | dma_fence_wait(fence: done_fence, intr: true); |
333 | dma_fence_put(fence: done_fence); |
334 | } |
335 | |
336 | return 0; |
337 | |
338 | err_cleanup: |
339 | mutex_unlock(lock: &sched->mutex); |
340 | nouveau_job_fence_attach_cleanup(job); |
341 | err: |
342 | job->state = NOUVEAU_JOB_SUBMIT_FAILED; |
343 | return ret; |
344 | } |
345 | |
346 | static struct dma_fence * |
347 | nouveau_job_run(struct nouveau_job *job) |
348 | { |
349 | struct dma_fence *fence; |
350 | |
351 | fence = job->ops->run(job); |
352 | if (IS_ERR(ptr: fence)) |
353 | job->state = NOUVEAU_JOB_RUN_FAILED; |
354 | else |
355 | job->state = NOUVEAU_JOB_RUN_SUCCESS; |
356 | |
357 | return fence; |
358 | } |
359 | |
360 | static struct dma_fence * |
361 | nouveau_sched_run_job(struct drm_sched_job *sched_job) |
362 | { |
363 | struct nouveau_job *job = to_nouveau_job(sched_job); |
364 | |
365 | return nouveau_job_run(job); |
366 | } |
367 | |
368 | static enum drm_gpu_sched_stat |
369 | nouveau_sched_timedout_job(struct drm_sched_job *sched_job) |
370 | { |
371 | struct drm_gpu_scheduler *sched = sched_job->sched; |
372 | struct nouveau_job *job = to_nouveau_job(sched_job); |
373 | enum drm_gpu_sched_stat stat = DRM_GPU_SCHED_STAT_NOMINAL; |
374 | |
375 | drm_sched_stop(sched, bad: sched_job); |
376 | |
377 | if (job->ops->timeout) |
378 | stat = job->ops->timeout(job); |
379 | else |
380 | NV_PRINTK(warn, job->cli, "Generic job timeout.\n" ); |
381 | |
382 | drm_sched_start(sched, full_recovery: true); |
383 | |
384 | return stat; |
385 | } |
386 | |
387 | static void |
388 | nouveau_sched_free_job(struct drm_sched_job *sched_job) |
389 | { |
390 | struct nouveau_job *job = to_nouveau_job(sched_job); |
391 | |
392 | nouveau_job_fini(job); |
393 | } |
394 | |
395 | static const struct drm_sched_backend_ops nouveau_sched_ops = { |
396 | .run_job = nouveau_sched_run_job, |
397 | .timedout_job = nouveau_sched_timedout_job, |
398 | .free_job = nouveau_sched_free_job, |
399 | }; |
400 | |
401 | static int |
402 | nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm, |
403 | struct workqueue_struct *wq, u32 credit_limit) |
404 | { |
405 | struct drm_gpu_scheduler *drm_sched = &sched->base; |
406 | struct drm_sched_entity *entity = &sched->entity; |
407 | long job_hang_limit = msecs_to_jiffies(NOUVEAU_SCHED_JOB_TIMEOUT_MS); |
408 | int ret; |
409 | |
410 | if (!wq) { |
411 | wq = alloc_workqueue(fmt: "nouveau_sched_wq_%d" , flags: 0, max_active: WQ_MAX_ACTIVE, |
412 | current->pid); |
413 | if (!wq) |
414 | return -ENOMEM; |
415 | |
416 | sched->wq = wq; |
417 | } |
418 | |
419 | ret = drm_sched_init(sched: drm_sched, ops: &nouveau_sched_ops, submit_wq: wq, |
420 | num_rqs: NOUVEAU_SCHED_PRIORITY_COUNT, |
421 | credit_limit, hang_limit: 0, timeout: job_hang_limit, |
422 | NULL, NULL, name: "nouveau_sched" , dev: drm->dev->dev); |
423 | if (ret) |
424 | goto fail_wq; |
425 | |
426 | /* Using DRM_SCHED_PRIORITY_KERNEL, since that's what we're required to use |
427 | * when we want to have a single run-queue only. |
428 | * |
429 | * It's not documented, but one will find out when trying to use any |
430 | * other priority running into faults, because the scheduler uses the |
431 | * priority as array index. |
432 | * |
433 | * Can't use NOUVEAU_SCHED_PRIORITY_SINGLE either, because it's not |
434 | * matching the enum type used in drm_sched_entity_init(). |
435 | */ |
436 | ret = drm_sched_entity_init(entity, priority: DRM_SCHED_PRIORITY_KERNEL, |
437 | sched_list: &drm_sched, num_sched_list: 1, NULL); |
438 | if (ret) |
439 | goto fail_sched; |
440 | |
441 | mutex_init(&sched->mutex); |
442 | spin_lock_init(&sched->job.list.lock); |
443 | INIT_LIST_HEAD(list: &sched->job.list.head); |
444 | init_waitqueue_head(&sched->job.wq); |
445 | |
446 | return 0; |
447 | |
448 | fail_sched: |
449 | drm_sched_fini(sched: drm_sched); |
450 | fail_wq: |
451 | if (sched->wq) |
452 | destroy_workqueue(wq: sched->wq); |
453 | return ret; |
454 | } |
455 | |
456 | int |
457 | nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm, |
458 | struct workqueue_struct *wq, u32 credit_limit) |
459 | { |
460 | struct nouveau_sched *sched; |
461 | int ret; |
462 | |
463 | sched = kzalloc(size: sizeof(*sched), GFP_KERNEL); |
464 | if (!sched) |
465 | return -ENOMEM; |
466 | |
467 | ret = nouveau_sched_init(sched, drm, wq, credit_limit); |
468 | if (ret) { |
469 | kfree(objp: sched); |
470 | return ret; |
471 | } |
472 | |
473 | *psched = sched; |
474 | |
475 | return 0; |
476 | } |
477 | |
478 | |
479 | static void |
480 | nouveau_sched_fini(struct nouveau_sched *sched) |
481 | { |
482 | struct drm_gpu_scheduler *drm_sched = &sched->base; |
483 | struct drm_sched_entity *entity = &sched->entity; |
484 | |
485 | rmb(); /* for list_empty to work without lock */ |
486 | wait_event(sched->job.wq, list_empty(&sched->job.list.head)); |
487 | |
488 | drm_sched_entity_fini(entity); |
489 | drm_sched_fini(sched: drm_sched); |
490 | |
491 | /* Destroy workqueue after scheduler tear down, otherwise it might still |
492 | * be in use. |
493 | */ |
494 | if (sched->wq) |
495 | destroy_workqueue(wq: sched->wq); |
496 | } |
497 | |
498 | void |
499 | nouveau_sched_destroy(struct nouveau_sched **psched) |
500 | { |
501 | struct nouveau_sched *sched = *psched; |
502 | |
503 | nouveau_sched_fini(sched); |
504 | kfree(objp: sched); |
505 | |
506 | *psched = NULL; |
507 | } |
508 | |