1// SPDX-License-Identifier: MIT
2
3#include "nouveau_drv.h"
4#include "nouveau_gem.h"
5#include "nouveau_mem.h"
6#include "nouveau_dma.h"
7#include "nouveau_exec.h"
8#include "nouveau_abi16.h"
9#include "nouveau_chan.h"
10#include "nouveau_sched.h"
11#include "nouveau_uvmm.h"
12
13/**
14 * DOC: Overview
15 *
16 * Nouveau's VM_BIND / EXEC UAPI consists of three ioctls: DRM_NOUVEAU_VM_INIT,
17 * DRM_NOUVEAU_VM_BIND and DRM_NOUVEAU_EXEC.
18 *
19 * In order to use the UAPI firstly a user client must initialize the VA space
20 * using the DRM_NOUVEAU_VM_INIT ioctl specifying which region of the VA space
21 * should be managed by the kernel and which by the UMD.
22 *
23 * The DRM_NOUVEAU_VM_BIND ioctl provides clients an interface to manage the
24 * userspace-managable portion of the VA space. It provides operations to map
25 * and unmap memory. Mappings may be flagged as sparse. Sparse mappings are not
26 * backed by a GEM object and the kernel will ignore GEM handles provided
27 * alongside a sparse mapping.
28 *
29 * Userspace may request memory backed mappings either within or outside of the
30 * bounds (but not crossing those bounds) of a previously mapped sparse
31 * mapping. Subsequently requested memory backed mappings within a sparse
32 * mapping will take precedence over the corresponding range of the sparse
33 * mapping. If such memory backed mappings are unmapped the kernel will make
34 * sure that the corresponding sparse mapping will take their place again.
35 * Requests to unmap a sparse mapping that still contains memory backed mappings
36 * will result in those memory backed mappings being unmapped first.
37 *
38 * Unmap requests are not bound to the range of existing mappings and can even
39 * overlap the bounds of sparse mappings. For such a request the kernel will
40 * make sure to unmap all memory backed mappings within the given range,
41 * splitting up memory backed mappings which are only partially contained
42 * within the given range. Unmap requests with the sparse flag set must match
43 * the range of a previously mapped sparse mapping exactly though.
44 *
45 * While the kernel generally permits arbitrary sequences and ranges of memory
46 * backed mappings being mapped and unmapped, either within a single or multiple
47 * VM_BIND ioctl calls, there are some restrictions for sparse mappings.
48 *
49 * The kernel does not permit to:
50 * - unmap non-existent sparse mappings
51 * - unmap a sparse mapping and map a new sparse mapping overlapping the range
52 * of the previously unmapped sparse mapping within the same VM_BIND ioctl
53 * - unmap a sparse mapping and map new memory backed mappings overlapping the
54 * range of the previously unmapped sparse mapping within the same VM_BIND
55 * ioctl
56 *
57 * When using the VM_BIND ioctl to request the kernel to map memory to a given
58 * virtual address in the GPU's VA space there is no guarantee that the actual
59 * mappings are created in the GPU's MMU. If the given memory is swapped out
60 * at the time the bind operation is executed the kernel will stash the mapping
61 * details into it's internal alloctor and create the actual MMU mappings once
62 * the memory is swapped back in. While this is transparent for userspace, it is
63 * guaranteed that all the backing memory is swapped back in and all the memory
64 * mappings, as requested by userspace previously, are actually mapped once the
65 * DRM_NOUVEAU_EXEC ioctl is called to submit an exec job.
66 *
67 * A VM_BIND job can be executed either synchronously or asynchronously. If
68 * exectued asynchronously, userspace may provide a list of syncobjs this job
69 * will wait for and/or a list of syncobj the kernel will signal once the
70 * VM_BIND job finished execution. If executed synchronously the ioctl will
71 * block until the bind job is finished. For synchronous jobs the kernel will
72 * not permit any syncobjs submitted to the kernel.
73 *
74 * To execute a push buffer the UAPI provides the DRM_NOUVEAU_EXEC ioctl. EXEC
75 * jobs are always executed asynchronously, and, equal to VM_BIND jobs, provide
76 * the option to synchronize them with syncobjs.
77 *
78 * Besides that, EXEC jobs can be scheduled for a specified channel to execute on.
79 *
80 * Since VM_BIND jobs update the GPU's VA space on job submit, EXEC jobs do have
81 * an up to date view of the VA space. However, the actual mappings might still
82 * be pending. Hence, EXEC jobs require to have the particular fences - of
83 * the corresponding VM_BIND jobs they depent on - attached to them.
84 */
85
86static int
87nouveau_exec_job_submit(struct nouveau_job *job,
88 struct drm_gpuvm_exec *vme)
89{
90 struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
91 struct nouveau_cli *cli = job->cli;
92 struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(cli);
93 int ret;
94
95 /* Create a new fence, but do not emit yet. */
96 ret = nouveau_fence_create(&exec_job->fence, exec_job->chan);
97 if (ret)
98 return ret;
99
100 nouveau_uvmm_lock(uvmm);
101 ret = drm_gpuvm_exec_lock(vm_exec: vme);
102 if (ret) {
103 nouveau_uvmm_unlock(uvmm);
104 return ret;
105 }
106 nouveau_uvmm_unlock(uvmm);
107
108 ret = drm_gpuvm_exec_validate(vm_exec: vme);
109 if (ret) {
110 drm_gpuvm_exec_unlock(vm_exec: vme);
111 return ret;
112 }
113
114 return 0;
115}
116
117static void
118nouveau_exec_job_armed_submit(struct nouveau_job *job,
119 struct drm_gpuvm_exec *vme)
120{
121 drm_gpuvm_exec_resv_add_fence(vm_exec: vme, fence: job->done_fence,
122 private_usage: job->resv_usage, extobj_usage: job->resv_usage);
123 drm_gpuvm_exec_unlock(vm_exec: vme);
124}
125
126static struct dma_fence *
127nouveau_exec_job_run(struct nouveau_job *job)
128{
129 struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
130 struct nouveau_channel *chan = exec_job->chan;
131 struct nouveau_fence *fence = exec_job->fence;
132 int i, ret;
133
134 ret = nouveau_dma_wait(chan, slots: exec_job->push.count + 1, size: 16);
135 if (ret) {
136 NV_PRINTK(err, job->cli, "nv50cal_space: %d\n", ret);
137 return ERR_PTR(error: ret);
138 }
139
140 for (i = 0; i < exec_job->push.count; i++) {
141 struct drm_nouveau_exec_push *p = &exec_job->push.s[i];
142 bool no_prefetch = p->flags & DRM_NOUVEAU_EXEC_PUSH_NO_PREFETCH;
143
144 nv50_dma_push(chan, addr: p->va, length: p->va_len, no_prefetch);
145 }
146
147 ret = nouveau_fence_emit(fence);
148 if (ret) {
149 nouveau_fence_unref(&exec_job->fence);
150 NV_PRINTK(err, job->cli, "error fencing pushbuf: %d\n", ret);
151 WIND_RING(chan);
152 return ERR_PTR(error: ret);
153 }
154
155 /* The fence was emitted successfully, set the job's fence pointer to
156 * NULL in order to avoid freeing it up when the job is cleaned up.
157 */
158 exec_job->fence = NULL;
159
160 return &fence->base;
161}
162
163static void
164nouveau_exec_job_free(struct nouveau_job *job)
165{
166 struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
167
168 nouveau_job_done(job);
169 nouveau_job_free(job);
170
171 kfree(objp: exec_job->fence);
172 kfree(objp: exec_job->push.s);
173 kfree(objp: exec_job);
174}
175
176static enum drm_gpu_sched_stat
177nouveau_exec_job_timeout(struct nouveau_job *job)
178{
179 struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
180 struct nouveau_channel *chan = exec_job->chan;
181
182 if (unlikely(!atomic_read(&chan->killed)))
183 nouveau_channel_kill(chan);
184
185 NV_PRINTK(warn, job->cli, "job timeout, channel %d killed!\n",
186 chan->chid);
187
188 return DRM_GPU_SCHED_STAT_NOMINAL;
189}
190
191static struct nouveau_job_ops nouveau_exec_job_ops = {
192 .submit = nouveau_exec_job_submit,
193 .armed_submit = nouveau_exec_job_armed_submit,
194 .run = nouveau_exec_job_run,
195 .free = nouveau_exec_job_free,
196 .timeout = nouveau_exec_job_timeout,
197};
198
199int
200nouveau_exec_job_init(struct nouveau_exec_job **pjob,
201 struct nouveau_exec_job_args *__args)
202{
203 struct nouveau_exec_job *job;
204 struct nouveau_job_args args = {};
205 int i, ret;
206
207 for (i = 0; i < __args->push.count; i++) {
208 struct drm_nouveau_exec_push *p = &__args->push.s[i];
209
210 if (unlikely(p->va_len > NV50_DMA_PUSH_MAX_LENGTH)) {
211 NV_PRINTK(err, nouveau_cli(__args->file_priv),
212 "pushbuf size exceeds limit: 0x%x max 0x%x\n",
213 p->va_len, NV50_DMA_PUSH_MAX_LENGTH);
214 return -EINVAL;
215 }
216 }
217
218 job = *pjob = kzalloc(size: sizeof(*job), GFP_KERNEL);
219 if (!job)
220 return -ENOMEM;
221
222 job->push.count = __args->push.count;
223 if (__args->push.count) {
224 job->push.s = kmemdup(p: __args->push.s,
225 size: sizeof(*__args->push.s) *
226 __args->push.count,
227 GFP_KERNEL);
228 if (!job->push.s) {
229 ret = -ENOMEM;
230 goto err_free_job;
231 }
232 }
233
234 args.file_priv = __args->file_priv;
235 job->chan = __args->chan;
236
237 args.sched = __args->sched;
238 /* Plus one to account for the HW fence. */
239 args.credits = job->push.count + 1;
240
241 args.in_sync.count = __args->in_sync.count;
242 args.in_sync.s = __args->in_sync.s;
243
244 args.out_sync.count = __args->out_sync.count;
245 args.out_sync.s = __args->out_sync.s;
246
247 args.ops = &nouveau_exec_job_ops;
248 args.resv_usage = DMA_RESV_USAGE_WRITE;
249
250 ret = nouveau_job_init(job: &job->base, args: &args);
251 if (ret)
252 goto err_free_pushs;
253
254 return 0;
255
256err_free_pushs:
257 kfree(objp: job->push.s);
258err_free_job:
259 kfree(objp: job);
260 *pjob = NULL;
261
262 return ret;
263}
264
265static int
266nouveau_exec(struct nouveau_exec_job_args *args)
267{
268 struct nouveau_exec_job *job;
269 int ret;
270
271 ret = nouveau_exec_job_init(pjob: &job, args: args);
272 if (ret)
273 return ret;
274
275 ret = nouveau_job_submit(job: &job->base);
276 if (ret)
277 goto err_job_fini;
278
279 return 0;
280
281err_job_fini:
282 nouveau_job_fini(job: &job->base);
283 return ret;
284}
285
286static int
287nouveau_exec_ucopy(struct nouveau_exec_job_args *args,
288 struct drm_nouveau_exec *req)
289{
290 struct drm_nouveau_sync **s;
291 u32 inc = req->wait_count;
292 u64 ins = req->wait_ptr;
293 u32 outc = req->sig_count;
294 u64 outs = req->sig_ptr;
295 u32 pushc = req->push_count;
296 u64 pushs = req->push_ptr;
297 int ret;
298
299 if (pushc) {
300 args->push.count = pushc;
301 args->push.s = u_memcpya(user: pushs, nmemb: pushc, size: sizeof(*args->push.s));
302 if (IS_ERR(ptr: args->push.s))
303 return PTR_ERR(ptr: args->push.s);
304 }
305
306 if (inc) {
307 s = &args->in_sync.s;
308
309 args->in_sync.count = inc;
310 *s = u_memcpya(user: ins, nmemb: inc, size: sizeof(**s));
311 if (IS_ERR(ptr: *s)) {
312 ret = PTR_ERR(ptr: *s);
313 goto err_free_pushs;
314 }
315 }
316
317 if (outc) {
318 s = &args->out_sync.s;
319
320 args->out_sync.count = outc;
321 *s = u_memcpya(user: outs, nmemb: outc, size: sizeof(**s));
322 if (IS_ERR(ptr: *s)) {
323 ret = PTR_ERR(ptr: *s);
324 goto err_free_ins;
325 }
326 }
327
328 return 0;
329
330err_free_pushs:
331 u_free(addr: args->push.s);
332err_free_ins:
333 u_free(addr: args->in_sync.s);
334 return ret;
335}
336
337static void
338nouveau_exec_ufree(struct nouveau_exec_job_args *args)
339{
340 u_free(addr: args->push.s);
341 u_free(addr: args->in_sync.s);
342 u_free(addr: args->out_sync.s);
343}
344
345int
346nouveau_exec_ioctl_exec(struct drm_device *dev,
347 void *data,
348 struct drm_file *file_priv)
349{
350 struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv);
351 struct nouveau_cli *cli = nouveau_cli(fpriv: file_priv);
352 struct nouveau_abi16_chan *chan16;
353 struct nouveau_channel *chan = NULL;
354 struct nouveau_exec_job_args args = {};
355 struct drm_nouveau_exec *req = data;
356 int push_max, ret = 0;
357
358 if (unlikely(!abi16))
359 return -ENOMEM;
360
361 /* abi16 locks already */
362 if (unlikely(!nouveau_cli_uvmm(cli)))
363 return nouveau_abi16_put(abi16, -ENOSYS);
364
365 list_for_each_entry(chan16, &abi16->channels, head) {
366 if (chan16->chan->chid == req->channel) {
367 chan = chan16->chan;
368 break;
369 }
370 }
371
372 if (!chan)
373 return nouveau_abi16_put(abi16, -ENOENT);
374
375 if (unlikely(atomic_read(&chan->killed)))
376 return nouveau_abi16_put(abi16, -ENODEV);
377
378 if (!chan->dma.ib_max)
379 return nouveau_abi16_put(abi16, -ENOSYS);
380
381 push_max = nouveau_exec_push_max_from_ib_max(ib_max: chan->dma.ib_max);
382 if (unlikely(req->push_count > push_max)) {
383 NV_PRINTK(err, cli, "pushbuf push count exceeds limit: %d max %d\n",
384 req->push_count, push_max);
385 return nouveau_abi16_put(abi16, -EINVAL);
386 }
387
388 ret = nouveau_exec_ucopy(args: &args, req);
389 if (ret)
390 goto out;
391
392 args.sched = chan16->sched;
393 args.file_priv = file_priv;
394 args.chan = chan;
395
396 ret = nouveau_exec(args: &args);
397 if (ret)
398 goto out_free_args;
399
400out_free_args:
401 nouveau_exec_ufree(args: &args);
402out:
403 return nouveau_abi16_put(abi16, ret);
404}
405

source code of linux/drivers/gpu/drm/nouveau/nouveau_exec.c