nouveau_exec.c source code [linux/drivers/gpu/drm/nouveau/nouveau_exec.c]

1	// SPDX-License-Identifier: MIT
2
3	#include "nouveau_drv.h"
4	#include "nouveau_gem.h"
5	#include "nouveau_mem.h"
6	#include "nouveau_dma.h"
7	#include "nouveau_exec.h"
8	#include "nouveau_abi16.h"
9	#include "nouveau_chan.h"
10	#include "nouveau_sched.h"
11	#include "nouveau_uvmm.h"
12
13	/**
14	* DOC: Overview
15	*
16	* Nouveau's VM_BIND / EXEC UAPI consists of three ioctls: DRM_NOUVEAU_VM_INIT,
17	* DRM_NOUVEAU_VM_BIND and DRM_NOUVEAU_EXEC.
18	*
19	* In order to use the UAPI firstly a user client must initialize the VA space
20	* using the DRM_NOUVEAU_VM_INIT ioctl specifying which region of the VA space
21	* should be managed by the kernel and which by the UMD.
22	*
23	* The DRM_NOUVEAU_VM_BIND ioctl provides clients an interface to manage the
24	* userspace-managable portion of the VA space. It provides operations to map
25	* and unmap memory. Mappings may be flagged as sparse. Sparse mappings are not
26	* backed by a GEM object and the kernel will ignore GEM handles provided
27	* alongside a sparse mapping.
28	*
29	* Userspace may request memory backed mappings either within or outside of the
30	* bounds (but not crossing those bounds) of a previously mapped sparse
31	* mapping. Subsequently requested memory backed mappings within a sparse
32	* mapping will take precedence over the corresponding range of the sparse
33	* mapping. If such memory backed mappings are unmapped the kernel will make
34	* sure that the corresponding sparse mapping will take their place again.
35	* Requests to unmap a sparse mapping that still contains memory backed mappings
36	* will result in those memory backed mappings being unmapped first.
37	*
38	* Unmap requests are not bound to the range of existing mappings and can even
39	* overlap the bounds of sparse mappings. For such a request the kernel will
40	* make sure to unmap all memory backed mappings within the given range,
41	* splitting up memory backed mappings which are only partially contained
42	* within the given range. Unmap requests with the sparse flag set must match
43	* the range of a previously mapped sparse mapping exactly though.
44	*
45	* While the kernel generally permits arbitrary sequences and ranges of memory
46	* backed mappings being mapped and unmapped, either within a single or multiple
47	* VM_BIND ioctl calls, there are some restrictions for sparse mappings.
48	*
49	* The kernel does not permit to:
50	* - unmap non-existent sparse mappings
51	* - unmap a sparse mapping and map a new sparse mapping overlapping the range
52	* of the previously unmapped sparse mapping within the same VM_BIND ioctl
53	* - unmap a sparse mapping and map new memory backed mappings overlapping the
54	* range of the previously unmapped sparse mapping within the same VM_BIND
55	* ioctl
56	*
57	* When using the VM_BIND ioctl to request the kernel to map memory to a given
58	* virtual address in the GPU's VA space there is no guarantee that the actual
59	* mappings are created in the GPU's MMU. If the given memory is swapped out
60	* at the time the bind operation is executed the kernel will stash the mapping
61	* details into it's internal alloctor and create the actual MMU mappings once
62	* the memory is swapped back in. While this is transparent for userspace, it is
63	* guaranteed that all the backing memory is swapped back in and all the memory
64	* mappings, as requested by userspace previously, are actually mapped once the
65	* DRM_NOUVEAU_EXEC ioctl is called to submit an exec job.
66	*
67	* A VM_BIND job can be executed either synchronously or asynchronously. If
68	* exectued asynchronously, userspace may provide a list of syncobjs this job
69	* will wait for and/or a list of syncobj the kernel will signal once the
70	* VM_BIND job finished execution. If executed synchronously the ioctl will
71	* block until the bind job is finished. For synchronous jobs the kernel will
72	* not permit any syncobjs submitted to the kernel.
73	*
74	* To execute a push buffer the UAPI provides the DRM_NOUVEAU_EXEC ioctl. EXEC
75	* jobs are always executed asynchronously, and, equal to VM_BIND jobs, provide
76	* the option to synchronize them with syncobjs.
77	*
78	* Besides that, EXEC jobs can be scheduled for a specified channel to execute on.
79	*
80	* Since VM_BIND jobs update the GPU's VA space on job submit, EXEC jobs do have
81	* an up to date view of the VA space. However, the actual mappings might still
82	* be pending. Hence, EXEC jobs require to have the particular fences - of
83	* the corresponding VM_BIND jobs they depent on - attached to them.
84	*/
85
86	static int
87	nouveau_exec_job_submit(struct nouveau_job *job,
88	struct drm_gpuvm_exec *vme)
89	{
90	struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
91	struct nouveau_cli *cli = job->cli;
92	struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(cli);
93	int ret;
94
95	/ Create a new fence, but do not emit yet. /
96	ret = nouveau_fence_create(&exec_job->fence, exec_job->chan);
97	if (ret)
98	return ret;
99
100	nouveau_uvmm_lock(uvmm);
101	ret = drm_gpuvm_exec_lock(vm_exec: vme);
102	if (ret) {
103	nouveau_uvmm_unlock(uvmm);
104	return ret;
105	}
106	nouveau_uvmm_unlock(uvmm);
107
108	ret = drm_gpuvm_exec_validate(vm_exec: vme);
109	if (ret) {
110	drm_gpuvm_exec_unlock(vm_exec: vme);
111	return ret;
112	}
113
114	return `0`;
115	}
116
117	static void
118	nouveau_exec_job_armed_submit(struct nouveau_job *job,
119	struct drm_gpuvm_exec *vme)
120	{
121	drm_gpuvm_exec_resv_add_fence(vm_exec: vme, fence: job->done_fence,
122	private_usage: job->resv_usage, extobj_usage: job->resv_usage);
123	drm_gpuvm_exec_unlock(vm_exec: vme);
124	}
125
126	static struct dma_fence *
127	nouveau_exec_job_run(struct nouveau_job *job)
128	{
129	struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
130	struct nouveau_channel *chan = exec_job->chan;
131	struct nouveau_fence *fence = exec_job->fence;
132	int i, ret;
133
134	ret = nouveau_dma_wait(chan, slots: exec_job->push.count + `1`, size: `16`);
135	if (ret) {
136	NV_PRINTK(err, job->cli, "nv50cal_space: %d\n", ret);
137	return ERR_PTR(error: ret);
138	}
139
140	for (i = `0`; i < exec_job->push.count; i++) {
141	struct drm_nouveau_exec_push *p = &exec_job->push.s[i];
142	bool no_prefetch = p->flags & DRM_NOUVEAU_EXEC_PUSH_NO_PREFETCH;
143
144	nv50_dma_push(chan, addr: p->va, length: p->va_len, no_prefetch);
145	}
146
147	ret = nouveau_fence_emit(fence);
148	if (ret) {
149	nouveau_fence_unref(&exec_job->fence);
150	NV_PRINTK(err, job->cli, "error fencing pushbuf: %d\n", ret);
151	WIND_RING(chan);
152	return ERR_PTR(error: ret);
153	}
154
155	/ The fence was emitted successfully, set the job's fence pointer to*
156	* NULL in order to avoid freeing it up when the job is cleaned up.
157	*/
158	exec_job->fence = NULL;
159
160	return &fence->base;
161	}
162
163	static void
164	nouveau_exec_job_free(struct nouveau_job *job)
165	{
166	struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
167
168	nouveau_job_done(job);
169	nouveau_job_free(job);
170
171	kfree(objp: exec_job->fence);
172	kfree(objp: exec_job->push.s);
173	kfree(objp: exec_job);
174	}
175
176	static enum drm_gpu_sched_stat
177	nouveau_exec_job_timeout(struct nouveau_job *job)
178	{
179	struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
180	struct nouveau_channel *chan = exec_job->chan;
181
182	if (unlikely(!atomic_read(&chan->killed)))
183	nouveau_channel_kill(chan);
184
185	NV_PRINTK(warn, job->cli, "job timeout, channel %d killed!\n",
186	chan->chid);
187
188	return DRM_GPU_SCHED_STAT_NOMINAL;
189	}
190
191	static struct nouveau_job_ops nouveau_exec_job_ops = {
192	.submit = nouveau_exec_job_submit,
193	.armed_submit = nouveau_exec_job_armed_submit,
194	.run = nouveau_exec_job_run,
195	.free = nouveau_exec_job_free,
196	.timeout = nouveau_exec_job_timeout,
197	};
198
199	int
200	nouveau_exec_job_init(struct nouveau_exec_job **pjob,
201	struct nouveau_exec_job_args *__args)
202	{
203	struct nouveau_exec_job *job;
204	struct nouveau_job_args args = {};
205	int i, ret;
206
207	for (i = `0`; i < __args->push.count; i++) {
208	struct drm_nouveau_exec_push *p = &__args->push.s[i];
209
210	if (unlikely(p->va_len > NV50_DMA_PUSH_MAX_LENGTH)) {
211	NV_PRINTK(err, nouveau_cli(__args->file_priv),
212	"pushbuf size exceeds limit: 0x%x max 0x%x\n",
213	p->va_len, NV50_DMA_PUSH_MAX_LENGTH);
214	return -EINVAL;
215	}
216	}
217
218	job = pjob = kzalloc(size: sizeof(job), GFP_KERNEL);
219	if (!job)
220	return -ENOMEM;
221
222	job->push.count = __args->push.count;
223	if (__args->push.count) {
224	job->push.s = kmemdup(p: __args->push.s,
225	size: sizeof(__args->push.s)
226	__args->push.count,
227	GFP_KERNEL);
228	if (!job->push.s) {
229	ret = -ENOMEM;
230	goto err_free_job;
231	}
232	}
233
234	args.file_priv = __args->file_priv;
235	job->chan = __args->chan;
236
237	args.sched = __args->sched;
238	/ Plus one to account for the HW fence. /
239	args.credits = job->push.count + `1`;
240
241	args.in_sync.count = __args->in_sync.count;
242	args.in_sync.s = __args->in_sync.s;
243
244	args.out_sync.count = __args->out_sync.count;
245	args.out_sync.s = __args->out_sync.s;
246
247	args.ops = &nouveau_exec_job_ops;
248	args.resv_usage = DMA_RESV_USAGE_WRITE;
249
250	ret = nouveau_job_init(job: &job->base, args: &args);
251	if (ret)
252	goto err_free_pushs;
253
254	return `0`;
255
256	err_free_pushs:
257	kfree(objp: job->push.s);
258	err_free_job:
259	kfree(objp: job);
260	*pjob = NULL;
261
262	return ret;
263	}
264
265	static int
266	nouveau_exec(struct nouveau_exec_job_args *args)
267	{
268	struct nouveau_exec_job *job;
269	int ret;
270
271	ret = nouveau_exec_job_init(pjob: &job, args: args);
272	if (ret)
273	return ret;
274
275	ret = nouveau_job_submit(job: &job->base);
276	if (ret)
277	goto err_job_fini;
278
279	return `0`;
280
281	err_job_fini:
282	nouveau_job_fini(job: &job->base);
283	return ret;
284	}
285
286	static int
287	nouveau_exec_ucopy(struct nouveau_exec_job_args *args,
288	struct drm_nouveau_exec *req)
289	{
290	struct drm_nouveau_sync **s;
291	u32 inc = req->wait_count;
292	u64 ins = req->wait_ptr;
293	u32 outc = req->sig_count;
294	u64 outs = req->sig_ptr;
295	u32 pushc = req->push_count;
296	u64 pushs = req->push_ptr;
297	int ret;
298
299	if (pushc) {
300	args->push.count = pushc;
301	args->push.s = u_memcpya(user: pushs, nmemb: pushc, size: sizeof(*args->push.s));
302	if (IS_ERR(ptr: args->push.s))
303	return PTR_ERR(ptr: args->push.s);
304	}
305
306	if (inc) {
307	s = &args->in_sync.s;
308
309	args->in_sync.count = inc;
310	s = u_memcpya(user: ins, nmemb: inc, size: sizeof(*s));
311	if (IS_ERR(ptr: *s)) {
312	ret = PTR_ERR(ptr: *s);
313	goto err_free_pushs;
314	}
315	}
316
317	if (outc) {
318	s = &args->out_sync.s;
319
320	args->out_sync.count = outc;
321	s = u_memcpya(user: outs, nmemb: outc, size: sizeof(*s));
322	if (IS_ERR(ptr: *s)) {
323	ret = PTR_ERR(ptr: *s);
324	goto err_free_ins;
325	}
326	}
327
328	return `0`;
329
330	err_free_pushs:
331	u_free(addr: args->push.s);
332	err_free_ins:
333	u_free(addr: args->in_sync.s);
334	return ret;
335	}
336
337	static void
338	nouveau_exec_ufree(struct nouveau_exec_job_args *args)
339	{
340	u_free(addr: args->push.s);
341	u_free(addr: args->in_sync.s);
342	u_free(addr: args->out_sync.s);
343	}
344
345	int
346	nouveau_exec_ioctl_exec(struct drm_device *dev,
347	void *data,
348	struct drm_file *file_priv)
349	{
350	struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv);
351	struct nouveau_cli *cli = nouveau_cli(fpriv: file_priv);
352	struct nouveau_abi16_chan *chan16;
353	struct nouveau_channel *chan = NULL;
354	struct nouveau_exec_job_args args = {};
355	struct drm_nouveau_exec *req = data;
356	int push_max, ret = `0`;
357
358	if (unlikely(!abi16))
359	return -ENOMEM;
360
361	/ abi16 locks already /
362	if (unlikely(!nouveau_cli_uvmm(cli)))
363	return nouveau_abi16_put(abi16, -ENOSYS);
364
365	list_for_each_entry(chan16, &abi16->channels, head) {
366	if (chan16->chan->chid == req->channel) {
367	chan = chan16->chan;
368	break;
369	}
370	}
371
372	if (!chan)
373	return nouveau_abi16_put(abi16, -ENOENT);
374
375	if (unlikely(atomic_read(&chan->killed)))
376	return nouveau_abi16_put(abi16, -ENODEV);
377
378	if (!chan->dma.ib_max)
379	return nouveau_abi16_put(abi16, -ENOSYS);
380
381	push_max = nouveau_exec_push_max_from_ib_max(ib_max: chan->dma.ib_max);
382	if (unlikely(req->push_count > push_max)) {
383	NV_PRINTK(err, cli, "pushbuf push count exceeds limit: %d max %d\n",
384	req->push_count, push_max);
385	return nouveau_abi16_put(abi16, -EINVAL);
386	}
387
388	ret = nouveau_exec_ucopy(args: &args, req);
389	if (ret)
390	goto out;
391
392	args.sched = chan16->sched;
393	args.file_priv = file_priv;
394	args.chan = chan;
395
396	ret = nouveau_exec(args: &args);
397	if (ret)
398	goto out_free_args;
399
400	out_free_args:
401	nouveau_exec_ufree(args: &args);
402	out:
403	return nouveau_abi16_put(abi16, ret);
404	}
405

source code of linux/drivers/gpu/drm/nouveau/nouveau_exec.c