job.c source code [linux/drivers/gpu/host1x/job.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/*
3	* Tegra host1x Job
4	*
5	* Copyright (c) 2010-2015, NVIDIA Corporation.
6	*/
7
8	#include <linux/dma-mapping.h>
9	#include <linux/err.h>
10	#include <linux/host1x.h>
11	#include <linux/iommu.h>
12	#include <linux/kref.h>
13	#include <linux/module.h>
14	#include <linux/scatterlist.h>
15	#include <linux/slab.h>
16	#include <linux/vmalloc.h>
17	#include <trace/events/host1x.h>
18
19	#include "channel.h"
20	#include "dev.h"
21	#include "job.h"
22	#include "syncpt.h"
23
24	#define HOST1X_WAIT_SYNCPT_OFFSET 0x8
25
26	struct host1x_job host1x_job_alloc(struct* host1x_channel *ch,
27	u32 num_cmdbufs, u32 num_relocs,
28	bool skip_firewall)
29	{
30	struct host1x_job *job = NULL;
31	unsigned int num_unpins = num_relocs;
32	bool enable_firewall;
33	u64 total;
34	void *mem;
35
36	enable_firewall = IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && !skip_firewall;
37
38	if (!enable_firewall)
39	num_unpins += num_cmdbufs;
40
41	/ Check that we're not going to overflow /
42	total = sizeof(struct host1x_job) +
43	(u64)num_relocs * sizeof(struct host1x_reloc) +
44	(u64)num_unpins * sizeof(struct host1x_job_unpin_data) +
45	(u64)num_cmdbufs * sizeof(struct host1x_job_cmd) +
46	(u64)num_unpins * sizeof(dma_addr_t) +
47	(u64)num_unpins * sizeof(u32 *);
48	if (total > ULONG_MAX)
49	return NULL;
50
51	mem = job = kzalloc(size: total, GFP_KERNEL);
52	if (!job)
53	return NULL;
54
55	job->enable_firewall = enable_firewall;
56
57	kref_init(kref: &job->ref);
58	job->channel = ch;
59
60	/ Redistribute memory to the structs /
61	mem += sizeof(struct host1x_job);
62	job->relocs = num_relocs ? mem : NULL;
63	mem += num_relocs * sizeof(struct host1x_reloc);
64	job->unpins = num_unpins ? mem : NULL;
65	mem += num_unpins * sizeof(struct host1x_job_unpin_data);
66	job->cmds = num_cmdbufs ? mem : NULL;
67	mem += num_cmdbufs * sizeof(struct host1x_job_cmd);
68	job->addr_phys = num_unpins ? mem : NULL;
69
70	job->reloc_addr_phys = job->addr_phys;
71	job->gather_addr_phys = &job->addr_phys[num_relocs];
72
73	return job;
74	}
75	EXPORT_SYMBOL(host1x_job_alloc);
76
77	struct host1x_job host1x_job_get(struct* host1x_job *job)
78	{
79	kref_get(kref: &job->ref);
80	return job;
81	}
82	EXPORT_SYMBOL(host1x_job_get);
83
84	static void job_free(struct kref *ref)
85	{
86	struct host1x_job job = container_of(ref, struct* host1x_job, ref);
87
88	if (job->release)
89	job->release(job);
90
91	if (job->fence) {
92	/*
93	* remove_callback is atomic w.r.t. fence signaling, so
94	* after the call returns, we know that the callback is not
95	* in execution, and the fence can be safely freed.
96	*/
97	dma_fence_remove_callback(fence: job->fence, cb: &job->fence_cb);
98	dma_fence_put(fence: job->fence);
99	}
100
101	if (job->syncpt)
102	host1x_syncpt_put(sp: job->syncpt);
103
104	kfree(objp: job);
105	}
106
107	void host1x_job_put(struct host1x_job *job)
108	{
109	kref_put(kref: &job->ref, release: job_free);
110	}
111	EXPORT_SYMBOL(host1x_job_put);
112
113	void host1x_job_add_gather(struct host1x_job job, struct* host1x_bo *bo,
114	unsigned int words, unsigned int offset)
115	{
116	struct host1x_job_gather *gather = &job->cmds[job->num_cmds].gather;
117
118	gather->words = words;
119	gather->bo = bo;
120	gather->offset = offset;
121
122	job->num_cmds++;
123	}
124	EXPORT_SYMBOL(host1x_job_add_gather);
125
126	void host1x_job_add_wait(struct host1x_job *job, u32 id, u32 thresh,
127	bool relative, u32 next_class)
128	{
129	struct host1x_job_cmd *cmd = &job->cmds[job->num_cmds];
130
131	cmd->is_wait = true;
132	cmd->wait.id = id;
133	cmd->wait.threshold = thresh;
134	cmd->wait.next_class = next_class;
135	cmd->wait.relative = relative;
136
137	job->num_cmds++;
138	}
139	EXPORT_SYMBOL(host1x_job_add_wait);
140
141	static unsigned int pin_job(struct host1x host, struct* host1x_job *job)
142	{
143	unsigned long mask = HOST1X_RELOC_READ \| HOST1X_RELOC_WRITE;
144	struct host1x_client *client = job->client;
145	struct device *dev = client->dev;
146	struct host1x_job_gather *g;
147	unsigned int i;
148	int err;
149
150	job->num_unpins = `0`;
151
152	for (i = `0`; i < job->num_relocs; i++) {
153	struct host1x_reloc *reloc = &job->relocs[i];
154	enum dma_data_direction direction;
155	struct host1x_bo_mapping *map;
156	struct host1x_bo *bo;
157
158	reloc->target.bo = host1x_bo_get(bo: reloc->target.bo);
159	if (!reloc->target.bo) {
160	err = -EINVAL;
161	goto unpin;
162	}
163
164	bo = reloc->target.bo;
165
166	switch (reloc->flags & mask) {
167	case HOST1X_RELOC_READ:
168	direction = DMA_TO_DEVICE;
169	break;
170
171	case HOST1X_RELOC_WRITE:
172	direction = DMA_FROM_DEVICE;
173	break;
174
175	case HOST1X_RELOC_READ \| HOST1X_RELOC_WRITE:
176	direction = DMA_BIDIRECTIONAL;
177	break;
178
179	default:
180	err = -EINVAL;
181	goto unpin;
182	}
183
184	map = host1x_bo_pin(dev, bo, dir: direction, NULL);
185	if (IS_ERR(ptr: map)) {
186	err = PTR_ERR(ptr: map);
187	goto unpin;
188	}
189
190	/*
191	* host1x clients are generally not able to do scatter-gather themselves, so fail
192	* if the buffer is discontiguous and we fail to map its SG table to a single
193	* contiguous chunk of I/O virtual memory.
194	*/
195	if (map->chunks > `1`) {
196	err = -EINVAL;
197	goto unpin;
198	}
199
200	job->addr_phys[job->num_unpins] = map->phys;
201	job->unpins[job->num_unpins].map = map;
202	job->num_unpins++;
203	}
204
205	/*
206	* We will copy gathers BO content later, so there is no need to
207	* hold and pin them.
208	*/
209	if (job->enable_firewall)
210	return `0`;
211
212	for (i = `0`; i < job->num_cmds; i++) {
213	struct host1x_bo_mapping *map;
214	size_t gather_size = `0`;
215	struct scatterlist *sg;
216	unsigned long shift;
217	struct iova *alloc;
218	unsigned int j;
219
220	if (job->cmds[i].is_wait)
221	continue;
222
223	g = &job->cmds[i].gather;
224
225	g->bo = host1x_bo_get(bo: g->bo);
226	if (!g->bo) {
227	err = -EINVAL;
228	goto unpin;
229	}
230
231	map = host1x_bo_pin(dev: host->dev, bo: g->bo, dir: DMA_TO_DEVICE, NULL);
232	if (IS_ERR(ptr: map)) {
233	err = PTR_ERR(ptr: map);
234	goto unpin;
235	}
236
237	if (host->domain) {
238	for_each_sgtable_sg(map->sgt, sg, j)
239	gather_size += sg->length;
240
241	gather_size = iova_align(iovad: &host->iova, size: gather_size);
242
243	shift = iova_shift(iovad: &host->iova);
244	alloc = alloc_iova(iovad: &host->iova, size: gather_size >> shift,
245	limit_pfn: host->iova_end >> shift, size_aligned: true);
246	if (!alloc) {
247	err = -ENOMEM;
248	goto put;
249	}
250
251	err = iommu_map_sgtable(domain: host->domain, iova: iova_dma_addr(iovad: &host->iova, iova: alloc),
252	sgt: map->sgt, IOMMU_READ);
253	if (err == `0`) {
254	__free_iova(iovad: &host->iova, iova: alloc);
255	err = -EINVAL;
256	goto put;
257	}
258
259	map->phys = iova_dma_addr(iovad: &host->iova, iova: alloc);
260	map->size = gather_size;
261	}
262
263	job->addr_phys[job->num_unpins] = map->phys;
264	job->unpins[job->num_unpins].map = map;
265	job->num_unpins++;
266
267	job->gather_addr_phys[i] = map->phys;
268	}
269
270	return `0`;
271
272	put:
273	host1x_bo_put(bo: g->bo);
274	unpin:
275	host1x_job_unpin(job);
276	return err;
277	}
278
279	static int do_relocs(struct host1x_job job, struct* host1x_job_gather *g)
280	{
281	void *cmdbuf_addr = NULL;
282	struct host1x_bo *cmdbuf = g->bo;
283	unsigned int i;
284
285	/ pin & patch the relocs for one gather /
286	for (i = `0`; i < job->num_relocs; i++) {
287	struct host1x_reloc *reloc = &job->relocs[i];
288	u32 reloc_addr = (job->reloc_addr_phys[i] +
289	reloc->target.offset) >> reloc->shift;
290	u32 *target;
291
292	/ skip all other gathers /
293	if (cmdbuf != reloc->cmdbuf.bo)
294	continue;
295
296	if (job->enable_firewall) {
297	target = (u32 *)job->gather_copy_mapped +
298	reloc->cmdbuf.offset / sizeof(u32) +
299	g->offset / sizeof(u32);
300	goto patch_reloc;
301	}
302
303	if (!cmdbuf_addr) {
304	cmdbuf_addr = host1x_bo_mmap(bo: cmdbuf);
305
306	if (unlikely(!cmdbuf_addr)) {
307	pr_err("Could not map cmdbuf for relocation\n");
308	return -ENOMEM;
309	}
310	}
311
312	target = cmdbuf_addr + reloc->cmdbuf.offset;
313	patch_reloc:
314	*target = reloc_addr;
315	}
316
317	if (cmdbuf_addr)
318	host1x_bo_munmap(bo: cmdbuf, addr: cmdbuf_addr);
319
320	return `0`;
321	}
322
323	static bool check_reloc(struct host1x_reloc reloc, struct* host1x_bo *cmdbuf,
324	unsigned int offset)
325	{
326	offset = sizeof*(u32);
327
328	if (reloc->cmdbuf.bo != cmdbuf \|\| reloc->cmdbuf.offset != offset)
329	return false;
330
331	/ relocation shift value validation isn't implemented yet /
332	if (reloc->shift)
333	return false;
334
335	return true;
336	}
337
338	struct host1x_firewall {
339	struct host1x_job *job;
340	struct device *dev;
341
342	unsigned int num_relocs;
343	struct host1x_reloc *reloc;
344
345	struct host1x_bo *cmdbuf;
346	unsigned int offset;
347
348	u32 words;
349	u32 class;
350	u32 reg;
351	u32 mask;
352	u32 count;
353	};
354
355	static int check_register(struct host1x_firewall fw, unsigned* long offset)
356	{
357	if (!fw->job->is_addr_reg)
358	return `0`;
359
360	if (fw->job->is_addr_reg(fw->dev, fw->class, offset)) {
361	if (!fw->num_relocs)
362	return -EINVAL;
363
364	if (!check_reloc(reloc: fw->reloc, cmdbuf: fw->cmdbuf, offset: fw->offset))
365	return -EINVAL;
366
367	fw->num_relocs--;
368	fw->reloc++;
369	}
370
371	return `0`;
372	}
373
374	static int check_class(struct host1x_firewall *fw, u32 class)
375	{
376	if (!fw->job->is_valid_class) {
377	if (fw->class != class)
378	return -EINVAL;
379	} else {
380	if (!fw->job->is_valid_class(fw->class))
381	return -EINVAL;
382	}
383
384	return `0`;
385	}
386
387	static int check_mask(struct host1x_firewall *fw)
388	{
389	u32 mask = fw->mask;
390	u32 reg = fw->reg;
391	int ret;
392
393	while (mask) {
394	if (fw->words == `0`)
395	return -EINVAL;
396
397	if (mask & `1`) {
398	ret = check_register(fw, offset: reg);
399	if (ret < `0`)
400	return ret;
401
402	fw->words--;
403	fw->offset++;
404	}
405	mask >>= `1`;
406	reg++;
407	}
408
409	return `0`;
410	}
411
412	static int check_incr(struct host1x_firewall *fw)
413	{
414	u32 count = fw->count;
415	u32 reg = fw->reg;
416	int ret;
417
418	while (count) {
419	if (fw->words == `0`)
420	return -EINVAL;
421
422	ret = check_register(fw, offset: reg);
423	if (ret < `0`)
424	return ret;
425
426	reg++;
427	fw->words--;
428	fw->offset++;
429	count--;
430	}
431
432	return `0`;
433	}
434
435	static int check_nonincr(struct host1x_firewall *fw)
436	{
437	u32 count = fw->count;
438	int ret;
439
440	while (count) {
441	if (fw->words == `0`)
442	return -EINVAL;
443
444	ret = check_register(fw, offset: fw->reg);
445	if (ret < `0`)
446	return ret;
447
448	fw->words--;
449	fw->offset++;
450	count--;
451	}
452
453	return `0`;
454	}
455
456	static int validate(struct host1x_firewall fw, struct* host1x_job_gather *g)
457	{
458	u32 cmdbuf_base = (u32 )fw->job->gather_copy_mapped +
459	(g->offset / sizeof(u32));
460	u32 job_class = fw->class;
461	int err = `0`;
462
463	fw->words = g->words;
464	fw->cmdbuf = g->bo;
465	fw->offset = `0`;
466
467	while (fw->words && !err) {
468	u32 word = cmdbuf_base[fw->offset];
469	u32 opcode = (word & `0xf0000000`) >> `28`;
470
471	fw->mask = `0`;
472	fw->reg = `0`;
473	fw->count = `0`;
474	fw->words--;
475	fw->offset++;
476
477	switch (opcode) {
478	case `0`:
479	fw->class = word >> `6` & `0x3ff`;
480	fw->mask = word & `0x3f`;
481	fw->reg = word >> `16` & `0xfff`;
482	err = check_class(fw, class: job_class);
483	if (!err)
484	err = check_mask(fw);
485	if (err)
486	goto out;
487	break;
488	case `1`:
489	fw->reg = word >> `16` & `0xfff`;
490	fw->count = word & `0xffff`;
491	err = check_incr(fw);
492	if (err)
493	goto out;
494	break;
495
496	case `2`:
497	fw->reg = word >> `16` & `0xfff`;
498	fw->count = word & `0xffff`;
499	err = check_nonincr(fw);
500	if (err)
501	goto out;
502	break;
503
504	case `3`:
505	fw->mask = word & `0xffff`;
506	fw->reg = word >> `16` & `0xfff`;
507	err = check_mask(fw);
508	if (err)
509	goto out;
510	break;
511	case `4`:
512	case `14`:
513	break;
514	default:
515	err = -EINVAL;
516	break;
517	}
518	}
519
520	out:
521	return err;
522	}
523
524	static inline int copy_gathers(struct device host, struct* host1x_job *job,
525	struct device *dev)
526	{
527	struct host1x_firewall fw;
528	size_t size = `0`;
529	size_t offset = `0`;
530	unsigned int i;
531
532	fw.job = job;
533	fw.dev = dev;
534	fw.reloc = job->relocs;
535	fw.num_relocs = job->num_relocs;
536	fw.class = job->class;
537
538	for (i = `0`; i < job->num_cmds; i++) {
539	struct host1x_job_gather *g;
540
541	if (job->cmds[i].is_wait)
542	continue;
543
544	g = &job->cmds[i].gather;
545
546	size += g->words * sizeof(u32);
547	}
548
549	/*
550	* Try a non-blocking allocation from a higher priority pools first,
551	* as awaiting for the allocation here is a major performance hit.
552	*/
553	job->gather_copy_mapped = dma_alloc_wc(dev: host, size, dma_addr: &job->gather_copy,
554	GFP_NOWAIT);
555
556	/ the higher priority allocation failed, try the generic-blocking /
557	if (!job->gather_copy_mapped)
558	job->gather_copy_mapped = dma_alloc_wc(dev: host, size,
559	dma_addr: &job->gather_copy,
560	GFP_KERNEL);
561	if (!job->gather_copy_mapped)
562	return -ENOMEM;
563
564	job->gather_copy_size = size;
565
566	for (i = `0`; i < job->num_cmds; i++) {
567	struct host1x_job_gather *g;
568	void *gather;
569
570	if (job->cmds[i].is_wait)
571	continue;
572	g = &job->cmds[i].gather;
573
574	/ Copy the gather /
575	gather = host1x_bo_mmap(bo: g->bo);
576	memcpy(job->gather_copy_mapped + offset, gather + g->offset,
577	g->words * sizeof(u32));
578	host1x_bo_munmap(bo: g->bo, addr: gather);
579
580	/ Store the location in the buffer /
581	g->base = job->gather_copy;
582	g->offset = offset;
583
584	/ Validate the job /
585	if (validate(fw: &fw, g))
586	return -EINVAL;
587
588	offset += g->words * sizeof(u32);
589	}
590
591	/ No relocs should remain at this point /
592	if (fw.num_relocs)
593	return -EINVAL;
594
595	return `0`;
596	}
597
598	int host1x_job_pin(struct host1x_job job, struct* device *dev)
599	{
600	int err;
601	unsigned int i, j;
602	struct host1x *host = dev_get_drvdata(dev: dev->parent);
603
604	/ pin memory /
605	err = pin_job(host, job);
606	if (err)
607	goto out;
608
609	if (job->enable_firewall) {
610	err = copy_gathers(host: host->dev, job, dev);
611	if (err)
612	goto out;
613	}
614
615	/ patch gathers /
616	for (i = `0`; i < job->num_cmds; i++) {
617	struct host1x_job_gather *g;
618
619	if (job->cmds[i].is_wait)
620	continue;
621	g = &job->cmds[i].gather;
622
623	/ process each gather mem only once /
624	if (g->handled)
625	continue;
626
627	/ copy_gathers() sets gathers base if firewall is enabled /
628	if (!job->enable_firewall)
629	g->base = job->gather_addr_phys[i];
630
631	for (j = i + `1`; j < job->num_cmds; j++) {
632	if (!job->cmds[j].is_wait &&
633	job->cmds[j].gather.bo == g->bo) {
634	job->cmds[j].gather.handled = true;
635	job->cmds[j].gather.base = g->base;
636	}
637	}
638
639	err = do_relocs(job, g);
640	if (err)
641	break;
642	}
643
644	out:
645	if (err)
646	host1x_job_unpin(job);
647	wmb();
648
649	return err;
650	}
651	EXPORT_SYMBOL(host1x_job_pin);
652
653	void host1x_job_unpin(struct host1x_job *job)
654	{
655	struct host1x *host = dev_get_drvdata(dev: job->channel->dev->parent);
656	unsigned int i;
657
658	for (i = `0`; i < job->num_unpins; i++) {
659	struct host1x_bo_mapping *map = job->unpins[i].map;
660	struct host1x_bo *bo = map->bo;
661
662	if (!job->enable_firewall && map->size && host->domain) {
663	iommu_unmap(domain: host->domain, iova: job->addr_phys[i], size: map->size);
664	free_iova(iovad: &host->iova, pfn: iova_pfn(iovad: &host->iova, iova: job->addr_phys[i]));
665	}
666
667	host1x_bo_unpin(map);
668	host1x_bo_put(bo);
669	}
670
671	job->num_unpins = `0`;
672
673	if (job->gather_copy_size)
674	dma_free_wc(dev: host->dev, size: job->gather_copy_size,
675	cpu_addr: job->gather_copy_mapped, dma_addr: job->gather_copy);
676	}
677	EXPORT_SYMBOL(host1x_job_unpin);
678
679	/*
680	* Debug routine used to dump job entries
681	*/
682	void host1x_job_dump(struct device dev, struct* host1x_job *job)
683	{
684	dev_dbg(dev, " SYNCPT_ID %d\n", job->syncpt->id);
685	dev_dbg(dev, " SYNCPT_VAL %d\n", job->syncpt_end);
686	dev_dbg(dev, " FIRST_GET 0x%x\n", job->first_get);
687	dev_dbg(dev, " TIMEOUT %d\n", job->timeout);
688	dev_dbg(dev, " NUM_SLOTS %d\n", job->num_slots);
689	dev_dbg(dev, " NUM_HANDLES %d\n", job->num_unpins);
690	}
691

source code of linux/drivers/gpu/host1x/job.c