1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Tegra host1x Job |
4 | * |
5 | * Copyright (c) 2010-2015, NVIDIA Corporation. |
6 | */ |
7 | |
8 | #include <linux/dma-mapping.h> |
9 | #include <linux/err.h> |
10 | #include <linux/host1x.h> |
11 | #include <linux/iommu.h> |
12 | #include <linux/kref.h> |
13 | #include <linux/module.h> |
14 | #include <linux/scatterlist.h> |
15 | #include <linux/slab.h> |
16 | #include <linux/vmalloc.h> |
17 | #include <trace/events/host1x.h> |
18 | |
19 | #include "channel.h" |
20 | #include "dev.h" |
21 | #include "job.h" |
22 | #include "syncpt.h" |
23 | |
24 | #define HOST1X_WAIT_SYNCPT_OFFSET 0x8 |
25 | |
26 | struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, |
27 | u32 num_cmdbufs, u32 num_relocs, |
28 | bool skip_firewall) |
29 | { |
30 | struct host1x_job *job = NULL; |
31 | unsigned int num_unpins = num_relocs; |
32 | bool enable_firewall; |
33 | u64 total; |
34 | void *mem; |
35 | |
36 | enable_firewall = IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && !skip_firewall; |
37 | |
38 | if (!enable_firewall) |
39 | num_unpins += num_cmdbufs; |
40 | |
41 | /* Check that we're not going to overflow */ |
42 | total = sizeof(struct host1x_job) + |
43 | (u64)num_relocs * sizeof(struct host1x_reloc) + |
44 | (u64)num_unpins * sizeof(struct host1x_job_unpin_data) + |
45 | (u64)num_cmdbufs * sizeof(struct host1x_job_cmd) + |
46 | (u64)num_unpins * sizeof(dma_addr_t) + |
47 | (u64)num_unpins * sizeof(u32 *); |
48 | if (total > ULONG_MAX) |
49 | return NULL; |
50 | |
51 | mem = job = kzalloc(size: total, GFP_KERNEL); |
52 | if (!job) |
53 | return NULL; |
54 | |
55 | job->enable_firewall = enable_firewall; |
56 | |
57 | kref_init(kref: &job->ref); |
58 | job->channel = ch; |
59 | |
60 | /* Redistribute memory to the structs */ |
61 | mem += sizeof(struct host1x_job); |
62 | job->relocs = num_relocs ? mem : NULL; |
63 | mem += num_relocs * sizeof(struct host1x_reloc); |
64 | job->unpins = num_unpins ? mem : NULL; |
65 | mem += num_unpins * sizeof(struct host1x_job_unpin_data); |
66 | job->cmds = num_cmdbufs ? mem : NULL; |
67 | mem += num_cmdbufs * sizeof(struct host1x_job_cmd); |
68 | job->addr_phys = num_unpins ? mem : NULL; |
69 | |
70 | job->reloc_addr_phys = job->addr_phys; |
71 | job->gather_addr_phys = &job->addr_phys[num_relocs]; |
72 | |
73 | return job; |
74 | } |
75 | EXPORT_SYMBOL(host1x_job_alloc); |
76 | |
77 | struct host1x_job *host1x_job_get(struct host1x_job *job) |
78 | { |
79 | kref_get(kref: &job->ref); |
80 | return job; |
81 | } |
82 | EXPORT_SYMBOL(host1x_job_get); |
83 | |
84 | static void job_free(struct kref *ref) |
85 | { |
86 | struct host1x_job *job = container_of(ref, struct host1x_job, ref); |
87 | |
88 | if (job->release) |
89 | job->release(job); |
90 | |
91 | if (job->fence) { |
92 | /* |
93 | * remove_callback is atomic w.r.t. fence signaling, so |
94 | * after the call returns, we know that the callback is not |
95 | * in execution, and the fence can be safely freed. |
96 | */ |
97 | dma_fence_remove_callback(fence: job->fence, cb: &job->fence_cb); |
98 | dma_fence_put(fence: job->fence); |
99 | } |
100 | |
101 | if (job->syncpt) |
102 | host1x_syncpt_put(sp: job->syncpt); |
103 | |
104 | kfree(objp: job); |
105 | } |
106 | |
107 | void host1x_job_put(struct host1x_job *job) |
108 | { |
109 | kref_put(kref: &job->ref, release: job_free); |
110 | } |
111 | EXPORT_SYMBOL(host1x_job_put); |
112 | |
113 | void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo, |
114 | unsigned int words, unsigned int offset) |
115 | { |
116 | struct host1x_job_gather *gather = &job->cmds[job->num_cmds].gather; |
117 | |
118 | gather->words = words; |
119 | gather->bo = bo; |
120 | gather->offset = offset; |
121 | |
122 | job->num_cmds++; |
123 | } |
124 | EXPORT_SYMBOL(host1x_job_add_gather); |
125 | |
126 | void host1x_job_add_wait(struct host1x_job *job, u32 id, u32 thresh, |
127 | bool relative, u32 next_class) |
128 | { |
129 | struct host1x_job_cmd *cmd = &job->cmds[job->num_cmds]; |
130 | |
131 | cmd->is_wait = true; |
132 | cmd->wait.id = id; |
133 | cmd->wait.threshold = thresh; |
134 | cmd->wait.next_class = next_class; |
135 | cmd->wait.relative = relative; |
136 | |
137 | job->num_cmds++; |
138 | } |
139 | EXPORT_SYMBOL(host1x_job_add_wait); |
140 | |
141 | static unsigned int pin_job(struct host1x *host, struct host1x_job *job) |
142 | { |
143 | unsigned long mask = HOST1X_RELOC_READ | HOST1X_RELOC_WRITE; |
144 | struct host1x_client *client = job->client; |
145 | struct device *dev = client->dev; |
146 | struct host1x_job_gather *g; |
147 | unsigned int i; |
148 | int err; |
149 | |
150 | job->num_unpins = 0; |
151 | |
152 | for (i = 0; i < job->num_relocs; i++) { |
153 | struct host1x_reloc *reloc = &job->relocs[i]; |
154 | enum dma_data_direction direction; |
155 | struct host1x_bo_mapping *map; |
156 | struct host1x_bo *bo; |
157 | |
158 | reloc->target.bo = host1x_bo_get(bo: reloc->target.bo); |
159 | if (!reloc->target.bo) { |
160 | err = -EINVAL; |
161 | goto unpin; |
162 | } |
163 | |
164 | bo = reloc->target.bo; |
165 | |
166 | switch (reloc->flags & mask) { |
167 | case HOST1X_RELOC_READ: |
168 | direction = DMA_TO_DEVICE; |
169 | break; |
170 | |
171 | case HOST1X_RELOC_WRITE: |
172 | direction = DMA_FROM_DEVICE; |
173 | break; |
174 | |
175 | case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE: |
176 | direction = DMA_BIDIRECTIONAL; |
177 | break; |
178 | |
179 | default: |
180 | err = -EINVAL; |
181 | goto unpin; |
182 | } |
183 | |
184 | map = host1x_bo_pin(dev, bo, dir: direction, NULL); |
185 | if (IS_ERR(ptr: map)) { |
186 | err = PTR_ERR(ptr: map); |
187 | goto unpin; |
188 | } |
189 | |
190 | /* |
191 | * host1x clients are generally not able to do scatter-gather themselves, so fail |
192 | * if the buffer is discontiguous and we fail to map its SG table to a single |
193 | * contiguous chunk of I/O virtual memory. |
194 | */ |
195 | if (map->chunks > 1) { |
196 | err = -EINVAL; |
197 | goto unpin; |
198 | } |
199 | |
200 | job->addr_phys[job->num_unpins] = map->phys; |
201 | job->unpins[job->num_unpins].map = map; |
202 | job->num_unpins++; |
203 | } |
204 | |
205 | /* |
206 | * We will copy gathers BO content later, so there is no need to |
207 | * hold and pin them. |
208 | */ |
209 | if (job->enable_firewall) |
210 | return 0; |
211 | |
212 | for (i = 0; i < job->num_cmds; i++) { |
213 | struct host1x_bo_mapping *map; |
214 | size_t gather_size = 0; |
215 | struct scatterlist *sg; |
216 | unsigned long shift; |
217 | struct iova *alloc; |
218 | unsigned int j; |
219 | |
220 | if (job->cmds[i].is_wait) |
221 | continue; |
222 | |
223 | g = &job->cmds[i].gather; |
224 | |
225 | g->bo = host1x_bo_get(bo: g->bo); |
226 | if (!g->bo) { |
227 | err = -EINVAL; |
228 | goto unpin; |
229 | } |
230 | |
231 | map = host1x_bo_pin(dev: host->dev, bo: g->bo, dir: DMA_TO_DEVICE, NULL); |
232 | if (IS_ERR(ptr: map)) { |
233 | err = PTR_ERR(ptr: map); |
234 | goto unpin; |
235 | } |
236 | |
237 | if (host->domain) { |
238 | for_each_sgtable_sg(map->sgt, sg, j) |
239 | gather_size += sg->length; |
240 | |
241 | gather_size = iova_align(iovad: &host->iova, size: gather_size); |
242 | |
243 | shift = iova_shift(iovad: &host->iova); |
244 | alloc = alloc_iova(iovad: &host->iova, size: gather_size >> shift, |
245 | limit_pfn: host->iova_end >> shift, size_aligned: true); |
246 | if (!alloc) { |
247 | err = -ENOMEM; |
248 | goto put; |
249 | } |
250 | |
251 | err = iommu_map_sgtable(domain: host->domain, iova: iova_dma_addr(iovad: &host->iova, iova: alloc), |
252 | sgt: map->sgt, IOMMU_READ); |
253 | if (err == 0) { |
254 | __free_iova(iovad: &host->iova, iova: alloc); |
255 | err = -EINVAL; |
256 | goto put; |
257 | } |
258 | |
259 | map->phys = iova_dma_addr(iovad: &host->iova, iova: alloc); |
260 | map->size = gather_size; |
261 | } |
262 | |
263 | job->addr_phys[job->num_unpins] = map->phys; |
264 | job->unpins[job->num_unpins].map = map; |
265 | job->num_unpins++; |
266 | |
267 | job->gather_addr_phys[i] = map->phys; |
268 | } |
269 | |
270 | return 0; |
271 | |
272 | put: |
273 | host1x_bo_put(bo: g->bo); |
274 | unpin: |
275 | host1x_job_unpin(job); |
276 | return err; |
277 | } |
278 | |
279 | static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g) |
280 | { |
281 | void *cmdbuf_addr = NULL; |
282 | struct host1x_bo *cmdbuf = g->bo; |
283 | unsigned int i; |
284 | |
285 | /* pin & patch the relocs for one gather */ |
286 | for (i = 0; i < job->num_relocs; i++) { |
287 | struct host1x_reloc *reloc = &job->relocs[i]; |
288 | u32 reloc_addr = (job->reloc_addr_phys[i] + |
289 | reloc->target.offset) >> reloc->shift; |
290 | u32 *target; |
291 | |
292 | /* skip all other gathers */ |
293 | if (cmdbuf != reloc->cmdbuf.bo) |
294 | continue; |
295 | |
296 | if (job->enable_firewall) { |
297 | target = (u32 *)job->gather_copy_mapped + |
298 | reloc->cmdbuf.offset / sizeof(u32) + |
299 | g->offset / sizeof(u32); |
300 | goto patch_reloc; |
301 | } |
302 | |
303 | if (!cmdbuf_addr) { |
304 | cmdbuf_addr = host1x_bo_mmap(bo: cmdbuf); |
305 | |
306 | if (unlikely(!cmdbuf_addr)) { |
307 | pr_err("Could not map cmdbuf for relocation\n" ); |
308 | return -ENOMEM; |
309 | } |
310 | } |
311 | |
312 | target = cmdbuf_addr + reloc->cmdbuf.offset; |
313 | patch_reloc: |
314 | *target = reloc_addr; |
315 | } |
316 | |
317 | if (cmdbuf_addr) |
318 | host1x_bo_munmap(bo: cmdbuf, addr: cmdbuf_addr); |
319 | |
320 | return 0; |
321 | } |
322 | |
323 | static bool check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf, |
324 | unsigned int offset) |
325 | { |
326 | offset *= sizeof(u32); |
327 | |
328 | if (reloc->cmdbuf.bo != cmdbuf || reloc->cmdbuf.offset != offset) |
329 | return false; |
330 | |
331 | /* relocation shift value validation isn't implemented yet */ |
332 | if (reloc->shift) |
333 | return false; |
334 | |
335 | return true; |
336 | } |
337 | |
338 | struct host1x_firewall { |
339 | struct host1x_job *job; |
340 | struct device *dev; |
341 | |
342 | unsigned int num_relocs; |
343 | struct host1x_reloc *reloc; |
344 | |
345 | struct host1x_bo *cmdbuf; |
346 | unsigned int offset; |
347 | |
348 | u32 words; |
349 | u32 class; |
350 | u32 reg; |
351 | u32 mask; |
352 | u32 count; |
353 | }; |
354 | |
355 | static int check_register(struct host1x_firewall *fw, unsigned long offset) |
356 | { |
357 | if (!fw->job->is_addr_reg) |
358 | return 0; |
359 | |
360 | if (fw->job->is_addr_reg(fw->dev, fw->class, offset)) { |
361 | if (!fw->num_relocs) |
362 | return -EINVAL; |
363 | |
364 | if (!check_reloc(reloc: fw->reloc, cmdbuf: fw->cmdbuf, offset: fw->offset)) |
365 | return -EINVAL; |
366 | |
367 | fw->num_relocs--; |
368 | fw->reloc++; |
369 | } |
370 | |
371 | return 0; |
372 | } |
373 | |
374 | static int check_class(struct host1x_firewall *fw, u32 class) |
375 | { |
376 | if (!fw->job->is_valid_class) { |
377 | if (fw->class != class) |
378 | return -EINVAL; |
379 | } else { |
380 | if (!fw->job->is_valid_class(fw->class)) |
381 | return -EINVAL; |
382 | } |
383 | |
384 | return 0; |
385 | } |
386 | |
387 | static int check_mask(struct host1x_firewall *fw) |
388 | { |
389 | u32 mask = fw->mask; |
390 | u32 reg = fw->reg; |
391 | int ret; |
392 | |
393 | while (mask) { |
394 | if (fw->words == 0) |
395 | return -EINVAL; |
396 | |
397 | if (mask & 1) { |
398 | ret = check_register(fw, offset: reg); |
399 | if (ret < 0) |
400 | return ret; |
401 | |
402 | fw->words--; |
403 | fw->offset++; |
404 | } |
405 | mask >>= 1; |
406 | reg++; |
407 | } |
408 | |
409 | return 0; |
410 | } |
411 | |
412 | static int check_incr(struct host1x_firewall *fw) |
413 | { |
414 | u32 count = fw->count; |
415 | u32 reg = fw->reg; |
416 | int ret; |
417 | |
418 | while (count) { |
419 | if (fw->words == 0) |
420 | return -EINVAL; |
421 | |
422 | ret = check_register(fw, offset: reg); |
423 | if (ret < 0) |
424 | return ret; |
425 | |
426 | reg++; |
427 | fw->words--; |
428 | fw->offset++; |
429 | count--; |
430 | } |
431 | |
432 | return 0; |
433 | } |
434 | |
435 | static int check_nonincr(struct host1x_firewall *fw) |
436 | { |
437 | u32 count = fw->count; |
438 | int ret; |
439 | |
440 | while (count) { |
441 | if (fw->words == 0) |
442 | return -EINVAL; |
443 | |
444 | ret = check_register(fw, offset: fw->reg); |
445 | if (ret < 0) |
446 | return ret; |
447 | |
448 | fw->words--; |
449 | fw->offset++; |
450 | count--; |
451 | } |
452 | |
453 | return 0; |
454 | } |
455 | |
456 | static int validate(struct host1x_firewall *fw, struct host1x_job_gather *g) |
457 | { |
458 | u32 *cmdbuf_base = (u32 *)fw->job->gather_copy_mapped + |
459 | (g->offset / sizeof(u32)); |
460 | u32 job_class = fw->class; |
461 | int err = 0; |
462 | |
463 | fw->words = g->words; |
464 | fw->cmdbuf = g->bo; |
465 | fw->offset = 0; |
466 | |
467 | while (fw->words && !err) { |
468 | u32 word = cmdbuf_base[fw->offset]; |
469 | u32 opcode = (word & 0xf0000000) >> 28; |
470 | |
471 | fw->mask = 0; |
472 | fw->reg = 0; |
473 | fw->count = 0; |
474 | fw->words--; |
475 | fw->offset++; |
476 | |
477 | switch (opcode) { |
478 | case 0: |
479 | fw->class = word >> 6 & 0x3ff; |
480 | fw->mask = word & 0x3f; |
481 | fw->reg = word >> 16 & 0xfff; |
482 | err = check_class(fw, class: job_class); |
483 | if (!err) |
484 | err = check_mask(fw); |
485 | if (err) |
486 | goto out; |
487 | break; |
488 | case 1: |
489 | fw->reg = word >> 16 & 0xfff; |
490 | fw->count = word & 0xffff; |
491 | err = check_incr(fw); |
492 | if (err) |
493 | goto out; |
494 | break; |
495 | |
496 | case 2: |
497 | fw->reg = word >> 16 & 0xfff; |
498 | fw->count = word & 0xffff; |
499 | err = check_nonincr(fw); |
500 | if (err) |
501 | goto out; |
502 | break; |
503 | |
504 | case 3: |
505 | fw->mask = word & 0xffff; |
506 | fw->reg = word >> 16 & 0xfff; |
507 | err = check_mask(fw); |
508 | if (err) |
509 | goto out; |
510 | break; |
511 | case 4: |
512 | case 14: |
513 | break; |
514 | default: |
515 | err = -EINVAL; |
516 | break; |
517 | } |
518 | } |
519 | |
520 | out: |
521 | return err; |
522 | } |
523 | |
524 | static inline int copy_gathers(struct device *host, struct host1x_job *job, |
525 | struct device *dev) |
526 | { |
527 | struct host1x_firewall fw; |
528 | size_t size = 0; |
529 | size_t offset = 0; |
530 | unsigned int i; |
531 | |
532 | fw.job = job; |
533 | fw.dev = dev; |
534 | fw.reloc = job->relocs; |
535 | fw.num_relocs = job->num_relocs; |
536 | fw.class = job->class; |
537 | |
538 | for (i = 0; i < job->num_cmds; i++) { |
539 | struct host1x_job_gather *g; |
540 | |
541 | if (job->cmds[i].is_wait) |
542 | continue; |
543 | |
544 | g = &job->cmds[i].gather; |
545 | |
546 | size += g->words * sizeof(u32); |
547 | } |
548 | |
549 | /* |
550 | * Try a non-blocking allocation from a higher priority pools first, |
551 | * as awaiting for the allocation here is a major performance hit. |
552 | */ |
553 | job->gather_copy_mapped = dma_alloc_wc(dev: host, size, dma_addr: &job->gather_copy, |
554 | GFP_NOWAIT); |
555 | |
556 | /* the higher priority allocation failed, try the generic-blocking */ |
557 | if (!job->gather_copy_mapped) |
558 | job->gather_copy_mapped = dma_alloc_wc(dev: host, size, |
559 | dma_addr: &job->gather_copy, |
560 | GFP_KERNEL); |
561 | if (!job->gather_copy_mapped) |
562 | return -ENOMEM; |
563 | |
564 | job->gather_copy_size = size; |
565 | |
566 | for (i = 0; i < job->num_cmds; i++) { |
567 | struct host1x_job_gather *g; |
568 | void *gather; |
569 | |
570 | if (job->cmds[i].is_wait) |
571 | continue; |
572 | g = &job->cmds[i].gather; |
573 | |
574 | /* Copy the gather */ |
575 | gather = host1x_bo_mmap(bo: g->bo); |
576 | memcpy(job->gather_copy_mapped + offset, gather + g->offset, |
577 | g->words * sizeof(u32)); |
578 | host1x_bo_munmap(bo: g->bo, addr: gather); |
579 | |
580 | /* Store the location in the buffer */ |
581 | g->base = job->gather_copy; |
582 | g->offset = offset; |
583 | |
584 | /* Validate the job */ |
585 | if (validate(fw: &fw, g)) |
586 | return -EINVAL; |
587 | |
588 | offset += g->words * sizeof(u32); |
589 | } |
590 | |
591 | /* No relocs should remain at this point */ |
592 | if (fw.num_relocs) |
593 | return -EINVAL; |
594 | |
595 | return 0; |
596 | } |
597 | |
598 | int host1x_job_pin(struct host1x_job *job, struct device *dev) |
599 | { |
600 | int err; |
601 | unsigned int i, j; |
602 | struct host1x *host = dev_get_drvdata(dev: dev->parent); |
603 | |
604 | /* pin memory */ |
605 | err = pin_job(host, job); |
606 | if (err) |
607 | goto out; |
608 | |
609 | if (job->enable_firewall) { |
610 | err = copy_gathers(host: host->dev, job, dev); |
611 | if (err) |
612 | goto out; |
613 | } |
614 | |
615 | /* patch gathers */ |
616 | for (i = 0; i < job->num_cmds; i++) { |
617 | struct host1x_job_gather *g; |
618 | |
619 | if (job->cmds[i].is_wait) |
620 | continue; |
621 | g = &job->cmds[i].gather; |
622 | |
623 | /* process each gather mem only once */ |
624 | if (g->handled) |
625 | continue; |
626 | |
627 | /* copy_gathers() sets gathers base if firewall is enabled */ |
628 | if (!job->enable_firewall) |
629 | g->base = job->gather_addr_phys[i]; |
630 | |
631 | for (j = i + 1; j < job->num_cmds; j++) { |
632 | if (!job->cmds[j].is_wait && |
633 | job->cmds[j].gather.bo == g->bo) { |
634 | job->cmds[j].gather.handled = true; |
635 | job->cmds[j].gather.base = g->base; |
636 | } |
637 | } |
638 | |
639 | err = do_relocs(job, g); |
640 | if (err) |
641 | break; |
642 | } |
643 | |
644 | out: |
645 | if (err) |
646 | host1x_job_unpin(job); |
647 | wmb(); |
648 | |
649 | return err; |
650 | } |
651 | EXPORT_SYMBOL(host1x_job_pin); |
652 | |
653 | void host1x_job_unpin(struct host1x_job *job) |
654 | { |
655 | struct host1x *host = dev_get_drvdata(dev: job->channel->dev->parent); |
656 | unsigned int i; |
657 | |
658 | for (i = 0; i < job->num_unpins; i++) { |
659 | struct host1x_bo_mapping *map = job->unpins[i].map; |
660 | struct host1x_bo *bo = map->bo; |
661 | |
662 | if (!job->enable_firewall && map->size && host->domain) { |
663 | iommu_unmap(domain: host->domain, iova: job->addr_phys[i], size: map->size); |
664 | free_iova(iovad: &host->iova, pfn: iova_pfn(iovad: &host->iova, iova: job->addr_phys[i])); |
665 | } |
666 | |
667 | host1x_bo_unpin(map); |
668 | host1x_bo_put(bo); |
669 | } |
670 | |
671 | job->num_unpins = 0; |
672 | |
673 | if (job->gather_copy_size) |
674 | dma_free_wc(dev: host->dev, size: job->gather_copy_size, |
675 | cpu_addr: job->gather_copy_mapped, dma_addr: job->gather_copy); |
676 | } |
677 | EXPORT_SYMBOL(host1x_job_unpin); |
678 | |
679 | /* |
680 | * Debug routine used to dump job entries |
681 | */ |
682 | void host1x_job_dump(struct device *dev, struct host1x_job *job) |
683 | { |
684 | dev_dbg(dev, " SYNCPT_ID %d\n" , job->syncpt->id); |
685 | dev_dbg(dev, " SYNCPT_VAL %d\n" , job->syncpt_end); |
686 | dev_dbg(dev, " FIRST_GET 0x%x\n" , job->first_get); |
687 | dev_dbg(dev, " TIMEOUT %d\n" , job->timeout); |
688 | dev_dbg(dev, " NUM_SLOTS %d\n" , job->num_slots); |
689 | dev_dbg(dev, " NUM_HANDLES %d\n" , job->num_unpins); |
690 | } |
691 | |