1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */ |
3 | /* Copyright 2019 Collabora ltd. */ |
4 | #include <linux/delay.h> |
5 | #include <linux/interrupt.h> |
6 | #include <linux/io.h> |
7 | #include <linux/iopoll.h> |
8 | #include <linux/platform_device.h> |
9 | #include <linux/pm_runtime.h> |
10 | #include <linux/dma-resv.h> |
11 | #include <drm/gpu_scheduler.h> |
12 | #include <drm/panfrost_drm.h> |
13 | |
14 | #include "panfrost_device.h" |
15 | #include "panfrost_devfreq.h" |
16 | #include "panfrost_job.h" |
17 | #include "panfrost_features.h" |
18 | #include "panfrost_issues.h" |
19 | #include "panfrost_gem.h" |
20 | #include "panfrost_regs.h" |
21 | #include "panfrost_gpu.h" |
22 | #include "panfrost_mmu.h" |
23 | #include "panfrost_dump.h" |
24 | |
25 | #define JOB_TIMEOUT_MS 500 |
26 | |
27 | #define job_write(dev, reg, data) writel(data, dev->iomem + (reg)) |
28 | #define job_read(dev, reg) readl(dev->iomem + (reg)) |
29 | |
30 | struct panfrost_queue_state { |
31 | struct drm_gpu_scheduler sched; |
32 | u64 fence_context; |
33 | u64 emit_seqno; |
34 | }; |
35 | |
36 | struct panfrost_job_slot { |
37 | struct panfrost_queue_state queue[NUM_JOB_SLOTS]; |
38 | spinlock_t job_lock; |
39 | int irq; |
40 | }; |
41 | |
42 | static struct panfrost_job * |
43 | to_panfrost_job(struct drm_sched_job *sched_job) |
44 | { |
45 | return container_of(sched_job, struct panfrost_job, base); |
46 | } |
47 | |
48 | struct panfrost_fence { |
49 | struct dma_fence base; |
50 | struct drm_device *dev; |
51 | /* panfrost seqno for signaled() test */ |
52 | u64 seqno; |
53 | int queue; |
54 | }; |
55 | |
56 | static inline struct panfrost_fence * |
57 | to_panfrost_fence(struct dma_fence *fence) |
58 | { |
59 | return (struct panfrost_fence *)fence; |
60 | } |
61 | |
62 | static const char *panfrost_fence_get_driver_name(struct dma_fence *fence) |
63 | { |
64 | return "panfrost" ; |
65 | } |
66 | |
67 | static const char *panfrost_fence_get_timeline_name(struct dma_fence *fence) |
68 | { |
69 | struct panfrost_fence *f = to_panfrost_fence(fence); |
70 | |
71 | switch (f->queue) { |
72 | case 0: |
73 | return "panfrost-js-0" ; |
74 | case 1: |
75 | return "panfrost-js-1" ; |
76 | case 2: |
77 | return "panfrost-js-2" ; |
78 | default: |
79 | return NULL; |
80 | } |
81 | } |
82 | |
83 | static const struct dma_fence_ops panfrost_fence_ops = { |
84 | .get_driver_name = panfrost_fence_get_driver_name, |
85 | .get_timeline_name = panfrost_fence_get_timeline_name, |
86 | }; |
87 | |
88 | static struct dma_fence *panfrost_fence_create(struct panfrost_device *pfdev, int js_num) |
89 | { |
90 | struct panfrost_fence *fence; |
91 | struct panfrost_job_slot *js = pfdev->js; |
92 | |
93 | fence = kzalloc(size: sizeof(*fence), GFP_KERNEL); |
94 | if (!fence) |
95 | return ERR_PTR(error: -ENOMEM); |
96 | |
97 | fence->dev = pfdev->ddev; |
98 | fence->queue = js_num; |
99 | fence->seqno = ++js->queue[js_num].emit_seqno; |
100 | dma_fence_init(fence: &fence->base, ops: &panfrost_fence_ops, lock: &js->job_lock, |
101 | context: js->queue[js_num].fence_context, seqno: fence->seqno); |
102 | |
103 | return &fence->base; |
104 | } |
105 | |
106 | int panfrost_job_get_slot(struct panfrost_job *job) |
107 | { |
108 | /* JS0: fragment jobs. |
109 | * JS1: vertex/tiler jobs |
110 | * JS2: compute jobs |
111 | */ |
112 | if (job->requirements & PANFROST_JD_REQ_FS) |
113 | return 0; |
114 | |
115 | /* Not exposed to userspace yet */ |
116 | #if 0 |
117 | if (job->requirements & PANFROST_JD_REQ_ONLY_COMPUTE) { |
118 | if ((job->requirements & PANFROST_JD_REQ_CORE_GRP_MASK) && |
119 | (job->pfdev->features.nr_core_groups == 2)) |
120 | return 2; |
121 | if (panfrost_has_hw_issue(job->pfdev, HW_ISSUE_8987)) |
122 | return 2; |
123 | } |
124 | #endif |
125 | return 1; |
126 | } |
127 | |
128 | static void panfrost_job_write_affinity(struct panfrost_device *pfdev, |
129 | u32 requirements, |
130 | int js) |
131 | { |
132 | u64 affinity; |
133 | |
134 | /* |
135 | * Use all cores for now. |
136 | * Eventually we may need to support tiler only jobs and h/w with |
137 | * multiple (2) coherent core groups |
138 | */ |
139 | affinity = pfdev->features.shader_present; |
140 | |
141 | job_write(pfdev, JS_AFFINITY_NEXT_LO(js), lower_32_bits(affinity)); |
142 | job_write(pfdev, JS_AFFINITY_NEXT_HI(js), upper_32_bits(affinity)); |
143 | } |
144 | |
145 | static u32 |
146 | panfrost_get_job_chain_flag(const struct panfrost_job *job) |
147 | { |
148 | struct panfrost_fence *f = to_panfrost_fence(fence: job->done_fence); |
149 | |
150 | if (!panfrost_has_hw_feature(pfdev: job->pfdev, feat: HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) |
151 | return 0; |
152 | |
153 | return (f->seqno & 1) ? JS_CONFIG_JOB_CHAIN_FLAG : 0; |
154 | } |
155 | |
156 | static struct panfrost_job * |
157 | panfrost_dequeue_job(struct panfrost_device *pfdev, int slot) |
158 | { |
159 | struct panfrost_job *job = pfdev->jobs[slot][0]; |
160 | |
161 | WARN_ON(!job); |
162 | if (job->is_profiled) { |
163 | if (job->engine_usage) { |
164 | job->engine_usage->elapsed_ns[slot] += |
165 | ktime_to_ns(ktime_sub(ktime_get(), job->start_time)); |
166 | job->engine_usage->cycles[slot] += |
167 | panfrost_cycle_counter_read(pfdev) - job->start_cycles; |
168 | } |
169 | panfrost_cycle_counter_put(pfdev: job->pfdev); |
170 | } |
171 | |
172 | pfdev->jobs[slot][0] = pfdev->jobs[slot][1]; |
173 | pfdev->jobs[slot][1] = NULL; |
174 | |
175 | return job; |
176 | } |
177 | |
178 | static unsigned int |
179 | panfrost_enqueue_job(struct panfrost_device *pfdev, int slot, |
180 | struct panfrost_job *job) |
181 | { |
182 | if (WARN_ON(!job)) |
183 | return 0; |
184 | |
185 | if (!pfdev->jobs[slot][0]) { |
186 | pfdev->jobs[slot][0] = job; |
187 | return 0; |
188 | } |
189 | |
190 | WARN_ON(pfdev->jobs[slot][1]); |
191 | pfdev->jobs[slot][1] = job; |
192 | WARN_ON(panfrost_get_job_chain_flag(job) == |
193 | panfrost_get_job_chain_flag(pfdev->jobs[slot][0])); |
194 | return 1; |
195 | } |
196 | |
197 | static void panfrost_job_hw_submit(struct panfrost_job *job, int js) |
198 | { |
199 | struct panfrost_device *pfdev = job->pfdev; |
200 | unsigned int subslot; |
201 | u32 cfg; |
202 | u64 jc_head = job->jc; |
203 | int ret; |
204 | |
205 | panfrost_devfreq_record_busy(devfreq: &pfdev->pfdevfreq); |
206 | |
207 | ret = pm_runtime_get_sync(dev: pfdev->dev); |
208 | if (ret < 0) |
209 | return; |
210 | |
211 | if (WARN_ON(job_read(pfdev, JS_COMMAND_NEXT(js)))) { |
212 | return; |
213 | } |
214 | |
215 | cfg = panfrost_mmu_as_get(pfdev, mmu: job->mmu); |
216 | |
217 | job_write(pfdev, JS_HEAD_NEXT_LO(js), lower_32_bits(jc_head)); |
218 | job_write(pfdev, JS_HEAD_NEXT_HI(js), upper_32_bits(jc_head)); |
219 | |
220 | panfrost_job_write_affinity(pfdev, requirements: job->requirements, js); |
221 | |
222 | /* start MMU, medium priority, cache clean/flush on end, clean/flush on |
223 | * start */ |
224 | cfg |= JS_CONFIG_THREAD_PRI(8) | |
225 | JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE | |
226 | JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE | |
227 | panfrost_get_job_chain_flag(job); |
228 | |
229 | if (panfrost_has_hw_feature(pfdev, feat: HW_FEATURE_FLUSH_REDUCTION)) |
230 | cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION; |
231 | |
232 | if (panfrost_has_hw_issue(pfdev, issue: HW_ISSUE_10649)) |
233 | cfg |= JS_CONFIG_START_MMU; |
234 | |
235 | job_write(pfdev, JS_CONFIG_NEXT(js), cfg); |
236 | |
237 | if (panfrost_has_hw_feature(pfdev, feat: HW_FEATURE_FLUSH_REDUCTION)) |
238 | job_write(pfdev, JS_FLUSH_ID_NEXT(js), job->flush_id); |
239 | |
240 | /* GO ! */ |
241 | |
242 | spin_lock(lock: &pfdev->js->job_lock); |
243 | subslot = panfrost_enqueue_job(pfdev, slot: js, job); |
244 | /* Don't queue the job if a reset is in progress */ |
245 | if (!atomic_read(v: &pfdev->reset.pending)) { |
246 | if (atomic_read(v: &pfdev->profile_mode)) { |
247 | panfrost_cycle_counter_get(pfdev); |
248 | job->is_profiled = true; |
249 | job->start_time = ktime_get(); |
250 | job->start_cycles = panfrost_cycle_counter_read(pfdev); |
251 | } |
252 | |
253 | job_write(pfdev, JS_COMMAND_NEXT(js), JS_COMMAND_START); |
254 | dev_dbg(pfdev->dev, |
255 | "JS: Submitting atom %p to js[%d][%d] with head=0x%llx AS %d" , |
256 | job, js, subslot, jc_head, cfg & 0xf); |
257 | } |
258 | spin_unlock(lock: &pfdev->js->job_lock); |
259 | } |
260 | |
261 | static int panfrost_acquire_object_fences(struct drm_gem_object **bos, |
262 | int bo_count, |
263 | struct drm_sched_job *job) |
264 | { |
265 | int i, ret; |
266 | |
267 | for (i = 0; i < bo_count; i++) { |
268 | ret = dma_resv_reserve_fences(obj: bos[i]->resv, num_fences: 1); |
269 | if (ret) |
270 | return ret; |
271 | |
272 | /* panfrost always uses write mode in its current uapi */ |
273 | ret = drm_sched_job_add_implicit_dependencies(job, obj: bos[i], |
274 | write: true); |
275 | if (ret) |
276 | return ret; |
277 | } |
278 | |
279 | return 0; |
280 | } |
281 | |
282 | static void panfrost_attach_object_fences(struct drm_gem_object **bos, |
283 | int bo_count, |
284 | struct dma_fence *fence) |
285 | { |
286 | int i; |
287 | |
288 | for (i = 0; i < bo_count; i++) |
289 | dma_resv_add_fence(obj: bos[i]->resv, fence, usage: DMA_RESV_USAGE_WRITE); |
290 | } |
291 | |
292 | int panfrost_job_push(struct panfrost_job *job) |
293 | { |
294 | struct panfrost_device *pfdev = job->pfdev; |
295 | struct ww_acquire_ctx acquire_ctx; |
296 | int ret = 0; |
297 | |
298 | ret = drm_gem_lock_reservations(objs: job->bos, count: job->bo_count, |
299 | acquire_ctx: &acquire_ctx); |
300 | if (ret) |
301 | return ret; |
302 | |
303 | mutex_lock(&pfdev->sched_lock); |
304 | drm_sched_job_arm(job: &job->base); |
305 | |
306 | job->render_done_fence = dma_fence_get(fence: &job->base.s_fence->finished); |
307 | |
308 | ret = panfrost_acquire_object_fences(bos: job->bos, bo_count: job->bo_count, |
309 | job: &job->base); |
310 | if (ret) { |
311 | mutex_unlock(lock: &pfdev->sched_lock); |
312 | goto unlock; |
313 | } |
314 | |
315 | kref_get(kref: &job->refcount); /* put by scheduler job completion */ |
316 | |
317 | drm_sched_entity_push_job(sched_job: &job->base); |
318 | |
319 | mutex_unlock(lock: &pfdev->sched_lock); |
320 | |
321 | panfrost_attach_object_fences(bos: job->bos, bo_count: job->bo_count, |
322 | fence: job->render_done_fence); |
323 | |
324 | unlock: |
325 | drm_gem_unlock_reservations(objs: job->bos, count: job->bo_count, acquire_ctx: &acquire_ctx); |
326 | |
327 | return ret; |
328 | } |
329 | |
330 | static void panfrost_job_cleanup(struct kref *ref) |
331 | { |
332 | struct panfrost_job *job = container_of(ref, struct panfrost_job, |
333 | refcount); |
334 | unsigned int i; |
335 | |
336 | dma_fence_put(fence: job->done_fence); |
337 | dma_fence_put(fence: job->render_done_fence); |
338 | |
339 | if (job->mappings) { |
340 | for (i = 0; i < job->bo_count; i++) { |
341 | if (!job->mappings[i]) |
342 | break; |
343 | |
344 | atomic_dec(v: &job->mappings[i]->obj->gpu_usecount); |
345 | panfrost_gem_mapping_put(mapping: job->mappings[i]); |
346 | } |
347 | kvfree(addr: job->mappings); |
348 | } |
349 | |
350 | if (job->bos) { |
351 | for (i = 0; i < job->bo_count; i++) |
352 | drm_gem_object_put(obj: job->bos[i]); |
353 | |
354 | kvfree(addr: job->bos); |
355 | } |
356 | |
357 | kfree(objp: job); |
358 | } |
359 | |
360 | void panfrost_job_put(struct panfrost_job *job) |
361 | { |
362 | kref_put(kref: &job->refcount, release: panfrost_job_cleanup); |
363 | } |
364 | |
365 | static void panfrost_job_free(struct drm_sched_job *sched_job) |
366 | { |
367 | struct panfrost_job *job = to_panfrost_job(sched_job); |
368 | |
369 | drm_sched_job_cleanup(job: sched_job); |
370 | |
371 | panfrost_job_put(job); |
372 | } |
373 | |
374 | static struct dma_fence *panfrost_job_run(struct drm_sched_job *sched_job) |
375 | { |
376 | struct panfrost_job *job = to_panfrost_job(sched_job); |
377 | struct panfrost_device *pfdev = job->pfdev; |
378 | int slot = panfrost_job_get_slot(job); |
379 | struct dma_fence *fence = NULL; |
380 | |
381 | if (unlikely(job->base.s_fence->finished.error)) |
382 | return NULL; |
383 | |
384 | /* Nothing to execute: can happen if the job has finished while |
385 | * we were resetting the GPU. |
386 | */ |
387 | if (!job->jc) |
388 | return NULL; |
389 | |
390 | fence = panfrost_fence_create(pfdev, js_num: slot); |
391 | if (IS_ERR(ptr: fence)) |
392 | return fence; |
393 | |
394 | if (job->done_fence) |
395 | dma_fence_put(fence: job->done_fence); |
396 | job->done_fence = dma_fence_get(fence); |
397 | |
398 | panfrost_job_hw_submit(job, js: slot); |
399 | |
400 | return fence; |
401 | } |
402 | |
403 | void panfrost_job_enable_interrupts(struct panfrost_device *pfdev) |
404 | { |
405 | int j; |
406 | u32 irq_mask = 0; |
407 | |
408 | clear_bit(nr: PANFROST_COMP_BIT_JOB, addr: pfdev->is_suspended); |
409 | |
410 | for (j = 0; j < NUM_JOB_SLOTS; j++) { |
411 | irq_mask |= MK_JS_MASK(j); |
412 | } |
413 | |
414 | job_write(pfdev, JOB_INT_CLEAR, irq_mask); |
415 | job_write(pfdev, JOB_INT_MASK, irq_mask); |
416 | } |
417 | |
418 | void panfrost_job_suspend_irq(struct panfrost_device *pfdev) |
419 | { |
420 | set_bit(nr: PANFROST_COMP_BIT_JOB, addr: pfdev->is_suspended); |
421 | |
422 | job_write(pfdev, JOB_INT_MASK, 0); |
423 | synchronize_irq(irq: pfdev->js->irq); |
424 | } |
425 | |
426 | static void panfrost_job_handle_err(struct panfrost_device *pfdev, |
427 | struct panfrost_job *job, |
428 | unsigned int js) |
429 | { |
430 | u32 js_status = job_read(pfdev, JS_STATUS(js)); |
431 | const char *exception_name = panfrost_exception_name(exception_code: js_status); |
432 | bool signal_fence = true; |
433 | |
434 | if (!panfrost_exception_is_fault(exception_code: js_status)) { |
435 | dev_dbg(pfdev->dev, "js event, js=%d, status=%s, head=0x%x, tail=0x%x" , |
436 | js, exception_name, |
437 | job_read(pfdev, JS_HEAD_LO(js)), |
438 | job_read(pfdev, JS_TAIL_LO(js))); |
439 | } else { |
440 | dev_err(pfdev->dev, "js fault, js=%d, status=%s, head=0x%x, tail=0x%x" , |
441 | js, exception_name, |
442 | job_read(pfdev, JS_HEAD_LO(js)), |
443 | job_read(pfdev, JS_TAIL_LO(js))); |
444 | } |
445 | |
446 | if (js_status == DRM_PANFROST_EXCEPTION_STOPPED) { |
447 | /* Update the job head so we can resume */ |
448 | job->jc = job_read(pfdev, JS_TAIL_LO(js)) | |
449 | ((u64)job_read(pfdev, JS_TAIL_HI(js)) << 32); |
450 | |
451 | /* The job will be resumed, don't signal the fence */ |
452 | signal_fence = false; |
453 | } else if (js_status == DRM_PANFROST_EXCEPTION_TERMINATED) { |
454 | /* Job has been hard-stopped, flag it as canceled */ |
455 | dma_fence_set_error(fence: job->done_fence, error: -ECANCELED); |
456 | job->jc = 0; |
457 | } else if (panfrost_exception_is_fault(exception_code: js_status)) { |
458 | /* We might want to provide finer-grained error code based on |
459 | * the exception type, but unconditionally setting to EINVAL |
460 | * is good enough for now. |
461 | */ |
462 | dma_fence_set_error(fence: job->done_fence, error: -EINVAL); |
463 | job->jc = 0; |
464 | } |
465 | |
466 | panfrost_mmu_as_put(pfdev, mmu: job->mmu); |
467 | panfrost_devfreq_record_idle(devfreq: &pfdev->pfdevfreq); |
468 | |
469 | if (signal_fence) |
470 | dma_fence_signal_locked(fence: job->done_fence); |
471 | |
472 | pm_runtime_put_autosuspend(dev: pfdev->dev); |
473 | |
474 | if (panfrost_exception_needs_reset(pfdev, exception_code: js_status)) { |
475 | atomic_set(v: &pfdev->reset.pending, i: 1); |
476 | drm_sched_fault(sched: &pfdev->js->queue[js].sched); |
477 | } |
478 | } |
479 | |
480 | static void panfrost_job_handle_done(struct panfrost_device *pfdev, |
481 | struct panfrost_job *job) |
482 | { |
483 | /* Set ->jc to 0 to avoid re-submitting an already finished job (can |
484 | * happen when we receive the DONE interrupt while doing a GPU reset). |
485 | */ |
486 | job->jc = 0; |
487 | panfrost_mmu_as_put(pfdev, mmu: job->mmu); |
488 | panfrost_devfreq_record_idle(devfreq: &pfdev->pfdevfreq); |
489 | |
490 | dma_fence_signal_locked(fence: job->done_fence); |
491 | pm_runtime_put_autosuspend(dev: pfdev->dev); |
492 | } |
493 | |
494 | static void panfrost_job_handle_irq(struct panfrost_device *pfdev, u32 status) |
495 | { |
496 | struct panfrost_job *done[NUM_JOB_SLOTS][2] = {}; |
497 | struct panfrost_job *failed[NUM_JOB_SLOTS] = {}; |
498 | u32 js_state = 0, js_events = 0; |
499 | unsigned int i, j; |
500 | |
501 | /* First we collect all failed/done jobs. */ |
502 | while (status) { |
503 | u32 js_state_mask = 0; |
504 | |
505 | for (j = 0; j < NUM_JOB_SLOTS; j++) { |
506 | if (status & MK_JS_MASK(j)) |
507 | js_state_mask |= MK_JS_MASK(j); |
508 | |
509 | if (status & JOB_INT_MASK_DONE(j)) { |
510 | if (done[j][0]) |
511 | done[j][1] = panfrost_dequeue_job(pfdev, slot: j); |
512 | else |
513 | done[j][0] = panfrost_dequeue_job(pfdev, slot: j); |
514 | } |
515 | |
516 | if (status & JOB_INT_MASK_ERR(j)) { |
517 | /* Cancel the next submission. Will be submitted |
518 | * after we're done handling this failure if |
519 | * there's no reset pending. |
520 | */ |
521 | job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_NOP); |
522 | failed[j] = panfrost_dequeue_job(pfdev, slot: j); |
523 | } |
524 | } |
525 | |
526 | /* JS_STATE is sampled when JOB_INT_CLEAR is written. |
527 | * For each BIT(slot) or BIT(slot + 16) bit written to |
528 | * JOB_INT_CLEAR, the corresponding bits in JS_STATE |
529 | * (BIT(slot) and BIT(slot + 16)) are updated, but this |
530 | * is racy. If we only have one job done at the time we |
531 | * read JOB_INT_RAWSTAT but the second job fails before we |
532 | * clear the status, we end up with a status containing |
533 | * only the DONE bit and consider both jobs as DONE since |
534 | * JS_STATE reports both NEXT and CURRENT as inactive. |
535 | * To prevent that, let's repeat this clear+read steps |
536 | * until status is 0. |
537 | */ |
538 | job_write(pfdev, JOB_INT_CLEAR, status); |
539 | js_state &= ~js_state_mask; |
540 | js_state |= job_read(pfdev, JOB_INT_JS_STATE) & js_state_mask; |
541 | js_events |= status; |
542 | status = job_read(pfdev, JOB_INT_RAWSTAT); |
543 | } |
544 | |
545 | /* Then we handle the dequeued jobs. */ |
546 | for (j = 0; j < NUM_JOB_SLOTS; j++) { |
547 | if (!(js_events & MK_JS_MASK(j))) |
548 | continue; |
549 | |
550 | if (failed[j]) { |
551 | panfrost_job_handle_err(pfdev, job: failed[j], js: j); |
552 | } else if (pfdev->jobs[j][0] && !(js_state & MK_JS_MASK(j))) { |
553 | /* When the current job doesn't fail, the JM dequeues |
554 | * the next job without waiting for an ACK, this means |
555 | * we can have 2 jobs dequeued and only catch the |
556 | * interrupt when the second one is done. If both slots |
557 | * are inactive, but one job remains in pfdev->jobs[j], |
558 | * consider it done. Of course that doesn't apply if a |
559 | * failure happened since we cancelled execution of the |
560 | * job in _NEXT (see above). |
561 | */ |
562 | if (WARN_ON(!done[j][0])) |
563 | done[j][0] = panfrost_dequeue_job(pfdev, slot: j); |
564 | else |
565 | done[j][1] = panfrost_dequeue_job(pfdev, slot: j); |
566 | } |
567 | |
568 | for (i = 0; i < ARRAY_SIZE(done[0]) && done[j][i]; i++) |
569 | panfrost_job_handle_done(pfdev, job: done[j][i]); |
570 | } |
571 | |
572 | /* And finally we requeue jobs that were waiting in the second slot |
573 | * and have been stopped if we detected a failure on the first slot. |
574 | */ |
575 | for (j = 0; j < NUM_JOB_SLOTS; j++) { |
576 | if (!(js_events & MK_JS_MASK(j))) |
577 | continue; |
578 | |
579 | if (!failed[j] || !pfdev->jobs[j][0]) |
580 | continue; |
581 | |
582 | if (pfdev->jobs[j][0]->jc == 0) { |
583 | /* The job was cancelled, signal the fence now */ |
584 | struct panfrost_job *canceled = panfrost_dequeue_job(pfdev, slot: j); |
585 | |
586 | dma_fence_set_error(fence: canceled->done_fence, error: -ECANCELED); |
587 | panfrost_job_handle_done(pfdev, job: canceled); |
588 | } else if (!atomic_read(v: &pfdev->reset.pending)) { |
589 | /* Requeue the job we removed if no reset is pending */ |
590 | job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_START); |
591 | } |
592 | } |
593 | } |
594 | |
595 | static void panfrost_job_handle_irqs(struct panfrost_device *pfdev) |
596 | { |
597 | u32 status = job_read(pfdev, JOB_INT_RAWSTAT); |
598 | |
599 | while (status) { |
600 | pm_runtime_mark_last_busy(dev: pfdev->dev); |
601 | |
602 | spin_lock(lock: &pfdev->js->job_lock); |
603 | panfrost_job_handle_irq(pfdev, status); |
604 | spin_unlock(lock: &pfdev->js->job_lock); |
605 | status = job_read(pfdev, JOB_INT_RAWSTAT); |
606 | } |
607 | } |
608 | |
609 | static u32 panfrost_active_slots(struct panfrost_device *pfdev, |
610 | u32 *js_state_mask, u32 js_state) |
611 | { |
612 | u32 rawstat; |
613 | |
614 | if (!(js_state & *js_state_mask)) |
615 | return 0; |
616 | |
617 | rawstat = job_read(pfdev, JOB_INT_RAWSTAT); |
618 | if (rawstat) { |
619 | unsigned int i; |
620 | |
621 | for (i = 0; i < NUM_JOB_SLOTS; i++) { |
622 | if (rawstat & MK_JS_MASK(i)) |
623 | *js_state_mask &= ~MK_JS_MASK(i); |
624 | } |
625 | } |
626 | |
627 | return js_state & *js_state_mask; |
628 | } |
629 | |
630 | static void |
631 | panfrost_reset(struct panfrost_device *pfdev, |
632 | struct drm_sched_job *bad) |
633 | { |
634 | u32 js_state, js_state_mask = 0xffffffff; |
635 | unsigned int i, j; |
636 | bool cookie; |
637 | int ret; |
638 | |
639 | if (!atomic_read(v: &pfdev->reset.pending)) |
640 | return; |
641 | |
642 | /* Stop the schedulers. |
643 | * |
644 | * FIXME: We temporarily get out of the dma_fence_signalling section |
645 | * because the cleanup path generate lockdep splats when taking locks |
646 | * to release job resources. We should rework the code to follow this |
647 | * pattern: |
648 | * |
649 | * try_lock |
650 | * if (locked) |
651 | * release |
652 | * else |
653 | * schedule_work_to_release_later |
654 | */ |
655 | for (i = 0; i < NUM_JOB_SLOTS; i++) |
656 | drm_sched_stop(sched: &pfdev->js->queue[i].sched, bad); |
657 | |
658 | cookie = dma_fence_begin_signalling(); |
659 | |
660 | if (bad) |
661 | drm_sched_increase_karma(bad); |
662 | |
663 | /* Mask job interrupts and synchronize to make sure we won't be |
664 | * interrupted during our reset. |
665 | */ |
666 | job_write(pfdev, JOB_INT_MASK, 0); |
667 | synchronize_irq(irq: pfdev->js->irq); |
668 | |
669 | for (i = 0; i < NUM_JOB_SLOTS; i++) { |
670 | /* Cancel the next job and soft-stop the running job. */ |
671 | job_write(pfdev, JS_COMMAND_NEXT(i), JS_COMMAND_NOP); |
672 | job_write(pfdev, JS_COMMAND(i), JS_COMMAND_SOFT_STOP); |
673 | } |
674 | |
675 | /* Wait at most 10ms for soft-stops to complete */ |
676 | ret = readl_poll_timeout(pfdev->iomem + JOB_INT_JS_STATE, js_state, |
677 | !panfrost_active_slots(pfdev, &js_state_mask, js_state), |
678 | 10, 10000); |
679 | |
680 | if (ret) |
681 | dev_err(pfdev->dev, "Soft-stop failed\n" ); |
682 | |
683 | /* Handle the remaining interrupts before we reset. */ |
684 | panfrost_job_handle_irqs(pfdev); |
685 | |
686 | /* Remaining interrupts have been handled, but we might still have |
687 | * stuck jobs. Let's make sure the PM counters stay balanced by |
688 | * manually calling pm_runtime_put_noidle() and |
689 | * panfrost_devfreq_record_idle() for each stuck job. |
690 | * Let's also make sure the cycle counting register's refcnt is |
691 | * kept balanced to prevent it from running forever |
692 | */ |
693 | spin_lock(lock: &pfdev->js->job_lock); |
694 | for (i = 0; i < NUM_JOB_SLOTS; i++) { |
695 | for (j = 0; j < ARRAY_SIZE(pfdev->jobs[0]) && pfdev->jobs[i][j]; j++) { |
696 | if (pfdev->jobs[i][j]->is_profiled) |
697 | panfrost_cycle_counter_put(pfdev: pfdev->jobs[i][j]->pfdev); |
698 | pm_runtime_put_noidle(dev: pfdev->dev); |
699 | panfrost_devfreq_record_idle(devfreq: &pfdev->pfdevfreq); |
700 | } |
701 | } |
702 | memset(pfdev->jobs, 0, sizeof(pfdev->jobs)); |
703 | spin_unlock(lock: &pfdev->js->job_lock); |
704 | |
705 | /* Proceed with reset now. */ |
706 | panfrost_device_reset(pfdev); |
707 | |
708 | /* panfrost_device_reset() unmasks job interrupts, but we want to |
709 | * keep them masked a bit longer. |
710 | */ |
711 | job_write(pfdev, JOB_INT_MASK, 0); |
712 | |
713 | /* GPU has been reset, we can clear the reset pending bit. */ |
714 | atomic_set(v: &pfdev->reset.pending, i: 0); |
715 | |
716 | /* Now resubmit jobs that were previously queued but didn't have a |
717 | * chance to finish. |
718 | * FIXME: We temporarily get out of the DMA fence signalling section |
719 | * while resubmitting jobs because the job submission logic will |
720 | * allocate memory with the GFP_KERNEL flag which can trigger memory |
721 | * reclaim and exposes a lock ordering issue. |
722 | */ |
723 | dma_fence_end_signalling(cookie); |
724 | for (i = 0; i < NUM_JOB_SLOTS; i++) |
725 | drm_sched_resubmit_jobs(sched: &pfdev->js->queue[i].sched); |
726 | cookie = dma_fence_begin_signalling(); |
727 | |
728 | /* Restart the schedulers */ |
729 | for (i = 0; i < NUM_JOB_SLOTS; i++) |
730 | drm_sched_start(sched: &pfdev->js->queue[i].sched, full_recovery: true); |
731 | |
732 | /* Re-enable job interrupts now that everything has been restarted. */ |
733 | job_write(pfdev, JOB_INT_MASK, |
734 | GENMASK(16 + NUM_JOB_SLOTS - 1, 16) | |
735 | GENMASK(NUM_JOB_SLOTS - 1, 0)); |
736 | |
737 | dma_fence_end_signalling(cookie); |
738 | } |
739 | |
740 | static enum drm_gpu_sched_stat panfrost_job_timedout(struct drm_sched_job |
741 | *sched_job) |
742 | { |
743 | struct panfrost_job *job = to_panfrost_job(sched_job); |
744 | struct panfrost_device *pfdev = job->pfdev; |
745 | int js = panfrost_job_get_slot(job); |
746 | |
747 | /* |
748 | * If the GPU managed to complete this jobs fence, the timeout is |
749 | * spurious. Bail out. |
750 | */ |
751 | if (dma_fence_is_signaled(fence: job->done_fence)) |
752 | return DRM_GPU_SCHED_STAT_NOMINAL; |
753 | |
754 | /* |
755 | * Panfrost IRQ handler may take a long time to process an interrupt |
756 | * if there is another IRQ handler hogging the processing. |
757 | * For example, the HDMI encoder driver might be stuck in the IRQ |
758 | * handler for a significant time in a case of bad cable connection. |
759 | * In order to catch such cases and not report spurious Panfrost |
760 | * job timeouts, synchronize the IRQ handler and re-check the fence |
761 | * status. |
762 | */ |
763 | synchronize_irq(irq: pfdev->js->irq); |
764 | |
765 | if (dma_fence_is_signaled(fence: job->done_fence)) { |
766 | dev_warn(pfdev->dev, "unexpectedly high interrupt latency\n" ); |
767 | return DRM_GPU_SCHED_STAT_NOMINAL; |
768 | } |
769 | |
770 | dev_err(pfdev->dev, "gpu sched timeout, js=%d, config=0x%x, status=0x%x, head=0x%x, tail=0x%x, sched_job=%p" , |
771 | js, |
772 | job_read(pfdev, JS_CONFIG(js)), |
773 | job_read(pfdev, JS_STATUS(js)), |
774 | job_read(pfdev, JS_HEAD_LO(js)), |
775 | job_read(pfdev, JS_TAIL_LO(js)), |
776 | sched_job); |
777 | |
778 | panfrost_core_dump(job); |
779 | |
780 | atomic_set(v: &pfdev->reset.pending, i: 1); |
781 | panfrost_reset(pfdev, bad: sched_job); |
782 | |
783 | return DRM_GPU_SCHED_STAT_NOMINAL; |
784 | } |
785 | |
786 | static void panfrost_reset_work(struct work_struct *work) |
787 | { |
788 | struct panfrost_device *pfdev; |
789 | |
790 | pfdev = container_of(work, struct panfrost_device, reset.work); |
791 | panfrost_reset(pfdev, NULL); |
792 | } |
793 | |
794 | static const struct drm_sched_backend_ops panfrost_sched_ops = { |
795 | .run_job = panfrost_job_run, |
796 | .timedout_job = panfrost_job_timedout, |
797 | .free_job = panfrost_job_free |
798 | }; |
799 | |
800 | static irqreturn_t panfrost_job_irq_handler_thread(int irq, void *data) |
801 | { |
802 | struct panfrost_device *pfdev = data; |
803 | |
804 | panfrost_job_handle_irqs(pfdev); |
805 | |
806 | /* Enable interrupts only if we're not about to get suspended */ |
807 | if (!test_bit(PANFROST_COMP_BIT_JOB, pfdev->is_suspended)) |
808 | job_write(pfdev, JOB_INT_MASK, |
809 | GENMASK(16 + NUM_JOB_SLOTS - 1, 16) | |
810 | GENMASK(NUM_JOB_SLOTS - 1, 0)); |
811 | |
812 | return IRQ_HANDLED; |
813 | } |
814 | |
815 | static irqreturn_t panfrost_job_irq_handler(int irq, void *data) |
816 | { |
817 | struct panfrost_device *pfdev = data; |
818 | u32 status; |
819 | |
820 | if (test_bit(PANFROST_COMP_BIT_JOB, pfdev->is_suspended)) |
821 | return IRQ_NONE; |
822 | |
823 | status = job_read(pfdev, JOB_INT_STAT); |
824 | if (!status) |
825 | return IRQ_NONE; |
826 | |
827 | job_write(pfdev, JOB_INT_MASK, 0); |
828 | return IRQ_WAKE_THREAD; |
829 | } |
830 | |
831 | int panfrost_job_init(struct panfrost_device *pfdev) |
832 | { |
833 | struct panfrost_job_slot *js; |
834 | unsigned int nentries = 2; |
835 | int ret, j; |
836 | |
837 | /* All GPUs have two entries per queue, but without jobchain |
838 | * disambiguation stopping the right job in the close path is tricky, |
839 | * so let's just advertise one entry in that case. |
840 | */ |
841 | if (!panfrost_has_hw_feature(pfdev, feat: HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) |
842 | nentries = 1; |
843 | |
844 | pfdev->js = js = devm_kzalloc(dev: pfdev->dev, size: sizeof(*js), GFP_KERNEL); |
845 | if (!js) |
846 | return -ENOMEM; |
847 | |
848 | INIT_WORK(&pfdev->reset.work, panfrost_reset_work); |
849 | spin_lock_init(&js->job_lock); |
850 | |
851 | js->irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "job" ); |
852 | if (js->irq < 0) |
853 | return js->irq; |
854 | |
855 | ret = devm_request_threaded_irq(dev: pfdev->dev, irq: js->irq, |
856 | handler: panfrost_job_irq_handler, |
857 | thread_fn: panfrost_job_irq_handler_thread, |
858 | IRQF_SHARED, KBUILD_MODNAME "-job" , |
859 | dev_id: pfdev); |
860 | if (ret) { |
861 | dev_err(pfdev->dev, "failed to request job irq" ); |
862 | return ret; |
863 | } |
864 | |
865 | pfdev->reset.wq = alloc_ordered_workqueue("panfrost-reset" , 0); |
866 | if (!pfdev->reset.wq) |
867 | return -ENOMEM; |
868 | |
869 | for (j = 0; j < NUM_JOB_SLOTS; j++) { |
870 | js->queue[j].fence_context = dma_fence_context_alloc(num: 1); |
871 | |
872 | ret = drm_sched_init(sched: &js->queue[j].sched, |
873 | ops: &panfrost_sched_ops, NULL, |
874 | num_rqs: DRM_SCHED_PRIORITY_COUNT, |
875 | credit_limit: nentries, hang_limit: 0, |
876 | timeout: msecs_to_jiffies(JOB_TIMEOUT_MS), |
877 | timeout_wq: pfdev->reset.wq, |
878 | NULL, name: "pan_js" , dev: pfdev->dev); |
879 | if (ret) { |
880 | dev_err(pfdev->dev, "Failed to create scheduler: %d." , ret); |
881 | goto err_sched; |
882 | } |
883 | } |
884 | |
885 | panfrost_job_enable_interrupts(pfdev); |
886 | |
887 | return 0; |
888 | |
889 | err_sched: |
890 | for (j--; j >= 0; j--) |
891 | drm_sched_fini(sched: &js->queue[j].sched); |
892 | |
893 | destroy_workqueue(wq: pfdev->reset.wq); |
894 | return ret; |
895 | } |
896 | |
897 | void panfrost_job_fini(struct panfrost_device *pfdev) |
898 | { |
899 | struct panfrost_job_slot *js = pfdev->js; |
900 | int j; |
901 | |
902 | job_write(pfdev, JOB_INT_MASK, 0); |
903 | |
904 | for (j = 0; j < NUM_JOB_SLOTS; j++) { |
905 | drm_sched_fini(sched: &js->queue[j].sched); |
906 | } |
907 | |
908 | cancel_work_sync(work: &pfdev->reset.work); |
909 | destroy_workqueue(wq: pfdev->reset.wq); |
910 | } |
911 | |
912 | int panfrost_job_open(struct panfrost_file_priv *panfrost_priv) |
913 | { |
914 | struct panfrost_device *pfdev = panfrost_priv->pfdev; |
915 | struct panfrost_job_slot *js = pfdev->js; |
916 | struct drm_gpu_scheduler *sched; |
917 | int ret, i; |
918 | |
919 | for (i = 0; i < NUM_JOB_SLOTS; i++) { |
920 | sched = &js->queue[i].sched; |
921 | ret = drm_sched_entity_init(entity: &panfrost_priv->sched_entity[i], |
922 | priority: DRM_SCHED_PRIORITY_NORMAL, sched_list: &sched, |
923 | num_sched_list: 1, NULL); |
924 | if (WARN_ON(ret)) |
925 | return ret; |
926 | } |
927 | return 0; |
928 | } |
929 | |
930 | void panfrost_job_close(struct panfrost_file_priv *panfrost_priv) |
931 | { |
932 | struct panfrost_device *pfdev = panfrost_priv->pfdev; |
933 | int i; |
934 | |
935 | for (i = 0; i < NUM_JOB_SLOTS; i++) |
936 | drm_sched_entity_destroy(entity: &panfrost_priv->sched_entity[i]); |
937 | |
938 | /* Kill in-flight jobs */ |
939 | spin_lock(lock: &pfdev->js->job_lock); |
940 | for (i = 0; i < NUM_JOB_SLOTS; i++) { |
941 | struct drm_sched_entity *entity = &panfrost_priv->sched_entity[i]; |
942 | int j; |
943 | |
944 | for (j = ARRAY_SIZE(pfdev->jobs[0]) - 1; j >= 0; j--) { |
945 | struct panfrost_job *job = pfdev->jobs[i][j]; |
946 | u32 cmd; |
947 | |
948 | if (!job || job->base.entity != entity) |
949 | continue; |
950 | |
951 | if (j == 1) { |
952 | /* Try to cancel the job before it starts */ |
953 | job_write(pfdev, JS_COMMAND_NEXT(i), JS_COMMAND_NOP); |
954 | /* Reset the job head so it doesn't get restarted if |
955 | * the job in the first slot failed. |
956 | */ |
957 | job->jc = 0; |
958 | } |
959 | |
960 | if (panfrost_has_hw_feature(pfdev, feat: HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { |
961 | cmd = panfrost_get_job_chain_flag(job) ? |
962 | JS_COMMAND_HARD_STOP_1 : |
963 | JS_COMMAND_HARD_STOP_0; |
964 | } else { |
965 | cmd = JS_COMMAND_HARD_STOP; |
966 | } |
967 | |
968 | job_write(pfdev, JS_COMMAND(i), cmd); |
969 | |
970 | /* Jobs can outlive their file context */ |
971 | job->engine_usage = NULL; |
972 | } |
973 | } |
974 | spin_unlock(lock: &pfdev->js->job_lock); |
975 | } |
976 | |
977 | int panfrost_job_is_idle(struct panfrost_device *pfdev) |
978 | { |
979 | struct panfrost_job_slot *js = pfdev->js; |
980 | int i; |
981 | |
982 | for (i = 0; i < NUM_JOB_SLOTS; i++) { |
983 | /* If there are any jobs in the HW queue, we're not idle */ |
984 | if (atomic_read(v: &js->queue[i].sched.credit_count)) |
985 | return false; |
986 | } |
987 | |
988 | return true; |
989 | } |
990 | |