panfrost_job.c source code [linux/drivers/gpu/drm/panfrost/panfrost_job.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/ Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> /
3	/ Copyright 2019 Collabora ltd. /
4	#include <linux/delay.h>
5	#include <linux/interrupt.h>
6	#include <linux/io.h>
7	#include <linux/iopoll.h>
8	#include <linux/platform_device.h>
9	#include <linux/pm_runtime.h>
10	#include <linux/dma-resv.h>
11	#include <drm/gpu_scheduler.h>
12	#include <drm/panfrost_drm.h>
13
14	#include "panfrost_device.h"
15	#include "panfrost_devfreq.h"
16	#include "panfrost_job.h"
17	#include "panfrost_features.h"
18	#include "panfrost_issues.h"
19	#include "panfrost_gem.h"
20	#include "panfrost_regs.h"
21	#include "panfrost_gpu.h"
22	#include "panfrost_mmu.h"
23	#include "panfrost_dump.h"
24
25	#define JOB_TIMEOUT_MS 500
26
27	#define job_write(dev, reg, data) writel(data, dev->iomem + (reg))
28	#define job_read(dev, reg) readl(dev->iomem + (reg))
29
30	struct panfrost_queue_state {
31	struct drm_gpu_scheduler sched;
32	u64 fence_context;
33	u64 emit_seqno;
34	};
35
36	struct panfrost_job_slot {
37	struct panfrost_queue_state queue[NUM_JOB_SLOTS];
38	spinlock_t job_lock;
39	int irq;
40	};
41
42	static struct panfrost_job *
43	to_panfrost_job(struct drm_sched_job *sched_job)
44	{
45	return container_of(sched_job, struct panfrost_job, base);
46	}
47
48	struct panfrost_fence {
49	struct dma_fence base;
50	struct drm_device *dev;
51	/ panfrost seqno for signaled() test /
52	u64 seqno;
53	int queue;
54	};
55
56	static inline struct panfrost_fence *
57	to_panfrost_fence(struct dma_fence *fence)
58	{
59	return (struct panfrost_fence *)fence;
60	}
61
62	static const char panfrost_fence_get_driver_name(struct* dma_fence *fence)
63	{
64	return "panfrost";
65	}
66
67	static const char panfrost_fence_get_timeline_name(struct* dma_fence *fence)
68	{
69	struct panfrost_fence *f = to_panfrost_fence(fence);
70
71	switch (f->queue) {
72	case `0`:
73	return "panfrost-js-0";
74	case `1`:
75	return "panfrost-js-1";
76	case `2`:
77	return "panfrost-js-2";
78	default:
79	return NULL;
80	}
81	}
82
83	static const struct dma_fence_ops panfrost_fence_ops = {
84	.get_driver_name = panfrost_fence_get_driver_name,
85	.get_timeline_name = panfrost_fence_get_timeline_name,
86	};
87
88	static struct dma_fence panfrost_fence_create(struct* panfrost_device pfdev, int* js_num)
89	{
90	struct panfrost_fence *fence;
91	struct panfrost_job_slot *js = pfdev->js;
92
93	fence = kzalloc(size: sizeof(*fence), GFP_KERNEL);
94	if (!fence)
95	return ERR_PTR(error: -ENOMEM);
96
97	fence->dev = pfdev->ddev;
98	fence->queue = js_num;
99	fence->seqno = ++js->queue[js_num].emit_seqno;
100	dma_fence_init(fence: &fence->base, ops: &panfrost_fence_ops, lock: &js->job_lock,
101	context: js->queue[js_num].fence_context, seqno: fence->seqno);
102
103	return &fence->base;
104	}
105
106	int panfrost_job_get_slot(struct panfrost_job *job)
107	{
108	/ JS0: fragment jobs.*
109	* JS1: vertex/tiler jobs
110	* JS2: compute jobs
111	*/
112	if (job->requirements & PANFROST_JD_REQ_FS)
113	return `0`;
114
115	/ Not exposed to userspace yet /
116	#if 0
117	if (job->requirements & PANFROST_JD_REQ_ONLY_COMPUTE) {
118	if ((job->requirements & PANFROST_JD_REQ_CORE_GRP_MASK) &&
119	(job->pfdev->features.nr_core_groups == `2`))
120	return `2`;
121	if (panfrost_has_hw_issue(job->pfdev, HW_ISSUE_8987))
122	return `2`;
123	}
124	#endif
125	return `1`;
126	}
127
128	static void panfrost_job_write_affinity(struct panfrost_device *pfdev,
129	u32 requirements,
130	int js)
131	{
132	u64 affinity;
133
134	/*
135	* Use all cores for now.
136	* Eventually we may need to support tiler only jobs and h/w with
137	* multiple (2) coherent core groups
138	*/
139	affinity = pfdev->features.shader_present;
140
141	job_write(pfdev, JS_AFFINITY_NEXT_LO(js), lower_32_bits(affinity));
142	job_write(pfdev, JS_AFFINITY_NEXT_HI(js), upper_32_bits(affinity));
143	}
144
145	static u32
146	panfrost_get_job_chain_flag(const struct panfrost_job *job)
147	{
148	struct panfrost_fence *f = to_panfrost_fence(fence: job->done_fence);
149
150	if (!panfrost_has_hw_feature(pfdev: job->pfdev, feat: HW_FEATURE_JOBCHAIN_DISAMBIGUATION))
151	return `0`;
152
153	return (f->seqno & `1`) ? JS_CONFIG_JOB_CHAIN_FLAG : `0`;
154	}
155
156	static struct panfrost_job *
157	panfrost_dequeue_job(struct panfrost_device pfdev, int* slot)
158	{
159	struct panfrost_job *job = pfdev->jobs[slot][`0`];
160
161	WARN_ON(!job);
162	if (job->is_profiled) {
163	if (job->engine_usage) {
164	job->engine_usage->elapsed_ns[slot] +=
165	ktime_to_ns(ktime_sub(ktime_get(), job->start_time));
166	job->engine_usage->cycles[slot] +=
167	panfrost_cycle_counter_read(pfdev) - job->start_cycles;
168	}
169	panfrost_cycle_counter_put(pfdev: job->pfdev);
170	}
171
172	pfdev->jobs[slot][`0`] = pfdev->jobs[slot][`1`];
173	pfdev->jobs[slot][`1`] = NULL;
174
175	return job;
176	}
177
178	static unsigned int
179	panfrost_enqueue_job(struct panfrost_device pfdev, int* slot,
180	struct panfrost_job *job)
181	{
182	if (WARN_ON(!job))
183	return `0`;
184
185	if (!pfdev->jobs[slot][`0`]) {
186	pfdev->jobs[slot][`0`] = job;
187	return `0`;
188	}
189
190	WARN_ON(pfdev->jobs[slot][`1`]);
191	pfdev->jobs[slot][`1`] = job;
192	WARN_ON(panfrost_get_job_chain_flag(job) ==
193	panfrost_get_job_chain_flag(pfdev->jobs[slot][`0`]));
194	return `1`;
195	}
196
197	static void panfrost_job_hw_submit(struct panfrost_job job, int* js)
198	{
199	struct panfrost_device *pfdev = job->pfdev;
200	unsigned int subslot;
201	u32 cfg;
202	u64 jc_head = job->jc;
203	int ret;
204
205	panfrost_devfreq_record_busy(devfreq: &pfdev->pfdevfreq);
206
207	ret = pm_runtime_get_sync(dev: pfdev->dev);
208	if (ret < `0`)
209	return;
210
211	if (WARN_ON(job_read(pfdev, JS_COMMAND_NEXT(js)))) {
212	return;
213	}
214
215	cfg = panfrost_mmu_as_get(pfdev, mmu: job->mmu);
216
217	job_write(pfdev, JS_HEAD_NEXT_LO(js), lower_32_bits(jc_head));
218	job_write(pfdev, JS_HEAD_NEXT_HI(js), upper_32_bits(jc_head));
219
220	panfrost_job_write_affinity(pfdev, requirements: job->requirements, js);
221
222	/ start MMU, medium priority, cache clean/flush on end, clean/flush on*
223	* start */
224	cfg \|= JS_CONFIG_THREAD_PRI(`8`) \|
225	JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE \|
226	JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE \|
227	panfrost_get_job_chain_flag(job);
228
229	if (panfrost_has_hw_feature(pfdev, feat: HW_FEATURE_FLUSH_REDUCTION))
230	cfg \|= JS_CONFIG_ENABLE_FLUSH_REDUCTION;
231
232	if (panfrost_has_hw_issue(pfdev, issue: HW_ISSUE_10649))
233	cfg \|= JS_CONFIG_START_MMU;
234
235	job_write(pfdev, JS_CONFIG_NEXT(js), cfg);
236
237	if (panfrost_has_hw_feature(pfdev, feat: HW_FEATURE_FLUSH_REDUCTION))
238	job_write(pfdev, JS_FLUSH_ID_NEXT(js), job->flush_id);
239
240	/ GO ! /
241
242	spin_lock(lock: &pfdev->js->job_lock);
243	subslot = panfrost_enqueue_job(pfdev, slot: js, job);
244	/ Don't queue the job if a reset is in progress /
245	if (!atomic_read(v: &pfdev->reset.pending)) {
246	if (atomic_read(v: &pfdev->profile_mode)) {
247	panfrost_cycle_counter_get(pfdev);
248	job->is_profiled = true;
249	job->start_time = ktime_get();
250	job->start_cycles = panfrost_cycle_counter_read(pfdev);
251	}
252
253	job_write(pfdev, JS_COMMAND_NEXT(js), JS_COMMAND_START);
254	dev_dbg(pfdev->dev,
255	"JS: Submitting atom %p to js[%d][%d] with head=0x%llx AS %d",
256	job, js, subslot, jc_head, cfg & `0xf`);
257	}
258	spin_unlock(lock: &pfdev->js->job_lock);
259	}
260
261	static int panfrost_acquire_object_fences(struct drm_gem_object **bos,
262	int bo_count,
263	struct drm_sched_job *job)
264	{
265	int i, ret;
266
267	for (i = `0`; i < bo_count; i++) {
268	ret = dma_resv_reserve_fences(obj: bos[i]->resv, num_fences: `1`);
269	if (ret)
270	return ret;
271
272	/ panfrost always uses write mode in its current uapi /
273	ret = drm_sched_job_add_implicit_dependencies(job, obj: bos[i],
274	write: true);
275	if (ret)
276	return ret;
277	}
278
279	return `0`;
280	}
281
282	static void panfrost_attach_object_fences(struct drm_gem_object **bos,
283	int bo_count,
284	struct dma_fence *fence)
285	{
286	int i;
287
288	for (i = `0`; i < bo_count; i++)
289	dma_resv_add_fence(obj: bos[i]->resv, fence, usage: DMA_RESV_USAGE_WRITE);
290	}
291
292	int panfrost_job_push(struct panfrost_job *job)
293	{
294	struct panfrost_device *pfdev = job->pfdev;
295	struct ww_acquire_ctx acquire_ctx;
296	int ret = `0`;
297
298	ret = drm_gem_lock_reservations(objs: job->bos, count: job->bo_count,
299	acquire_ctx: &acquire_ctx);
300	if (ret)
301	return ret;
302
303	mutex_lock(&pfdev->sched_lock);
304	drm_sched_job_arm(job: &job->base);
305
306	job->render_done_fence = dma_fence_get(fence: &job->base.s_fence->finished);
307
308	ret = panfrost_acquire_object_fences(bos: job->bos, bo_count: job->bo_count,
309	job: &job->base);
310	if (ret) {
311	mutex_unlock(lock: &pfdev->sched_lock);
312	goto unlock;
313	}
314
315	kref_get(kref: &job->refcount); / put by scheduler job completion /
316
317	drm_sched_entity_push_job(sched_job: &job->base);
318
319	mutex_unlock(lock: &pfdev->sched_lock);
320
321	panfrost_attach_object_fences(bos: job->bos, bo_count: job->bo_count,
322	fence: job->render_done_fence);
323
324	unlock:
325	drm_gem_unlock_reservations(objs: job->bos, count: job->bo_count, acquire_ctx: &acquire_ctx);
326
327	return ret;
328	}
329
330	static void panfrost_job_cleanup(struct kref *ref)
331	{
332	struct panfrost_job job = container_of(ref, struct* panfrost_job,
333	refcount);
334	unsigned int i;
335
336	dma_fence_put(fence: job->done_fence);
337	dma_fence_put(fence: job->render_done_fence);
338
339	if (job->mappings) {
340	for (i = `0`; i < job->bo_count; i++) {
341	if (!job->mappings[i])
342	break;
343
344	atomic_dec(v: &job->mappings[i]->obj->gpu_usecount);
345	panfrost_gem_mapping_put(mapping: job->mappings[i]);
346	}
347	kvfree(addr: job->mappings);
348	}
349
350	if (job->bos) {
351	for (i = `0`; i < job->bo_count; i++)
352	drm_gem_object_put(obj: job->bos[i]);
353
354	kvfree(addr: job->bos);
355	}
356
357	kfree(objp: job);
358	}
359
360	void panfrost_job_put(struct panfrost_job *job)
361	{
362	kref_put(kref: &job->refcount, release: panfrost_job_cleanup);
363	}
364
365	static void panfrost_job_free(struct drm_sched_job *sched_job)
366	{
367	struct panfrost_job *job = to_panfrost_job(sched_job);
368
369	drm_sched_job_cleanup(job: sched_job);
370
371	panfrost_job_put(job);
372	}
373
374	static struct dma_fence panfrost_job_run(struct* drm_sched_job *sched_job)
375	{
376	struct panfrost_job *job = to_panfrost_job(sched_job);
377	struct panfrost_device *pfdev = job->pfdev;
378	int slot = panfrost_job_get_slot(job);
379	struct dma_fence *fence = NULL;
380
381	if (unlikely(job->base.s_fence->finished.error))
382	return NULL;
383
384	/ Nothing to execute: can happen if the job has finished while*
385	* we were resetting the GPU.
386	*/
387	if (!job->jc)
388	return NULL;
389
390	fence = panfrost_fence_create(pfdev, js_num: slot);
391	if (IS_ERR(ptr: fence))
392	return fence;
393
394	if (job->done_fence)
395	dma_fence_put(fence: job->done_fence);
396	job->done_fence = dma_fence_get(fence);
397
398	panfrost_job_hw_submit(job, js: slot);
399
400	return fence;
401	}
402
403	void panfrost_job_enable_interrupts(struct panfrost_device *pfdev)
404	{
405	int j;
406	u32 irq_mask = `0`;
407
408	clear_bit(nr: PANFROST_COMP_BIT_JOB, addr: pfdev->is_suspended);
409
410	for (j = `0`; j < NUM_JOB_SLOTS; j++) {
411	irq_mask \|= MK_JS_MASK(j);
412	}
413
414	job_write(pfdev, JOB_INT_CLEAR, irq_mask);
415	job_write(pfdev, JOB_INT_MASK, irq_mask);
416	}
417
418	void panfrost_job_suspend_irq(struct panfrost_device *pfdev)
419	{
420	set_bit(nr: PANFROST_COMP_BIT_JOB, addr: pfdev->is_suspended);
421
422	job_write(pfdev, JOB_INT_MASK, `0`);
423	synchronize_irq(irq: pfdev->js->irq);
424	}
425
426	static void panfrost_job_handle_err(struct panfrost_device *pfdev,
427	struct panfrost_job *job,
428	unsigned int js)
429	{
430	u32 js_status = job_read(pfdev, JS_STATUS(js));
431	const char *exception_name = panfrost_exception_name(exception_code: js_status);
432	bool signal_fence = true;
433
434	if (!panfrost_exception_is_fault(exception_code: js_status)) {
435	dev_dbg(pfdev->dev, "js event, js=%d, status=%s, head=0x%x, tail=0x%x",
436	js, exception_name,
437	job_read(pfdev, JS_HEAD_LO(js)),
438	job_read(pfdev, JS_TAIL_LO(js)));
439	} else {
440	dev_err(pfdev->dev, "js fault, js=%d, status=%s, head=0x%x, tail=0x%x",
441	js, exception_name,
442	job_read(pfdev, JS_HEAD_LO(js)),
443	job_read(pfdev, JS_TAIL_LO(js)));
444	}
445
446	if (js_status == DRM_PANFROST_EXCEPTION_STOPPED) {
447	/ Update the job head so we can resume /
448	job->jc = job_read(pfdev, JS_TAIL_LO(js)) \|
449	((u64)job_read(pfdev, JS_TAIL_HI(js)) << `32`);
450
451	/ The job will be resumed, don't signal the fence /
452	signal_fence = false;
453	} else if (js_status == DRM_PANFROST_EXCEPTION_TERMINATED) {
454	/ Job has been hard-stopped, flag it as canceled /
455	dma_fence_set_error(fence: job->done_fence, error: -ECANCELED);
456	job->jc = `0`;
457	} else if (panfrost_exception_is_fault(exception_code: js_status)) {
458	/ We might want to provide finer-grained error code based on*
459	* the exception type, but unconditionally setting to EINVAL
460	* is good enough for now.
461	*/
462	dma_fence_set_error(fence: job->done_fence, error: -EINVAL);
463	job->jc = `0`;
464	}
465
466	panfrost_mmu_as_put(pfdev, mmu: job->mmu);
467	panfrost_devfreq_record_idle(devfreq: &pfdev->pfdevfreq);
468
469	if (signal_fence)
470	dma_fence_signal_locked(fence: job->done_fence);
471
472	pm_runtime_put_autosuspend(dev: pfdev->dev);
473
474	if (panfrost_exception_needs_reset(pfdev, exception_code: js_status)) {
475	atomic_set(v: &pfdev->reset.pending, i: `1`);
476	drm_sched_fault(sched: &pfdev->js->queue[js].sched);
477	}
478	}
479
480	static void panfrost_job_handle_done(struct panfrost_device *pfdev,
481	struct panfrost_job *job)
482	{
483	/ Set ->jc to 0 to avoid re-submitting an already finished job (can*
484	* happen when we receive the DONE interrupt while doing a GPU reset).
485	*/
486	job->jc = `0`;
487	panfrost_mmu_as_put(pfdev, mmu: job->mmu);
488	panfrost_devfreq_record_idle(devfreq: &pfdev->pfdevfreq);
489
490	dma_fence_signal_locked(fence: job->done_fence);
491	pm_runtime_put_autosuspend(dev: pfdev->dev);
492	}
493
494	static void panfrost_job_handle_irq(struct panfrost_device *pfdev, u32 status)
495	{
496	struct panfrost_job *done[NUM_JOB_SLOTS][`2`] = {};
497	struct panfrost_job *failed[NUM_JOB_SLOTS] = {};
498	u32 js_state = `0`, js_events = `0`;
499	unsigned int i, j;
500
501	/ First we collect all failed/done jobs. /
502	while (status) {
503	u32 js_state_mask = `0`;
504
505	for (j = `0`; j < NUM_JOB_SLOTS; j++) {
506	if (status & MK_JS_MASK(j))
507	js_state_mask \|= MK_JS_MASK(j);
508
509	if (status & JOB_INT_MASK_DONE(j)) {
510	if (done[j][`0`])
511	done[j][`1`] = panfrost_dequeue_job(pfdev, slot: j);
512	else
513	done[j][`0`] = panfrost_dequeue_job(pfdev, slot: j);
514	}
515
516	if (status & JOB_INT_MASK_ERR(j)) {
517	/ Cancel the next submission. Will be submitted*
518	* after we're done handling this failure if
519	* there's no reset pending.
520	*/
521	job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_NOP);
522	failed[j] = panfrost_dequeue_job(pfdev, slot: j);
523	}
524	}
525
526	/ JS_STATE is sampled when JOB_INT_CLEAR is written.*
527	* For each BIT(slot) or BIT(slot + 16) bit written to
528	* JOB_INT_CLEAR, the corresponding bits in JS_STATE
529	* (BIT(slot) and BIT(slot + 16)) are updated, but this
530	* is racy. If we only have one job done at the time we
531	* read JOB_INT_RAWSTAT but the second job fails before we
532	* clear the status, we end up with a status containing
533	* only the DONE bit and consider both jobs as DONE since
534	* JS_STATE reports both NEXT and CURRENT as inactive.
535	* To prevent that, let's repeat this clear+read steps
536	* until status is 0.
537	*/
538	job_write(pfdev, JOB_INT_CLEAR, status);
539	js_state &= ~js_state_mask;
540	js_state \|= job_read(pfdev, JOB_INT_JS_STATE) & js_state_mask;
541	js_events \|= status;
542	status = job_read(pfdev, JOB_INT_RAWSTAT);
543	}
544
545	/ Then we handle the dequeued jobs. /
546	for (j = `0`; j < NUM_JOB_SLOTS; j++) {
547	if (!(js_events & MK_JS_MASK(j)))
548	continue;
549
550	if (failed[j]) {
551	panfrost_job_handle_err(pfdev, job: failed[j], js: j);
552	} else if (pfdev->jobs[j][`0`] && !(js_state & MK_JS_MASK(j))) {
553	/ When the current job doesn't fail, the JM dequeues*
554	* the next job without waiting for an ACK, this means
555	* we can have 2 jobs dequeued and only catch the
556	* interrupt when the second one is done. If both slots
557	* are inactive, but one job remains in pfdev->jobs[j],
558	* consider it done. Of course that doesn't apply if a
559	* failure happened since we cancelled execution of the
560	* job in _NEXT (see above).
561	*/
562	if (WARN_ON(!done[j][`0`]))
563	done[j][`0`] = panfrost_dequeue_job(pfdev, slot: j);
564	else
565	done[j][`1`] = panfrost_dequeue_job(pfdev, slot: j);
566	}
567
568	for (i = `0`; i < ARRAY_SIZE(done[`0`]) && done[j][i]; i++)
569	panfrost_job_handle_done(pfdev, job: done[j][i]);
570	}
571
572	/ And finally we requeue jobs that were waiting in the second slot*
573	* and have been stopped if we detected a failure on the first slot.
574	*/
575	for (j = `0`; j < NUM_JOB_SLOTS; j++) {
576	if (!(js_events & MK_JS_MASK(j)))
577	continue;
578
579	if (!failed[j] \|\| !pfdev->jobs[j][`0`])
580	continue;
581
582	if (pfdev->jobs[j][`0`]->jc == `0`) {
583	/ The job was cancelled, signal the fence now /
584	struct panfrost_job *canceled = panfrost_dequeue_job(pfdev, slot: j);
585
586	dma_fence_set_error(fence: canceled->done_fence, error: -ECANCELED);
587	panfrost_job_handle_done(pfdev, job: canceled);
588	} else if (!atomic_read(v: &pfdev->reset.pending)) {
589	/ Requeue the job we removed if no reset is pending /
590	job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_START);
591	}
592	}
593	}
594
595	static void panfrost_job_handle_irqs(struct panfrost_device *pfdev)
596	{
597	u32 status = job_read(pfdev, JOB_INT_RAWSTAT);
598
599	while (status) {
600	pm_runtime_mark_last_busy(dev: pfdev->dev);
601
602	spin_lock(lock: &pfdev->js->job_lock);
603	panfrost_job_handle_irq(pfdev, status);
604	spin_unlock(lock: &pfdev->js->job_lock);
605	status = job_read(pfdev, JOB_INT_RAWSTAT);
606	}
607	}
608
609	static u32 panfrost_active_slots(struct panfrost_device *pfdev,
610	u32 *js_state_mask, u32 js_state)
611	{
612	u32 rawstat;
613
614	if (!(js_state & *js_state_mask))
615	return `0`;
616
617	rawstat = job_read(pfdev, JOB_INT_RAWSTAT);
618	if (rawstat) {
619	unsigned int i;
620
621	for (i = `0`; i < NUM_JOB_SLOTS; i++) {
622	if (rawstat & MK_JS_MASK(i))
623	*js_state_mask &= ~MK_JS_MASK(i);
624	}
625	}
626
627	return js_state & *js_state_mask;
628	}
629
630	static void
631	panfrost_reset(struct panfrost_device *pfdev,
632	struct drm_sched_job *bad)
633	{
634	u32 js_state, js_state_mask = `0xffffffff`;
635	unsigned int i, j;
636	bool cookie;
637	int ret;
638
639	if (!atomic_read(v: &pfdev->reset.pending))
640	return;
641
642	/ Stop the schedulers.*
643	*
644	* FIXME: We temporarily get out of the dma_fence_signalling section
645	* because the cleanup path generate lockdep splats when taking locks
646	* to release job resources. We should rework the code to follow this
647	* pattern:
648	*
649	* try_lock
650	* if (locked)
651	* release
652	* else
653	* schedule_work_to_release_later
654	*/
655	for (i = `0`; i < NUM_JOB_SLOTS; i++)
656	drm_sched_stop(sched: &pfdev->js->queue[i].sched, bad);
657
658	cookie = dma_fence_begin_signalling();
659
660	if (bad)
661	drm_sched_increase_karma(bad);
662
663	/ Mask job interrupts and synchronize to make sure we won't be*
664	* interrupted during our reset.
665	*/
666	job_write(pfdev, JOB_INT_MASK, `0`);
667	synchronize_irq(irq: pfdev->js->irq);
668
669	for (i = `0`; i < NUM_JOB_SLOTS; i++) {
670	/ Cancel the next job and soft-stop the running job. /
671	job_write(pfdev, JS_COMMAND_NEXT(i), JS_COMMAND_NOP);
672	job_write(pfdev, JS_COMMAND(i), JS_COMMAND_SOFT_STOP);
673	}
674
675	/ Wait at most 10ms for soft-stops to complete /
676	ret = readl_poll_timeout(pfdev->iomem + JOB_INT_JS_STATE, js_state,
677	!panfrost_active_slots(pfdev, &js_state_mask, js_state),
678	`10`, `10000`);
679
680	if (ret)
681	dev_err(pfdev->dev, "Soft-stop failed\n");
682
683	/ Handle the remaining interrupts before we reset. /
684	panfrost_job_handle_irqs(pfdev);
685
686	/ Remaining interrupts have been handled, but we might still have*
687	* stuck jobs. Let's make sure the PM counters stay balanced by
688	* manually calling pm_runtime_put_noidle() and
689	* panfrost_devfreq_record_idle() for each stuck job.
690	* Let's also make sure the cycle counting register's refcnt is
691	* kept balanced to prevent it from running forever
692	*/
693	spin_lock(lock: &pfdev->js->job_lock);
694	for (i = `0`; i < NUM_JOB_SLOTS; i++) {
695	for (j = `0`; j < ARRAY_SIZE(pfdev->jobs[`0`]) && pfdev->jobs[i][j]; j++) {
696	if (pfdev->jobs[i][j]->is_profiled)
697	panfrost_cycle_counter_put(pfdev: pfdev->jobs[i][j]->pfdev);
698	pm_runtime_put_noidle(dev: pfdev->dev);
699	panfrost_devfreq_record_idle(devfreq: &pfdev->pfdevfreq);
700	}
701	}
702	memset(pfdev->jobs, `0`, sizeof(pfdev->jobs));
703	spin_unlock(lock: &pfdev->js->job_lock);
704
705	/ Proceed with reset now. /
706	panfrost_device_reset(pfdev);
707
708	/ panfrost_device_reset() unmasks job interrupts, but we want to*
709	* keep them masked a bit longer.
710	*/
711	job_write(pfdev, JOB_INT_MASK, `0`);
712
713	/ GPU has been reset, we can clear the reset pending bit. /
714	atomic_set(v: &pfdev->reset.pending, i: `0`);
715
716	/ Now resubmit jobs that were previously queued but didn't have a*
717	* chance to finish.
718	* FIXME: We temporarily get out of the DMA fence signalling section
719	* while resubmitting jobs because the job submission logic will
720	* allocate memory with the GFP_KERNEL flag which can trigger memory
721	* reclaim and exposes a lock ordering issue.
722	*/
723	dma_fence_end_signalling(cookie);
724	for (i = `0`; i < NUM_JOB_SLOTS; i++)
725	drm_sched_resubmit_jobs(sched: &pfdev->js->queue[i].sched);
726	cookie = dma_fence_begin_signalling();
727
728	/ Restart the schedulers /
729	for (i = `0`; i < NUM_JOB_SLOTS; i++)
730	drm_sched_start(sched: &pfdev->js->queue[i].sched, full_recovery: true);
731
732	/ Re-enable job interrupts now that everything has been restarted. /
733	job_write(pfdev, JOB_INT_MASK,
734	GENMASK(`16` + NUM_JOB_SLOTS - `1`, `16`) \|
735	GENMASK(NUM_JOB_SLOTS - `1`, `0`));
736
737	dma_fence_end_signalling(cookie);
738	}
739
740	static enum drm_gpu_sched_stat panfrost_job_timedout(struct drm_sched_job
741	*sched_job)
742	{
743	struct panfrost_job *job = to_panfrost_job(sched_job);
744	struct panfrost_device *pfdev = job->pfdev;
745	int js = panfrost_job_get_slot(job);
746
747	/*
748	* If the GPU managed to complete this jobs fence, the timeout is
749	* spurious. Bail out.
750	*/
751	if (dma_fence_is_signaled(fence: job->done_fence))
752	return DRM_GPU_SCHED_STAT_NOMINAL;
753
754	/*
755	* Panfrost IRQ handler may take a long time to process an interrupt
756	* if there is another IRQ handler hogging the processing.
757	* For example, the HDMI encoder driver might be stuck in the IRQ
758	* handler for a significant time in a case of bad cable connection.
759	* In order to catch such cases and not report spurious Panfrost
760	* job timeouts, synchronize the IRQ handler and re-check the fence
761	* status.
762	*/
763	synchronize_irq(irq: pfdev->js->irq);
764
765	if (dma_fence_is_signaled(fence: job->done_fence)) {
766	dev_warn(pfdev->dev, "unexpectedly high interrupt latency\n");
767	return DRM_GPU_SCHED_STAT_NOMINAL;
768	}
769
770	dev_err(pfdev->dev, "gpu sched timeout, js=%d, config=0x%x, status=0x%x, head=0x%x, tail=0x%x, sched_job=%p",
771	js,
772	job_read(pfdev, JS_CONFIG(js)),
773	job_read(pfdev, JS_STATUS(js)),
774	job_read(pfdev, JS_HEAD_LO(js)),
775	job_read(pfdev, JS_TAIL_LO(js)),
776	sched_job);
777
778	panfrost_core_dump(job);
779
780	atomic_set(v: &pfdev->reset.pending, i: `1`);
781	panfrost_reset(pfdev, bad: sched_job);
782
783	return DRM_GPU_SCHED_STAT_NOMINAL;
784	}
785
786	static void panfrost_reset_work(struct work_struct *work)
787	{
788	struct panfrost_device *pfdev;
789
790	pfdev = container_of(work, struct panfrost_device, reset.work);
791	panfrost_reset(pfdev, NULL);
792	}
793
794	static const struct drm_sched_backend_ops panfrost_sched_ops = {
795	.run_job = panfrost_job_run,
796	.timedout_job = panfrost_job_timedout,
797	.free_job = panfrost_job_free
798	};
799
800	static irqreturn_t panfrost_job_irq_handler_thread(int irq, void *data)
801	{
802	struct panfrost_device *pfdev = data;
803
804	panfrost_job_handle_irqs(pfdev);
805
806	/ Enable interrupts only if we're not about to get suspended /
807	if (!test_bit(PANFROST_COMP_BIT_JOB, pfdev->is_suspended))
808	job_write(pfdev, JOB_INT_MASK,
809	GENMASK(`16` + NUM_JOB_SLOTS - `1`, `16`) \|
810	GENMASK(NUM_JOB_SLOTS - `1`, `0`));
811
812	return IRQ_HANDLED;
813	}
814
815	static irqreturn_t panfrost_job_irq_handler(int irq, void *data)
816	{
817	struct panfrost_device *pfdev = data;
818	u32 status;
819
820	if (test_bit(PANFROST_COMP_BIT_JOB, pfdev->is_suspended))
821	return IRQ_NONE;
822
823	status = job_read(pfdev, JOB_INT_STAT);
824	if (!status)
825	return IRQ_NONE;
826
827	job_write(pfdev, JOB_INT_MASK, `0`);
828	return IRQ_WAKE_THREAD;
829	}
830
831	int panfrost_job_init(struct panfrost_device *pfdev)
832	{
833	struct panfrost_job_slot *js;
834	unsigned int nentries = `2`;
835	int ret, j;
836
837	/ All GPUs have two entries per queue, but without jobchain*
838	* disambiguation stopping the right job in the close path is tricky,
839	* so let's just advertise one entry in that case.
840	*/
841	if (!panfrost_has_hw_feature(pfdev, feat: HW_FEATURE_JOBCHAIN_DISAMBIGUATION))
842	nentries = `1`;
843
844	pfdev->js = js = devm_kzalloc(dev: pfdev->dev, size: sizeof(*js), GFP_KERNEL);
845	if (!js)
846	return -ENOMEM;
847
848	INIT_WORK(&pfdev->reset.work, panfrost_reset_work);
849	spin_lock_init(&js->job_lock);
850
851	js->irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "job");
852	if (js->irq < `0`)
853	return js->irq;
854
855	ret = devm_request_threaded_irq(dev: pfdev->dev, irq: js->irq,
856	handler: panfrost_job_irq_handler,
857	thread_fn: panfrost_job_irq_handler_thread,
858	IRQF_SHARED, KBUILD_MODNAME "-job",
859	dev_id: pfdev);
860	if (ret) {
861	dev_err(pfdev->dev, "failed to request job irq");
862	return ret;
863	}
864
865	pfdev->reset.wq = alloc_ordered_workqueue("panfrost-reset", `0`);
866	if (!pfdev->reset.wq)
867	return -ENOMEM;
868
869	for (j = `0`; j < NUM_JOB_SLOTS; j++) {
870	js->queue[j].fence_context = dma_fence_context_alloc(num: `1`);
871
872	ret = drm_sched_init(sched: &js->queue[j].sched,
873	ops: &panfrost_sched_ops, NULL,
874	num_rqs: DRM_SCHED_PRIORITY_COUNT,
875	credit_limit: nentries, hang_limit: `0`,
876	timeout: msecs_to_jiffies(JOB_TIMEOUT_MS),
877	timeout_wq: pfdev->reset.wq,
878	NULL, name: "pan_js", dev: pfdev->dev);
879	if (ret) {
880	dev_err(pfdev->dev, "Failed to create scheduler: %d.", ret);
881	goto err_sched;
882	}
883	}
884
885	panfrost_job_enable_interrupts(pfdev);
886
887	return `0`;
888
889	err_sched:
890	for (j--; j >= `0`; j--)
891	drm_sched_fini(sched: &js->queue[j].sched);
892
893	destroy_workqueue(wq: pfdev->reset.wq);
894	return ret;
895	}
896
897	void panfrost_job_fini(struct panfrost_device *pfdev)
898	{
899	struct panfrost_job_slot *js = pfdev->js;
900	int j;
901
902	job_write(pfdev, JOB_INT_MASK, `0`);
903
904	for (j = `0`; j < NUM_JOB_SLOTS; j++) {
905	drm_sched_fini(sched: &js->queue[j].sched);
906	}
907
908	cancel_work_sync(work: &pfdev->reset.work);
909	destroy_workqueue(wq: pfdev->reset.wq);
910	}
911
912	int panfrost_job_open(struct panfrost_file_priv *panfrost_priv)
913	{
914	struct panfrost_device *pfdev = panfrost_priv->pfdev;
915	struct panfrost_job_slot *js = pfdev->js;
916	struct drm_gpu_scheduler *sched;
917	int ret, i;
918
919	for (i = `0`; i < NUM_JOB_SLOTS; i++) {
920	sched = &js->queue[i].sched;
921	ret = drm_sched_entity_init(entity: &panfrost_priv->sched_entity[i],
922	priority: DRM_SCHED_PRIORITY_NORMAL, sched_list: &sched,
923	num_sched_list: `1`, NULL);
924	if (WARN_ON(ret))
925	return ret;
926	}
927	return `0`;
928	}
929
930	void panfrost_job_close(struct panfrost_file_priv *panfrost_priv)
931	{
932	struct panfrost_device *pfdev = panfrost_priv->pfdev;
933	int i;
934
935	for (i = `0`; i < NUM_JOB_SLOTS; i++)
936	drm_sched_entity_destroy(entity: &panfrost_priv->sched_entity[i]);
937
938	/ Kill in-flight jobs /
939	spin_lock(lock: &pfdev->js->job_lock);
940	for (i = `0`; i < NUM_JOB_SLOTS; i++) {
941	struct drm_sched_entity *entity = &panfrost_priv->sched_entity[i];
942	int j;
943
944	for (j = ARRAY_SIZE(pfdev->jobs[`0`]) - `1`; j >= `0`; j--) {
945	struct panfrost_job *job = pfdev->jobs[i][j];
946	u32 cmd;
947
948	if (!job \|\| job->base.entity != entity)
949	continue;
950
951	if (j == `1`) {
952	/ Try to cancel the job before it starts /
953	job_write(pfdev, JS_COMMAND_NEXT(i), JS_COMMAND_NOP);
954	/ Reset the job head so it doesn't get restarted if*
955	* the job in the first slot failed.
956	*/
957	job->jc = `0`;
958	}
959
960	if (panfrost_has_hw_feature(pfdev, feat: HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
961	cmd = panfrost_get_job_chain_flag(job) ?
962	JS_COMMAND_HARD_STOP_1 :
963	JS_COMMAND_HARD_STOP_0;
964	} else {
965	cmd = JS_COMMAND_HARD_STOP;
966	}
967
968	job_write(pfdev, JS_COMMAND(i), cmd);
969
970	/ Jobs can outlive their file context /
971	job->engine_usage = NULL;
972	}
973	}
974	spin_unlock(lock: &pfdev->js->job_lock);
975	}
976
977	int panfrost_job_is_idle(struct panfrost_device *pfdev)
978	{
979	struct panfrost_job_slot *js = pfdev->js;
980	int i;
981
982	for (i = `0`; i < NUM_JOB_SLOTS; i++) {
983	/ If there are any jobs in the HW queue, we're not idle /
984	if (atomic_read(v: &js->queue[i].sched.credit_count))
985	return false;
986	}
987
988	return true;
989	}
990

source code of linux/drivers/gpu/drm/panfrost/panfrost_job.c