gpu_scheduler.h source code [linux/include/drm/gpu_scheduler.h]

1	/*
2	* Copyright 2015 Advanced Micro Devices, Inc.
3	*
4	* Permission is hereby granted, free of charge, to any person obtaining a
5	* copy of this software and associated documentation files (the "Software"),
6	* to deal in the Software without restriction, including without limitation
7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8	* and/or sell copies of the Software, and to permit persons to whom the
9	* Software is furnished to do so, subject to the following conditions:
10	*
11	* The above copyright notice and this permission notice shall be included in
12	* all copies or substantial portions of the Software.
13	*
14	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17	* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18	* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19	* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20	* OTHER DEALINGS IN THE SOFTWARE.
21	*
22	*/
23
24	#ifndef _DRM_GPU_SCHEDULER_H_
25	#define _DRM_GPU_SCHEDULER_H_
26
27	#include <drm/spsc_queue.h>
28	#include <linux/dma-fence.h>
29	#include <linux/completion.h>
30	#include <linux/xarray.h>
31	#include <linux/workqueue.h>
32
33	#define MAX_WAIT_SCHED_ENTITY_Q_EMPTY msecs_to_jiffies(1000)
34
35	/**
36	* DRM_SCHED_FENCE_DONT_PIPELINE - Prefent dependency pipelining
37	*
38	* Setting this flag on a scheduler fence prevents pipelining of jobs depending
39	* on this fence. In other words we always insert a full CPU round trip before
40	* dependen jobs are pushed to the hw queue.
41	*/
42	#define DRM_SCHED_FENCE_DONT_PIPELINE DMA_FENCE_FLAG_USER_BITS
43
44	/**
45	* DRM_SCHED_FENCE_FLAG_HAS_DEADLINE_BIT - A fence deadline hint has been set
46	*
47	* Because we could have a deadline hint can be set before the backing hw
48	* fence is created, we need to keep track of whether a deadline has already
49	* been set.
50	*/
51	#define DRM_SCHED_FENCE_FLAG_HAS_DEADLINE_BIT (DMA_FENCE_FLAG_USER_BITS + 1)
52
53	enum dma_resv_usage;
54	struct dma_resv;
55	struct drm_gem_object;
56
57	struct drm_gpu_scheduler;
58	struct drm_sched_rq;
59
60	struct drm_file;
61
62	/ These are often used as an (initial) index*
63	* to an array, and as such should start at 0.
64	*/
65	enum drm_sched_priority {
66	DRM_SCHED_PRIORITY_KERNEL,
67	DRM_SCHED_PRIORITY_HIGH,
68	DRM_SCHED_PRIORITY_NORMAL,
69	DRM_SCHED_PRIORITY_LOW,
70
71	DRM_SCHED_PRIORITY_COUNT
72	};
73
74	/ Used to chose between FIFO and RR jobs scheduling /
75	extern int drm_sched_policy;
76
77	#define DRM_SCHED_POLICY_RR 0
78	#define DRM_SCHED_POLICY_FIFO 1
79
80	/**
81	* struct drm_sched_entity - A wrapper around a job queue (typically
82	* attached to the DRM file_priv).
83	*
84	* Entities will emit jobs in order to their corresponding hardware
85	* ring, and the scheduler will alternate between entities based on
86	* scheduling policy.
87	*/
88	struct drm_sched_entity {
89	/**
90	* @list:
91	*
92	* Used to append this struct to the list of entities in the runqueue
93	* @rq under &drm_sched_rq.entities.
94	*
95	* Protected by &drm_sched_rq.lock of @rq.
96	*/
97	struct list_head list;
98
99	/**
100	* @rq:
101	*
102	* Runqueue on which this entity is currently scheduled.
103	*
104	* FIXME: Locking is very unclear for this. Writers are protected by
105	* @rq_lock, but readers are generally lockless and seem to just race
106	* with not even a READ_ONCE.
107	*/
108	struct drm_sched_rq *rq;
109
110	/**
111	* @sched_list:
112	*
113	* A list of schedulers (struct drm_gpu_scheduler). Jobs from this entity can
114	* be scheduled on any scheduler on this list.
115	*
116	* This can be modified by calling drm_sched_entity_modify_sched().
117	* Locking is entirely up to the driver, see the above function for more
118	* details.
119	*
120	* This will be set to NULL if &num_sched_list equals 1 and @rq has been
121	* set already.
122	*
123	* FIXME: This means priority changes through
124	* drm_sched_entity_set_priority() will be lost henceforth in this case.
125	*/
126	struct drm_gpu_scheduler **sched_list;
127
128	/**
129	* @num_sched_list:
130	*
131	* Number of drm_gpu_schedulers in the @sched_list.
132	*/
133	unsigned int num_sched_list;
134
135	/**
136	* @priority:
137	*
138	* Priority of the entity. This can be modified by calling
139	* drm_sched_entity_set_priority(). Protected by &rq_lock.
140	*/
141	enum drm_sched_priority priority;
142
143	/**
144	* @rq_lock:
145	*
146	* Lock to modify the runqueue to which this entity belongs.
147	*/
148	spinlock_t rq_lock;
149
150	/**
151	* @job_queue: the list of jobs of this entity.
152	*/
153	struct spsc_queue job_queue;
154
155	/**
156	* @fence_seq:
157	*
158	* A linearly increasing seqno incremented with each new
159	* &drm_sched_fence which is part of the entity.
160	*
161	* FIXME: Callers of drm_sched_job_arm() need to ensure correct locking,
162	* this doesn't need to be atomic.
163	*/
164	atomic_t fence_seq;
165
166	/**
167	* @fence_context:
168	*
169	* A unique context for all the fences which belong to this entity. The
170	* &drm_sched_fence.scheduled uses the fence_context but
171	* &drm_sched_fence.finished uses fence_context + 1.
172	*/
173	uint64_t fence_context;
174
175	/**
176	* @dependency:
177	*
178	* The dependency fence of the job which is on the top of the job queue.
179	*/
180	struct dma_fence *dependency;
181
182	/**
183	* @cb:
184	*
185	* Callback for the dependency fence above.
186	*/
187	struct dma_fence_cb cb;
188
189	/**
190	* @guilty:
191	*
192	* Points to entities' guilty.
193	*/
194	atomic_t *guilty;
195
196	/**
197	* @last_scheduled:
198	*
199	* Points to the finished fence of the last scheduled job. Only written
200	* by the scheduler thread, can be accessed locklessly from
201	* drm_sched_job_arm() iff the queue is empty.
202	*/
203	struct dma_fence __rcu *last_scheduled;
204
205	/**
206	* @last_user: last group leader pushing a job into the entity.
207	*/
208	struct task_struct *last_user;
209
210	/**
211	* @stopped:
212	*
213	* Marks the enity as removed from rq and destined for
214	* termination. This is set by calling drm_sched_entity_flush() and by
215	* drm_sched_fini().
216	*/
217	bool stopped;
218
219	/**
220	* @entity_idle:
221	*
222	* Signals when entity is not in use, used to sequence entity cleanup in
223	* drm_sched_entity_fini().
224	*/
225	struct completion entity_idle;
226
227	/**
228	* @oldest_job_waiting:
229	*
230	* Marks earliest job waiting in SW queue
231	*/
232	ktime_t oldest_job_waiting;
233
234	/**
235	* @rb_tree_node:
236	*
237	* The node used to insert this entity into time based priority queue
238	*/
239	struct rb_node rb_tree_node;
240
241	};
242
243	/**
244	* struct drm_sched_rq - queue of entities to be scheduled.
245	*
246	* @lock: to modify the entities list.
247	* @sched: the scheduler to which this rq belongs to.
248	* @entities: list of the entities to be scheduled.
249	* @current_entity: the entity which is to be scheduled.
250	* @rb_tree_root: root of time based priory queue of entities for FIFO scheduling
251	*
252	* Run queue is a set of entities scheduling command submissions for
253	* one specific ring. It implements the scheduling policy that selects
254	* the next entity to emit commands from.
255	*/
256	struct drm_sched_rq {
257	spinlock_t lock;
258	struct drm_gpu_scheduler *sched;
259	struct list_head entities;
260	struct drm_sched_entity *current_entity;
261	struct rb_root_cached rb_tree_root;
262	};
263
264	/**
265	* struct drm_sched_fence - fences corresponding to the scheduling of a job.
266	*/
267	struct drm_sched_fence {
268	/**
269	* @scheduled: this fence is what will be signaled by the scheduler
270	* when the job is scheduled.
271	*/
272	struct dma_fence scheduled;
273
274	/**
275	* @finished: this fence is what will be signaled by the scheduler
276	* when the job is completed.
277	*
278	* When setting up an out fence for the job, you should use
279	* this, since it's available immediately upon
280	* drm_sched_job_init(), and the fence returned by the driver
281	* from run_job() won't be created until the dependencies have
282	* resolved.
283	*/
284	struct dma_fence finished;
285
286	/**
287	* @deadline: deadline set on &drm_sched_fence.finished which
288	* potentially needs to be propagated to &drm_sched_fence.parent
289	*/
290	ktime_t deadline;
291
292	/**
293	* @parent: the fence returned by &drm_sched_backend_ops.run_job
294	* when scheduling the job on hardware. We signal the
295	* &drm_sched_fence.finished fence once parent is signalled.
296	*/
297	struct dma_fence *parent;
298	/**
299	* @sched: the scheduler instance to which the job having this struct
300	* belongs to.
301	*/
302	struct drm_gpu_scheduler *sched;
303	/**
304	* @lock: the lock used by the scheduled and the finished fences.
305	*/
306	spinlock_t lock;
307	/**
308	* @owner: job owner for debugging
309	*/
310	void *owner;
311	};
312
313	struct drm_sched_fence to_drm_sched_fence(struct* dma_fence *f);
314
315	/**
316	* struct drm_sched_job - A job to be run by an entity.
317	*
318	* @queue_node: used to append this struct to the queue of jobs in an entity.
319	* @list: a job participates in a "pending" and "done" lists.
320	* @sched: the scheduler instance on which this job is scheduled.
321	* @s_fence: contains the fences for the scheduling of job.
322	* @finish_cb: the callback for the finished fence.
323	* @credits: the number of credits this job contributes to the scheduler
324	* @work: Helper to reschdeule job kill to different context.
325	* @id: a unique id assigned to each job scheduled on the scheduler.
326	* @karma: increment on every hang caused by this job. If this exceeds the hang
327	* limit of the scheduler then the job is marked guilty and will not
328	* be scheduled further.
329	* @s_priority: the priority of the job.
330	* @entity: the entity to which this job belongs.
331	* @cb: the callback for the parent fence in s_fence.
332	*
333	* A job is created by the driver using drm_sched_job_init(), and
334	* should call drm_sched_entity_push_job() once it wants the scheduler
335	* to schedule the job.
336	*/
337	struct drm_sched_job {
338	struct spsc_node queue_node;
339	struct list_head list;
340	struct drm_gpu_scheduler *sched;
341	struct drm_sched_fence *s_fence;
342
343	u32 credits;
344
345	/*
346	* work is used only after finish_cb has been used and will not be
347	* accessed anymore.
348	*/
349	union {
350	struct dma_fence_cb finish_cb;
351	struct work_struct work;
352	};
353
354	uint64_t id;
355	atomic_t karma;
356	enum drm_sched_priority s_priority;
357	struct drm_sched_entity *entity;
358	struct dma_fence_cb cb;
359	/**
360	* @dependencies:
361	*
362	* Contains the dependencies as struct dma_fence for this job, see
363	* drm_sched_job_add_dependency() and
364	* drm_sched_job_add_implicit_dependencies().
365	*/
366	struct xarray dependencies;
367
368	/* @last_dependency: tracks @dependencies as they signal /
369	unsigned long last_dependency;
370
371	/**
372	* @submit_ts:
373	*
374	* When the job was pushed into the entity queue.
375	*/
376	ktime_t submit_ts;
377	};
378
379	static inline bool drm_sched_invalidate_job(struct drm_sched_job *s_job,
380	int threshold)
381	{
382	return s_job && atomic_inc_return(v: &s_job->karma) > threshold;
383	}
384
385	enum drm_gpu_sched_stat {
386	DRM_GPU_SCHED_STAT_NONE, / Reserve 0 /
387	DRM_GPU_SCHED_STAT_NOMINAL,
388	DRM_GPU_SCHED_STAT_ENODEV,
389	};
390
391	/**
392	* struct drm_sched_backend_ops - Define the backend operations
393	* called by the scheduler
394	*
395	* These functions should be implemented in the driver side.
396	*/
397	struct drm_sched_backend_ops {
398	/**
399	* @prepare_job:
400	*
401	* Called when the scheduler is considering scheduling this job next, to
402	* get another struct dma_fence for this job to block on. Once it
403	* returns NULL, run_job() may be called.
404	*
405	* Can be NULL if no additional preparation to the dependencies are
406	* necessary. Skipped when jobs are killed instead of run.
407	*/
408	struct dma_fence (prepare_job)(struct drm_sched_job *sched_job,
409	struct drm_sched_entity *s_entity);
410
411	/**
412	* @run_job: Called to execute the job once all of the dependencies
413	* have been resolved. This may be called multiple times, if
414	* timedout_job() has happened and drm_sched_job_recovery()
415	* decides to try it again.
416	*/
417	struct dma_fence (run_job)(struct drm_sched_job *sched_job);
418
419	/**
420	* @timedout_job: Called when a job has taken too long to execute,
421	* to trigger GPU recovery.
422	*
423	* This method is called in a workqueue context.
424	*
425	* Drivers typically issue a reset to recover from GPU hangs, and this
426	* procedure usually follows the following workflow:
427	*
428	* 1. Stop the scheduler using drm_sched_stop(). This will park the
429	* scheduler thread and cancel the timeout work, guaranteeing that
430	* nothing is queued while we reset the hardware queue
431	* 2. Try to gracefully stop non-faulty jobs (optional)
432	* 3. Issue a GPU reset (driver-specific)
433	* 4. Re-submit jobs using drm_sched_resubmit_jobs()
434	* 5. Restart the scheduler using drm_sched_start(). At that point, new
435	* jobs can be queued, and the scheduler thread is unblocked
436	*
437	* Note that some GPUs have distinct hardware queues but need to reset
438	* the GPU globally, which requires extra synchronization between the
439	* timeout handler of the different &drm_gpu_scheduler. One way to
440	* achieve this synchronization is to create an ordered workqueue
441	* (using alloc_ordered_workqueue()) at the driver level, and pass this
442	* queue to drm_sched_init(), to guarantee that timeout handlers are
443	* executed sequentially. The above workflow needs to be slightly
444	* adjusted in that case:
445	*
446	* 1. Stop all schedulers impacted by the reset using drm_sched_stop()
447	* 2. Try to gracefully stop non-faulty jobs on all queues impacted by
448	* the reset (optional)
449	* 3. Issue a GPU reset on all faulty queues (driver-specific)
450	* 4. Re-submit jobs on all schedulers impacted by the reset using
451	* drm_sched_resubmit_jobs()
452	* 5. Restart all schedulers that were stopped in step #1 using
453	* drm_sched_start()
454	*
455	* Return DRM_GPU_SCHED_STAT_NOMINAL, when all is normal,
456	* and the underlying driver has started or completed recovery.
457	*
458	* Return DRM_GPU_SCHED_STAT_ENODEV, if the device is no longer
459	* available, i.e. has been unplugged.
460	*/
461	enum drm_gpu_sched_stat (timedout_job)(struct* drm_sched_job *sched_job);
462
463	/**
464	* @free_job: Called once the job's finished fence has been signaled
465	* and it's time to clean it up.
466	*/
467	void (free_job)(struct* drm_sched_job *sched_job);
468
469	/**
470	* @update_job_credits: Called when the scheduler is considering this
471	* job for execution.
472	*
473	* This callback returns the number of credits the job would take if
474	* pushed to the hardware. Drivers may use this to dynamically update
475	* the job's credit count. For instance, deduct the number of credits
476	* for already signalled native fences.
477	*
478	* This callback is optional.
479	*/
480	u32 (update_job_credits)(struct* drm_sched_job *sched_job);
481	};
482
483	/**
484	* struct drm_gpu_scheduler - scheduler instance-specific data
485	*
486	* @ops: backend operations provided by the driver.
487	* @credit_limit: the credit limit of this scheduler
488	* @credit_count: the current credit count of this scheduler
489	* @timeout: the time after which a job is removed from the scheduler.
490	* @name: name of the ring for which this scheduler is being used.
491	* @num_rqs: Number of run-queues. This is at most DRM_SCHED_PRIORITY_COUNT,
492	* as there's usually one run-queue per priority, but could be less.
493	* @sched_rq: An allocated array of run-queues of size @num_rqs;
494	* @job_scheduled: once @drm_sched_entity_do_release is called the scheduler
495	* waits on this wait queue until all the scheduled jobs are
496	* finished.
497	* @job_id_count: used to assign unique id to the each job.
498	* @submit_wq: workqueue used to queue @work_run_job and @work_free_job
499	* @timeout_wq: workqueue used to queue @work_tdr
500	* @work_run_job: work which calls run_job op of each scheduler.
501	* @work_free_job: work which calls free_job op of each scheduler.
502	* @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the
503	* timeout interval is over.
504	* @pending_list: the list of jobs which are currently in the job queue.
505	* @job_list_lock: lock to protect the pending_list.
506	* @hang_limit: once the hangs by a job crosses this limit then it is marked
507	* guilty and it will no longer be considered for scheduling.
508	* @score: score to help loadbalancer pick a idle sched
509	* @_score: score used when the driver doesn't provide one
510	* @ready: marks if the underlying HW is ready to work
511	* @free_guilty: A hit to time out handler to free the guilty job.
512	* @pause_submit: pause queuing of @work_run_job on @submit_wq
513	* @own_submit_wq: scheduler owns allocation of @submit_wq
514	* @dev: system &struct device
515	*
516	* One scheduler is implemented for each hardware ring.
517	*/
518	struct drm_gpu_scheduler {
519	const struct drm_sched_backend_ops *ops;
520	u32 credit_limit;
521	atomic_t credit_count;
522	long timeout;
523	const char *name;
524	u32 num_rqs;
525	struct drm_sched_rq **sched_rq;
526	wait_queue_head_t job_scheduled;
527	atomic64_t job_id_count;
528	struct workqueue_struct *submit_wq;
529	struct workqueue_struct *timeout_wq;
530	struct work_struct work_run_job;
531	struct work_struct work_free_job;
532	struct delayed_work work_tdr;
533	struct list_head pending_list;
534	spinlock_t job_list_lock;
535	int hang_limit;
536	atomic_t *score;
537	atomic_t _score;
538	bool ready;
539	bool free_guilty;
540	bool pause_submit;
541	bool own_submit_wq;
542	struct device *dev;
543	};
544
545	int drm_sched_init(struct drm_gpu_scheduler *sched,
546	const struct drm_sched_backend_ops *ops,
547	struct workqueue_struct *submit_wq,
548	u32 num_rqs, u32 credit_limit, unsigned int hang_limit,
549	long timeout, struct workqueue_struct *timeout_wq,
550	atomic_t score, const* char name, struct* device *dev);
551
552	void drm_sched_fini(struct drm_gpu_scheduler *sched);
553	int drm_sched_job_init(struct drm_sched_job *job,
554	struct drm_sched_entity *entity,
555	u32 credits, void *owner);
556	void drm_sched_job_arm(struct drm_sched_job *job);
557	int drm_sched_job_add_dependency(struct drm_sched_job *job,
558	struct dma_fence *fence);
559	int drm_sched_job_add_syncobj_dependency(struct drm_sched_job *job,
560	struct drm_file *file,
561	u32 handle,
562	u32 point);
563	int drm_sched_job_add_resv_dependencies(struct drm_sched_job *job,
564	struct dma_resv *resv,
565	enum dma_resv_usage usage);
566	int drm_sched_job_add_implicit_dependencies(struct drm_sched_job *job,
567	struct drm_gem_object *obj,
568	bool write);
569
570
571	void drm_sched_entity_modify_sched(struct drm_sched_entity *entity,
572	struct drm_gpu_scheduler **sched_list,
573	unsigned int num_sched_list);
574
575	void drm_sched_tdr_queue_imm(struct drm_gpu_scheduler *sched);
576	void drm_sched_job_cleanup(struct drm_sched_job *job);
577	void drm_sched_wakeup(struct drm_gpu_scheduler sched, struct* drm_sched_entity *entity);
578	bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched);
579	void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched);
580	void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched);
581	void drm_sched_stop(struct drm_gpu_scheduler sched, struct* drm_sched_job *bad);
582	void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery);
583	void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched);
584	void drm_sched_increase_karma(struct drm_sched_job *bad);
585	void drm_sched_reset_karma(struct drm_sched_job *bad);
586	void drm_sched_increase_karma_ext(struct drm_sched_job bad, int* type);
587	bool drm_sched_dependency_optimized(struct dma_fence* fence,
588	struct drm_sched_entity *entity);
589	void drm_sched_fault(struct drm_gpu_scheduler *sched);
590
591	void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
592	struct drm_sched_entity *entity);
593	void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
594	struct drm_sched_entity *entity);
595
596	void drm_sched_rq_update_fifo(struct drm_sched_entity *entity, ktime_t ts);
597
598	int drm_sched_entity_init(struct drm_sched_entity *entity,
599	enum drm_sched_priority priority,
600	struct drm_gpu_scheduler **sched_list,
601	unsigned int num_sched_list,
602	atomic_t *guilty);
603	long drm_sched_entity_flush(struct drm_sched_entity entity, long* timeout);
604	void drm_sched_entity_fini(struct drm_sched_entity *entity);
605	void drm_sched_entity_destroy(struct drm_sched_entity *entity);
606	void drm_sched_entity_select_rq(struct drm_sched_entity *entity);
607	struct drm_sched_job drm_sched_entity_pop_job(struct* drm_sched_entity *entity);
608	void drm_sched_entity_push_job(struct drm_sched_job *sched_job);
609	void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
610	enum drm_sched_priority priority);
611	bool drm_sched_entity_is_ready(struct drm_sched_entity *entity);
612	int drm_sched_entity_error(struct drm_sched_entity *entity);
613
614	struct drm_sched_fence *drm_sched_fence_alloc(
615	struct drm_sched_entity s_entity, void* *owner);
616	void drm_sched_fence_init(struct drm_sched_fence *fence,
617	struct drm_sched_entity *entity);
618	void drm_sched_fence_free(struct drm_sched_fence *fence);
619
620	void drm_sched_fence_scheduled(struct drm_sched_fence *fence,
621	struct dma_fence *parent);
622	void drm_sched_fence_finished(struct drm_sched_fence fence, int* result);
623
624	unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched);
625	void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
626	unsigned long remaining);
627	struct drm_gpu_scheduler *
628	drm_sched_pick_best(struct drm_gpu_scheduler **sched_list,
629	unsigned int num_sched_list);
630
631	#endif
632

source code of linux/include/drm/gpu_scheduler.h