deadline.c source code [linux/kernel/sched/deadline.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* Deadline Scheduling Class (SCHED_DEADLINE)
4	*
5	* Earliest Deadline First (EDF) + Constant Bandwidth Server (CBS).
6	*
7	* Tasks that periodically executes their instances for less than their
8	* runtime won't miss any of their deadlines.
9	* Tasks that are not periodic or sporadic or that tries to execute more
10	* than their reserved bandwidth will be slowed down (and may potentially
11	* miss some of their deadlines), and won't affect any other task.
12	*
13	* Copyright (C) 2012 Dario Faggioli <raistlin@linux.it>,
14	* Juri Lelli <juri.lelli@gmail.com>,
15	* Michael Trimarchi <michael@amarulasolutions.com>,
16	* Fabio Checconi <fchecconi@gmail.com>
17	*/
18
19	#include <linux/cpuset.h>
20
21	/*
22	* Default limits for DL period; on the top end we guard against small util
23	* tasks still getting ridiculously long effective runtimes, on the bottom end we
24	* guard against timer DoS.
25	*/
26	static unsigned int sysctl_sched_dl_period_max = `1` << `22`; / ~4 seconds /
27	static unsigned int sysctl_sched_dl_period_min = `100`; / 100 us /
28	#ifdef CONFIG_SYSCTL
29	static struct ctl_table sched_dl_sysctls[] = {
30	{
31	.procname = "sched_deadline_period_max_us",
32	.data = &sysctl_sched_dl_period_max,
33	.maxlen = sizeof(unsigned int),
34	.mode = `0644`,
35	.proc_handler = proc_douintvec_minmax,
36	.extra1 = (void *)&sysctl_sched_dl_period_min,
37	},
38	{
39	.procname = "sched_deadline_period_min_us",
40	.data = &sysctl_sched_dl_period_min,
41	.maxlen = sizeof(unsigned int),
42	.mode = `0644`,
43	.proc_handler = proc_douintvec_minmax,
44	.extra2 = (void *)&sysctl_sched_dl_period_max,
45	},
46	{}
47	};
48
49	static int __init sched_dl_sysctl_init(void)
50	{
51	register_sysctl_init("kernel", sched_dl_sysctls);
52	return `0`;
53	}
54	late_initcall(sched_dl_sysctl_init);
55	#endif
56
57	static inline struct task_struct dl_task_of(struct* sched_dl_entity *dl_se)
58	{
59	return container_of(dl_se, struct task_struct, dl);
60	}
61
62	static inline struct rq rq_of_dl_rq(struct* dl_rq *dl_rq)
63	{
64	return container_of(dl_rq, struct rq, dl);
65	}
66
67	static inline struct dl_rq dl_rq_of_se(struct* sched_dl_entity *dl_se)
68	{
69	struct task_struct *p = dl_task_of(dl_se);
70	struct rq *rq = task_rq(p);
71
72	return &rq->dl;
73	}
74
75	static inline int on_dl_rq(struct sched_dl_entity *dl_se)
76	{
77	return !RB_EMPTY_NODE(&dl_se->rb_node);
78	}
79
80	#ifdef CONFIG_RT_MUTEXES
81	static inline struct sched_dl_entity pi_of(struct* sched_dl_entity *dl_se)
82	{
83	return dl_se->pi_se;
84	}
85
86	static inline bool is_dl_boosted(struct sched_dl_entity *dl_se)
87	{
88	return pi_of(dl_se) != dl_se;
89	}
90	#else
91	static inline struct sched_dl_entity pi_of(struct* sched_dl_entity *dl_se)
92	{
93	return dl_se;
94	}
95
96	static inline bool is_dl_boosted(struct sched_dl_entity *dl_se)
97	{
98	return false;
99	}
100	#endif
101
102	#ifdef CONFIG_SMP
103	static inline struct dl_bw dl_bw_of(int* i)
104	{
105	RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
106	"sched RCU must be held");
107	return &cpu_rq(i)->rd->dl_bw;
108	}
109
110	static inline int dl_bw_cpus(int i)
111	{
112	struct root_domain *rd = cpu_rq(i)->rd;
113	int cpus;
114
115	RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
116	"sched RCU must be held");
117
118	if (cpumask_subset(src1p: rd->span, cpu_active_mask))
119	return cpumask_weight(srcp: rd->span);
120
121	cpus = `0`;
122
123	for_each_cpu_and(i, rd->span, cpu_active_mask)
124	cpus++;
125
126	return cpus;
127	}
128
129	static inline unsigned long __dl_bw_capacity(const struct cpumask *mask)
130	{
131	unsigned long cap = `0`;
132	int i;
133
134	for_each_cpu_and(i, mask, cpu_active_mask)
135	cap += arch_scale_cpu_capacity(cpu: i);
136
137	return cap;
138	}
139
140	/*
141	* XXX Fix: If 'rq->rd == def_root_domain' perform AC against capacity
142	* of the CPU the task is running on rather rd's \Sum CPU capacity.
143	*/
144	static inline unsigned long dl_bw_capacity(int i)
145	{
146	if (!sched_asym_cpucap_active() &&
147	arch_scale_cpu_capacity(cpu: i) == SCHED_CAPACITY_SCALE) {
148	return dl_bw_cpus(i) << SCHED_CAPACITY_SHIFT;
149	} else {
150	RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
151	"sched RCU must be held");
152
153	return __dl_bw_capacity(cpu_rq(i)->rd->span);
154	}
155	}
156
157	static inline bool dl_bw_visited(int cpu, u64 gen)
158	{
159	struct root_domain *rd = cpu_rq(cpu)->rd;
160
161	if (rd->visit_gen == gen)
162	return true;
163
164	rd->visit_gen = gen;
165	return false;
166	}
167
168	static inline
169	void __dl_update(struct dl_bw *dl_b, s64 bw)
170	{
171	struct root_domain rd = container_of(dl_b, struct* root_domain, dl_bw);
172	int i;
173
174	RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
175	"sched RCU must be held");
176	for_each_cpu_and(i, rd->span, cpu_active_mask) {
177	struct rq *rq = cpu_rq(i);
178
179	rq->dl.extra_bw += bw;
180	}
181	}
182	#else
183	static inline struct dl_bw dl_bw_of(int* i)
184	{
185	return &cpu_rq(i)->dl.dl_bw;
186	}
187
188	static inline int dl_bw_cpus(int i)
189	{
190	return `1`;
191	}
192
193	static inline unsigned long dl_bw_capacity(int i)
194	{
195	return SCHED_CAPACITY_SCALE;
196	}
197
198	static inline bool dl_bw_visited(int cpu, u64 gen)
199	{
200	return false;
201	}
202
203	static inline
204	void __dl_update(struct dl_bw *dl_b, s64 bw)
205	{
206	struct dl_rq dl = container_of(dl_b, struct* dl_rq, dl_bw);
207
208	dl->extra_bw += bw;
209	}
210	#endif
211
212	static inline
213	void __dl_sub(struct dl_bw dl_b, u64 tsk_bw, int* cpus)
214	{
215	dl_b->total_bw -= tsk_bw;
216	__dl_update(dl_b, bw: (s32)tsk_bw / cpus);
217	}
218
219	static inline
220	void __dl_add(struct dl_bw dl_b, u64 tsk_bw, int* cpus)
221	{
222	dl_b->total_bw += tsk_bw;
223	__dl_update(dl_b, bw: -((s32)tsk_bw / cpus));
224	}
225
226	static inline bool
227	__dl_overflow(struct dl_bw dl_b, unsigned* long cap, u64 old_bw, u64 new_bw)
228	{
229	return dl_b->bw != -`1` &&
230	cap_scale(dl_b->bw, cap) < dl_b->total_bw - old_bw + new_bw;
231	}
232
233	static inline
234	void __add_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
235	{
236	u64 old = dl_rq->running_bw;
237
238	lockdep_assert_rq_held(rq: rq_of_dl_rq(dl_rq));
239	dl_rq->running_bw += dl_bw;
240	SCHED_WARN_ON(dl_rq->running_bw < old); / overflow /
241	SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw);
242	/ kick cpufreq (see the comment in kernel/sched/sched.h). /
243	cpufreq_update_util(rq: rq_of_dl_rq(dl_rq), flags: `0`);
244	}
245
246	static inline
247	void __sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
248	{
249	u64 old = dl_rq->running_bw;
250
251	lockdep_assert_rq_held(rq: rq_of_dl_rq(dl_rq));
252	dl_rq->running_bw -= dl_bw;
253	SCHED_WARN_ON(dl_rq->running_bw > old); / underflow /
254	if (dl_rq->running_bw > old)
255	dl_rq->running_bw = `0`;
256	/ kick cpufreq (see the comment in kernel/sched/sched.h). /
257	cpufreq_update_util(rq: rq_of_dl_rq(dl_rq), flags: `0`);
258	}
259
260	static inline
261	void __add_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
262	{
263	u64 old = dl_rq->this_bw;
264
265	lockdep_assert_rq_held(rq: rq_of_dl_rq(dl_rq));
266	dl_rq->this_bw += dl_bw;
267	SCHED_WARN_ON(dl_rq->this_bw < old); / overflow /
268	}
269
270	static inline
271	void __sub_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
272	{
273	u64 old = dl_rq->this_bw;
274
275	lockdep_assert_rq_held(rq: rq_of_dl_rq(dl_rq));
276	dl_rq->this_bw -= dl_bw;
277	SCHED_WARN_ON(dl_rq->this_bw > old); / underflow /
278	if (dl_rq->this_bw > old)
279	dl_rq->this_bw = `0`;
280	SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw);
281	}
282
283	static inline
284	void add_rq_bw(struct sched_dl_entity dl_se, struct* dl_rq *dl_rq)
285	{
286	if (!dl_entity_is_special(dl_se))
287	__add_rq_bw(dl_bw: dl_se->dl_bw, dl_rq);
288	}
289
290	static inline
291	void sub_rq_bw(struct sched_dl_entity dl_se, struct* dl_rq *dl_rq)
292	{
293	if (!dl_entity_is_special(dl_se))
294	__sub_rq_bw(dl_bw: dl_se->dl_bw, dl_rq);
295	}
296
297	static inline
298	void add_running_bw(struct sched_dl_entity dl_se, struct* dl_rq *dl_rq)
299	{
300	if (!dl_entity_is_special(dl_se))
301	__add_running_bw(dl_bw: dl_se->dl_bw, dl_rq);
302	}
303
304	static inline
305	void sub_running_bw(struct sched_dl_entity dl_se, struct* dl_rq *dl_rq)
306	{
307	if (!dl_entity_is_special(dl_se))
308	__sub_running_bw(dl_bw: dl_se->dl_bw, dl_rq);
309	}
310
311	static void dl_change_utilization(struct task_struct *p, u64 new_bw)
312	{
313	struct rq *rq;
314
315	WARN_ON_ONCE(p->dl.flags & SCHED_FLAG_SUGOV);
316
317	if (task_on_rq_queued(p))
318	return;
319
320	rq = task_rq(p);
321	if (p->dl.dl_non_contending) {
322	sub_running_bw(dl_se: &p->dl, dl_rq: &rq->dl);
323	p->dl.dl_non_contending = `0`;
324	/*
325	* If the timer handler is currently running and the
326	* timer cannot be canceled, inactive_task_timer()
327	* will see that dl_not_contending is not set, and
328	* will not touch the rq's active utilization,
329	* so we are still safe.
330	*/
331	if (hrtimer_try_to_cancel(timer: &p->dl.inactive_timer) == `1`)
332	put_task_struct(t: p);
333	}
334	__sub_rq_bw(dl_bw: p->dl.dl_bw, dl_rq: &rq->dl);
335	__add_rq_bw(dl_bw: new_bw, dl_rq: &rq->dl);
336	}
337
338	/*
339	* The utilization of a task cannot be immediately removed from
340	* the rq active utilization (running_bw) when the task blocks.
341	* Instead, we have to wait for the so called "0-lag time".
342	*
343	* If a task blocks before the "0-lag time", a timer (the inactive
344	* timer) is armed, and running_bw is decreased when the timer
345	* fires.
346	*
347	* If the task wakes up again before the inactive timer fires,
348	* the timer is canceled, whereas if the task wakes up after the
349	* inactive timer fired (and running_bw has been decreased) the
350	* task's utilization has to be added to running_bw again.
351	* A flag in the deadline scheduling entity (dl_non_contending)
352	* is used to avoid race conditions between the inactive timer handler
353	* and task wakeups.
354	*
355	* The following diagram shows how running_bw is updated. A task is
356	* "ACTIVE" when its utilization contributes to running_bw; an
357	* "ACTIVE contending" task is in the TASK_RUNNING state, while an
358	* "ACTIVE non contending" task is a blocked task for which the "0-lag time"
359	* has not passed yet. An "INACTIVE" task is a task for which the "0-lag"
360	* time already passed, which does not contribute to running_bw anymore.
361	* +------------------+
362	* wakeup \| ACTIVE \|
363	* +------------------>+ contending \|
364	* \| add_running_bw \| \|
365	* \| +----+------+------+
366	* \| \| ^
367	* \| dequeue \| \|
368	* +--------+-------+ \| \|
369	* \| \| t >= 0-lag \| \| wakeup
370	* \| INACTIVE \|<---------------+ \|
371	* \| \| sub_running_bw \| \|
372	* +--------+-------+ \| \|
373	* ^ \| \|
374	* \| t < 0-lag \| \|
375	* \| \| \|
376	* \| V \|
377	* \| +----+------+------+
378	* \| sub_running_bw \| ACTIVE \|
379	* +-------------------+ \|
380	* inactive timer \| non contending \|
381	* fired +------------------+
382	*
383	* The task_non_contending() function is invoked when a task
384	* blocks, and checks if the 0-lag time already passed or
385	* not (in the first case, it directly updates running_bw;
386	* in the second case, it arms the inactive timer).
387	*
388	* The task_contending() function is invoked when a task wakes
389	* up, and checks if the task is still in the "ACTIVE non contending"
390	* state or not (in the second case, it updates running_bw).
391	*/
392	static void task_non_contending(struct task_struct *p)
393	{
394	struct sched_dl_entity *dl_se = &p->dl;
395	struct hrtimer *timer = &dl_se->inactive_timer;
396	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
397	struct rq *rq = rq_of_dl_rq(dl_rq);
398	s64 zerolag_time;
399
400	/*
401	* If this is a non-deadline task that has been boosted,
402	* do nothing
403	*/
404	if (dl_se->dl_runtime == `0`)
405	return;
406
407	if (dl_entity_is_special(dl_se))
408	return;
409
410	WARN_ON(dl_se->dl_non_contending);
411
412	zerolag_time = dl_se->deadline -
413	div64_long((dl_se->runtime * dl_se->dl_period),
414	dl_se->dl_runtime);
415
416	/*
417	* Using relative times instead of the absolute "0-lag time"
418	* allows to simplify the code
419	*/
420	zerolag_time -= rq_clock(rq);
421
422	/*
423	* If the "0-lag time" already passed, decrease the active
424	* utilization now, instead of starting a timer
425	*/
426	if ((zerolag_time < `0`) \|\| hrtimer_active(timer: &dl_se->inactive_timer)) {
427	if (dl_task(p))
428	sub_running_bw(dl_se, dl_rq);
429	if (!dl_task(p) \|\| READ_ONCE(p->__state) == TASK_DEAD) {
430	struct dl_bw *dl_b = dl_bw_of(i: task_cpu(p));
431
432	if (READ_ONCE(p->__state) == TASK_DEAD)
433	sub_rq_bw(dl_se: &p->dl, dl_rq: &rq->dl);
434	raw_spin_lock(&dl_b->lock);
435	__dl_sub(dl_b, tsk_bw: p->dl.dl_bw, cpus: dl_bw_cpus(i: task_cpu(p)));
436	raw_spin_unlock(&dl_b->lock);
437	__dl_clear_params(p);
438	}
439
440	return;
441	}
442
443	dl_se->dl_non_contending = `1`;
444	get_task_struct(t: p);
445	hrtimer_start(timer, tim: ns_to_ktime(ns: zerolag_time), mode: HRTIMER_MODE_REL_HARD);
446	}
447
448	static void task_contending(struct sched_dl_entity dl_se, int* flags)
449	{
450	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
451
452	/*
453	* If this is a non-deadline task that has been boosted,
454	* do nothing
455	*/
456	if (dl_se->dl_runtime == `0`)
457	return;
458
459	if (flags & ENQUEUE_MIGRATED)
460	add_rq_bw(dl_se, dl_rq);
461
462	if (dl_se->dl_non_contending) {
463	dl_se->dl_non_contending = `0`;
464	/*
465	* If the timer handler is currently running and the
466	* timer cannot be canceled, inactive_task_timer()
467	* will see that dl_not_contending is not set, and
468	* will not touch the rq's active utilization,
469	* so we are still safe.
470	*/
471	if (hrtimer_try_to_cancel(timer: &dl_se->inactive_timer) == `1`)
472	put_task_struct(t: dl_task_of(dl_se));
473	} else {
474	/*
475	* Since "dl_non_contending" is not set, the
476	* task's utilization has already been removed from
477	* active utilization (either when the task blocked,
478	* when the "inactive timer" fired).
479	* So, add it back.
480	*/
481	add_running_bw(dl_se, dl_rq);
482	}
483	}
484
485	static inline int is_leftmost(struct task_struct p, struct* dl_rq *dl_rq)
486	{
487	struct sched_dl_entity *dl_se = &p->dl;
488
489	return rb_first_cached(&dl_rq->root) == &dl_se->rb_node;
490	}
491
492	static void init_dl_rq_bw_ratio(struct dl_rq *dl_rq);
493
494	void init_dl_bw(struct dl_bw *dl_b)
495	{
496	raw_spin_lock_init(&dl_b->lock);
497	if (global_rt_runtime() == RUNTIME_INF)
498	dl_b->bw = -`1`;
499	else
500	dl_b->bw = to_ratio(period: global_rt_period(), runtime: global_rt_runtime());
501	dl_b->total_bw = `0`;
502	}
503
504	void init_dl_rq(struct dl_rq *dl_rq)
505	{
506	dl_rq->root = RB_ROOT_CACHED;
507
508	#ifdef CONFIG_SMP
509	/ zero means no -deadline tasks /
510	dl_rq->earliest_dl.curr = dl_rq->earliest_dl.next = `0`;
511
512	dl_rq->overloaded = `0`;
513	dl_rq->pushable_dl_tasks_root = RB_ROOT_CACHED;
514	#else
515	init_dl_bw(&dl_rq->dl_bw);
516	#endif
517
518	dl_rq->running_bw = `0`;
519	dl_rq->this_bw = `0`;
520	init_dl_rq_bw_ratio(dl_rq);
521	}
522
523	#ifdef CONFIG_SMP
524
525	static inline int dl_overloaded(struct rq *rq)
526	{
527	return atomic_read(v: &rq->rd->dlo_count);
528	}
529
530	static inline void dl_set_overload(struct rq *rq)
531	{
532	if (!rq->online)
533	return;
534
535	cpumask_set_cpu(cpu: rq->cpu, dstp: rq->rd->dlo_mask);
536	/*
537	* Must be visible before the overload count is
538	* set (as in sched_rt.c).
539	*
540	* Matched by the barrier in pull_dl_task().
541	*/
542	smp_wmb();
543	atomic_inc(v: &rq->rd->dlo_count);
544	}
545
546	static inline void dl_clear_overload(struct rq *rq)
547	{
548	if (!rq->online)
549	return;
550
551	atomic_dec(v: &rq->rd->dlo_count);
552	cpumask_clear_cpu(cpu: rq->cpu, dstp: rq->rd->dlo_mask);
553	}
554
555	#define __node_2_pdl(node) \
556	rb_entry((node), struct task_struct, pushable_dl_tasks)
557
558	static inline bool __pushable_less(struct rb_node a, const* struct rb_node *b)
559	{
560	return dl_entity_preempt(a: &__node_2_pdl(a)->dl, b: &__node_2_pdl(b)->dl);
561	}
562
563	static inline int has_pushable_dl_tasks(struct rq *rq)
564	{
565	return !RB_EMPTY_ROOT(&rq->dl.pushable_dl_tasks_root.rb_root);
566	}
567
568	/*
569	* The list of pushable -deadline task is not a plist, like in
570	* sched_rt.c, it is an rb-tree with tasks ordered by deadline.
571	*/
572	static void enqueue_pushable_dl_task(struct rq rq, struct* task_struct *p)
573	{
574	struct rb_node *leftmost;
575
576	WARN_ON_ONCE(!RB_EMPTY_NODE(&p->pushable_dl_tasks));
577
578	leftmost = rb_add_cached(node: &p->pushable_dl_tasks,
579	tree: &rq->dl.pushable_dl_tasks_root,
580	less: __pushable_less);
581	if (leftmost)
582	rq->dl.earliest_dl.next = p->dl.deadline;
583
584	if (!rq->dl.overloaded) {
585	dl_set_overload(rq);
586	rq->dl.overloaded = `1`;
587	}
588	}
589
590	static void dequeue_pushable_dl_task(struct rq rq, struct* task_struct *p)
591	{
592	struct dl_rq *dl_rq = &rq->dl;
593	struct rb_root_cached *root = &dl_rq->pushable_dl_tasks_root;
594	struct rb_node *leftmost;
595
596	if (RB_EMPTY_NODE(&p->pushable_dl_tasks))
597	return;
598
599	leftmost = rb_erase_cached(node: &p->pushable_dl_tasks, root);
600	if (leftmost)
601	dl_rq->earliest_dl.next = __node_2_pdl(leftmost)->dl.deadline;
602
603	RB_CLEAR_NODE(&p->pushable_dl_tasks);
604
605	if (!has_pushable_dl_tasks(rq) && rq->dl.overloaded) {
606	dl_clear_overload(rq);
607	rq->dl.overloaded = `0`;
608	}
609	}
610
611	static int push_dl_task(struct rq *rq);
612
613	static inline bool need_pull_dl_task(struct rq rq, struct* task_struct *prev)
614	{
615	return rq->online && dl_task(p: prev);
616	}
617
618	static DEFINE_PER_CPU(struct balance_callback, dl_push_head);
619	static DEFINE_PER_CPU(struct balance_callback, dl_pull_head);
620
621	static void push_dl_tasks(struct rq *);
622	static void pull_dl_task(struct rq *);
623
624	static inline void deadline_queue_push_tasks(struct rq *rq)
625	{
626	if (!has_pushable_dl_tasks(rq))
627	return;
628
629	queue_balance_callback(rq, head: &per_cpu(dl_push_head, rq->cpu), func: push_dl_tasks);
630	}
631
632	static inline void deadline_queue_pull_task(struct rq *rq)
633	{
634	queue_balance_callback(rq, head: &per_cpu(dl_pull_head, rq->cpu), func: pull_dl_task);
635	}
636
637	static struct rq find_lock_later_rq(struct* task_struct task, struct* rq *rq);
638
639	static struct rq dl_task_offline_migration(struct* rq rq, struct* task_struct *p)
640	{
641	struct rq *later_rq = NULL;
642	struct dl_bw *dl_b;
643
644	later_rq = find_lock_later_rq(task: p, rq);
645	if (!later_rq) {
646	int cpu;
647
648	/*
649	* If we cannot preempt any rq, fall back to pick any
650	* online CPU:
651	*/
652	cpu = cpumask_any_and(cpu_active_mask, p->cpus_ptr);
653	if (cpu >= nr_cpu_ids) {
654	/*
655	* Failed to find any suitable CPU.
656	* The task will never come back!
657	*/
658	WARN_ON_ONCE(dl_bandwidth_enabled());
659
660	/*
661	* If admission control is disabled we
662	* try a little harder to let the task
663	* run.
664	*/
665	cpu = cpumask_any(cpu_active_mask);
666	}
667	later_rq = cpu_rq(cpu);
668	double_lock_balance(this_rq: rq, busiest: later_rq);
669	}
670
671	if (p->dl.dl_non_contending \|\| p->dl.dl_throttled) {
672	/*
673	* Inactive timer is armed (or callback is running, but
674	* waiting for us to release rq locks). In any case, when it
675	* will fire (or continue), it will see running_bw of this
676	* task migrated to later_rq (and correctly handle it).
677	*/
678	sub_running_bw(dl_se: &p->dl, dl_rq: &rq->dl);
679	sub_rq_bw(dl_se: &p->dl, dl_rq: &rq->dl);
680
681	add_rq_bw(dl_se: &p->dl, dl_rq: &later_rq->dl);
682	add_running_bw(dl_se: &p->dl, dl_rq: &later_rq->dl);
683	} else {
684	sub_rq_bw(dl_se: &p->dl, dl_rq: &rq->dl);
685	add_rq_bw(dl_se: &p->dl, dl_rq: &later_rq->dl);
686	}
687
688	/*
689	* And we finally need to fixup root_domain(s) bandwidth accounting,
690	* since p is still hanging out in the old (now moved to default) root
691	* domain.
692	*/
693	dl_b = &rq->rd->dl_bw;
694	raw_spin_lock(&dl_b->lock);
695	__dl_sub(dl_b, tsk_bw: p->dl.dl_bw, cpus: cpumask_weight(srcp: rq->rd->span));
696	raw_spin_unlock(&dl_b->lock);
697
698	dl_b = &later_rq->rd->dl_bw;
699	raw_spin_lock(&dl_b->lock);
700	__dl_add(dl_b, tsk_bw: p->dl.dl_bw, cpus: cpumask_weight(srcp: later_rq->rd->span));
701	raw_spin_unlock(&dl_b->lock);
702
703	set_task_cpu(p, cpu: later_rq->cpu);
704	double_unlock_balance(this_rq: later_rq, busiest: rq);
705
706	return later_rq;
707	}
708
709	#else
710
711	static inline
712	void enqueue_pushable_dl_task(struct rq rq, struct* task_struct *p)
713	{
714	}
715
716	static inline
717	void dequeue_pushable_dl_task(struct rq rq, struct* task_struct *p)
718	{
719	}
720
721	static inline
722	void inc_dl_migration(struct sched_dl_entity dl_se, struct* dl_rq *dl_rq)
723	{
724	}
725
726	static inline
727	void dec_dl_migration(struct sched_dl_entity dl_se, struct* dl_rq *dl_rq)
728	{
729	}
730
731	static inline void deadline_queue_push_tasks(struct rq *rq)
732	{
733	}
734
735	static inline void deadline_queue_pull_task(struct rq *rq)
736	{
737	}
738	#endif /* CONFIG_SMP */
739
740	static void enqueue_task_dl(struct rq rq, struct* task_struct p, int* flags);
741	static void __dequeue_task_dl(struct rq rq, struct* task_struct p, int* flags);
742	static void wakeup_preempt_dl(struct rq rq, struct* task_struct p, int* flags);
743
744	static inline void replenish_dl_new_period(struct sched_dl_entity *dl_se,
745	struct rq *rq)
746	{
747	/ for non-boosted task, pi_of(dl_se) == dl_se /
748	dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
749	dl_se->runtime = pi_of(dl_se)->dl_runtime;
750	}
751
752	/*
753	* We are being explicitly informed that a new instance is starting,
754	* and this means that:
755	* - the absolute deadline of the entity has to be placed at
756	* current time + relative deadline;
757	* - the runtime of the entity has to be set to the maximum value.
758	*
759	* The capability of specifying such event is useful whenever a -deadline
760	* entity wants to (try to!) synchronize its behaviour with the scheduler's
761	* one, and to (try to!) reconcile itself with its own scheduling
762	* parameters.
763	*/
764	static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
765	{
766	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
767	struct rq *rq = rq_of_dl_rq(dl_rq);
768
769	WARN_ON(is_dl_boosted(dl_se));
770	WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline));
771
772	/*
773	* We are racing with the deadline timer. So, do nothing because
774	* the deadline timer handler will take care of properly recharging
775	* the runtime and postponing the deadline
776	*/
777	if (dl_se->dl_throttled)
778	return;
779
780	/*
781	* We use the regular wall clock time to set deadlines in the
782	* future; in fact, we must consider execution overheads (time
783	* spent on hardirq context, etc.).
784	*/
785	replenish_dl_new_period(dl_se, rq);
786	}
787
788	/*
789	* Pure Earliest Deadline First (EDF) scheduling does not deal with the
790	* possibility of a entity lasting more than what it declared, and thus
791	* exhausting its runtime.
792	*
793	* Here we are interested in making runtime overrun possible, but we do
794	* not want a entity which is misbehaving to affect the scheduling of all
795	* other entities.
796	* Therefore, a budgeting strategy called Constant Bandwidth Server (CBS)
797	* is used, in order to confine each entity within its own bandwidth.
798	*
799	* This function deals exactly with that, and ensures that when the runtime
800	* of a entity is replenished, its deadline is also postponed. That ensures
801	* the overrunning entity can't interfere with other entity in the system and
802	* can't make them miss their deadlines. Reasons why this kind of overruns
803	* could happen are, typically, a entity voluntarily trying to overcome its
804	* runtime, or it just underestimated it during sched_setattr().
805	*/
806	static void replenish_dl_entity(struct sched_dl_entity *dl_se)
807	{
808	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
809	struct rq *rq = rq_of_dl_rq(dl_rq);
810
811	WARN_ON_ONCE(pi_of(dl_se)->dl_runtime <= `0`);
812
813	/*
814	* This could be the case for a !-dl task that is boosted.
815	* Just go with full inherited parameters.
816	*/
817	if (dl_se->dl_deadline == `0`)
818	replenish_dl_new_period(dl_se, rq);
819
820	if (dl_se->dl_yielded && dl_se->runtime > `0`)
821	dl_se->runtime = `0`;
822
823	/*
824	* We keep moving the deadline away until we get some
825	* available runtime for the entity. This ensures correct
826	* handling of situations where the runtime overrun is
827	* arbitrary large.
828	*/
829	while (dl_se->runtime <= `0`) {
830	dl_se->deadline += pi_of(dl_se)->dl_period;
831	dl_se->runtime += pi_of(dl_se)->dl_runtime;
832	}
833
834	/*
835	* At this point, the deadline really should be "in
836	* the future" with respect to rq->clock. If it's
837	* not, we are, for some reason, lagging too much!
838	* Anyway, after having warn userspace abut that,
839	* we still try to keep the things running by
840	* resetting the deadline and the budget of the
841	* entity.
842	*/
843	if (dl_time_before(a: dl_se->deadline, b: rq_clock(rq))) {
844	printk_deferred_once("sched: DL replenish lagged too much\n");
845	replenish_dl_new_period(dl_se, rq);
846	}
847
848	if (dl_se->dl_yielded)
849	dl_se->dl_yielded = `0`;
850	if (dl_se->dl_throttled)
851	dl_se->dl_throttled = `0`;
852	}
853
854	/*
855	* Here we check if --at time t-- an entity (which is probably being
856	* [re]activated or, in general, enqueued) can use its remaining runtime
857	* and its current deadline _without_ exceeding the bandwidth it is
858	* assigned (function returns true if it can't). We are in fact applying
859	* one of the CBS rules: when a task wakes up, if the residual runtime
860	* over residual deadline fits within the allocated bandwidth, then we
861	* can keep the current (absolute) deadline and residual budget without
862	* disrupting the schedulability of the system. Otherwise, we should
863	* refill the runtime and set the deadline a period in the future,
864	* because keeping the current (absolute) deadline of the task would
865	* result in breaking guarantees promised to other tasks (refer to
866	* Documentation/scheduler/sched-deadline.rst for more information).
867	*
868	* This function returns true if:
869	*
870	* runtime / (deadline - t) > dl_runtime / dl_deadline ,
871	*
872	* IOW we can't recycle current parameters.
873	*
874	* Notice that the bandwidth check is done against the deadline. For
875	* task with deadline equal to period this is the same of using
876	* dl_period instead of dl_deadline in the equation above.
877	*/
878	static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t)
879	{
880	u64 left, right;
881
882	/*
883	* left and right are the two sides of the equation above,
884	* after a bit of shuffling to use multiplications instead
885	* of divisions.
886	*
887	* Note that none of the time values involved in the two
888	* multiplications are absolute: dl_deadline and dl_runtime
889	* are the relative deadline and the maximum runtime of each
890	* instance, runtime is the runtime left for the last instance
891	* and (deadline - t), since t is rq->clock, is the time left
892	* to the (absolute) deadline. Even if overflowing the u64 type
893	* is very unlikely to occur in both cases, here we scale down
894	* as we want to avoid that risk at all. Scaling down by 10
895	* means that we reduce granularity to 1us. We are fine with it,
896	* since this is only a true/false check and, anyway, thinking
897	* of anything below microseconds resolution is actually fiction
898	* (but still we want to give the user that illusion >;).
899	*/
900	left = (pi_of(dl_se)->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
901	right = ((dl_se->deadline - t) >> DL_SCALE) *
902	(pi_of(dl_se)->dl_runtime >> DL_SCALE);
903
904	return dl_time_before(a: right, b: left);
905	}
906
907	/*
908	* Revised wakeup rule [1]: For self-suspending tasks, rather then
909	* re-initializing task's runtime and deadline, the revised wakeup
910	* rule adjusts the task's runtime to avoid the task to overrun its
911	* density.
912	*
913	* Reasoning: a task may overrun the density if:
914	* runtime / (deadline - t) > dl_runtime / dl_deadline
915	*
916	* Therefore, runtime can be adjusted to:
917	* runtime = (dl_runtime / dl_deadline) * (deadline - t)
918	*
919	* In such way that runtime will be equal to the maximum density
920	* the task can use without breaking any rule.
921	*
922	* [1] Luca Abeni, Giuseppe Lipari, and Juri Lelli. 2015. Constant
923	* bandwidth server revisited. SIGBED Rev. 11, 4 (January 2015), 19-24.
924	*/
925	static void
926	update_dl_revised_wakeup(struct sched_dl_entity dl_se, struct* rq *rq)
927	{
928	u64 laxity = dl_se->deadline - rq_clock(rq);
929
930	/*
931	* If the task has deadline < period, and the deadline is in the past,
932	* it should already be throttled before this check.
933	*
934	* See update_dl_entity() comments for further details.
935	*/
936	WARN_ON(dl_time_before(dl_se->deadline, rq_clock(rq)));
937
938	dl_se->runtime = (dl_se->dl_density * laxity) >> BW_SHIFT;
939	}
940
941	/*
942	* Regarding the deadline, a task with implicit deadline has a relative
943	* deadline == relative period. A task with constrained deadline has a
944	* relative deadline <= relative period.
945	*
946	* We support constrained deadline tasks. However, there are some restrictions
947	* applied only for tasks which do not have an implicit deadline. See
948	* update_dl_entity() to know more about such restrictions.
949	*
950	* The dl_is_implicit() returns true if the task has an implicit deadline.
951	*/
952	static inline bool dl_is_implicit(struct sched_dl_entity *dl_se)
953	{
954	return dl_se->dl_deadline == dl_se->dl_period;
955	}
956
957	/*
958	* When a deadline entity is placed in the runqueue, its runtime and deadline
959	* might need to be updated. This is done by a CBS wake up rule. There are two
960	* different rules: 1) the original CBS; and 2) the Revisited CBS.
961	*
962	* When the task is starting a new period, the Original CBS is used. In this
963	* case, the runtime is replenished and a new absolute deadline is set.
964	*
965	* When a task is queued before the begin of the next period, using the
966	* remaining runtime and deadline could make the entity to overflow, see
967	* dl_entity_overflow() to find more about runtime overflow. When such case
968	* is detected, the runtime and deadline need to be updated.
969	*
970	* If the task has an implicit deadline, i.e., deadline == period, the Original
971	* CBS is applied. the runtime is replenished and a new absolute deadline is
972	* set, as in the previous cases.
973	*
974	* However, the Original CBS does not work properly for tasks with
975	* deadline < period, which are said to have a constrained deadline. By
976	* applying the Original CBS, a constrained deadline task would be able to run
977	* runtime/deadline in a period. With deadline < period, the task would
978	* overrun the runtime/period allowed bandwidth, breaking the admission test.
979	*
980	* In order to prevent this misbehave, the Revisited CBS is used for
981	* constrained deadline tasks when a runtime overflow is detected. In the
982	* Revisited CBS, rather than replenishing & setting a new absolute deadline,
983	* the remaining runtime of the task is reduced to avoid runtime overflow.
984	* Please refer to the comments update_dl_revised_wakeup() function to find
985	* more about the Revised CBS rule.
986	*/
987	static void update_dl_entity(struct sched_dl_entity *dl_se)
988	{
989	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
990	struct rq *rq = rq_of_dl_rq(dl_rq);
991
992	if (dl_time_before(a: dl_se->deadline, b: rq_clock(rq)) \|\|
993	dl_entity_overflow(dl_se, t: rq_clock(rq))) {
994
995	if (unlikely(!dl_is_implicit(dl_se) &&
996	!dl_time_before(dl_se->deadline, rq_clock(rq)) &&
997	!is_dl_boosted(dl_se))) {
998	update_dl_revised_wakeup(dl_se, rq);
999	return;
1000	}
1001
1002	replenish_dl_new_period(dl_se, rq);
1003	}
1004	}
1005
1006	static inline u64 dl_next_period(struct sched_dl_entity *dl_se)
1007	{
1008	return dl_se->deadline - dl_se->dl_deadline + dl_se->dl_period;
1009	}
1010
1011	/*
1012	* If the entity depleted all its runtime, and if we want it to sleep
1013	* while waiting for some new execution time to become available, we
1014	* set the bandwidth replenishment timer to the replenishment instant
1015	* and try to activate it.
1016	*
1017	* Notice that it is important for the caller to know if the timer
1018	* actually started or not (i.e., the replenishment instant is in
1019	* the future or in the past).
1020	*/
1021	static int start_dl_timer(struct task_struct *p)
1022	{
1023	struct sched_dl_entity *dl_se = &p->dl;
1024	struct hrtimer *timer = &dl_se->dl_timer;
1025	struct rq *rq = task_rq(p);
1026	ktime_t now, act;
1027	s64 delta;
1028
1029	lockdep_assert_rq_held(rq);
1030
1031	/*
1032	* We want the timer to fire at the deadline, but considering
1033	* that it is actually coming from rq->clock and not from
1034	* hrtimer's time base reading.
1035	*/
1036	act = ns_to_ktime(ns: dl_next_period(dl_se));
1037	now = hrtimer_cb_get_time(timer);
1038	delta = ktime_to_ns(kt: now) - rq_clock(rq);
1039	act = ktime_add_ns(act, delta);
1040
1041	/*
1042	* If the expiry time already passed, e.g., because the value
1043	* chosen as the deadline is too small, don't even try to
1044	* start the timer in the past!
1045	*/
1046	if (ktime_us_delta(later: act, earlier: now) < `0`)
1047	return `0`;
1048
1049	/*
1050	* !enqueued will guarantee another callback; even if one is already in
1051	* progress. This ensures a balanced {get,put}_task_struct().
1052	*
1053	* The race against __run_timer() clearing the enqueued state is
1054	* harmless because we're holding task_rq()->lock, therefore the timer
1055	* expiring after we've done the check will wait on its task_rq_lock()
1056	* and observe our state.
1057	*/
1058	if (!hrtimer_is_queued(timer)) {
1059	get_task_struct(t: p);
1060	hrtimer_start(timer, tim: act, mode: HRTIMER_MODE_ABS_HARD);
1061	}
1062
1063	return `1`;
1064	}
1065
1066	/*
1067	* This is the bandwidth enforcement timer callback. If here, we know
1068	* a task is not on its dl_rq, since the fact that the timer was running
1069	* means the task is throttled and needs a runtime replenishment.
1070	*
1071	* However, what we actually do depends on the fact the task is active,
1072	* (it is on its rq) or has been removed from there by a call to
1073	* dequeue_task_dl(). In the former case we must issue the runtime
1074	* replenishment and add the task back to the dl_rq; in the latter, we just
1075	* do nothing but clearing dl_throttled, so that runtime and deadline
1076	* updating (and the queueing back to dl_rq) will be done by the
1077	* next call to enqueue_task_dl().
1078	*/
1079	static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
1080	{
1081	struct sched_dl_entity *dl_se = container_of(timer,
1082	struct sched_dl_entity,
1083	dl_timer);
1084	struct task_struct *p = dl_task_of(dl_se);
1085	struct rq_flags rf;
1086	struct rq *rq;
1087
1088	rq = task_rq_lock(p, rf: &rf);
1089
1090	/*
1091	* The task might have changed its scheduling policy to something
1092	* different than SCHED_DEADLINE (through switched_from_dl()).
1093	*/
1094	if (!dl_task(p))
1095	goto unlock;
1096
1097	/*
1098	* The task might have been boosted by someone else and might be in the
1099	* boosting/deboosting path, its not throttled.
1100	*/
1101	if (is_dl_boosted(dl_se))
1102	goto unlock;
1103
1104	/*
1105	* Spurious timer due to start_dl_timer() race; or we already received
1106	* a replenishment from rt_mutex_setprio().
1107	*/
1108	if (!dl_se->dl_throttled)
1109	goto unlock;
1110
1111	sched_clock_tick();
1112	update_rq_clock(rq);
1113
1114	/*
1115	* If the throttle happened during sched-out; like:
1116	*
1117	* schedule()
1118	* deactivate_task()
1119	* dequeue_task_dl()
1120	* update_curr_dl()
1121	* start_dl_timer()
1122	* __dequeue_task_dl()
1123	* prev->on_rq = 0;
1124	*
1125	* We can be both throttled and !queued. Replenish the counter
1126	* but do not enqueue -- wait for our wakeup to do that.
1127	*/
1128	if (!task_on_rq_queued(p)) {
1129	replenish_dl_entity(dl_se);
1130	goto unlock;
1131	}
1132
1133	#ifdef CONFIG_SMP
1134	if (unlikely(!rq->online)) {
1135	/*
1136	* If the runqueue is no longer available, migrate the
1137	* task elsewhere. This necessarily changes rq.
1138	*/
1139	lockdep_unpin_lock(__rq_lockp(rq), rf.cookie);
1140	rq = dl_task_offline_migration(rq, p);
1141	rf.cookie = lockdep_pin_lock(__rq_lockp(rq));
1142	update_rq_clock(rq);
1143
1144	/*
1145	* Now that the task has been migrated to the new RQ and we
1146	* have that locked, proceed as normal and enqueue the task
1147	* there.
1148	*/
1149	}
1150	#endif
1151
1152	enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
1153	if (dl_task(p: rq->curr))
1154	wakeup_preempt_dl(rq, p, flags: `0`);
1155	else
1156	resched_curr(rq);
1157
1158	#ifdef CONFIG_SMP
1159	/*
1160	* Queueing this task back might have overloaded rq, check if we need
1161	* to kick someone away.
1162	*/
1163	if (has_pushable_dl_tasks(rq)) {
1164	/*
1165	* Nothing relies on rq->lock after this, so its safe to drop
1166	* rq->lock.
1167	*/
1168	rq_unpin_lock(rq, rf: &rf);
1169	push_dl_task(rq);
1170	rq_repin_lock(rq, rf: &rf);
1171	}
1172	#endif
1173
1174	unlock:
1175	task_rq_unlock(rq, p, rf: &rf);
1176
1177	/*
1178	* This can free the task_struct, including this hrtimer, do not touch
1179	* anything related to that after this.
1180	*/
1181	put_task_struct(t: p);
1182
1183	return HRTIMER_NORESTART;
1184	}
1185
1186	void init_dl_task_timer(struct sched_dl_entity *dl_se)
1187	{
1188	struct hrtimer *timer = &dl_se->dl_timer;
1189
1190	hrtimer_init(timer, CLOCK_MONOTONIC, mode: HRTIMER_MODE_REL_HARD);
1191	timer->function = dl_task_timer;
1192	}
1193
1194	/*
1195	* During the activation, CBS checks if it can reuse the current task's
1196	* runtime and period. If the deadline of the task is in the past, CBS
1197	* cannot use the runtime, and so it replenishes the task. This rule
1198	* works fine for implicit deadline tasks (deadline == period), and the
1199	* CBS was designed for implicit deadline tasks. However, a task with
1200	* constrained deadline (deadline < period) might be awakened after the
1201	* deadline, but before the next period. In this case, replenishing the
1202	* task would allow it to run for runtime / deadline. As in this case
1203	* deadline < period, CBS enables a task to run for more than the
1204	* runtime / period. In a very loaded system, this can cause a domino
1205	* effect, making other tasks miss their deadlines.
1206	*
1207	* To avoid this problem, in the activation of a constrained deadline
1208	* task after the deadline but before the next period, throttle the
1209	* task and set the replenishing timer to the begin of the next period,
1210	* unless it is boosted.
1211	*/
1212	static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se)
1213	{
1214	struct task_struct *p = dl_task_of(dl_se);
1215	struct rq *rq = rq_of_dl_rq(dl_rq: dl_rq_of_se(dl_se));
1216
1217	if (dl_time_before(a: dl_se->deadline, b: rq_clock(rq)) &&
1218	dl_time_before(a: rq_clock(rq), b: dl_next_period(dl_se))) {
1219	if (unlikely(is_dl_boosted(dl_se) \|\| !start_dl_timer(p)))
1220	return;
1221	dl_se->dl_throttled = `1`;
1222	if (dl_se->runtime > `0`)
1223	dl_se->runtime = `0`;
1224	}
1225	}
1226
1227	static
1228	int dl_runtime_exceeded(struct sched_dl_entity *dl_se)
1229	{
1230	return (dl_se->runtime <= `0`);
1231	}
1232
1233	/*
1234	* This function implements the GRUB accounting rule. According to the
1235	* GRUB reclaiming algorithm, the runtime is not decreased as "dq = -dt",
1236	* but as "dq = -(max{u, (Umax - Uinact - Uextra)} / Umax) dt",
1237	* where u is the utilization of the task, Umax is the maximum reclaimable
1238	* utilization, Uinact is the (per-runqueue) inactive utilization, computed
1239	* as the difference between the "total runqueue utilization" and the
1240	* "runqueue active utilization", and Uextra is the (per runqueue) extra
1241	* reclaimable utilization.
1242	* Since rq->dl.running_bw and rq->dl.this_bw contain utilizations multiplied
1243	* by 2^BW_SHIFT, the result has to be shifted right by BW_SHIFT.
1244	* Since rq->dl.bw_ratio contains 1 / Umax multiplied by 2^RATIO_SHIFT, dl_bw
1245	* is multiped by rq->dl.bw_ratio and shifted right by RATIO_SHIFT.
1246	* Since delta is a 64 bit variable, to have an overflow its value should be
1247	* larger than 2^(64 - 20 - 8), which is more than 64 seconds. So, overflow is
1248	* not an issue here.
1249	*/
1250	static u64 grub_reclaim(u64 delta, struct rq rq, struct* sched_dl_entity *dl_se)
1251	{
1252	u64 u_act;
1253	u64 u_inact = rq->dl.this_bw - rq->dl.running_bw; / Utot - Uact /
1254
1255	/*
1256	* Instead of computing max{u, (u_max - u_inact - u_extra)}, we
1257	* compare u_inact + u_extra with u_max - u, because u_inact + u_extra
1258	* can be larger than u_max. So, u_max - u_inact - u_extra would be
1259	* negative leading to wrong results.
1260	*/
1261	if (u_inact + rq->dl.extra_bw > rq->dl.max_bw - dl_se->dl_bw)
1262	u_act = dl_se->dl_bw;
1263	else
1264	u_act = rq->dl.max_bw - u_inact - rq->dl.extra_bw;
1265
1266	u_act = (u_act * rq->dl.bw_ratio) >> RATIO_SHIFT;
1267	return (delta * u_act) >> BW_SHIFT;
1268	}
1269
1270	/*
1271	* Update the current task's runtime statistics (provided it is still
1272	* a -deadline task and has not been removed from the dl_rq).
1273	*/
1274	static void update_curr_dl(struct rq *rq)
1275	{
1276	struct task_struct *curr = rq->curr;
1277	struct sched_dl_entity *dl_se = &curr->dl;
1278	u64 delta_exec, scaled_delta_exec;
1279	int cpu = cpu_of(rq);
1280	u64 now;
1281
1282	if (!dl_task(p: curr) \|\| !on_dl_rq(dl_se))
1283	return;
1284
1285	/*
1286	* Consumed budget is computed considering the time as
1287	* observed by schedulable tasks (excluding time spent
1288	* in hardirq context, etc.). Deadlines are instead
1289	* computed using hard walltime. This seems to be the more
1290	* natural solution, but the full ramifications of this
1291	* approach need further study.
1292	*/
1293	now = rq_clock_task(rq);
1294	delta_exec = now - curr->se.exec_start;
1295	if (unlikely((s64)delta_exec <= `0`)) {
1296	if (unlikely(dl_se->dl_yielded))
1297	goto throttle;
1298	return;
1299	}
1300
1301	schedstat_set(curr->stats.exec_max,
1302	max(curr->stats.exec_max, delta_exec));
1303
1304	trace_sched_stat_runtime(tsk: curr, runtime: delta_exec, vruntime: `0`);
1305
1306	update_current_exec_runtime(curr, now, delta_exec);
1307
1308	if (dl_entity_is_special(dl_se))
1309	return;
1310
1311	/*
1312	* For tasks that participate in GRUB, we implement GRUB-PA: the
1313	* spare reclaimed bandwidth is used to clock down frequency.
1314	*
1315	* For the others, we still need to scale reservation parameters
1316	* according to current frequency and CPU maximum capacity.
1317	*/
1318	if (unlikely(dl_se->flags & SCHED_FLAG_RECLAIM)) {
1319	scaled_delta_exec = grub_reclaim(delta: delta_exec,
1320	rq,
1321	dl_se: &curr->dl);
1322	} else {
1323	unsigned long scale_freq = arch_scale_freq_capacity(cpu);
1324	unsigned long scale_cpu = arch_scale_cpu_capacity(cpu);
1325
1326	scaled_delta_exec = cap_scale(delta_exec, scale_freq);
1327	scaled_delta_exec = cap_scale(scaled_delta_exec, scale_cpu);
1328	}
1329
1330	dl_se->runtime -= scaled_delta_exec;
1331
1332	throttle:
1333	if (dl_runtime_exceeded(dl_se) \|\| dl_se->dl_yielded) {
1334	dl_se->dl_throttled = `1`;
1335
1336	/ If requested, inform the user about runtime overruns. /
1337	if (dl_runtime_exceeded(dl_se) &&
1338	(dl_se->flags & SCHED_FLAG_DL_OVERRUN))
1339	dl_se->dl_overrun = `1`;
1340
1341	__dequeue_task_dl(rq, p: curr, flags: `0`);
1342	if (unlikely(is_dl_boosted(dl_se) \|\| !start_dl_timer(curr)))
1343	enqueue_task_dl(rq, p: curr, ENQUEUE_REPLENISH);
1344
1345	if (!is_leftmost(p: curr, dl_rq: &rq->dl))
1346	resched_curr(rq);
1347	}
1348
1349	/*
1350	* Because -- for now -- we share the rt bandwidth, we need to
1351	* account our runtime there too, otherwise actual rt tasks
1352	* would be able to exceed the shared quota.
1353	*
1354	* Account to the root rt group for now.
1355	*
1356	* The solution we're working towards is having the RT groups scheduled
1357	* using deadline servers -- however there's a few nasties to figure
1358	* out before that can happen.
1359	*/
1360	if (rt_bandwidth_enabled()) {
1361	struct rt_rq *rt_rq = &rq->rt;
1362
1363	raw_spin_lock(&rt_rq->rt_runtime_lock);
1364	/*
1365	* We'll let actual RT tasks worry about the overflow here, we
1366	* have our own CBS to keep us inline; only account when RT
1367	* bandwidth is relevant.
1368	*/
1369	if (sched_rt_bandwidth_account(rt_rq))
1370	rt_rq->rt_time += delta_exec;
1371	raw_spin_unlock(&rt_rq->rt_runtime_lock);
1372	}
1373	}
1374
1375	static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
1376	{
1377	struct sched_dl_entity *dl_se = container_of(timer,
1378	struct sched_dl_entity,
1379	inactive_timer);
1380	struct task_struct *p = dl_task_of(dl_se);
1381	struct rq_flags rf;
1382	struct rq *rq;
1383
1384	rq = task_rq_lock(p, rf: &rf);
1385
1386	sched_clock_tick();
1387	update_rq_clock(rq);
1388
1389	if (!dl_task(p) \|\| READ_ONCE(p->__state) == TASK_DEAD) {
1390	struct dl_bw *dl_b = dl_bw_of(i: task_cpu(p));
1391
1392	if (READ_ONCE(p->__state) == TASK_DEAD && dl_se->dl_non_contending) {
1393	sub_running_bw(dl_se: &p->dl, dl_rq: dl_rq_of_se(dl_se: &p->dl));
1394	sub_rq_bw(dl_se: &p->dl, dl_rq: dl_rq_of_se(dl_se: &p->dl));
1395	dl_se->dl_non_contending = `0`;
1396	}
1397
1398	raw_spin_lock(&dl_b->lock);
1399	__dl_sub(dl_b, tsk_bw: p->dl.dl_bw, cpus: dl_bw_cpus(i: task_cpu(p)));
1400	raw_spin_unlock(&dl_b->lock);
1401	__dl_clear_params(p);
1402
1403	goto unlock;
1404	}
1405	if (dl_se->dl_non_contending == `0`)
1406	goto unlock;
1407
1408	sub_running_bw(dl_se, dl_rq: &rq->dl);
1409	dl_se->dl_non_contending = `0`;
1410	unlock:
1411	task_rq_unlock(rq, p, rf: &rf);
1412	put_task_struct(t: p);
1413
1414	return HRTIMER_NORESTART;
1415	}
1416
1417	void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se)
1418	{
1419	struct hrtimer *timer = &dl_se->inactive_timer;
1420
1421	hrtimer_init(timer, CLOCK_MONOTONIC, mode: HRTIMER_MODE_REL_HARD);
1422	timer->function = inactive_task_timer;
1423	}
1424
1425	#define __node_2_dle(node) \
1426	rb_entry((node), struct sched_dl_entity, rb_node)
1427
1428	#ifdef CONFIG_SMP
1429
1430	static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
1431	{
1432	struct rq *rq = rq_of_dl_rq(dl_rq);
1433
1434	if (dl_rq->earliest_dl.curr == `0` \|\|
1435	dl_time_before(a: deadline, b: dl_rq->earliest_dl.curr)) {
1436	if (dl_rq->earliest_dl.curr == `0`)
1437	cpupri_set(cp: &rq->rd->cpupri, cpu: rq->cpu, CPUPRI_HIGHER);
1438	dl_rq->earliest_dl.curr = deadline;
1439	cpudl_set(cp: &rq->rd->cpudl, cpu: rq->cpu, dl: deadline);
1440	}
1441	}
1442
1443	static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
1444	{
1445	struct rq *rq = rq_of_dl_rq(dl_rq);
1446
1447	/*
1448	* Since we may have removed our earliest (and/or next earliest)
1449	* task we must recompute them.
1450	*/
1451	if (!dl_rq->dl_nr_running) {
1452	dl_rq->earliest_dl.curr = `0`;
1453	dl_rq->earliest_dl.next = `0`;
1454	cpudl_clear(cp: &rq->rd->cpudl, cpu: rq->cpu);
1455	cpupri_set(cp: &rq->rd->cpupri, cpu: rq->cpu, pri: rq->rt.highest_prio.curr);
1456	} else {
1457	struct rb_node *leftmost = rb_first_cached(&dl_rq->root);
1458	struct sched_dl_entity *entry = __node_2_dle(leftmost);
1459
1460	dl_rq->earliest_dl.curr = entry->deadline;
1461	cpudl_set(cp: &rq->rd->cpudl, cpu: rq->cpu, dl: entry->deadline);
1462	}
1463	}
1464
1465	#else
1466
1467	static inline void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}
1468	static inline void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}
1469
1470	#endif /* CONFIG_SMP */
1471
1472	static inline
1473	void inc_dl_tasks(struct sched_dl_entity dl_se, struct* dl_rq *dl_rq)
1474	{
1475	int prio = dl_task_of(dl_se)->prio;
1476	u64 deadline = dl_se->deadline;
1477
1478	WARN_ON(!dl_prio(prio));
1479	dl_rq->dl_nr_running++;
1480	add_nr_running(rq: rq_of_dl_rq(dl_rq), count: `1`);
1481
1482	inc_dl_deadline(dl_rq, deadline);
1483	}
1484
1485	static inline
1486	void dec_dl_tasks(struct sched_dl_entity dl_se, struct* dl_rq *dl_rq)
1487	{
1488	int prio = dl_task_of(dl_se)->prio;
1489
1490	WARN_ON(!dl_prio(prio));
1491	WARN_ON(!dl_rq->dl_nr_running);
1492	dl_rq->dl_nr_running--;
1493	sub_nr_running(rq: rq_of_dl_rq(dl_rq), count: `1`);
1494
1495	dec_dl_deadline(dl_rq, deadline: dl_se->deadline);
1496	}
1497
1498	static inline bool __dl_less(struct rb_node a, const* struct rb_node *b)
1499	{
1500	return dl_time_before(__node_2_dle(a)->deadline, __node_2_dle(b)->deadline);
1501	}
1502
1503	static inline struct sched_statistics *
1504	__schedstats_from_dl_se(struct sched_dl_entity *dl_se)
1505	{
1506	return &dl_task_of(dl_se)->stats;
1507	}
1508
1509	static inline void
1510	update_stats_wait_start_dl(struct dl_rq dl_rq, struct* sched_dl_entity *dl_se)
1511	{
1512	struct sched_statistics *stats;
1513
1514	if (!schedstat_enabled())
1515	return;
1516
1517	stats = __schedstats_from_dl_se(dl_se);
1518	__update_stats_wait_start(rq: rq_of_dl_rq(dl_rq), p: dl_task_of(dl_se), stats);
1519	}
1520
1521	static inline void
1522	update_stats_wait_end_dl(struct dl_rq dl_rq, struct* sched_dl_entity *dl_se)
1523	{
1524	struct sched_statistics *stats;
1525
1526	if (!schedstat_enabled())
1527	return;
1528
1529	stats = __schedstats_from_dl_se(dl_se);
1530	__update_stats_wait_end(rq: rq_of_dl_rq(dl_rq), p: dl_task_of(dl_se), stats);
1531	}
1532
1533	static inline void
1534	update_stats_enqueue_sleeper_dl(struct dl_rq dl_rq, struct* sched_dl_entity *dl_se)
1535	{
1536	struct sched_statistics *stats;
1537
1538	if (!schedstat_enabled())
1539	return;
1540
1541	stats = __schedstats_from_dl_se(dl_se);
1542	__update_stats_enqueue_sleeper(rq: rq_of_dl_rq(dl_rq), p: dl_task_of(dl_se), stats);
1543	}
1544
1545	static inline void
1546	update_stats_enqueue_dl(struct dl_rq dl_rq, struct* sched_dl_entity *dl_se,
1547	int flags)
1548	{
1549	if (!schedstat_enabled())
1550	return;
1551
1552	if (flags & ENQUEUE_WAKEUP)
1553	update_stats_enqueue_sleeper_dl(dl_rq, dl_se);
1554	}
1555
1556	static inline void
1557	update_stats_dequeue_dl(struct dl_rq dl_rq, struct* sched_dl_entity *dl_se,
1558	int flags)
1559	{
1560	struct task_struct *p = dl_task_of(dl_se);
1561
1562	if (!schedstat_enabled())
1563	return;
1564
1565	if ((flags & DEQUEUE_SLEEP)) {
1566	unsigned int state;
1567
1568	state = READ_ONCE(p->__state);
1569	if (state & TASK_INTERRUPTIBLE)
1570	__schedstat_set(p->stats.sleep_start,
1571	rq_clock(rq_of_dl_rq(dl_rq)));
1572
1573	if (state & TASK_UNINTERRUPTIBLE)
1574	__schedstat_set(p->stats.block_start,
1575	rq_clock(rq_of_dl_rq(dl_rq)));
1576	}
1577	}
1578
1579	static void __enqueue_dl_entity(struct sched_dl_entity *dl_se)
1580	{
1581	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
1582
1583	WARN_ON_ONCE(!RB_EMPTY_NODE(&dl_se->rb_node));
1584
1585	rb_add_cached(node: &dl_se->rb_node, tree: &dl_rq->root, less: __dl_less);
1586
1587	inc_dl_tasks(dl_se, dl_rq);
1588	}
1589
1590	static void __dequeue_dl_entity(struct sched_dl_entity *dl_se)
1591	{
1592	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
1593
1594	if (RB_EMPTY_NODE(&dl_se->rb_node))
1595	return;
1596
1597	rb_erase_cached(node: &dl_se->rb_node, root: &dl_rq->root);
1598
1599	RB_CLEAR_NODE(&dl_se->rb_node);
1600
1601	dec_dl_tasks(dl_se, dl_rq);
1602	}
1603
1604	static void
1605	enqueue_dl_entity(struct sched_dl_entity dl_se, int* flags)
1606	{
1607	WARN_ON_ONCE(on_dl_rq(dl_se));
1608
1609	update_stats_enqueue_dl(dl_rq: dl_rq_of_se(dl_se), dl_se, flags);
1610
1611	/*
1612	* If this is a wakeup or a new instance, the scheduling
1613	* parameters of the task might need updating. Otherwise,
1614	* we want a replenishment of its runtime.
1615	*/
1616	if (flags & ENQUEUE_WAKEUP) {
1617	task_contending(dl_se, flags);
1618	update_dl_entity(dl_se);
1619	} else if (flags & ENQUEUE_REPLENISH) {
1620	replenish_dl_entity(dl_se);
1621	} else if ((flags & ENQUEUE_RESTORE) &&
1622	dl_time_before(a: dl_se->deadline,
1623	b: rq_clock(rq: rq_of_dl_rq(dl_rq: dl_rq_of_se(dl_se))))) {
1624	setup_new_dl_entity(dl_se);
1625	}
1626
1627	__enqueue_dl_entity(dl_se);
1628	}
1629
1630	static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
1631	{
1632	__dequeue_dl_entity(dl_se);
1633	}
1634
1635	static void enqueue_task_dl(struct rq rq, struct* task_struct p, int* flags)
1636	{
1637	if (is_dl_boosted(dl_se: &p->dl)) {
1638	/*
1639	* Because of delays in the detection of the overrun of a
1640	* thread's runtime, it might be the case that a thread
1641	* goes to sleep in a rt mutex with negative runtime. As
1642	* a consequence, the thread will be throttled.
1643	*
1644	* While waiting for the mutex, this thread can also be
1645	* boosted via PI, resulting in a thread that is throttled
1646	* and boosted at the same time.
1647	*
1648	* In this case, the boost overrides the throttle.
1649	*/
1650	if (p->dl.dl_throttled) {
1651	/*
1652	* The replenish timer needs to be canceled. No
1653	* problem if it fires concurrently: boosted threads
1654	* are ignored in dl_task_timer().
1655	*/
1656	hrtimer_try_to_cancel(timer: &p->dl.dl_timer);
1657	p->dl.dl_throttled = `0`;
1658	}
1659	} else if (!dl_prio(prio: p->normal_prio)) {
1660	/*
1661	* Special case in which we have a !SCHED_DEADLINE task that is going
1662	* to be deboosted, but exceeds its runtime while doing so. No point in
1663	* replenishing it, as it's going to return back to its original
1664	* scheduling class after this. If it has been throttled, we need to
1665	* clear the flag, otherwise the task may wake up as throttled after
1666	* being boosted again with no means to replenish the runtime and clear
1667	* the throttle.
1668	*/
1669	p->dl.dl_throttled = `0`;
1670	if (!(flags & ENQUEUE_REPLENISH))
1671	printk_deferred_once("sched: DL de-boosted task PID %d: REPLENISH flag missing\n",
1672	task_pid_nr(p));
1673
1674	return;
1675	}
1676
1677	/*
1678	* Check if a constrained deadline task was activated
1679	* after the deadline but before the next period.
1680	* If that is the case, the task will be throttled and
1681	* the replenishment timer will be set to the next period.
1682	*/
1683	if (!p->dl.dl_throttled && !dl_is_implicit(dl_se: &p->dl))
1684	dl_check_constrained_dl(dl_se: &p->dl);
1685
1686	if (p->on_rq == TASK_ON_RQ_MIGRATING \|\| flags & ENQUEUE_RESTORE) {
1687	add_rq_bw(dl_se: &p->dl, dl_rq: &rq->dl);
1688	add_running_bw(dl_se: &p->dl, dl_rq: &rq->dl);
1689	}
1690
1691	/*
1692	* If p is throttled, we do not enqueue it. In fact, if it exhausted
1693	* its budget it needs a replenishment and, since it now is on
1694	* its rq, the bandwidth timer callback (which clearly has not
1695	* run yet) will take care of this.
1696	* However, the active utilization does not depend on the fact
1697	* that the task is on the runqueue or not (but depends on the
1698	* task's state - in GRUB parlance, "inactive" vs "active contending").
1699	* In other words, even if a task is throttled its utilization must
1700	* be counted in the active utilization; hence, we need to call
1701	* add_running_bw().
1702	*/
1703	if (p->dl.dl_throttled && !(flags & ENQUEUE_REPLENISH)) {
1704	if (flags & ENQUEUE_WAKEUP)
1705	task_contending(dl_se: &p->dl, flags);
1706
1707	return;
1708	}
1709
1710	check_schedstat_required();
1711	update_stats_wait_start_dl(dl_rq: dl_rq_of_se(dl_se: &p->dl), dl_se: &p->dl);
1712
1713	enqueue_dl_entity(dl_se: &p->dl, flags);
1714
1715	if (!task_current(rq, p) && p->nr_cpus_allowed > `1`)
1716	enqueue_pushable_dl_task(rq, p);
1717	}
1718
1719	static void __dequeue_task_dl(struct rq rq, struct* task_struct p, int* flags)
1720	{
1721	update_stats_dequeue_dl(dl_rq: &rq->dl, dl_se: &p->dl, flags);
1722	dequeue_dl_entity(dl_se: &p->dl);
1723	dequeue_pushable_dl_task(rq, p);
1724	}
1725
1726	static void dequeue_task_dl(struct rq rq, struct* task_struct p, int* flags)
1727	{
1728	update_curr_dl(rq);
1729	__dequeue_task_dl(rq, p, flags);
1730
1731	if (p->on_rq == TASK_ON_RQ_MIGRATING \|\| flags & DEQUEUE_SAVE) {
1732	sub_running_bw(dl_se: &p->dl, dl_rq: &rq->dl);
1733	sub_rq_bw(dl_se: &p->dl, dl_rq: &rq->dl);
1734	}
1735
1736	/*
1737	* This check allows to start the inactive timer (or to immediately
1738	* decrease the active utilization, if needed) in two cases:
1739	* when the task blocks and when it is terminating
1740	* (p->state == TASK_DEAD). We can handle the two cases in the same
1741	* way, because from GRUB's point of view the same thing is happening
1742	* (the task moves from "active contending" to "active non contending"
1743	* or "inactive")
1744	*/
1745	if (flags & DEQUEUE_SLEEP)
1746	task_non_contending(p);
1747	}
1748
1749	/*
1750	* Yield task semantic for -deadline tasks is:
1751	*
1752	* get off from the CPU until our next instance, with
1753	* a new runtime. This is of little use now, since we
1754	* don't have a bandwidth reclaiming mechanism. Anyway,
1755	* bandwidth reclaiming is planned for the future, and
1756	* yield_task_dl will indicate that some spare budget
1757	* is available for other task instances to use it.
1758	*/
1759	static void yield_task_dl(struct rq *rq)
1760	{
1761	/*
1762	* We make the task go to sleep until its current deadline by
1763	* forcing its runtime to zero. This way, update_curr_dl() stops
1764	* it and the bandwidth timer will wake it up and will give it
1765	* new scheduling parameters (thanks to dl_yielded=1).
1766	*/
1767	rq->curr->dl.dl_yielded = `1`;
1768
1769	update_rq_clock(rq);
1770	update_curr_dl(rq);
1771	/*
1772	* Tell update_rq_clock() that we've just updated,
1773	* so we don't do microscopic update in schedule()
1774	* and double the fastpath cost.
1775	*/
1776	rq_clock_skip_update(rq);
1777	}
1778
1779	#ifdef CONFIG_SMP
1780
1781	static inline bool dl_task_is_earliest_deadline(struct task_struct *p,
1782	struct rq *rq)
1783	{
1784	return (!rq->dl.dl_nr_running \|\|
1785	dl_time_before(a: p->dl.deadline,
1786	b: rq->dl.earliest_dl.curr));
1787	}
1788
1789	static int find_later_rq(struct task_struct *task);
1790
1791	static int
1792	select_task_rq_dl(struct task_struct p, int* cpu, int flags)
1793	{
1794	struct task_struct *curr;
1795	bool select_rq;
1796	struct rq *rq;
1797
1798	if (!(flags & WF_TTWU))
1799	goto out;
1800
1801	rq = cpu_rq(cpu);
1802
1803	rcu_read_lock();
1804	curr = READ_ONCE(rq->curr); / unlocked access /
1805
1806	/*
1807	* If we are dealing with a -deadline task, we must
1808	* decide where to wake it up.
1809	* If it has a later deadline and the current task
1810	* on this rq can't move (provided the waking task
1811	* can!) we prefer to send it somewhere else. On the
1812	* other hand, if it has a shorter deadline, we
1813	* try to make it stay here, it might be important.
1814	*/
1815	select_rq = unlikely(dl_task(curr)) &&
1816	(curr->nr_cpus_allowed < `2` \|\|
1817	!dl_entity_preempt(a: &p->dl, b: &curr->dl)) &&
1818	p->nr_cpus_allowed > `1`;
1819
1820	/*
1821	* Take the capacity of the CPU into account to
1822	* ensure it fits the requirement of the task.
1823	*/
1824	if (sched_asym_cpucap_active())
1825	select_rq \|= !dl_task_fits_capacity(p, cpu);
1826
1827	if (select_rq) {
1828	int target = find_later_rq(task: p);
1829
1830	if (target != -`1` &&
1831	dl_task_is_earliest_deadline(p, cpu_rq(target)))
1832	cpu = target;
1833	}
1834	rcu_read_unlock();
1835
1836	out:
1837	return cpu;
1838	}
1839
1840	static void migrate_task_rq_dl(struct task_struct p, int* new_cpu __maybe_unused)
1841	{
1842	struct rq_flags rf;
1843	struct rq *rq;
1844
1845	if (READ_ONCE(p->__state) != TASK_WAKING)
1846	return;
1847
1848	rq = task_rq(p);
1849	/*
1850	* Since p->state == TASK_WAKING, set_task_cpu() has been called
1851	* from try_to_wake_up(). Hence, p->pi_lock is locked, but
1852	* rq->lock is not... So, lock it
1853	*/
1854	rq_lock(rq, rf: &rf);
1855	if (p->dl.dl_non_contending) {
1856	update_rq_clock(rq);
1857	sub_running_bw(dl_se: &p->dl, dl_rq: &rq->dl);
1858	p->dl.dl_non_contending = `0`;
1859	/*
1860	* If the timer handler is currently running and the
1861	* timer cannot be canceled, inactive_task_timer()
1862	* will see that dl_not_contending is not set, and
1863	* will not touch the rq's active utilization,
1864	* so we are still safe.
1865	*/
1866	if (hrtimer_try_to_cancel(timer: &p->dl.inactive_timer) == `1`)
1867	put_task_struct(t: p);
1868	}
1869	sub_rq_bw(dl_se: &p->dl, dl_rq: &rq->dl);
1870	rq_unlock(rq, rf: &rf);
1871	}
1872
1873	static void check_preempt_equal_dl(struct rq rq, struct* task_struct *p)
1874	{
1875	/*
1876	* Current can't be migrated, useless to reschedule,
1877	* let's hope p can move out.
1878	*/
1879	if (rq->curr->nr_cpus_allowed == `1` \|\|
1880	!cpudl_find(cp: &rq->rd->cpudl, p: rq->curr, NULL))
1881	return;
1882
1883	/*
1884	* p is migratable, so let's not schedule it and
1885	* see if it is pushed or pulled somewhere else.
1886	*/
1887	if (p->nr_cpus_allowed != `1` &&
1888	cpudl_find(cp: &rq->rd->cpudl, p, NULL))
1889	return;
1890
1891	resched_curr(rq);
1892	}
1893
1894	static int balance_dl(struct rq rq, struct* task_struct p, struct* rq_flags *rf)
1895	{
1896	if (!on_dl_rq(dl_se: &p->dl) && need_pull_dl_task(rq, prev: p)) {
1897	/*
1898	* This is OK, because current is on_cpu, which avoids it being
1899	* picked for load-balance and preemption/IRQs are still
1900	* disabled avoiding further scheduler activity on it and we've
1901	* not yet started the picking loop.
1902	*/
1903	rq_unpin_lock(rq, rf);
1904	pull_dl_task(rq);
1905	rq_repin_lock(rq, rf);
1906	}
1907
1908	return sched_stop_runnable(rq) \|\| sched_dl_runnable(rq);
1909	}
1910	#endif /* CONFIG_SMP */
1911
1912	/*
1913	* Only called when both the current and waking task are -deadline
1914	* tasks.
1915	*/
1916	static void wakeup_preempt_dl(struct rq rq, struct* task_struct *p,
1917	int flags)
1918	{
1919	if (dl_entity_preempt(a: &p->dl, b: &rq->curr->dl)) {
1920	resched_curr(rq);
1921	return;
1922	}
1923
1924	#ifdef CONFIG_SMP
1925	/*
1926	* In the unlikely case current and p have the same deadline
1927	* let us try to decide what's the best thing to do...
1928	*/
1929	if ((p->dl.deadline == rq->curr->dl.deadline) &&
1930	!test_tsk_need_resched(tsk: rq->curr))
1931	check_preempt_equal_dl(rq, p);
1932	#endif /* CONFIG_SMP */
1933	}
1934
1935	#ifdef CONFIG_SCHED_HRTICK
1936	static void start_hrtick_dl(struct rq rq, struct* task_struct *p)
1937	{
1938	hrtick_start(rq, delay: p->dl.runtime);
1939	}
1940	#else /* !CONFIG_SCHED_HRTICK */
1941	static void start_hrtick_dl(struct rq rq, struct* task_struct *p)
1942	{
1943	}
1944	#endif
1945
1946	static void set_next_task_dl(struct rq rq, struct* task_struct *p, bool first)
1947	{
1948	struct sched_dl_entity *dl_se = &p->dl;
1949	struct dl_rq *dl_rq = &rq->dl;
1950
1951	p->se.exec_start = rq_clock_task(rq);
1952	if (on_dl_rq(dl_se: &p->dl))
1953	update_stats_wait_end_dl(dl_rq, dl_se);
1954
1955	/ You can't push away the running task /
1956	dequeue_pushable_dl_task(rq, p);
1957
1958	if (!first)
1959	return;
1960
1961	if (hrtick_enabled_dl(rq))
1962	start_hrtick_dl(rq, p);
1963
1964	if (rq->curr->sched_class != &dl_sched_class)
1965	update_dl_rq_load_avg(now: rq_clock_pelt(rq), rq, running: `0`);
1966
1967	deadline_queue_push_tasks(rq);
1968	}
1969
1970	static struct sched_dl_entity pick_next_dl_entity(struct* dl_rq *dl_rq)
1971	{
1972	struct rb_node *left = rb_first_cached(&dl_rq->root);
1973
1974	if (!left)
1975	return NULL;
1976
1977	return __node_2_dle(left);
1978	}
1979
1980	static struct task_struct pick_task_dl(struct* rq *rq)
1981	{
1982	struct sched_dl_entity *dl_se;
1983	struct dl_rq *dl_rq = &rq->dl;
1984	struct task_struct *p;
1985
1986	if (!sched_dl_runnable(rq))
1987	return NULL;
1988
1989	dl_se = pick_next_dl_entity(dl_rq);
1990	WARN_ON_ONCE(!dl_se);
1991	p = dl_task_of(dl_se);
1992
1993	return p;
1994	}
1995
1996	static struct task_struct pick_next_task_dl(struct* rq *rq)
1997	{
1998	struct task_struct *p;
1999
2000	p = pick_task_dl(rq);
2001	if (p)
2002	set_next_task_dl(rq, p, first: true);
2003
2004	return p;
2005	}
2006
2007	static void put_prev_task_dl(struct rq rq, struct* task_struct *p)
2008	{
2009	struct sched_dl_entity *dl_se = &p->dl;
2010	struct dl_rq *dl_rq = &rq->dl;
2011
2012	if (on_dl_rq(dl_se: &p->dl))
2013	update_stats_wait_start_dl(dl_rq, dl_se);
2014
2015	update_curr_dl(rq);
2016
2017	update_dl_rq_load_avg(now: rq_clock_pelt(rq), rq, running: `1`);
2018	if (on_dl_rq(dl_se: &p->dl) && p->nr_cpus_allowed > `1`)
2019	enqueue_pushable_dl_task(rq, p);
2020	}
2021
2022	/*
2023	* scheduler tick hitting a task of our scheduling class.
2024	*
2025	* NOTE: This function can be called remotely by the tick offload that
2026	* goes along full dynticks. Therefore no local assumption can be made
2027	* and everything must be accessed through the @rq and @curr passed in
2028	* parameters.
2029	*/
2030	static void task_tick_dl(struct rq rq, struct* task_struct p, int* queued)
2031	{
2032	update_curr_dl(rq);
2033
2034	update_dl_rq_load_avg(now: rq_clock_pelt(rq), rq, running: `1`);
2035	/*
2036	* Even when we have runtime, update_curr_dl() might have resulted in us
2037	* not being the leftmost task anymore. In that case NEED_RESCHED will
2038	* be set and schedule() will start a new hrtick for the next task.
2039	*/
2040	if (hrtick_enabled_dl(rq) && queued && p->dl.runtime > `0` &&
2041	is_leftmost(p, dl_rq: &rq->dl))
2042	start_hrtick_dl(rq, p);
2043	}
2044
2045	static void task_fork_dl(struct task_struct *p)
2046	{
2047	/*
2048	* SCHED_DEADLINE tasks cannot fork and this is achieved through
2049	* sched_fork()
2050	*/
2051	}
2052
2053	#ifdef CONFIG_SMP
2054
2055	/ Only try algorithms three times /
2056	#define DL_MAX_TRIES 3
2057
2058	static int pick_dl_task(struct rq rq, struct* task_struct p, int* cpu)
2059	{
2060	if (!task_on_cpu(rq, p) &&
2061	cpumask_test_cpu(cpu, cpumask: &p->cpus_mask))
2062	return `1`;
2063	return `0`;
2064	}
2065
2066	/*
2067	* Return the earliest pushable rq's task, which is suitable to be executed
2068	* on the CPU, NULL otherwise:
2069	*/
2070	static struct task_struct pick_earliest_pushable_dl_task(struct* rq rq, int* cpu)
2071	{
2072	struct task_struct *p = NULL;
2073	struct rb_node *next_node;
2074
2075	if (!has_pushable_dl_tasks(rq))
2076	return NULL;
2077
2078	next_node = rb_first_cached(&rq->dl.pushable_dl_tasks_root);
2079
2080	next_node:
2081	if (next_node) {
2082	p = __node_2_pdl(next_node);
2083
2084	if (pick_dl_task(rq, p, cpu))
2085	return p;
2086
2087	next_node = rb_next(next_node);
2088	goto next_node;
2089	}
2090
2091	return NULL;
2092	}
2093
2094	static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
2095
2096	static int find_later_rq(struct task_struct *task)
2097	{
2098	struct sched_domain *sd;
2099	struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
2100	int this_cpu = smp_processor_id();
2101	int cpu = task_cpu(p: task);
2102
2103	/ Make sure the mask is initialized first /
2104	if (unlikely(!later_mask))
2105	return -`1`;
2106
2107	if (task->nr_cpus_allowed == `1`)
2108	return -`1`;
2109
2110	/*
2111	* We have to consider system topology and task affinity
2112	* first, then we can look for a suitable CPU.
2113	*/
2114	if (!cpudl_find(cp: &task_rq(task)->rd->cpudl, p: task, later_mask))
2115	return -`1`;
2116
2117	/*
2118	* If we are here, some targets have been found, including
2119	* the most suitable which is, among the runqueues where the
2120	* current tasks have later deadlines than the task's one, the
2121	* rq with the latest possible one.
2122	*
2123	* Now we check how well this matches with task's
2124	* affinity and system topology.
2125	*
2126	* The last CPU where the task run is our first
2127	* guess, since it is most likely cache-hot there.
2128	*/
2129	if (cpumask_test_cpu(cpu, cpumask: later_mask))
2130	return cpu;
2131	/*
2132	* Check if this_cpu is to be skipped (i.e., it is
2133	* not in the mask) or not.
2134	*/
2135	if (!cpumask_test_cpu(cpu: this_cpu, cpumask: later_mask))
2136	this_cpu = -`1`;
2137
2138	rcu_read_lock();
2139	for_each_domain(cpu, sd) {
2140	if (sd->flags & SD_WAKE_AFFINE) {
2141	int best_cpu;
2142
2143	/*
2144	* If possible, preempting this_cpu is
2145	* cheaper than migrating.
2146	*/
2147	if (this_cpu != -`1` &&
2148	cpumask_test_cpu(cpu: this_cpu, cpumask: sched_domain_span(sd))) {
2149	rcu_read_unlock();
2150	return this_cpu;
2151	}
2152
2153	best_cpu = cpumask_any_and_distribute(src1p: later_mask,
2154	src2p: sched_domain_span(sd));
2155	/*
2156	* Last chance: if a CPU being in both later_mask
2157	* and current sd span is valid, that becomes our
2158	* choice. Of course, the latest possible CPU is
2159	* already under consideration through later_mask.
2160	*/
2161	if (best_cpu < nr_cpu_ids) {
2162	rcu_read_unlock();
2163	return best_cpu;
2164	}
2165	}
2166	}
2167	rcu_read_unlock();
2168
2169	/*
2170	* At this point, all our guesses failed, we just return
2171	* 'something', and let the caller sort the things out.
2172	*/
2173	if (this_cpu != -`1`)
2174	return this_cpu;
2175
2176	cpu = cpumask_any_distribute(srcp: later_mask);
2177	if (cpu < nr_cpu_ids)
2178	return cpu;
2179
2180	return -`1`;
2181	}
2182
2183	/ Locks the rq it finds /
2184	static struct rq find_lock_later_rq(struct* task_struct task, struct* rq *rq)
2185	{
2186	struct rq *later_rq = NULL;
2187	int tries;
2188	int cpu;
2189
2190	for (tries = `0`; tries < DL_MAX_TRIES; tries++) {
2191	cpu = find_later_rq(task);
2192
2193	if ((cpu == -`1`) \|\| (cpu == rq->cpu))
2194	break;
2195
2196	later_rq = cpu_rq(cpu);
2197
2198	if (!dl_task_is_earliest_deadline(p: task, rq: later_rq)) {
2199	/*
2200	* Target rq has tasks of equal or earlier deadline,
2201	* retrying does not release any lock and is unlikely
2202	* to yield a different result.
2203	*/
2204	later_rq = NULL;
2205	break;
2206	}
2207
2208	/ Retry if something changed. /
2209	if (double_lock_balance(this_rq: rq, busiest: later_rq)) {
2210	if (unlikely(task_rq(task) != rq \|\|
2211	!cpumask_test_cpu(later_rq->cpu, &task->cpus_mask) \|\|
2212	task_on_cpu(rq, task) \|\|
2213	!dl_task(task) \|\|
2214	is_migration_disabled(task) \|\|
2215	!task_on_rq_queued(task))) {
2216	double_unlock_balance(this_rq: rq, busiest: later_rq);
2217	later_rq = NULL;
2218	break;
2219	}
2220	}
2221
2222	/*
2223	* If the rq we found has no -deadline task, or
2224	* its earliest one has a later deadline than our
2225	* task, the rq is a good one.
2226	*/
2227	if (dl_task_is_earliest_deadline(p: task, rq: later_rq))
2228	break;
2229
2230	/ Otherwise we try again. /
2231	double_unlock_balance(this_rq: rq, busiest: later_rq);
2232	later_rq = NULL;
2233	}
2234
2235	return later_rq;
2236	}
2237
2238	static struct task_struct pick_next_pushable_dl_task(struct* rq *rq)
2239	{
2240	struct task_struct *p;
2241
2242	if (!has_pushable_dl_tasks(rq))
2243	return NULL;
2244
2245	p = __node_2_pdl(rb_first_cached(&rq->dl.pushable_dl_tasks_root));
2246
2247	WARN_ON_ONCE(rq->cpu != task_cpu(p));
2248	WARN_ON_ONCE(task_current(rq, p));
2249	WARN_ON_ONCE(p->nr_cpus_allowed <= `1`);
2250
2251	WARN_ON_ONCE(!task_on_rq_queued(p));
2252	WARN_ON_ONCE(!dl_task(p));
2253
2254	return p;
2255	}
2256
2257	/*
2258	* See if the non running -deadline tasks on this rq
2259	* can be sent to some other CPU where they can preempt
2260	* and start executing.
2261	*/
2262	static int push_dl_task(struct rq *rq)
2263	{
2264	struct task_struct *next_task;
2265	struct rq *later_rq;
2266	int ret = `0`;
2267
2268	next_task = pick_next_pushable_dl_task(rq);
2269	if (!next_task)
2270	return `0`;
2271
2272	retry:
2273	/*
2274	* If next_task preempts rq->curr, and rq->curr
2275	* can move away, it makes sense to just reschedule
2276	* without going further in pushing next_task.
2277	*/
2278	if (dl_task(p: rq->curr) &&
2279	dl_time_before(a: next_task->dl.deadline, b: rq->curr->dl.deadline) &&
2280	rq->curr->nr_cpus_allowed > `1`) {
2281	resched_curr(rq);
2282	return `0`;
2283	}
2284
2285	if (is_migration_disabled(p: next_task))
2286	return `0`;
2287
2288	if (WARN_ON(next_task == rq->curr))
2289	return `0`;
2290
2291	/ We might release rq lock /
2292	get_task_struct(t: next_task);
2293
2294	/ Will lock the rq it'll find /
2295	later_rq = find_lock_later_rq(task: next_task, rq);
2296	if (!later_rq) {
2297	struct task_struct *task;
2298
2299	/*
2300	* We must check all this again, since
2301	* find_lock_later_rq releases rq->lock and it is
2302	* then possible that next_task has migrated.
2303	*/
2304	task = pick_next_pushable_dl_task(rq);
2305	if (task == next_task) {
2306	/*
2307	* The task is still there. We don't try
2308	* again, some other CPU will pull it when ready.
2309	*/
2310	goto out;
2311	}
2312
2313	if (!task)
2314	/ No more tasks /
2315	goto out;
2316
2317	put_task_struct(t: next_task);
2318	next_task = task;
2319	goto retry;
2320	}
2321
2322	deactivate_task(rq, p: next_task, flags: `0`);
2323	set_task_cpu(p: next_task, cpu: later_rq->cpu);
2324	activate_task(rq: later_rq, p: next_task, flags: `0`);
2325	ret = `1`;
2326
2327	resched_curr(rq: later_rq);
2328
2329	double_unlock_balance(this_rq: rq, busiest: later_rq);
2330
2331	out:
2332	put_task_struct(t: next_task);
2333
2334	return ret;
2335	}
2336
2337	static void push_dl_tasks(struct rq *rq)
2338	{
2339	/ push_dl_task() will return true if it moved a -deadline task /
2340	while (push_dl_task(rq))
2341	;
2342	}
2343
2344	static void pull_dl_task(struct rq *this_rq)
2345	{
2346	int this_cpu = this_rq->cpu, cpu;
2347	struct task_struct p, push_task;
2348	bool resched = false;
2349	struct rq *src_rq;
2350	u64 dmin = LONG_MAX;
2351
2352	if (likely(!dl_overloaded(this_rq)))
2353	return;
2354
2355	/*
2356	* Match the barrier from dl_set_overloaded; this guarantees that if we
2357	* see overloaded we must also see the dlo_mask bit.
2358	*/
2359	smp_rmb();
2360
2361	for_each_cpu(cpu, this_rq->rd->dlo_mask) {
2362	if (this_cpu == cpu)
2363	continue;
2364
2365	src_rq = cpu_rq(cpu);
2366
2367	/*
2368	* It looks racy, abd it is! However, as in sched_rt.c,
2369	* we are fine with this.
2370	*/
2371	if (this_rq->dl.dl_nr_running &&
2372	dl_time_before(a: this_rq->dl.earliest_dl.curr,
2373	b: src_rq->dl.earliest_dl.next))
2374	continue;
2375
2376	/ Might drop this_rq->lock /
2377	push_task = NULL;
2378	double_lock_balance(this_rq, busiest: src_rq);
2379
2380	/*
2381	* If there are no more pullable tasks on the
2382	* rq, we're done with it.
2383	*/
2384	if (src_rq->dl.dl_nr_running <= `1`)
2385	goto skip;
2386
2387	p = pick_earliest_pushable_dl_task(rq: src_rq, cpu: this_cpu);
2388
2389	/*
2390	* We found a task to be pulled if:
2391	* - it preempts our current (if there's one),
2392	* - it will preempt the last one we pulled (if any).
2393	*/
2394	if (p && dl_time_before(a: p->dl.deadline, b: dmin) &&
2395	dl_task_is_earliest_deadline(p, rq: this_rq)) {
2396	WARN_ON(p == src_rq->curr);
2397	WARN_ON(!task_on_rq_queued(p));
2398
2399	/*
2400	* Then we pull iff p has actually an earlier
2401	* deadline than the current task of its runqueue.
2402	*/
2403	if (dl_time_before(a: p->dl.deadline,
2404	b: src_rq->curr->dl.deadline))
2405	goto skip;
2406
2407	if (is_migration_disabled(p)) {
2408	push_task = get_push_task(rq: src_rq);
2409	} else {
2410	deactivate_task(rq: src_rq, p, flags: `0`);
2411	set_task_cpu(p, cpu: this_cpu);
2412	activate_task(rq: this_rq, p, flags: `0`);
2413	dmin = p->dl.deadline;
2414	resched = true;
2415	}
2416
2417	/ Is there any other task even earlier? /
2418	}
2419	skip:
2420	double_unlock_balance(this_rq, busiest: src_rq);
2421
2422	if (push_task) {
2423	preempt_disable();
2424	raw_spin_rq_unlock(rq: this_rq);
2425	stop_one_cpu_nowait(cpu: src_rq->cpu, fn: push_cpu_stop,
2426	arg: push_task, work_buf: &src_rq->push_work);
2427	preempt_enable();
2428	raw_spin_rq_lock(rq: this_rq);
2429	}
2430	}
2431
2432	if (resched)
2433	resched_curr(rq: this_rq);
2434	}
2435
2436	/*
2437	* Since the task is not running and a reschedule is not going to happen
2438	* anytime soon on its runqueue, we try pushing it away now.
2439	*/
2440	static void task_woken_dl(struct rq rq, struct* task_struct *p)
2441	{
2442	if (!task_on_cpu(rq, p) &&
2443	!test_tsk_need_resched(tsk: rq->curr) &&
2444	p->nr_cpus_allowed > `1` &&
2445	dl_task(p: rq->curr) &&
2446	(rq->curr->nr_cpus_allowed < `2` \|\|
2447	!dl_entity_preempt(a: &p->dl, b: &rq->curr->dl))) {
2448	push_dl_tasks(rq);
2449	}
2450	}
2451
2452	static void set_cpus_allowed_dl(struct task_struct *p,
2453	struct affinity_context *ctx)
2454	{
2455	struct root_domain *src_rd;
2456	struct rq *rq;
2457
2458	WARN_ON_ONCE(!dl_task(p));
2459
2460	rq = task_rq(p);
2461	src_rd = rq->rd;
2462	/*
2463	* Migrating a SCHED_DEADLINE task between exclusive
2464	* cpusets (different root_domains) entails a bandwidth
2465	* update. We already made space for us in the destination
2466	* domain (see cpuset_can_attach()).
2467	*/
2468	if (!cpumask_intersects(src1p: src_rd->span, src2p: ctx->new_mask)) {
2469	struct dl_bw *src_dl_b;
2470
2471	src_dl_b = dl_bw_of(i: cpu_of(rq));
2472	/*
2473	* We now free resources of the root_domain we are migrating
2474	* off. In the worst case, sched_setattr() may temporary fail
2475	* until we complete the update.
2476	*/
2477	raw_spin_lock(&src_dl_b->lock);
2478	__dl_sub(dl_b: src_dl_b, tsk_bw: p->dl.dl_bw, cpus: dl_bw_cpus(i: task_cpu(p)));
2479	raw_spin_unlock(&src_dl_b->lock);
2480	}
2481
2482	set_cpus_allowed_common(p, ctx);
2483	}
2484
2485	/ Assumes rq->lock is held /
2486	static void rq_online_dl(struct rq *rq)
2487	{
2488	if (rq->dl.overloaded)
2489	dl_set_overload(rq);
2490
2491	cpudl_set_freecpu(cp: &rq->rd->cpudl, cpu: rq->cpu);
2492	if (rq->dl.dl_nr_running > `0`)
2493	cpudl_set(cp: &rq->rd->cpudl, cpu: rq->cpu, dl: rq->dl.earliest_dl.curr);
2494	}
2495
2496	/ Assumes rq->lock is held /
2497	static void rq_offline_dl(struct rq *rq)
2498	{
2499	if (rq->dl.overloaded)
2500	dl_clear_overload(rq);
2501
2502	cpudl_clear(cp: &rq->rd->cpudl, cpu: rq->cpu);
2503	cpudl_clear_freecpu(cp: &rq->rd->cpudl, cpu: rq->cpu);
2504	}
2505
2506	void __init init_sched_dl_class(void)
2507	{
2508	unsigned int i;
2509
2510	for_each_possible_cpu(i)
2511	zalloc_cpumask_var_node(mask: &per_cpu(local_cpu_mask_dl, i),
2512	GFP_KERNEL, cpu_to_node(cpu: i));
2513	}
2514
2515	void dl_add_task_root_domain(struct task_struct *p)
2516	{
2517	struct rq_flags rf;
2518	struct rq *rq;
2519	struct dl_bw *dl_b;
2520
2521	raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
2522	if (!dl_task(p)) {
2523	raw_spin_unlock_irqrestore(&p->pi_lock, rf.flags);
2524	return;
2525	}
2526
2527	rq = __task_rq_lock(p, rf: &rf);
2528
2529	dl_b = &rq->rd->dl_bw;
2530	raw_spin_lock(&dl_b->lock);
2531
2532	__dl_add(dl_b, tsk_bw: p->dl.dl_bw, cpus: cpumask_weight(srcp: rq->rd->span));
2533
2534	raw_spin_unlock(&dl_b->lock);
2535
2536	task_rq_unlock(rq, p, rf: &rf);
2537	}
2538
2539	void dl_clear_root_domain(struct root_domain *rd)
2540	{
2541	unsigned long flags;
2542
2543	raw_spin_lock_irqsave(&rd->dl_bw.lock, flags);
2544	rd->dl_bw.total_bw = `0`;
2545	raw_spin_unlock_irqrestore(&rd->dl_bw.lock, flags);
2546	}
2547
2548	#endif /* CONFIG_SMP */
2549
2550	static void switched_from_dl(struct rq rq, struct* task_struct *p)
2551	{
2552	/*
2553	* task_non_contending() can start the "inactive timer" (if the 0-lag
2554	* time is in the future). If the task switches back to dl before
2555	* the "inactive timer" fires, it can continue to consume its current
2556	* runtime using its current deadline. If it stays outside of
2557	* SCHED_DEADLINE until the 0-lag time passes, inactive_task_timer()
2558	* will reset the task parameters.
2559	*/
2560	if (task_on_rq_queued(p) && p->dl.dl_runtime)
2561	task_non_contending(p);
2562
2563	/*
2564	* In case a task is setscheduled out from SCHED_DEADLINE we need to
2565	* keep track of that on its cpuset (for correct bandwidth tracking).
2566	*/
2567	dec_dl_tasks_cs(task: p);
2568
2569	if (!task_on_rq_queued(p)) {
2570	/*
2571	* Inactive timer is armed. However, p is leaving DEADLINE and
2572	* might migrate away from this rq while continuing to run on
2573	* some other class. We need to remove its contribution from
2574	* this rq running_bw now, or sub_rq_bw (below) will complain.
2575	*/
2576	if (p->dl.dl_non_contending)
2577	sub_running_bw(dl_se: &p->dl, dl_rq: &rq->dl);
2578	sub_rq_bw(dl_se: &p->dl, dl_rq: &rq->dl);
2579	}
2580
2581	/*
2582	* We cannot use inactive_task_timer() to invoke sub_running_bw()
2583	* at the 0-lag time, because the task could have been migrated
2584	* while SCHED_OTHER in the meanwhile.
2585	*/
2586	if (p->dl.dl_non_contending)
2587	p->dl.dl_non_contending = `0`;
2588
2589	/*
2590	* Since this might be the only -deadline task on the rq,
2591	* this is the right place to try to pull some other one
2592	* from an overloaded CPU, if any.
2593	*/
2594	if (!task_on_rq_queued(p) \|\| rq->dl.dl_nr_running)
2595	return;
2596
2597	deadline_queue_pull_task(rq);
2598	}
2599
2600	/*
2601	* When switching to -deadline, we may overload the rq, then
2602	* we try to push someone off, if possible.
2603	*/
2604	static void switched_to_dl(struct rq rq, struct* task_struct *p)
2605	{
2606	if (hrtimer_try_to_cancel(timer: &p->dl.inactive_timer) == `1`)
2607	put_task_struct(t: p);
2608
2609	/*
2610	* In case a task is setscheduled to SCHED_DEADLINE we need to keep
2611	* track of that on its cpuset (for correct bandwidth tracking).
2612	*/
2613	inc_dl_tasks_cs(task: p);
2614
2615	/ If p is not queued we will update its parameters at next wakeup. /
2616	if (!task_on_rq_queued(p)) {
2617	add_rq_bw(dl_se: &p->dl, dl_rq: &rq->dl);
2618
2619	return;
2620	}
2621
2622	if (rq->curr != p) {
2623	#ifdef CONFIG_SMP
2624	if (p->nr_cpus_allowed > `1` && rq->dl.overloaded)
2625	deadline_queue_push_tasks(rq);
2626	#endif
2627	if (dl_task(p: rq->curr))
2628	wakeup_preempt_dl(rq, p, flags: `0`);
2629	else
2630	resched_curr(rq);
2631	} else {
2632	update_dl_rq_load_avg(now: rq_clock_pelt(rq), rq, running: `0`);
2633	}
2634	}
2635
2636	/*
2637	* If the scheduling parameters of a -deadline task changed,
2638	* a push or pull operation might be needed.
2639	*/
2640	static void prio_changed_dl(struct rq rq, struct* task_struct *p,
2641	int oldprio)
2642	{
2643	if (!task_on_rq_queued(p))
2644	return;
2645
2646	#ifdef CONFIG_SMP
2647	/*
2648	* This might be too much, but unfortunately
2649	* we don't have the old deadline value, and
2650	* we can't argue if the task is increasing
2651	* or lowering its prio, so...
2652	*/
2653	if (!rq->dl.overloaded)
2654	deadline_queue_pull_task(rq);
2655
2656	if (task_current(rq, p)) {
2657	/*
2658	* If we now have a earlier deadline task than p,
2659	* then reschedule, provided p is still on this
2660	* runqueue.
2661	*/
2662	if (dl_time_before(a: rq->dl.earliest_dl.curr, b: p->dl.deadline))
2663	resched_curr(rq);
2664	} else {
2665	/*
2666	* Current may not be deadline in case p was throttled but we
2667	* have just replenished it (e.g. rt_mutex_setprio()).
2668	*
2669	* Otherwise, if p was given an earlier deadline, reschedule.
2670	*/
2671	if (!dl_task(p: rq->curr) \|\|
2672	dl_time_before(a: p->dl.deadline, b: rq->curr->dl.deadline))
2673	resched_curr(rq);
2674	}
2675	#else
2676	/*
2677	* We don't know if p has a earlier or later deadline, so let's blindly
2678	* set a (maybe not needed) rescheduling point.
2679	*/
2680	resched_curr(rq);
2681	#endif
2682	}
2683
2684	#ifdef CONFIG_SCHED_CORE
2685	static int task_is_throttled_dl(struct task_struct p, int* cpu)
2686	{
2687	return p->dl.dl_throttled;
2688	}
2689	#endif
2690
2691	DEFINE_SCHED_CLASS(dl) = {
2692
2693	.enqueue_task = enqueue_task_dl,
2694	.dequeue_task = dequeue_task_dl,
2695	.yield_task = yield_task_dl,
2696
2697	.wakeup_preempt = wakeup_preempt_dl,
2698
2699	.pick_next_task = pick_next_task_dl,
2700	.put_prev_task = put_prev_task_dl,
2701	.set_next_task = set_next_task_dl,
2702
2703	#ifdef CONFIG_SMP
2704	.balance = balance_dl,
2705	.pick_task = pick_task_dl,
2706	.select_task_rq = select_task_rq_dl,
2707	.migrate_task_rq = migrate_task_rq_dl,
2708	.set_cpus_allowed = set_cpus_allowed_dl,
2709	.rq_online = rq_online_dl,
2710	.rq_offline = rq_offline_dl,
2711	.task_woken = task_woken_dl,
2712	.find_lock_rq = find_lock_later_rq,
2713	#endif
2714
2715	.task_tick = task_tick_dl,
2716	.task_fork = task_fork_dl,
2717
2718	.prio_changed = prio_changed_dl,
2719	.switched_from = switched_from_dl,
2720	.switched_to = switched_to_dl,
2721
2722	.update_curr = update_curr_dl,
2723	#ifdef CONFIG_SCHED_CORE
2724	.task_is_throttled = task_is_throttled_dl,
2725	#endif
2726	};
2727
2728	/ Used for dl_bw check and update, used under sched_rt_handler()::mutex /
2729	static u64 dl_generation;
2730
2731	int sched_dl_global_validate(void)
2732	{
2733	u64 runtime = global_rt_runtime();
2734	u64 period = global_rt_period();
2735	u64 new_bw = to_ratio(period, runtime);
2736	u64 gen = ++dl_generation;
2737	struct dl_bw *dl_b;
2738	int cpu, cpus, ret = `0`;
2739	unsigned long flags;
2740
2741	/*
2742	* Here we want to check the bandwidth not being set to some
2743	* value smaller than the currently allocated bandwidth in
2744	* any of the root_domains.
2745	*/
2746	for_each_possible_cpu(cpu) {
2747	rcu_read_lock_sched();
2748
2749	if (dl_bw_visited(cpu, gen))
2750	goto next;
2751
2752	dl_b = dl_bw_of(i: cpu);
2753	cpus = dl_bw_cpus(i: cpu);
2754
2755	raw_spin_lock_irqsave(&dl_b->lock, flags);
2756	if (new_bw * cpus < dl_b->total_bw)
2757	ret = -EBUSY;
2758	raw_spin_unlock_irqrestore(&dl_b->lock, flags);
2759
2760	next:
2761	rcu_read_unlock_sched();
2762
2763	if (ret)
2764	break;
2765	}
2766
2767	return ret;
2768	}
2769
2770	static void init_dl_rq_bw_ratio(struct dl_rq *dl_rq)
2771	{
2772	if (global_rt_runtime() == RUNTIME_INF) {
2773	dl_rq->bw_ratio = `1` << RATIO_SHIFT;
2774	dl_rq->max_bw = dl_rq->extra_bw = `1` << BW_SHIFT;
2775	} else {
2776	dl_rq->bw_ratio = to_ratio(period: global_rt_runtime(),
2777	runtime: global_rt_period()) >> (BW_SHIFT - RATIO_SHIFT);
2778	dl_rq->max_bw = dl_rq->extra_bw =
2779	to_ratio(period: global_rt_period(), runtime: global_rt_runtime());
2780	}
2781	}
2782
2783	void sched_dl_do_global(void)
2784	{
2785	u64 new_bw = -`1`;
2786	u64 gen = ++dl_generation;
2787	struct dl_bw *dl_b;
2788	int cpu;
2789	unsigned long flags;
2790
2791	if (global_rt_runtime() != RUNTIME_INF)
2792	new_bw = to_ratio(period: global_rt_period(), runtime: global_rt_runtime());
2793
2794	for_each_possible_cpu(cpu) {
2795	rcu_read_lock_sched();
2796
2797	if (dl_bw_visited(cpu, gen)) {
2798	rcu_read_unlock_sched();
2799	continue;
2800	}
2801
2802	dl_b = dl_bw_of(i: cpu);
2803
2804	raw_spin_lock_irqsave(&dl_b->lock, flags);
2805	dl_b->bw = new_bw;
2806	raw_spin_unlock_irqrestore(&dl_b->lock, flags);
2807
2808	rcu_read_unlock_sched();
2809	init_dl_rq_bw_ratio(dl_rq: &cpu_rq(cpu)->dl);
2810	}
2811	}
2812
2813	/*
2814	* We must be sure that accepting a new task (or allowing changing the
2815	* parameters of an existing one) is consistent with the bandwidth
2816	* constraints. If yes, this function also accordingly updates the currently
2817	* allocated bandwidth to reflect the new situation.
2818	*
2819	* This function is called while holding p's rq->lock.
2820	*/
2821	int sched_dl_overflow(struct task_struct p, int* policy,
2822	const struct sched_attr *attr)
2823	{
2824	u64 period = attr->sched_period ?: attr->sched_deadline;
2825	u64 runtime = attr->sched_runtime;
2826	u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : `0`;
2827	int cpus, err = -`1`, cpu = task_cpu(p);
2828	struct dl_bw *dl_b = dl_bw_of(i: cpu);
2829	unsigned long cap;
2830
2831	if (attr->sched_flags & SCHED_FLAG_SUGOV)
2832	return `0`;
2833
2834	/ !deadline task may carry old deadline bandwidth /
2835	if (new_bw == p->dl.dl_bw && task_has_dl_policy(p))
2836	return `0`;
2837
2838	/*
2839	* Either if a task, enters, leave, or stays -deadline but changes
2840	* its parameters, we may need to update accordingly the total
2841	* allocated bandwidth of the container.
2842	*/
2843	raw_spin_lock(&dl_b->lock);
2844	cpus = dl_bw_cpus(i: cpu);
2845	cap = dl_bw_capacity(i: cpu);
2846
2847	if (dl_policy(policy) && !task_has_dl_policy(p) &&
2848	!__dl_overflow(dl_b, cap, old_bw: `0`, new_bw)) {
2849	if (hrtimer_active(timer: &p->dl.inactive_timer))
2850	__dl_sub(dl_b, tsk_bw: p->dl.dl_bw, cpus);
2851	__dl_add(dl_b, tsk_bw: new_bw, cpus);
2852	err = `0`;
2853	} else if (dl_policy(policy) && task_has_dl_policy(p) &&
2854	!__dl_overflow(dl_b, cap, old_bw: p->dl.dl_bw, new_bw)) {
2855	/*
2856	* XXX this is slightly incorrect: when the task
2857	* utilization decreases, we should delay the total
2858	* utilization change until the task's 0-lag point.
2859	* But this would require to set the task's "inactive
2860	* timer" when the task is not inactive.
2861	*/
2862	__dl_sub(dl_b, tsk_bw: p->dl.dl_bw, cpus);
2863	__dl_add(dl_b, tsk_bw: new_bw, cpus);
2864	dl_change_utilization(p, new_bw);
2865	err = `0`;
2866	} else if (!dl_policy(policy) && task_has_dl_policy(p)) {
2867	/*
2868	* Do not decrease the total deadline utilization here,
2869	* switched_from_dl() will take care to do it at the correct
2870	* (0-lag) time.
2871	*/
2872	err = `0`;
2873	}
2874	raw_spin_unlock(&dl_b->lock);
2875
2876	return err;
2877	}
2878
2879	/*
2880	* This function initializes the sched_dl_entity of a newly becoming
2881	* SCHED_DEADLINE task.
2882	*
2883	* Only the static values are considered here, the actual runtime and the
2884	* absolute deadline will be properly calculated when the task is enqueued
2885	* for the first time with its new policy.
2886	*/
2887	void __setparam_dl(struct task_struct p, const* struct sched_attr *attr)
2888	{
2889	struct sched_dl_entity *dl_se = &p->dl;
2890
2891	dl_se->dl_runtime = attr->sched_runtime;
2892	dl_se->dl_deadline = attr->sched_deadline;
2893	dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline;
2894	dl_se->flags = attr->sched_flags & SCHED_DL_FLAGS;
2895	dl_se->dl_bw = to_ratio(period: dl_se->dl_period, runtime: dl_se->dl_runtime);
2896	dl_se->dl_density = to_ratio(period: dl_se->dl_deadline, runtime: dl_se->dl_runtime);
2897	}
2898
2899	void __getparam_dl(struct task_struct p, struct* sched_attr *attr)
2900	{
2901	struct sched_dl_entity *dl_se = &p->dl;
2902
2903	attr->sched_priority = p->rt_priority;
2904	attr->sched_runtime = dl_se->dl_runtime;
2905	attr->sched_deadline = dl_se->dl_deadline;
2906	attr->sched_period = dl_se->dl_period;
2907	attr->sched_flags &= ~SCHED_DL_FLAGS;
2908	attr->sched_flags \|= dl_se->flags;
2909	}
2910
2911	/*
2912	* This function validates the new parameters of a -deadline task.
2913	* We ask for the deadline not being zero, and greater or equal
2914	* than the runtime, as well as the period of being zero or
2915	* greater than deadline. Furthermore, we have to be sure that
2916	* user parameters are above the internal resolution of 1us (we
2917	* check sched_runtime only since it is always the smaller one) and
2918	* below 2^63 ns (we have to check both sched_deadline and
2919	* sched_period, as the latter can be zero).
2920	*/
2921	bool __checkparam_dl(const struct sched_attr *attr)
2922	{
2923	u64 period, max, min;
2924
2925	/ special dl tasks don't actually use any parameter /
2926	if (attr->sched_flags & SCHED_FLAG_SUGOV)
2927	return true;
2928
2929	/ deadline != 0 /
2930	if (attr->sched_deadline == `0`)
2931	return false;
2932
2933	/*
2934	* Since we truncate DL_SCALE bits, make sure we're at least
2935	* that big.
2936	*/
2937	if (attr->sched_runtime < (`1ULL` << DL_SCALE))
2938	return false;
2939
2940	/*
2941	* Since we use the MSB for wrap-around and sign issues, make
2942	* sure it's not set (mind that period can be equal to zero).
2943	*/
2944	if (attr->sched_deadline & (`1ULL` << `63`) \|\|
2945	attr->sched_period & (`1ULL` << `63`))
2946	return false;
2947
2948	period = attr->sched_period;
2949	if (!period)
2950	period = attr->sched_deadline;
2951
2952	/ runtime <= deadline <= period (if period != 0) /
2953	if (period < attr->sched_deadline \|\|
2954	attr->sched_deadline < attr->sched_runtime)
2955	return false;
2956
2957	max = (u64)READ_ONCE(sysctl_sched_dl_period_max) * NSEC_PER_USEC;
2958	min = (u64)READ_ONCE(sysctl_sched_dl_period_min) * NSEC_PER_USEC;
2959
2960	if (period < min \|\| period > max)
2961	return false;
2962
2963	return true;
2964	}
2965
2966	/*
2967	* This function clears the sched_dl_entity static params.
2968	*/
2969	void __dl_clear_params(struct task_struct *p)
2970	{
2971	struct sched_dl_entity *dl_se = &p->dl;
2972
2973	dl_se->dl_runtime = `0`;
2974	dl_se->dl_deadline = `0`;
2975	dl_se->dl_period = `0`;
2976	dl_se->flags = `0`;
2977	dl_se->dl_bw = `0`;
2978	dl_se->dl_density = `0`;
2979
2980	dl_se->dl_throttled = `0`;
2981	dl_se->dl_yielded = `0`;
2982	dl_se->dl_non_contending = `0`;
2983	dl_se->dl_overrun = `0`;
2984
2985	#ifdef CONFIG_RT_MUTEXES
2986	dl_se->pi_se = dl_se;
2987	#endif
2988	}
2989
2990	bool dl_param_changed(struct task_struct p, const* struct sched_attr *attr)
2991	{
2992	struct sched_dl_entity *dl_se = &p->dl;
2993
2994	if (dl_se->dl_runtime != attr->sched_runtime \|\|
2995	dl_se->dl_deadline != attr->sched_deadline \|\|
2996	dl_se->dl_period != attr->sched_period \|\|
2997	dl_se->flags != (attr->sched_flags & SCHED_DL_FLAGS))
2998	return true;
2999
3000	return false;
3001	}
3002
3003	#ifdef CONFIG_SMP
3004	int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
3005	const struct cpumask *trial)
3006	{
3007	unsigned long flags, cap;
3008	struct dl_bw *cur_dl_b;
3009	int ret = `1`;
3010
3011	rcu_read_lock_sched();
3012	cur_dl_b = dl_bw_of(cpumask_any(cur));
3013	cap = __dl_bw_capacity(mask: trial);
3014	raw_spin_lock_irqsave(&cur_dl_b->lock, flags);
3015	if (__dl_overflow(dl_b: cur_dl_b, cap, old_bw: `0`, new_bw: `0`))
3016	ret = `0`;
3017	raw_spin_unlock_irqrestore(&cur_dl_b->lock, flags);
3018	rcu_read_unlock_sched();
3019
3020	return ret;
3021	}
3022
3023	enum dl_bw_request {
3024	dl_bw_req_check_overflow = `0`,
3025	dl_bw_req_alloc,
3026	dl_bw_req_free
3027	};
3028
3029	static int dl_bw_manage(enum dl_bw_request req, int cpu, u64 dl_bw)
3030	{
3031	unsigned long flags;
3032	struct dl_bw *dl_b;
3033	bool overflow = `0`;
3034
3035	rcu_read_lock_sched();
3036	dl_b = dl_bw_of(i: cpu);
3037	raw_spin_lock_irqsave(&dl_b->lock, flags);
3038
3039	if (req == dl_bw_req_free) {
3040	__dl_sub(dl_b, tsk_bw: dl_bw, cpus: dl_bw_cpus(i: cpu));
3041	} else {
3042	unsigned long cap = dl_bw_capacity(i: cpu);
3043
3044	overflow = __dl_overflow(dl_b, cap, old_bw: `0`, new_bw: dl_bw);
3045
3046	if (req == dl_bw_req_alloc && !overflow) {
3047	/*
3048	* We reserve space in the destination
3049	* root_domain, as we can't fail after this point.
3050	* We will free resources in the source root_domain
3051	* later on (see set_cpus_allowed_dl()).
3052	*/
3053	__dl_add(dl_b, tsk_bw: dl_bw, cpus: dl_bw_cpus(i: cpu));
3054	}
3055	}
3056
3057	raw_spin_unlock_irqrestore(&dl_b->lock, flags);
3058	rcu_read_unlock_sched();
3059
3060	return overflow ? -EBUSY : `0`;
3061	}
3062
3063	int dl_bw_check_overflow(int cpu)
3064	{
3065	return dl_bw_manage(req: dl_bw_req_check_overflow, cpu, dl_bw: `0`);
3066	}
3067
3068	int dl_bw_alloc(int cpu, u64 dl_bw)
3069	{
3070	return dl_bw_manage(req: dl_bw_req_alloc, cpu, dl_bw);
3071	}
3072
3073	void dl_bw_free(int cpu, u64 dl_bw)
3074	{
3075	dl_bw_manage(req: dl_bw_req_free, cpu, dl_bw);
3076	}
3077	#endif
3078
3079	#ifdef CONFIG_SCHED_DEBUG
3080	void print_dl_stats(struct seq_file m, int* cpu)
3081	{
3082	print_dl_rq(m, cpu, dl_rq: &cpu_rq(cpu)->dl);
3083	}
3084	#endif /* CONFIG_SCHED_DEBUG */
3085

source code of linux/kernel/sched/deadline.c