1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | #ifndef _KERNEL_STATS_H |
3 | #define _KERNEL_STATS_H |
4 | |
5 | #ifdef CONFIG_SCHEDSTATS |
6 | |
7 | extern struct static_key_false sched_schedstats; |
8 | |
9 | /* |
10 | * Expects runqueue lock to be held for atomicity of update |
11 | */ |
12 | static inline void |
13 | rq_sched_info_arrive(struct rq *rq, unsigned long long delta) |
14 | { |
15 | if (rq) { |
16 | rq->rq_sched_info.run_delay += delta; |
17 | rq->rq_sched_info.pcount++; |
18 | } |
19 | } |
20 | |
21 | /* |
22 | * Expects runqueue lock to be held for atomicity of update |
23 | */ |
24 | static inline void |
25 | rq_sched_info_depart(struct rq *rq, unsigned long long delta) |
26 | { |
27 | if (rq) |
28 | rq->rq_cpu_time += delta; |
29 | } |
30 | |
31 | static inline void |
32 | rq_sched_info_dequeue(struct rq *rq, unsigned long long delta) |
33 | { |
34 | if (rq) |
35 | rq->rq_sched_info.run_delay += delta; |
36 | } |
37 | #define schedstat_enabled() static_branch_unlikely(&sched_schedstats) |
38 | #define __schedstat_inc(var) do { var++; } while (0) |
39 | #define schedstat_inc(var) do { if (schedstat_enabled()) { var++; } } while (0) |
40 | #define __schedstat_add(var, amt) do { var += (amt); } while (0) |
41 | #define schedstat_add(var, amt) do { if (schedstat_enabled()) { var += (amt); } } while (0) |
42 | #define __schedstat_set(var, val) do { var = (val); } while (0) |
43 | #define schedstat_set(var, val) do { if (schedstat_enabled()) { var = (val); } } while (0) |
44 | #define schedstat_val(var) (var) |
45 | #define schedstat_val_or_zero(var) ((schedstat_enabled()) ? (var) : 0) |
46 | |
47 | void __update_stats_wait_start(struct rq *rq, struct task_struct *p, |
48 | struct sched_statistics *stats); |
49 | |
50 | void __update_stats_wait_end(struct rq *rq, struct task_struct *p, |
51 | struct sched_statistics *stats); |
52 | void __update_stats_enqueue_sleeper(struct rq *rq, struct task_struct *p, |
53 | struct sched_statistics *stats); |
54 | |
55 | static inline void |
56 | check_schedstat_required(void) |
57 | { |
58 | if (schedstat_enabled()) |
59 | return; |
60 | |
61 | /* Force schedstat enabled if a dependent tracepoint is active */ |
62 | if (trace_sched_stat_wait_enabled() || |
63 | trace_sched_stat_sleep_enabled() || |
64 | trace_sched_stat_iowait_enabled() || |
65 | trace_sched_stat_blocked_enabled() || |
66 | trace_sched_stat_runtime_enabled()) |
67 | printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, stat_blocked and stat_runtime require the kernel parameter schedstats=enable or kernel.sched_schedstats=1\n" ); |
68 | } |
69 | |
70 | #else /* !CONFIG_SCHEDSTATS: */ |
71 | |
72 | static inline void rq_sched_info_arrive (struct rq *rq, unsigned long long delta) { } |
73 | static inline void rq_sched_info_dequeue(struct rq *rq, unsigned long long delta) { } |
74 | static inline void rq_sched_info_depart (struct rq *rq, unsigned long long delta) { } |
75 | # define schedstat_enabled() 0 |
76 | # define __schedstat_inc(var) do { } while (0) |
77 | # define schedstat_inc(var) do { } while (0) |
78 | # define __schedstat_add(var, amt) do { } while (0) |
79 | # define schedstat_add(var, amt) do { } while (0) |
80 | # define __schedstat_set(var, val) do { } while (0) |
81 | # define schedstat_set(var, val) do { } while (0) |
82 | # define schedstat_val(var) 0 |
83 | # define schedstat_val_or_zero(var) 0 |
84 | |
85 | # define __update_stats_wait_start(rq, p, stats) do { } while (0) |
86 | # define __update_stats_wait_end(rq, p, stats) do { } while (0) |
87 | # define __update_stats_enqueue_sleeper(rq, p, stats) do { } while (0) |
88 | # define check_schedstat_required() do { } while (0) |
89 | |
90 | #endif /* CONFIG_SCHEDSTATS */ |
91 | |
92 | #ifdef CONFIG_FAIR_GROUP_SCHED |
93 | struct sched_entity_stats { |
94 | struct sched_entity se; |
95 | struct sched_statistics stats; |
96 | } __no_randomize_layout; |
97 | #endif |
98 | |
99 | static inline struct sched_statistics * |
100 | __schedstats_from_se(struct sched_entity *se) |
101 | { |
102 | #ifdef CONFIG_FAIR_GROUP_SCHED |
103 | if (!entity_is_task(se)) |
104 | return &container_of(se, struct sched_entity_stats, se)->stats; |
105 | #endif |
106 | return &task_of(se)->stats; |
107 | } |
108 | |
109 | #ifdef CONFIG_PSI |
110 | void psi_task_change(struct task_struct *task, int clear, int set); |
111 | void psi_task_switch(struct task_struct *prev, struct task_struct *next, |
112 | bool sleep); |
113 | void psi_account_irqtime(struct task_struct *task, u32 delta); |
114 | |
115 | /* |
116 | * PSI tracks state that persists across sleeps, such as iowaits and |
117 | * memory stalls. As a result, it has to distinguish between sleeps, |
118 | * where a task's runnable state changes, and requeues, where a task |
119 | * and its state are being moved between CPUs and runqueues. |
120 | */ |
121 | static inline void psi_enqueue(struct task_struct *p, bool wakeup) |
122 | { |
123 | int clear = 0, set = TSK_RUNNING; |
124 | |
125 | if (static_branch_likely(&psi_disabled)) |
126 | return; |
127 | |
128 | if (p->in_memstall) |
129 | set |= TSK_MEMSTALL_RUNNING; |
130 | |
131 | if (!wakeup) { |
132 | if (p->in_memstall) |
133 | set |= TSK_MEMSTALL; |
134 | } else { |
135 | if (p->in_iowait) |
136 | clear |= TSK_IOWAIT; |
137 | } |
138 | |
139 | psi_task_change(task: p, clear, set); |
140 | } |
141 | |
142 | static inline void psi_dequeue(struct task_struct *p, bool sleep) |
143 | { |
144 | if (static_branch_likely(&psi_disabled)) |
145 | return; |
146 | |
147 | /* |
148 | * A voluntary sleep is a dequeue followed by a task switch. To |
149 | * avoid walking all ancestors twice, psi_task_switch() handles |
150 | * TSK_RUNNING and TSK_IOWAIT for us when it moves TSK_ONCPU. |
151 | * Do nothing here. |
152 | */ |
153 | if (sleep) |
154 | return; |
155 | |
156 | psi_task_change(task: p, clear: p->psi_flags, set: 0); |
157 | } |
158 | |
159 | static inline void psi_ttwu_dequeue(struct task_struct *p) |
160 | { |
161 | if (static_branch_likely(&psi_disabled)) |
162 | return; |
163 | /* |
164 | * Is the task being migrated during a wakeup? Make sure to |
165 | * deregister its sleep-persistent psi states from the old |
166 | * queue, and let psi_enqueue() know it has to requeue. |
167 | */ |
168 | if (unlikely(p->psi_flags)) { |
169 | struct rq_flags rf; |
170 | struct rq *rq; |
171 | |
172 | rq = __task_rq_lock(p, rf: &rf); |
173 | psi_task_change(task: p, clear: p->psi_flags, set: 0); |
174 | __task_rq_unlock(rq, rf: &rf); |
175 | } |
176 | } |
177 | |
178 | static inline void psi_sched_switch(struct task_struct *prev, |
179 | struct task_struct *next, |
180 | bool sleep) |
181 | { |
182 | if (static_branch_likely(&psi_disabled)) |
183 | return; |
184 | |
185 | psi_task_switch(prev, next, sleep); |
186 | } |
187 | |
188 | #else /* CONFIG_PSI */ |
189 | static inline void psi_enqueue(struct task_struct *p, bool wakeup) {} |
190 | static inline void psi_dequeue(struct task_struct *p, bool sleep) {} |
191 | static inline void psi_ttwu_dequeue(struct task_struct *p) {} |
192 | static inline void psi_sched_switch(struct task_struct *prev, |
193 | struct task_struct *next, |
194 | bool sleep) {} |
195 | static inline void psi_account_irqtime(struct task_struct *task, u32 delta) {} |
196 | #endif /* CONFIG_PSI */ |
197 | |
198 | #ifdef CONFIG_SCHED_INFO |
199 | /* |
200 | * We are interested in knowing how long it was from the *first* time a |
201 | * task was queued to the time that it finally hit a CPU, we call this routine |
202 | * from dequeue_task() to account for possible rq->clock skew across CPUs. The |
203 | * delta taken on each CPU would annul the skew. |
204 | */ |
205 | static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t) |
206 | { |
207 | unsigned long long delta = 0; |
208 | |
209 | if (!t->sched_info.last_queued) |
210 | return; |
211 | |
212 | delta = rq_clock(rq) - t->sched_info.last_queued; |
213 | t->sched_info.last_queued = 0; |
214 | t->sched_info.run_delay += delta; |
215 | |
216 | rq_sched_info_dequeue(rq, delta); |
217 | } |
218 | |
219 | /* |
220 | * Called when a task finally hits the CPU. We can now calculate how |
221 | * long it was waiting to run. We also note when it began so that we |
222 | * can keep stats on how long its timeslice is. |
223 | */ |
224 | static void sched_info_arrive(struct rq *rq, struct task_struct *t) |
225 | { |
226 | unsigned long long now, delta = 0; |
227 | |
228 | if (!t->sched_info.last_queued) |
229 | return; |
230 | |
231 | now = rq_clock(rq); |
232 | delta = now - t->sched_info.last_queued; |
233 | t->sched_info.last_queued = 0; |
234 | t->sched_info.run_delay += delta; |
235 | t->sched_info.last_arrival = now; |
236 | t->sched_info.pcount++; |
237 | |
238 | rq_sched_info_arrive(rq, delta); |
239 | } |
240 | |
241 | /* |
242 | * This function is only called from enqueue_task(), but also only updates |
243 | * the timestamp if it is already not set. It's assumed that |
244 | * sched_info_dequeue() will clear that stamp when appropriate. |
245 | */ |
246 | static inline void sched_info_enqueue(struct rq *rq, struct task_struct *t) |
247 | { |
248 | if (!t->sched_info.last_queued) |
249 | t->sched_info.last_queued = rq_clock(rq); |
250 | } |
251 | |
252 | /* |
253 | * Called when a process ceases being the active-running process involuntarily |
254 | * due, typically, to expiring its time slice (this may also be called when |
255 | * switching to the idle task). Now we can calculate how long we ran. |
256 | * Also, if the process is still in the TASK_RUNNING state, call |
257 | * sched_info_enqueue() to mark that it has now again started waiting on |
258 | * the runqueue. |
259 | */ |
260 | static inline void sched_info_depart(struct rq *rq, struct task_struct *t) |
261 | { |
262 | unsigned long long delta = rq_clock(rq) - t->sched_info.last_arrival; |
263 | |
264 | rq_sched_info_depart(rq, delta); |
265 | |
266 | if (task_is_running(t)) |
267 | sched_info_enqueue(rq, t); |
268 | } |
269 | |
270 | /* |
271 | * Called when tasks are switched involuntarily due, typically, to expiring |
272 | * their time slice. (This may also be called when switching to or from |
273 | * the idle task.) We are only called when prev != next. |
274 | */ |
275 | static inline void |
276 | sched_info_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next) |
277 | { |
278 | /* |
279 | * prev now departs the CPU. It's not interesting to record |
280 | * stats about how efficient we were at scheduling the idle |
281 | * process, however. |
282 | */ |
283 | if (prev != rq->idle) |
284 | sched_info_depart(rq, t: prev); |
285 | |
286 | if (next != rq->idle) |
287 | sched_info_arrive(rq, t: next); |
288 | } |
289 | |
290 | #else /* !CONFIG_SCHED_INFO: */ |
291 | # define sched_info_enqueue(rq, t) do { } while (0) |
292 | # define sched_info_dequeue(rq, t) do { } while (0) |
293 | # define sched_info_switch(rq, t, next) do { } while (0) |
294 | #endif /* CONFIG_SCHED_INFO */ |
295 | |
296 | #endif /* _KERNEL_STATS_H */ |
297 | |