1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * kernel/sched/debug.c |
4 | * |
5 | * Print the CFS rbtree and other debugging details |
6 | * |
7 | * Copyright(C) 2007, Red Hat, Inc., Ingo Molnar |
8 | */ |
9 | |
10 | /* |
11 | * This allows printing both to /sys/kernel/debug/sched/debug and |
12 | * to the console |
13 | */ |
14 | #define SEQ_printf(m, x...) \ |
15 | do { \ |
16 | if (m) \ |
17 | seq_printf(m, x); \ |
18 | else \ |
19 | pr_cont(x); \ |
20 | } while (0) |
21 | |
22 | /* |
23 | * Ease the printing of nsec fields: |
24 | */ |
25 | static long long nsec_high(unsigned long long nsec) |
26 | { |
27 | if ((long long)nsec < 0) { |
28 | nsec = -nsec; |
29 | do_div(nsec, 1000000); |
30 | return -nsec; |
31 | } |
32 | do_div(nsec, 1000000); |
33 | |
34 | return nsec; |
35 | } |
36 | |
37 | static unsigned long nsec_low(unsigned long long nsec) |
38 | { |
39 | if ((long long)nsec < 0) |
40 | nsec = -nsec; |
41 | |
42 | return do_div(nsec, 1000000); |
43 | } |
44 | |
45 | #define SPLIT_NS(x) nsec_high(x), nsec_low(x) |
46 | |
47 | #define SCHED_FEAT(name, enabled) \ |
48 | #name , |
49 | |
50 | static const char * const sched_feat_names[] = { |
51 | #include "features.h" |
52 | }; |
53 | |
54 | #undef SCHED_FEAT |
55 | |
56 | static int sched_feat_show(struct seq_file *m, void *v) |
57 | { |
58 | int i; |
59 | |
60 | for (i = 0; i < __SCHED_FEAT_NR; i++) { |
61 | if (!(sysctl_sched_features & (1UL << i))) |
62 | seq_puts(m, s: "NO_" ); |
63 | seq_printf(m, fmt: "%s " , sched_feat_names[i]); |
64 | } |
65 | seq_puts(m, s: "\n" ); |
66 | |
67 | return 0; |
68 | } |
69 | |
70 | #ifdef CONFIG_JUMP_LABEL |
71 | |
72 | #define jump_label_key__true STATIC_KEY_INIT_TRUE |
73 | #define jump_label_key__false STATIC_KEY_INIT_FALSE |
74 | |
75 | #define SCHED_FEAT(name, enabled) \ |
76 | jump_label_key__##enabled , |
77 | |
78 | struct static_key sched_feat_keys[__SCHED_FEAT_NR] = { |
79 | #include "features.h" |
80 | }; |
81 | |
82 | #undef SCHED_FEAT |
83 | |
84 | static void sched_feat_disable(int i) |
85 | { |
86 | static_key_disable_cpuslocked(key: &sched_feat_keys[i]); |
87 | } |
88 | |
89 | static void sched_feat_enable(int i) |
90 | { |
91 | static_key_enable_cpuslocked(key: &sched_feat_keys[i]); |
92 | } |
93 | #else |
94 | static void sched_feat_disable(int i) { }; |
95 | static void sched_feat_enable(int i) { }; |
96 | #endif /* CONFIG_JUMP_LABEL */ |
97 | |
98 | static int sched_feat_set(char *cmp) |
99 | { |
100 | int i; |
101 | int neg = 0; |
102 | |
103 | if (strncmp(cmp, "NO_" , 3) == 0) { |
104 | neg = 1; |
105 | cmp += 3; |
106 | } |
107 | |
108 | i = match_string(array: sched_feat_names, n: __SCHED_FEAT_NR, string: cmp); |
109 | if (i < 0) |
110 | return i; |
111 | |
112 | if (neg) { |
113 | sysctl_sched_features &= ~(1UL << i); |
114 | sched_feat_disable(i); |
115 | } else { |
116 | sysctl_sched_features |= (1UL << i); |
117 | sched_feat_enable(i); |
118 | } |
119 | |
120 | return 0; |
121 | } |
122 | |
123 | static ssize_t |
124 | sched_feat_write(struct file *filp, const char __user *ubuf, |
125 | size_t cnt, loff_t *ppos) |
126 | { |
127 | char buf[64]; |
128 | char *cmp; |
129 | int ret; |
130 | struct inode *inode; |
131 | |
132 | if (cnt > 63) |
133 | cnt = 63; |
134 | |
135 | if (copy_from_user(to: &buf, from: ubuf, n: cnt)) |
136 | return -EFAULT; |
137 | |
138 | buf[cnt] = 0; |
139 | cmp = strstrip(str: buf); |
140 | |
141 | /* Ensure the static_key remains in a consistent state */ |
142 | inode = file_inode(f: filp); |
143 | cpus_read_lock(); |
144 | inode_lock(inode); |
145 | ret = sched_feat_set(cmp); |
146 | inode_unlock(inode); |
147 | cpus_read_unlock(); |
148 | if (ret < 0) |
149 | return ret; |
150 | |
151 | *ppos += cnt; |
152 | |
153 | return cnt; |
154 | } |
155 | |
156 | static int sched_feat_open(struct inode *inode, struct file *filp) |
157 | { |
158 | return single_open(filp, sched_feat_show, NULL); |
159 | } |
160 | |
161 | static const struct file_operations sched_feat_fops = { |
162 | .open = sched_feat_open, |
163 | .write = sched_feat_write, |
164 | .read = seq_read, |
165 | .llseek = seq_lseek, |
166 | .release = single_release, |
167 | }; |
168 | |
169 | #ifdef CONFIG_SMP |
170 | |
171 | static ssize_t sched_scaling_write(struct file *filp, const char __user *ubuf, |
172 | size_t cnt, loff_t *ppos) |
173 | { |
174 | char buf[16]; |
175 | unsigned int scaling; |
176 | |
177 | if (cnt > 15) |
178 | cnt = 15; |
179 | |
180 | if (copy_from_user(to: &buf, from: ubuf, n: cnt)) |
181 | return -EFAULT; |
182 | buf[cnt] = '\0'; |
183 | |
184 | if (kstrtouint(s: buf, base: 10, res: &scaling)) |
185 | return -EINVAL; |
186 | |
187 | if (scaling >= SCHED_TUNABLESCALING_END) |
188 | return -EINVAL; |
189 | |
190 | sysctl_sched_tunable_scaling = scaling; |
191 | if (sched_update_scaling()) |
192 | return -EINVAL; |
193 | |
194 | *ppos += cnt; |
195 | return cnt; |
196 | } |
197 | |
198 | static int sched_scaling_show(struct seq_file *m, void *v) |
199 | { |
200 | seq_printf(m, fmt: "%d\n" , sysctl_sched_tunable_scaling); |
201 | return 0; |
202 | } |
203 | |
204 | static int sched_scaling_open(struct inode *inode, struct file *filp) |
205 | { |
206 | return single_open(filp, sched_scaling_show, NULL); |
207 | } |
208 | |
209 | static const struct file_operations sched_scaling_fops = { |
210 | .open = sched_scaling_open, |
211 | .write = sched_scaling_write, |
212 | .read = seq_read, |
213 | .llseek = seq_lseek, |
214 | .release = single_release, |
215 | }; |
216 | |
217 | #endif /* SMP */ |
218 | |
219 | #ifdef CONFIG_PREEMPT_DYNAMIC |
220 | |
221 | static ssize_t sched_dynamic_write(struct file *filp, const char __user *ubuf, |
222 | size_t cnt, loff_t *ppos) |
223 | { |
224 | char buf[16]; |
225 | int mode; |
226 | |
227 | if (cnt > 15) |
228 | cnt = 15; |
229 | |
230 | if (copy_from_user(to: &buf, from: ubuf, n: cnt)) |
231 | return -EFAULT; |
232 | |
233 | buf[cnt] = 0; |
234 | mode = sched_dynamic_mode(str: strstrip(str: buf)); |
235 | if (mode < 0) |
236 | return mode; |
237 | |
238 | sched_dynamic_update(mode); |
239 | |
240 | *ppos += cnt; |
241 | |
242 | return cnt; |
243 | } |
244 | |
245 | static int sched_dynamic_show(struct seq_file *m, void *v) |
246 | { |
247 | static const char * preempt_modes[] = { |
248 | "none" , "voluntary" , "full" |
249 | }; |
250 | int i; |
251 | |
252 | for (i = 0; i < ARRAY_SIZE(preempt_modes); i++) { |
253 | if (preempt_dynamic_mode == i) |
254 | seq_puts(m, s: "(" ); |
255 | seq_puts(m, s: preempt_modes[i]); |
256 | if (preempt_dynamic_mode == i) |
257 | seq_puts(m, s: ")" ); |
258 | |
259 | seq_puts(m, s: " " ); |
260 | } |
261 | |
262 | seq_puts(m, s: "\n" ); |
263 | return 0; |
264 | } |
265 | |
266 | static int sched_dynamic_open(struct inode *inode, struct file *filp) |
267 | { |
268 | return single_open(filp, sched_dynamic_show, NULL); |
269 | } |
270 | |
271 | static const struct file_operations sched_dynamic_fops = { |
272 | .open = sched_dynamic_open, |
273 | .write = sched_dynamic_write, |
274 | .read = seq_read, |
275 | .llseek = seq_lseek, |
276 | .release = single_release, |
277 | }; |
278 | |
279 | #endif /* CONFIG_PREEMPT_DYNAMIC */ |
280 | |
281 | __read_mostly bool sched_debug_verbose; |
282 | |
283 | #ifdef CONFIG_SMP |
284 | static struct dentry *sd_dentry; |
285 | |
286 | |
287 | static ssize_t sched_verbose_write(struct file *filp, const char __user *ubuf, |
288 | size_t cnt, loff_t *ppos) |
289 | { |
290 | ssize_t result; |
291 | bool orig; |
292 | |
293 | cpus_read_lock(); |
294 | mutex_lock(&sched_domains_mutex); |
295 | |
296 | orig = sched_debug_verbose; |
297 | result = debugfs_write_file_bool(file: filp, user_buf: ubuf, count: cnt, ppos); |
298 | |
299 | if (sched_debug_verbose && !orig) |
300 | update_sched_domain_debugfs(); |
301 | else if (!sched_debug_verbose && orig) { |
302 | debugfs_remove(dentry: sd_dentry); |
303 | sd_dentry = NULL; |
304 | } |
305 | |
306 | mutex_unlock(lock: &sched_domains_mutex); |
307 | cpus_read_unlock(); |
308 | |
309 | return result; |
310 | } |
311 | #else |
312 | #define sched_verbose_write debugfs_write_file_bool |
313 | #endif |
314 | |
315 | static const struct file_operations sched_verbose_fops = { |
316 | .read = debugfs_read_file_bool, |
317 | .write = sched_verbose_write, |
318 | .open = simple_open, |
319 | .llseek = default_llseek, |
320 | }; |
321 | |
322 | static const struct seq_operations sched_debug_sops; |
323 | |
324 | static int sched_debug_open(struct inode *inode, struct file *filp) |
325 | { |
326 | return seq_open(filp, &sched_debug_sops); |
327 | } |
328 | |
329 | static const struct file_operations sched_debug_fops = { |
330 | .open = sched_debug_open, |
331 | .read = seq_read, |
332 | .llseek = seq_lseek, |
333 | .release = seq_release, |
334 | }; |
335 | |
336 | static struct dentry *debugfs_sched; |
337 | |
338 | static __init int sched_init_debug(void) |
339 | { |
340 | struct dentry __maybe_unused *numa; |
341 | |
342 | debugfs_sched = debugfs_create_dir(name: "sched" , NULL); |
343 | |
344 | debugfs_create_file(name: "features" , mode: 0644, parent: debugfs_sched, NULL, fops: &sched_feat_fops); |
345 | debugfs_create_file_unsafe(name: "verbose" , mode: 0644, parent: debugfs_sched, data: &sched_debug_verbose, fops: &sched_verbose_fops); |
346 | #ifdef CONFIG_PREEMPT_DYNAMIC |
347 | debugfs_create_file(name: "preempt" , mode: 0644, parent: debugfs_sched, NULL, fops: &sched_dynamic_fops); |
348 | #endif |
349 | |
350 | debugfs_create_u32(name: "base_slice_ns" , mode: 0644, parent: debugfs_sched, value: &sysctl_sched_base_slice); |
351 | |
352 | debugfs_create_u32(name: "latency_warn_ms" , mode: 0644, parent: debugfs_sched, value: &sysctl_resched_latency_warn_ms); |
353 | debugfs_create_u32(name: "latency_warn_once" , mode: 0644, parent: debugfs_sched, value: &sysctl_resched_latency_warn_once); |
354 | |
355 | #ifdef CONFIG_SMP |
356 | debugfs_create_file(name: "tunable_scaling" , mode: 0644, parent: debugfs_sched, NULL, fops: &sched_scaling_fops); |
357 | debugfs_create_u32(name: "migration_cost_ns" , mode: 0644, parent: debugfs_sched, value: &sysctl_sched_migration_cost); |
358 | debugfs_create_u32(name: "nr_migrate" , mode: 0644, parent: debugfs_sched, value: &sysctl_sched_nr_migrate); |
359 | |
360 | mutex_lock(&sched_domains_mutex); |
361 | update_sched_domain_debugfs(); |
362 | mutex_unlock(lock: &sched_domains_mutex); |
363 | #endif |
364 | |
365 | #ifdef CONFIG_NUMA_BALANCING |
366 | numa = debugfs_create_dir(name: "numa_balancing" , parent: debugfs_sched); |
367 | |
368 | debugfs_create_u32(name: "scan_delay_ms" , mode: 0644, parent: numa, value: &sysctl_numa_balancing_scan_delay); |
369 | debugfs_create_u32(name: "scan_period_min_ms" , mode: 0644, parent: numa, value: &sysctl_numa_balancing_scan_period_min); |
370 | debugfs_create_u32(name: "scan_period_max_ms" , mode: 0644, parent: numa, value: &sysctl_numa_balancing_scan_period_max); |
371 | debugfs_create_u32(name: "scan_size_mb" , mode: 0644, parent: numa, value: &sysctl_numa_balancing_scan_size); |
372 | debugfs_create_u32(name: "hot_threshold_ms" , mode: 0644, parent: numa, value: &sysctl_numa_balancing_hot_threshold); |
373 | #endif |
374 | |
375 | debugfs_create_file(name: "debug" , mode: 0444, parent: debugfs_sched, NULL, fops: &sched_debug_fops); |
376 | |
377 | return 0; |
378 | } |
379 | late_initcall(sched_init_debug); |
380 | |
381 | #ifdef CONFIG_SMP |
382 | |
383 | static cpumask_var_t sd_sysctl_cpus; |
384 | |
385 | static int sd_flags_show(struct seq_file *m, void *v) |
386 | { |
387 | unsigned long flags = *(unsigned int *)m->private; |
388 | int idx; |
389 | |
390 | for_each_set_bit(idx, &flags, __SD_FLAG_CNT) { |
391 | seq_puts(m, s: sd_flag_debug[idx].name); |
392 | seq_puts(m, s: " " ); |
393 | } |
394 | seq_puts(m, s: "\n" ); |
395 | |
396 | return 0; |
397 | } |
398 | |
399 | static int sd_flags_open(struct inode *inode, struct file *file) |
400 | { |
401 | return single_open(file, sd_flags_show, inode->i_private); |
402 | } |
403 | |
404 | static const struct file_operations sd_flags_fops = { |
405 | .open = sd_flags_open, |
406 | .read = seq_read, |
407 | .llseek = seq_lseek, |
408 | .release = single_release, |
409 | }; |
410 | |
411 | static void register_sd(struct sched_domain *sd, struct dentry *parent) |
412 | { |
413 | #define SDM(type, mode, member) \ |
414 | debugfs_create_##type(#member, mode, parent, &sd->member) |
415 | |
416 | SDM(ulong, 0644, min_interval); |
417 | SDM(ulong, 0644, max_interval); |
418 | SDM(u64, 0644, max_newidle_lb_cost); |
419 | SDM(u32, 0644, busy_factor); |
420 | SDM(u32, 0644, imbalance_pct); |
421 | SDM(u32, 0644, cache_nice_tries); |
422 | SDM(str, 0444, name); |
423 | |
424 | #undef SDM |
425 | |
426 | debugfs_create_file(name: "flags" , mode: 0444, parent, data: &sd->flags, fops: &sd_flags_fops); |
427 | debugfs_create_file(name: "groups_flags" , mode: 0444, parent, data: &sd->groups->flags, fops: &sd_flags_fops); |
428 | } |
429 | |
430 | void update_sched_domain_debugfs(void) |
431 | { |
432 | int cpu, i; |
433 | |
434 | /* |
435 | * This can unfortunately be invoked before sched_debug_init() creates |
436 | * the debug directory. Don't touch sd_sysctl_cpus until then. |
437 | */ |
438 | if (!debugfs_sched) |
439 | return; |
440 | |
441 | if (!sched_debug_verbose) |
442 | return; |
443 | |
444 | if (!cpumask_available(mask: sd_sysctl_cpus)) { |
445 | if (!alloc_cpumask_var(mask: &sd_sysctl_cpus, GFP_KERNEL)) |
446 | return; |
447 | cpumask_copy(dstp: sd_sysctl_cpus, cpu_possible_mask); |
448 | } |
449 | |
450 | if (!sd_dentry) { |
451 | sd_dentry = debugfs_create_dir(name: "domains" , parent: debugfs_sched); |
452 | |
453 | /* rebuild sd_sysctl_cpus if empty since it gets cleared below */ |
454 | if (cpumask_empty(srcp: sd_sysctl_cpus)) |
455 | cpumask_copy(dstp: sd_sysctl_cpus, cpu_online_mask); |
456 | } |
457 | |
458 | for_each_cpu(cpu, sd_sysctl_cpus) { |
459 | struct sched_domain *sd; |
460 | struct dentry *d_cpu; |
461 | char buf[32]; |
462 | |
463 | snprintf(buf, size: sizeof(buf), fmt: "cpu%d" , cpu); |
464 | debugfs_lookup_and_remove(name: buf, parent: sd_dentry); |
465 | d_cpu = debugfs_create_dir(name: buf, parent: sd_dentry); |
466 | |
467 | i = 0; |
468 | for_each_domain(cpu, sd) { |
469 | struct dentry *d_sd; |
470 | |
471 | snprintf(buf, size: sizeof(buf), fmt: "domain%d" , i); |
472 | d_sd = debugfs_create_dir(name: buf, parent: d_cpu); |
473 | |
474 | register_sd(sd, parent: d_sd); |
475 | i++; |
476 | } |
477 | |
478 | __cpumask_clear_cpu(cpu, dstp: sd_sysctl_cpus); |
479 | } |
480 | } |
481 | |
482 | void dirty_sched_domain_sysctl(int cpu) |
483 | { |
484 | if (cpumask_available(mask: sd_sysctl_cpus)) |
485 | __cpumask_set_cpu(cpu, dstp: sd_sysctl_cpus); |
486 | } |
487 | |
488 | #endif /* CONFIG_SMP */ |
489 | |
490 | #ifdef CONFIG_FAIR_GROUP_SCHED |
491 | static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group *tg) |
492 | { |
493 | struct sched_entity *se = tg->se[cpu]; |
494 | |
495 | #define P(F) SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)F) |
496 | #define P_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld\n", \ |
497 | #F, (long long)schedstat_val(stats->F)) |
498 | #define PN(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F)) |
499 | #define PN_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", \ |
500 | #F, SPLIT_NS((long long)schedstat_val(stats->F))) |
501 | |
502 | if (!se) |
503 | return; |
504 | |
505 | PN(se->exec_start); |
506 | PN(se->vruntime); |
507 | PN(se->sum_exec_runtime); |
508 | |
509 | if (schedstat_enabled()) { |
510 | struct sched_statistics *stats; |
511 | stats = __schedstats_from_se(se); |
512 | |
513 | PN_SCHEDSTAT(wait_start); |
514 | PN_SCHEDSTAT(sleep_start); |
515 | PN_SCHEDSTAT(block_start); |
516 | PN_SCHEDSTAT(sleep_max); |
517 | PN_SCHEDSTAT(block_max); |
518 | PN_SCHEDSTAT(exec_max); |
519 | PN_SCHEDSTAT(slice_max); |
520 | PN_SCHEDSTAT(wait_max); |
521 | PN_SCHEDSTAT(wait_sum); |
522 | P_SCHEDSTAT(wait_count); |
523 | } |
524 | |
525 | P(se->load.weight); |
526 | #ifdef CONFIG_SMP |
527 | P(se->avg.load_avg); |
528 | P(se->avg.util_avg); |
529 | P(se->avg.runnable_avg); |
530 | #endif |
531 | |
532 | #undef PN_SCHEDSTAT |
533 | #undef PN |
534 | #undef P_SCHEDSTAT |
535 | #undef P |
536 | } |
537 | #endif |
538 | |
539 | #ifdef CONFIG_CGROUP_SCHED |
540 | static DEFINE_SPINLOCK(sched_debug_lock); |
541 | static char group_path[PATH_MAX]; |
542 | |
543 | static void task_group_path(struct task_group *tg, char *path, int plen) |
544 | { |
545 | if (autogroup_path(tg, buf: path, buflen: plen)) |
546 | return; |
547 | |
548 | cgroup_path(cgrp: tg->css.cgroup, buf: path, buflen: plen); |
549 | } |
550 | |
551 | /* |
552 | * Only 1 SEQ_printf_task_group_path() caller can use the full length |
553 | * group_path[] for cgroup path. Other simultaneous callers will have |
554 | * to use a shorter stack buffer. A "..." suffix is appended at the end |
555 | * of the stack buffer so that it will show up in case the output length |
556 | * matches the given buffer size to indicate possible path name truncation. |
557 | */ |
558 | #define SEQ_printf_task_group_path(m, tg, fmt...) \ |
559 | { \ |
560 | if (spin_trylock(&sched_debug_lock)) { \ |
561 | task_group_path(tg, group_path, sizeof(group_path)); \ |
562 | SEQ_printf(m, fmt, group_path); \ |
563 | spin_unlock(&sched_debug_lock); \ |
564 | } else { \ |
565 | char buf[128]; \ |
566 | char *bufend = buf + sizeof(buf) - 3; \ |
567 | task_group_path(tg, buf, bufend - buf); \ |
568 | strcpy(bufend - 1, "..."); \ |
569 | SEQ_printf(m, fmt, buf); \ |
570 | } \ |
571 | } |
572 | #endif |
573 | |
574 | static void |
575 | print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) |
576 | { |
577 | if (task_current(rq, p)) |
578 | SEQ_printf(m, ">R" ); |
579 | else |
580 | SEQ_printf(m, " %c" , task_state_to_char(p)); |
581 | |
582 | SEQ_printf(m, "%15s %5d %9Ld.%06ld %c %9Ld.%06ld %9Ld.%06ld %9Ld.%06ld %9Ld %5d " , |
583 | p->comm, task_pid_nr(p), |
584 | SPLIT_NS(p->se.vruntime), |
585 | entity_eligible(cfs_rq_of(&p->se), &p->se) ? 'E' : 'N', |
586 | SPLIT_NS(p->se.deadline), |
587 | SPLIT_NS(p->se.slice), |
588 | SPLIT_NS(p->se.sum_exec_runtime), |
589 | (long long)(p->nvcsw + p->nivcsw), |
590 | p->prio); |
591 | |
592 | SEQ_printf(m, "%9lld.%06ld %9lld.%06ld %9lld.%06ld %9lld.%06ld" , |
593 | SPLIT_NS(schedstat_val_or_zero(p->stats.wait_sum)), |
594 | SPLIT_NS(p->se.sum_exec_runtime), |
595 | SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)), |
596 | SPLIT_NS(schedstat_val_or_zero(p->stats.sum_block_runtime))); |
597 | |
598 | #ifdef CONFIG_NUMA_BALANCING |
599 | SEQ_printf(m, " %d %d" , task_node(p), task_numa_group_id(p)); |
600 | #endif |
601 | #ifdef CONFIG_CGROUP_SCHED |
602 | SEQ_printf_task_group_path(m, task_group(p), " %s" ) |
603 | #endif |
604 | |
605 | SEQ_printf(m, "\n" ); |
606 | } |
607 | |
608 | static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu) |
609 | { |
610 | struct task_struct *g, *p; |
611 | |
612 | SEQ_printf(m, "\n" ); |
613 | SEQ_printf(m, "runnable tasks:\n" ); |
614 | SEQ_printf(m, " S task PID tree-key switches prio" |
615 | " wait-time sum-exec sum-sleep\n" ); |
616 | SEQ_printf(m, "-------------------------------------------------------" |
617 | "------------------------------------------------------\n" ); |
618 | |
619 | rcu_read_lock(); |
620 | for_each_process_thread(g, p) { |
621 | if (task_cpu(p) != rq_cpu) |
622 | continue; |
623 | |
624 | print_task(m, rq, p); |
625 | } |
626 | rcu_read_unlock(); |
627 | } |
628 | |
629 | void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) |
630 | { |
631 | s64 left_vruntime = -1, min_vruntime, right_vruntime = -1, spread; |
632 | struct sched_entity *last, *first; |
633 | struct rq *rq = cpu_rq(cpu); |
634 | unsigned long flags; |
635 | |
636 | #ifdef CONFIG_FAIR_GROUP_SCHED |
637 | SEQ_printf(m, "\n" ); |
638 | SEQ_printf_task_group_path(m, cfs_rq->tg, "cfs_rq[%d]:%s\n" , cpu); |
639 | #else |
640 | SEQ_printf(m, "\n" ); |
641 | SEQ_printf(m, "cfs_rq[%d]:\n" , cpu); |
642 | #endif |
643 | SEQ_printf(m, " .%-30s: %Ld.%06ld\n" , "exec_clock" , |
644 | SPLIT_NS(cfs_rq->exec_clock)); |
645 | |
646 | raw_spin_rq_lock_irqsave(rq, flags); |
647 | first = __pick_first_entity(cfs_rq); |
648 | if (first) |
649 | left_vruntime = first->vruntime; |
650 | last = __pick_last_entity(cfs_rq); |
651 | if (last) |
652 | right_vruntime = last->vruntime; |
653 | min_vruntime = cfs_rq->min_vruntime; |
654 | raw_spin_rq_unlock_irqrestore(rq, flags); |
655 | |
656 | SEQ_printf(m, " .%-30s: %Ld.%06ld\n" , "left_vruntime" , |
657 | SPLIT_NS(left_vruntime)); |
658 | SEQ_printf(m, " .%-30s: %Ld.%06ld\n" , "min_vruntime" , |
659 | SPLIT_NS(min_vruntime)); |
660 | SEQ_printf(m, " .%-30s: %Ld.%06ld\n" , "avg_vruntime" , |
661 | SPLIT_NS(avg_vruntime(cfs_rq))); |
662 | SEQ_printf(m, " .%-30s: %Ld.%06ld\n" , "right_vruntime" , |
663 | SPLIT_NS(right_vruntime)); |
664 | spread = right_vruntime - left_vruntime; |
665 | SEQ_printf(m, " .%-30s: %Ld.%06ld\n" , "spread" , SPLIT_NS(spread)); |
666 | SEQ_printf(m, " .%-30s: %d\n" , "nr_spread_over" , |
667 | cfs_rq->nr_spread_over); |
668 | SEQ_printf(m, " .%-30s: %d\n" , "nr_running" , cfs_rq->nr_running); |
669 | SEQ_printf(m, " .%-30s: %d\n" , "h_nr_running" , cfs_rq->h_nr_running); |
670 | SEQ_printf(m, " .%-30s: %d\n" , "idle_nr_running" , |
671 | cfs_rq->idle_nr_running); |
672 | SEQ_printf(m, " .%-30s: %d\n" , "idle_h_nr_running" , |
673 | cfs_rq->idle_h_nr_running); |
674 | SEQ_printf(m, " .%-30s: %ld\n" , "load" , cfs_rq->load.weight); |
675 | #ifdef CONFIG_SMP |
676 | SEQ_printf(m, " .%-30s: %lu\n" , "load_avg" , |
677 | cfs_rq->avg.load_avg); |
678 | SEQ_printf(m, " .%-30s: %lu\n" , "runnable_avg" , |
679 | cfs_rq->avg.runnable_avg); |
680 | SEQ_printf(m, " .%-30s: %lu\n" , "util_avg" , |
681 | cfs_rq->avg.util_avg); |
682 | SEQ_printf(m, " .%-30s: %u\n" , "util_est_enqueued" , |
683 | cfs_rq->avg.util_est.enqueued); |
684 | SEQ_printf(m, " .%-30s: %ld\n" , "removed.load_avg" , |
685 | cfs_rq->removed.load_avg); |
686 | SEQ_printf(m, " .%-30s: %ld\n" , "removed.util_avg" , |
687 | cfs_rq->removed.util_avg); |
688 | SEQ_printf(m, " .%-30s: %ld\n" , "removed.runnable_avg" , |
689 | cfs_rq->removed.runnable_avg); |
690 | #ifdef CONFIG_FAIR_GROUP_SCHED |
691 | SEQ_printf(m, " .%-30s: %lu\n" , "tg_load_avg_contrib" , |
692 | cfs_rq->tg_load_avg_contrib); |
693 | SEQ_printf(m, " .%-30s: %ld\n" , "tg_load_avg" , |
694 | atomic_long_read(&cfs_rq->tg->load_avg)); |
695 | #endif |
696 | #endif |
697 | #ifdef CONFIG_CFS_BANDWIDTH |
698 | SEQ_printf(m, " .%-30s: %d\n" , "throttled" , |
699 | cfs_rq->throttled); |
700 | SEQ_printf(m, " .%-30s: %d\n" , "throttle_count" , |
701 | cfs_rq->throttle_count); |
702 | #endif |
703 | |
704 | #ifdef CONFIG_FAIR_GROUP_SCHED |
705 | print_cfs_group_stats(m, cpu, tg: cfs_rq->tg); |
706 | #endif |
707 | } |
708 | |
709 | void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) |
710 | { |
711 | #ifdef CONFIG_RT_GROUP_SCHED |
712 | SEQ_printf(m, "\n" ); |
713 | SEQ_printf_task_group_path(m, rt_rq->tg, "rt_rq[%d]:%s\n" , cpu); |
714 | #else |
715 | SEQ_printf(m, "\n" ); |
716 | SEQ_printf(m, "rt_rq[%d]:\n" , cpu); |
717 | #endif |
718 | |
719 | #define P(x) \ |
720 | SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x)) |
721 | #define PU(x) \ |
722 | SEQ_printf(m, " .%-30s: %lu\n", #x, (unsigned long)(rt_rq->x)) |
723 | #define PN(x) \ |
724 | SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rt_rq->x)) |
725 | |
726 | PU(rt_nr_running); |
727 | P(rt_throttled); |
728 | PN(rt_time); |
729 | PN(rt_runtime); |
730 | |
731 | #undef PN |
732 | #undef PU |
733 | #undef P |
734 | } |
735 | |
736 | void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq) |
737 | { |
738 | struct dl_bw *dl_bw; |
739 | |
740 | SEQ_printf(m, "\n" ); |
741 | SEQ_printf(m, "dl_rq[%d]:\n" , cpu); |
742 | |
743 | #define PU(x) \ |
744 | SEQ_printf(m, " .%-30s: %lu\n", #x, (unsigned long)(dl_rq->x)) |
745 | |
746 | PU(dl_nr_running); |
747 | #ifdef CONFIG_SMP |
748 | dl_bw = &cpu_rq(cpu)->rd->dl_bw; |
749 | #else |
750 | dl_bw = &dl_rq->dl_bw; |
751 | #endif |
752 | SEQ_printf(m, " .%-30s: %lld\n" , "dl_bw->bw" , dl_bw->bw); |
753 | SEQ_printf(m, " .%-30s: %lld\n" , "dl_bw->total_bw" , dl_bw->total_bw); |
754 | |
755 | #undef PU |
756 | } |
757 | |
758 | static void print_cpu(struct seq_file *m, int cpu) |
759 | { |
760 | struct rq *rq = cpu_rq(cpu); |
761 | |
762 | #ifdef CONFIG_X86 |
763 | { |
764 | unsigned int freq = cpu_khz ? : 1; |
765 | |
766 | SEQ_printf(m, "cpu#%d, %u.%03u MHz\n" , |
767 | cpu, freq / 1000, (freq % 1000)); |
768 | } |
769 | #else |
770 | SEQ_printf(m, "cpu#%d\n" , cpu); |
771 | #endif |
772 | |
773 | #define P(x) \ |
774 | do { \ |
775 | if (sizeof(rq->x) == 4) \ |
776 | SEQ_printf(m, " .%-30s: %d\n", #x, (int)(rq->x)); \ |
777 | else \ |
778 | SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rq->x));\ |
779 | } while (0) |
780 | |
781 | #define PN(x) \ |
782 | SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rq->x)) |
783 | |
784 | P(nr_running); |
785 | P(nr_switches); |
786 | P(nr_uninterruptible); |
787 | PN(next_balance); |
788 | SEQ_printf(m, " .%-30s: %ld\n" , "curr->pid" , (long)(task_pid_nr(rq->curr))); |
789 | PN(clock); |
790 | PN(clock_task); |
791 | #undef P |
792 | #undef PN |
793 | |
794 | #ifdef CONFIG_SMP |
795 | #define P64(n) SEQ_printf(m, " .%-30s: %Ld\n", #n, rq->n); |
796 | P64(avg_idle); |
797 | P64(max_idle_balance_cost); |
798 | #undef P64 |
799 | #endif |
800 | |
801 | #define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, schedstat_val(rq->n)); |
802 | if (schedstat_enabled()) { |
803 | P(yld_count); |
804 | P(sched_count); |
805 | P(sched_goidle); |
806 | P(ttwu_count); |
807 | P(ttwu_local); |
808 | } |
809 | #undef P |
810 | |
811 | print_cfs_stats(m, cpu); |
812 | print_rt_stats(m, cpu); |
813 | print_dl_stats(m, cpu); |
814 | |
815 | print_rq(m, rq, rq_cpu: cpu); |
816 | SEQ_printf(m, "\n" ); |
817 | } |
818 | |
819 | static const char *sched_tunable_scaling_names[] = { |
820 | "none" , |
821 | "logarithmic" , |
822 | "linear" |
823 | }; |
824 | |
825 | static void (struct seq_file *m) |
826 | { |
827 | u64 ktime, sched_clk, cpu_clk; |
828 | unsigned long flags; |
829 | |
830 | local_irq_save(flags); |
831 | ktime = ktime_to_ns(kt: ktime_get()); |
832 | sched_clk = sched_clock(); |
833 | cpu_clk = local_clock(); |
834 | local_irq_restore(flags); |
835 | |
836 | SEQ_printf(m, "Sched Debug Version: v0.11, %s %.*s\n" , |
837 | init_utsname()->release, |
838 | (int)strcspn(init_utsname()->version, " " ), |
839 | init_utsname()->version); |
840 | |
841 | #define P(x) \ |
842 | SEQ_printf(m, "%-40s: %Ld\n", #x, (long long)(x)) |
843 | #define PN(x) \ |
844 | SEQ_printf(m, "%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x)) |
845 | PN(ktime); |
846 | PN(sched_clk); |
847 | PN(cpu_clk); |
848 | P(jiffies); |
849 | #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK |
850 | P(sched_clock_stable()); |
851 | #endif |
852 | #undef PN |
853 | #undef P |
854 | |
855 | SEQ_printf(m, "\n" ); |
856 | SEQ_printf(m, "sysctl_sched\n" ); |
857 | |
858 | #define P(x) \ |
859 | SEQ_printf(m, " .%-40s: %Ld\n", #x, (long long)(x)) |
860 | #define PN(x) \ |
861 | SEQ_printf(m, " .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x)) |
862 | PN(sysctl_sched_base_slice); |
863 | P(sysctl_sched_features); |
864 | #undef PN |
865 | #undef P |
866 | |
867 | SEQ_printf(m, " .%-40s: %d (%s)\n" , |
868 | "sysctl_sched_tunable_scaling" , |
869 | sysctl_sched_tunable_scaling, |
870 | sched_tunable_scaling_names[sysctl_sched_tunable_scaling]); |
871 | SEQ_printf(m, "\n" ); |
872 | } |
873 | |
874 | static int sched_debug_show(struct seq_file *m, void *v) |
875 | { |
876 | int cpu = (unsigned long)(v - 2); |
877 | |
878 | if (cpu != -1) |
879 | print_cpu(m, cpu); |
880 | else |
881 | sched_debug_header(m); |
882 | |
883 | return 0; |
884 | } |
885 | |
886 | void sysrq_sched_debug_show(void) |
887 | { |
888 | int cpu; |
889 | |
890 | sched_debug_header(NULL); |
891 | for_each_online_cpu(cpu) { |
892 | /* |
893 | * Need to reset softlockup watchdogs on all CPUs, because |
894 | * another CPU might be blocked waiting for us to process |
895 | * an IPI or stop_machine. |
896 | */ |
897 | touch_nmi_watchdog(); |
898 | touch_all_softlockup_watchdogs(); |
899 | print_cpu(NULL, cpu); |
900 | } |
901 | } |
902 | |
903 | /* |
904 | * This iterator needs some explanation. |
905 | * It returns 1 for the header position. |
906 | * This means 2 is CPU 0. |
907 | * In a hotplugged system some CPUs, including CPU 0, may be missing so we have |
908 | * to use cpumask_* to iterate over the CPUs. |
909 | */ |
910 | static void *sched_debug_start(struct seq_file *file, loff_t *offset) |
911 | { |
912 | unsigned long n = *offset; |
913 | |
914 | if (n == 0) |
915 | return (void *) 1; |
916 | |
917 | n--; |
918 | |
919 | if (n > 0) |
920 | n = cpumask_next(n: n - 1, cpu_online_mask); |
921 | else |
922 | n = cpumask_first(cpu_online_mask); |
923 | |
924 | *offset = n + 1; |
925 | |
926 | if (n < nr_cpu_ids) |
927 | return (void *)(unsigned long)(n + 2); |
928 | |
929 | return NULL; |
930 | } |
931 | |
932 | static void *sched_debug_next(struct seq_file *file, void *data, loff_t *offset) |
933 | { |
934 | (*offset)++; |
935 | return sched_debug_start(file, offset); |
936 | } |
937 | |
938 | static void sched_debug_stop(struct seq_file *file, void *data) |
939 | { |
940 | } |
941 | |
942 | static const struct seq_operations sched_debug_sops = { |
943 | .start = sched_debug_start, |
944 | .next = sched_debug_next, |
945 | .stop = sched_debug_stop, |
946 | .show = sched_debug_show, |
947 | }; |
948 | |
949 | #define __PS(S, F) SEQ_printf(m, "%-45s:%21Ld\n", S, (long long)(F)) |
950 | #define __P(F) __PS(#F, F) |
951 | #define P(F) __PS(#F, p->F) |
952 | #define PM(F, M) __PS(#F, p->F & (M)) |
953 | #define __PSN(S, F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", S, SPLIT_NS((long long)(F))) |
954 | #define __PN(F) __PSN(#F, F) |
955 | #define PN(F) __PSN(#F, p->F) |
956 | |
957 | |
958 | #ifdef CONFIG_NUMA_BALANCING |
959 | void print_numa_stats(struct seq_file *m, int node, unsigned long tsf, |
960 | unsigned long tpf, unsigned long gsf, unsigned long gpf) |
961 | { |
962 | SEQ_printf(m, "numa_faults node=%d " , node); |
963 | SEQ_printf(m, "task_private=%lu task_shared=%lu " , tpf, tsf); |
964 | SEQ_printf(m, "group_private=%lu group_shared=%lu\n" , gpf, gsf); |
965 | } |
966 | #endif |
967 | |
968 | |
969 | static void sched_show_numa(struct task_struct *p, struct seq_file *m) |
970 | { |
971 | #ifdef CONFIG_NUMA_BALANCING |
972 | if (p->mm) |
973 | P(mm->numa_scan_seq); |
974 | |
975 | P(numa_pages_migrated); |
976 | P(numa_preferred_nid); |
977 | P(total_numa_faults); |
978 | SEQ_printf(m, "current_node=%d, numa_group_id=%d\n" , |
979 | task_node(p), task_numa_group_id(p)); |
980 | show_numa_stats(p, m); |
981 | #endif |
982 | } |
983 | |
984 | void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, |
985 | struct seq_file *m) |
986 | { |
987 | unsigned long nr_switches; |
988 | |
989 | SEQ_printf(m, "%s (%d, #threads: %d)\n" , p->comm, task_pid_nr_ns(p, ns), |
990 | get_nr_threads(p)); |
991 | SEQ_printf(m, |
992 | "---------------------------------------------------------" |
993 | "----------\n" ); |
994 | |
995 | #define P_SCHEDSTAT(F) __PS(#F, schedstat_val(p->stats.F)) |
996 | #define PN_SCHEDSTAT(F) __PSN(#F, schedstat_val(p->stats.F)) |
997 | |
998 | PN(se.exec_start); |
999 | PN(se.vruntime); |
1000 | PN(se.sum_exec_runtime); |
1001 | |
1002 | nr_switches = p->nvcsw + p->nivcsw; |
1003 | |
1004 | P(se.nr_migrations); |
1005 | |
1006 | if (schedstat_enabled()) { |
1007 | u64 avg_atom, avg_per_cpu; |
1008 | |
1009 | PN_SCHEDSTAT(sum_sleep_runtime); |
1010 | PN_SCHEDSTAT(sum_block_runtime); |
1011 | PN_SCHEDSTAT(wait_start); |
1012 | PN_SCHEDSTAT(sleep_start); |
1013 | PN_SCHEDSTAT(block_start); |
1014 | PN_SCHEDSTAT(sleep_max); |
1015 | PN_SCHEDSTAT(block_max); |
1016 | PN_SCHEDSTAT(exec_max); |
1017 | PN_SCHEDSTAT(slice_max); |
1018 | PN_SCHEDSTAT(wait_max); |
1019 | PN_SCHEDSTAT(wait_sum); |
1020 | P_SCHEDSTAT(wait_count); |
1021 | PN_SCHEDSTAT(iowait_sum); |
1022 | P_SCHEDSTAT(iowait_count); |
1023 | P_SCHEDSTAT(nr_migrations_cold); |
1024 | P_SCHEDSTAT(nr_failed_migrations_affine); |
1025 | P_SCHEDSTAT(nr_failed_migrations_running); |
1026 | P_SCHEDSTAT(nr_failed_migrations_hot); |
1027 | P_SCHEDSTAT(nr_forced_migrations); |
1028 | P_SCHEDSTAT(nr_wakeups); |
1029 | P_SCHEDSTAT(nr_wakeups_sync); |
1030 | P_SCHEDSTAT(nr_wakeups_migrate); |
1031 | P_SCHEDSTAT(nr_wakeups_local); |
1032 | P_SCHEDSTAT(nr_wakeups_remote); |
1033 | P_SCHEDSTAT(nr_wakeups_affine); |
1034 | P_SCHEDSTAT(nr_wakeups_affine_attempts); |
1035 | P_SCHEDSTAT(nr_wakeups_passive); |
1036 | P_SCHEDSTAT(nr_wakeups_idle); |
1037 | |
1038 | avg_atom = p->se.sum_exec_runtime; |
1039 | if (nr_switches) |
1040 | avg_atom = div64_ul(avg_atom, nr_switches); |
1041 | else |
1042 | avg_atom = -1LL; |
1043 | |
1044 | avg_per_cpu = p->se.sum_exec_runtime; |
1045 | if (p->se.nr_migrations) { |
1046 | avg_per_cpu = div64_u64(dividend: avg_per_cpu, |
1047 | divisor: p->se.nr_migrations); |
1048 | } else { |
1049 | avg_per_cpu = -1LL; |
1050 | } |
1051 | |
1052 | __PN(avg_atom); |
1053 | __PN(avg_per_cpu); |
1054 | |
1055 | #ifdef CONFIG_SCHED_CORE |
1056 | PN_SCHEDSTAT(core_forceidle_sum); |
1057 | #endif |
1058 | } |
1059 | |
1060 | __P(nr_switches); |
1061 | __PS("nr_voluntary_switches" , p->nvcsw); |
1062 | __PS("nr_involuntary_switches" , p->nivcsw); |
1063 | |
1064 | P(se.load.weight); |
1065 | #ifdef CONFIG_SMP |
1066 | P(se.avg.load_sum); |
1067 | P(se.avg.runnable_sum); |
1068 | P(se.avg.util_sum); |
1069 | P(se.avg.load_avg); |
1070 | P(se.avg.runnable_avg); |
1071 | P(se.avg.util_avg); |
1072 | P(se.avg.last_update_time); |
1073 | P(se.avg.util_est.ewma); |
1074 | PM(se.avg.util_est.enqueued, ~UTIL_AVG_UNCHANGED); |
1075 | #endif |
1076 | #ifdef CONFIG_UCLAMP_TASK |
1077 | __PS("uclamp.min" , p->uclamp_req[UCLAMP_MIN].value); |
1078 | __PS("uclamp.max" , p->uclamp_req[UCLAMP_MAX].value); |
1079 | __PS("effective uclamp.min" , uclamp_eff_value(p, UCLAMP_MIN)); |
1080 | __PS("effective uclamp.max" , uclamp_eff_value(p, UCLAMP_MAX)); |
1081 | #endif |
1082 | P(policy); |
1083 | P(prio); |
1084 | if (task_has_dl_policy(p)) { |
1085 | P(dl.runtime); |
1086 | P(dl.deadline); |
1087 | } |
1088 | #undef PN_SCHEDSTAT |
1089 | #undef P_SCHEDSTAT |
1090 | |
1091 | { |
1092 | unsigned int this_cpu = raw_smp_processor_id(); |
1093 | u64 t0, t1; |
1094 | |
1095 | t0 = cpu_clock(cpu: this_cpu); |
1096 | t1 = cpu_clock(cpu: this_cpu); |
1097 | __PS("clock-delta" , t1-t0); |
1098 | } |
1099 | |
1100 | sched_show_numa(p, m); |
1101 | } |
1102 | |
1103 | void proc_sched_set_task(struct task_struct *p) |
1104 | { |
1105 | #ifdef CONFIG_SCHEDSTATS |
1106 | memset(&p->stats, 0, sizeof(p->stats)); |
1107 | #endif |
1108 | } |
1109 | |
1110 | void resched_latency_warn(int cpu, u64 latency) |
1111 | { |
1112 | static DEFINE_RATELIMIT_STATE(latency_check_ratelimit, 60 * 60 * HZ, 1); |
1113 | |
1114 | WARN(__ratelimit(&latency_check_ratelimit), |
1115 | "sched: CPU %d need_resched set for > %llu ns (%d ticks) " |
1116 | "without schedule\n" , |
1117 | cpu, latency, cpu_rq(cpu)->ticks_without_resched); |
1118 | } |
1119 | |