1 | // SPDX-License-Identifier: GPL-2.0+ |
2 | /* |
3 | * RCU CPU stall warnings for normal RCU grace periods |
4 | * |
5 | * Copyright IBM Corporation, 2019 |
6 | * |
7 | * Author: Paul E. McKenney <paulmck@linux.ibm.com> |
8 | */ |
9 | |
10 | #include <linux/kvm_para.h> |
11 | #include <linux/rcu_notifier.h> |
12 | |
13 | ////////////////////////////////////////////////////////////////////////////// |
14 | // |
15 | // Controlling CPU stall warnings, including delay calculation. |
16 | |
17 | /* panic() on RCU Stall sysctl. */ |
18 | int sysctl_panic_on_rcu_stall __read_mostly; |
19 | int sysctl_max_rcu_stall_to_panic __read_mostly; |
20 | |
21 | #ifdef CONFIG_PROVE_RCU |
22 | #define RCU_STALL_DELAY_DELTA (5 * HZ) |
23 | #else |
24 | #define RCU_STALL_DELAY_DELTA 0 |
25 | #endif |
26 | #define RCU_STALL_MIGHT_DIV 8 |
27 | #define RCU_STALL_MIGHT_MIN (2 * HZ) |
28 | |
29 | int rcu_exp_jiffies_till_stall_check(void) |
30 | { |
31 | int cpu_stall_timeout = READ_ONCE(rcu_exp_cpu_stall_timeout); |
32 | int exp_stall_delay_delta = 0; |
33 | int till_stall_check; |
34 | |
35 | // Zero says to use rcu_cpu_stall_timeout, but in milliseconds. |
36 | if (!cpu_stall_timeout) |
37 | cpu_stall_timeout = jiffies_to_msecs(j: rcu_jiffies_till_stall_check()); |
38 | |
39 | // Limit check must be consistent with the Kconfig limits for |
40 | // CONFIG_RCU_EXP_CPU_STALL_TIMEOUT, so check the allowed range. |
41 | // The minimum clamped value is "2UL", because at least one full |
42 | // tick has to be guaranteed. |
43 | till_stall_check = clamp(msecs_to_jiffies(cpu_stall_timeout), 2UL, 300UL * HZ); |
44 | |
45 | if (cpu_stall_timeout && jiffies_to_msecs(j: till_stall_check) != cpu_stall_timeout) |
46 | WRITE_ONCE(rcu_exp_cpu_stall_timeout, jiffies_to_msecs(till_stall_check)); |
47 | |
48 | #ifdef CONFIG_PROVE_RCU |
49 | /* Add extra ~25% out of till_stall_check. */ |
50 | exp_stall_delay_delta = ((till_stall_check * 25) / 100) + 1; |
51 | #endif |
52 | |
53 | return till_stall_check + exp_stall_delay_delta; |
54 | } |
55 | EXPORT_SYMBOL_GPL(rcu_exp_jiffies_till_stall_check); |
56 | |
57 | /* Limit-check stall timeouts specified at boottime and runtime. */ |
58 | int rcu_jiffies_till_stall_check(void) |
59 | { |
60 | int till_stall_check = READ_ONCE(rcu_cpu_stall_timeout); |
61 | |
62 | /* |
63 | * Limit check must be consistent with the Kconfig limits |
64 | * for CONFIG_RCU_CPU_STALL_TIMEOUT. |
65 | */ |
66 | if (till_stall_check < 3) { |
67 | WRITE_ONCE(rcu_cpu_stall_timeout, 3); |
68 | till_stall_check = 3; |
69 | } else if (till_stall_check > 300) { |
70 | WRITE_ONCE(rcu_cpu_stall_timeout, 300); |
71 | till_stall_check = 300; |
72 | } |
73 | return till_stall_check * HZ + RCU_STALL_DELAY_DELTA; |
74 | } |
75 | EXPORT_SYMBOL_GPL(rcu_jiffies_till_stall_check); |
76 | |
77 | /** |
78 | * rcu_gp_might_be_stalled - Is it likely that the grace period is stalled? |
79 | * |
80 | * Returns @true if the current grace period is sufficiently old that |
81 | * it is reasonable to assume that it might be stalled. This can be |
82 | * useful when deciding whether to allocate memory to enable RCU-mediated |
83 | * freeing on the one hand or just invoking synchronize_rcu() on the other. |
84 | * The latter is preferable when the grace period is stalled. |
85 | * |
86 | * Note that sampling of the .gp_start and .gp_seq fields must be done |
87 | * carefully to avoid false positives at the beginnings and ends of |
88 | * grace periods. |
89 | */ |
90 | bool rcu_gp_might_be_stalled(void) |
91 | { |
92 | unsigned long d = rcu_jiffies_till_stall_check() / RCU_STALL_MIGHT_DIV; |
93 | unsigned long j = jiffies; |
94 | |
95 | if (d < RCU_STALL_MIGHT_MIN) |
96 | d = RCU_STALL_MIGHT_MIN; |
97 | smp_mb(); // jiffies before .gp_seq to avoid false positives. |
98 | if (!rcu_gp_in_progress()) |
99 | return false; |
100 | // Long delays at this point avoids false positive, but a delay |
101 | // of ULONG_MAX/4 jiffies voids your no-false-positive warranty. |
102 | smp_mb(); // .gp_seq before second .gp_start |
103 | // And ditto here. |
104 | return !time_before(j, READ_ONCE(rcu_state.gp_start) + d); |
105 | } |
106 | |
107 | /* Don't do RCU CPU stall warnings during long sysrq printouts. */ |
108 | void rcu_sysrq_start(void) |
109 | { |
110 | if (!rcu_cpu_stall_suppress) |
111 | rcu_cpu_stall_suppress = 2; |
112 | } |
113 | |
114 | void rcu_sysrq_end(void) |
115 | { |
116 | if (rcu_cpu_stall_suppress == 2) |
117 | rcu_cpu_stall_suppress = 0; |
118 | } |
119 | |
120 | /* Don't print RCU CPU stall warnings during a kernel panic. */ |
121 | static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) |
122 | { |
123 | rcu_cpu_stall_suppress = 1; |
124 | return NOTIFY_DONE; |
125 | } |
126 | |
127 | static struct notifier_block rcu_panic_block = { |
128 | .notifier_call = rcu_panic, |
129 | }; |
130 | |
131 | static int __init check_cpu_stall_init(void) |
132 | { |
133 | atomic_notifier_chain_register(nh: &panic_notifier_list, nb: &rcu_panic_block); |
134 | return 0; |
135 | } |
136 | early_initcall(check_cpu_stall_init); |
137 | |
138 | /* If so specified via sysctl, panic, yielding cleaner stall-warning output. */ |
139 | static void panic_on_rcu_stall(void) |
140 | { |
141 | static int cpu_stall; |
142 | |
143 | if (++cpu_stall < sysctl_max_rcu_stall_to_panic) |
144 | return; |
145 | |
146 | if (sysctl_panic_on_rcu_stall) |
147 | panic(fmt: "RCU Stall\n" ); |
148 | } |
149 | |
150 | /** |
151 | * rcu_cpu_stall_reset - restart stall-warning timeout for current grace period |
152 | * |
153 | * To perform the reset request from the caller, disable stall detection until |
154 | * 3 fqs loops have passed. This is required to ensure a fresh jiffies is |
155 | * loaded. It should be safe to do from the fqs loop as enough timer |
156 | * interrupts and context switches should have passed. |
157 | * |
158 | * The caller must disable hard irqs. |
159 | */ |
160 | void rcu_cpu_stall_reset(void) |
161 | { |
162 | WRITE_ONCE(rcu_state.nr_fqs_jiffies_stall, 3); |
163 | WRITE_ONCE(rcu_state.jiffies_stall, ULONG_MAX); |
164 | } |
165 | |
166 | ////////////////////////////////////////////////////////////////////////////// |
167 | // |
168 | // Interaction with RCU grace periods |
169 | |
170 | /* Start of new grace period, so record stall time (and forcing times). */ |
171 | static void record_gp_stall_check_time(void) |
172 | { |
173 | unsigned long j = jiffies; |
174 | unsigned long j1; |
175 | |
176 | WRITE_ONCE(rcu_state.gp_start, j); |
177 | j1 = rcu_jiffies_till_stall_check(); |
178 | smp_mb(); // ->gp_start before ->jiffies_stall and caller's ->gp_seq. |
179 | WRITE_ONCE(rcu_state.nr_fqs_jiffies_stall, 0); |
180 | WRITE_ONCE(rcu_state.jiffies_stall, j + j1); |
181 | rcu_state.jiffies_resched = j + j1 / 2; |
182 | rcu_state.n_force_qs_gpstart = READ_ONCE(rcu_state.n_force_qs); |
183 | } |
184 | |
185 | /* Zero ->ticks_this_gp and snapshot the number of RCU softirq handlers. */ |
186 | static void zero_cpu_stall_ticks(struct rcu_data *rdp) |
187 | { |
188 | rdp->ticks_this_gp = 0; |
189 | rdp->softirq_snap = kstat_softirqs_cpu(irq: RCU_SOFTIRQ, smp_processor_id()); |
190 | WRITE_ONCE(rdp->last_fqs_resched, jiffies); |
191 | } |
192 | |
193 | /* |
194 | * If too much time has passed in the current grace period, and if |
195 | * so configured, go kick the relevant kthreads. |
196 | */ |
197 | static void rcu_stall_kick_kthreads(void) |
198 | { |
199 | unsigned long j; |
200 | |
201 | if (!READ_ONCE(rcu_kick_kthreads)) |
202 | return; |
203 | j = READ_ONCE(rcu_state.jiffies_kick_kthreads); |
204 | if (time_after(jiffies, j) && rcu_state.gp_kthread && |
205 | (rcu_gp_in_progress() || READ_ONCE(rcu_state.gp_flags))) { |
206 | WARN_ONCE(1, "Kicking %s grace-period kthread\n" , |
207 | rcu_state.name); |
208 | rcu_ftrace_dump(DUMP_ALL); |
209 | wake_up_process(tsk: rcu_state.gp_kthread); |
210 | WRITE_ONCE(rcu_state.jiffies_kick_kthreads, j + HZ); |
211 | } |
212 | } |
213 | |
214 | /* |
215 | * Handler for the irq_work request posted about halfway into the RCU CPU |
216 | * stall timeout, and used to detect excessive irq disabling. Set state |
217 | * appropriately, but just complain if there is unexpected state on entry. |
218 | */ |
219 | static void rcu_iw_handler(struct irq_work *iwp) |
220 | { |
221 | struct rcu_data *rdp; |
222 | struct rcu_node *rnp; |
223 | |
224 | rdp = container_of(iwp, struct rcu_data, rcu_iw); |
225 | rnp = rdp->mynode; |
226 | raw_spin_lock_rcu_node(rnp); |
227 | if (!WARN_ON_ONCE(!rdp->rcu_iw_pending)) { |
228 | rdp->rcu_iw_gp_seq = rnp->gp_seq; |
229 | rdp->rcu_iw_pending = false; |
230 | } |
231 | raw_spin_unlock_rcu_node(rnp); |
232 | } |
233 | |
234 | ////////////////////////////////////////////////////////////////////////////// |
235 | // |
236 | // Printing RCU CPU stall warnings |
237 | |
238 | #ifdef CONFIG_PREEMPT_RCU |
239 | |
240 | /* |
241 | * Dump detailed information for all tasks blocking the current RCU |
242 | * grace period on the specified rcu_node structure. |
243 | */ |
244 | static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) |
245 | { |
246 | unsigned long flags; |
247 | struct task_struct *t; |
248 | |
249 | raw_spin_lock_irqsave_rcu_node(rnp, flags); |
250 | if (!rcu_preempt_blocked_readers_cgp(rnp)) { |
251 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
252 | return; |
253 | } |
254 | t = list_entry(rnp->gp_tasks->prev, |
255 | struct task_struct, rcu_node_entry); |
256 | list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { |
257 | /* |
258 | * We could be printing a lot while holding a spinlock. |
259 | * Avoid triggering hard lockup. |
260 | */ |
261 | touch_nmi_watchdog(); |
262 | sched_show_task(p: t); |
263 | } |
264 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
265 | } |
266 | |
267 | // Communicate task state back to the RCU CPU stall warning request. |
268 | struct rcu_stall_chk_rdr { |
269 | int nesting; |
270 | union rcu_special rs; |
271 | bool on_blkd_list; |
272 | }; |
273 | |
274 | /* |
275 | * Report out the state of a not-running task that is stalling the |
276 | * current RCU grace period. |
277 | */ |
278 | static int check_slow_task(struct task_struct *t, void *arg) |
279 | { |
280 | struct rcu_stall_chk_rdr *rscrp = arg; |
281 | |
282 | if (task_curr(p: t)) |
283 | return -EBUSY; // It is running, so decline to inspect it. |
284 | rscrp->nesting = t->rcu_read_lock_nesting; |
285 | rscrp->rs = t->rcu_read_unlock_special; |
286 | rscrp->on_blkd_list = !list_empty(head: &t->rcu_node_entry); |
287 | return 0; |
288 | } |
289 | |
290 | /* |
291 | * Scan the current list of tasks blocked within RCU read-side critical |
292 | * sections, printing out the tid of each of the first few of them. |
293 | */ |
294 | static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags) |
295 | __releases(rnp->lock) |
296 | { |
297 | int i = 0; |
298 | int ndetected = 0; |
299 | struct rcu_stall_chk_rdr rscr; |
300 | struct task_struct *t; |
301 | struct task_struct *ts[8]; |
302 | |
303 | lockdep_assert_irqs_disabled(); |
304 | if (!rcu_preempt_blocked_readers_cgp(rnp)) { |
305 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
306 | return 0; |
307 | } |
308 | pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):" , |
309 | rnp->level, rnp->grplo, rnp->grphi); |
310 | t = list_entry(rnp->gp_tasks->prev, |
311 | struct task_struct, rcu_node_entry); |
312 | list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { |
313 | get_task_struct(t); |
314 | ts[i++] = t; |
315 | if (i >= ARRAY_SIZE(ts)) |
316 | break; |
317 | } |
318 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
319 | while (i) { |
320 | t = ts[--i]; |
321 | if (task_call_func(p: t, func: check_slow_task, arg: &rscr)) |
322 | pr_cont(" P%d" , t->pid); |
323 | else |
324 | pr_cont(" P%d/%d:%c%c%c%c" , |
325 | t->pid, rscr.nesting, |
326 | ".b" [rscr.rs.b.blocked], |
327 | ".q" [rscr.rs.b.need_qs], |
328 | ".e" [rscr.rs.b.exp_hint], |
329 | ".l" [rscr.on_blkd_list]); |
330 | lockdep_assert_irqs_disabled(); |
331 | put_task_struct(t); |
332 | ndetected++; |
333 | } |
334 | pr_cont("\n" ); |
335 | return ndetected; |
336 | } |
337 | |
338 | #else /* #ifdef CONFIG_PREEMPT_RCU */ |
339 | |
340 | /* |
341 | * Because preemptible RCU does not exist, we never have to check for |
342 | * tasks blocked within RCU read-side critical sections. |
343 | */ |
344 | static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) |
345 | { |
346 | } |
347 | |
348 | /* |
349 | * Because preemptible RCU does not exist, we never have to check for |
350 | * tasks blocked within RCU read-side critical sections. |
351 | */ |
352 | static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags) |
353 | __releases(rnp->lock) |
354 | { |
355 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
356 | return 0; |
357 | } |
358 | #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ |
359 | |
360 | /* |
361 | * Dump stacks of all tasks running on stalled CPUs. First try using |
362 | * NMIs, but fall back to manual remote stack tracing on architectures |
363 | * that don't support NMI-based stack dumps. The NMI-triggered stack |
364 | * traces are more accurate because they are printed by the target CPU. |
365 | */ |
366 | static void rcu_dump_cpu_stacks(void) |
367 | { |
368 | int cpu; |
369 | unsigned long flags; |
370 | struct rcu_node *rnp; |
371 | |
372 | rcu_for_each_leaf_node(rnp) { |
373 | raw_spin_lock_irqsave_rcu_node(rnp, flags); |
374 | for_each_leaf_node_possible_cpu(rnp, cpu) |
375 | if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) { |
376 | if (cpu_is_offline(cpu)) |
377 | pr_err("Offline CPU %d blocking current GP.\n" , cpu); |
378 | else |
379 | dump_cpu_task(cpu); |
380 | } |
381 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
382 | } |
383 | } |
384 | |
385 | static const char * const gp_state_names[] = { |
386 | [RCU_GP_IDLE] = "RCU_GP_IDLE" , |
387 | [RCU_GP_WAIT_GPS] = "RCU_GP_WAIT_GPS" , |
388 | [RCU_GP_DONE_GPS] = "RCU_GP_DONE_GPS" , |
389 | [RCU_GP_ONOFF] = "RCU_GP_ONOFF" , |
390 | [RCU_GP_INIT] = "RCU_GP_INIT" , |
391 | [RCU_GP_WAIT_FQS] = "RCU_GP_WAIT_FQS" , |
392 | [RCU_GP_DOING_FQS] = "RCU_GP_DOING_FQS" , |
393 | [RCU_GP_CLEANUP] = "RCU_GP_CLEANUP" , |
394 | [RCU_GP_CLEANED] = "RCU_GP_CLEANED" , |
395 | }; |
396 | |
397 | /* |
398 | * Convert a ->gp_state value to a character string. |
399 | */ |
400 | static const char *gp_state_getname(short gs) |
401 | { |
402 | if (gs < 0 || gs >= ARRAY_SIZE(gp_state_names)) |
403 | return "???" ; |
404 | return gp_state_names[gs]; |
405 | } |
406 | |
407 | /* Is the RCU grace-period kthread being starved of CPU time? */ |
408 | static bool rcu_is_gp_kthread_starving(unsigned long *jp) |
409 | { |
410 | unsigned long j = jiffies - READ_ONCE(rcu_state.gp_activity); |
411 | |
412 | if (jp) |
413 | *jp = j; |
414 | return j > 2 * HZ; |
415 | } |
416 | |
417 | static bool rcu_is_rcuc_kthread_starving(struct rcu_data *rdp, unsigned long *jp) |
418 | { |
419 | int cpu; |
420 | struct task_struct *rcuc; |
421 | unsigned long j; |
422 | |
423 | rcuc = rdp->rcu_cpu_kthread_task; |
424 | if (!rcuc) |
425 | return false; |
426 | |
427 | cpu = task_cpu(p: rcuc); |
428 | if (cpu_is_offline(cpu) || idle_cpu(cpu)) |
429 | return false; |
430 | |
431 | j = jiffies - READ_ONCE(rdp->rcuc_activity); |
432 | |
433 | if (jp) |
434 | *jp = j; |
435 | return j > 2 * HZ; |
436 | } |
437 | |
438 | static void print_cpu_stat_info(int cpu) |
439 | { |
440 | struct rcu_snap_record rsr, *rsrp; |
441 | struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); |
442 | struct kernel_cpustat *kcsp = &kcpustat_cpu(cpu); |
443 | |
444 | if (!rcu_cpu_stall_cputime) |
445 | return; |
446 | |
447 | rsrp = &rdp->snap_record; |
448 | if (rsrp->gp_seq != rdp->gp_seq) |
449 | return; |
450 | |
451 | rsr.cputime_irq = kcpustat_field(kcpustat: kcsp, usage: CPUTIME_IRQ, cpu); |
452 | rsr.cputime_softirq = kcpustat_field(kcpustat: kcsp, usage: CPUTIME_SOFTIRQ, cpu); |
453 | rsr.cputime_system = kcpustat_field(kcpustat: kcsp, usage: CPUTIME_SYSTEM, cpu); |
454 | |
455 | pr_err("\t hardirqs softirqs csw/system\n" ); |
456 | pr_err("\t number: %8ld %10d %12lld\n" , |
457 | kstat_cpu_irqs_sum(cpu) - rsrp->nr_hardirqs, |
458 | kstat_cpu_softirqs_sum(cpu) - rsrp->nr_softirqs, |
459 | nr_context_switches_cpu(cpu) - rsrp->nr_csw); |
460 | pr_err("\tcputime: %8lld %10lld %12lld ==> %d(ms)\n" , |
461 | div_u64(rsr.cputime_irq - rsrp->cputime_irq, NSEC_PER_MSEC), |
462 | div_u64(rsr.cputime_softirq - rsrp->cputime_softirq, NSEC_PER_MSEC), |
463 | div_u64(rsr.cputime_system - rsrp->cputime_system, NSEC_PER_MSEC), |
464 | jiffies_to_msecs(jiffies - rsrp->jiffies)); |
465 | } |
466 | |
467 | /* |
468 | * Print out diagnostic information for the specified stalled CPU. |
469 | * |
470 | * If the specified CPU is aware of the current RCU grace period, then |
471 | * print the number of scheduling clock interrupts the CPU has taken |
472 | * during the time that it has been aware. Otherwise, print the number |
473 | * of RCU grace periods that this CPU is ignorant of, for example, "1" |
474 | * if the CPU was aware of the previous grace period. |
475 | * |
476 | * Also print out idle info. |
477 | */ |
478 | static void print_cpu_stall_info(int cpu) |
479 | { |
480 | unsigned long delta; |
481 | bool falsepositive; |
482 | struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); |
483 | char *ticks_title; |
484 | unsigned long ticks_value; |
485 | bool rcuc_starved; |
486 | unsigned long j; |
487 | char buf[32]; |
488 | |
489 | /* |
490 | * We could be printing a lot while holding a spinlock. Avoid |
491 | * triggering hard lockup. |
492 | */ |
493 | touch_nmi_watchdog(); |
494 | |
495 | ticks_value = rcu_seq_ctr(s: rcu_state.gp_seq - rdp->gp_seq); |
496 | if (ticks_value) { |
497 | ticks_title = "GPs behind" ; |
498 | } else { |
499 | ticks_title = "ticks this GP" ; |
500 | ticks_value = rdp->ticks_this_gp; |
501 | } |
502 | delta = rcu_seq_ctr(s: rdp->mynode->gp_seq - rdp->rcu_iw_gp_seq); |
503 | falsepositive = rcu_is_gp_kthread_starving(NULL) && |
504 | rcu_dynticks_in_eqs(snap: rcu_dynticks_snap(cpu)); |
505 | rcuc_starved = rcu_is_rcuc_kthread_starving(rdp, jp: &j); |
506 | if (rcuc_starved) |
507 | sprintf(buf, fmt: " rcuc=%ld jiffies(starved)" , j); |
508 | pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%04x/%ld/%#lx softirq=%u/%u fqs=%ld%s%s\n" , |
509 | cpu, |
510 | "O." [!!cpu_online(cpu)], |
511 | "o." [!!(rdp->grpmask & rdp->mynode->qsmaskinit)], |
512 | "N." [!!(rdp->grpmask & rdp->mynode->qsmaskinitnext)], |
513 | !IS_ENABLED(CONFIG_IRQ_WORK) ? '?' : |
514 | rdp->rcu_iw_pending ? (int)min(delta, 9UL) + '0' : |
515 | "!." [!delta], |
516 | ticks_value, ticks_title, |
517 | rcu_dynticks_snap(cpu) & 0xffff, |
518 | ct_dynticks_nesting_cpu(cpu), ct_dynticks_nmi_nesting_cpu(cpu), |
519 | rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu), |
520 | data_race(rcu_state.n_force_qs) - rcu_state.n_force_qs_gpstart, |
521 | rcuc_starved ? buf : "" , |
522 | falsepositive ? " (false positive?)" : "" ); |
523 | |
524 | print_cpu_stat_info(cpu); |
525 | } |
526 | |
527 | /* Complain about starvation of grace-period kthread. */ |
528 | static void rcu_check_gp_kthread_starvation(void) |
529 | { |
530 | int cpu; |
531 | struct task_struct *gpk = rcu_state.gp_kthread; |
532 | unsigned long j; |
533 | |
534 | if (rcu_is_gp_kthread_starving(jp: &j)) { |
535 | cpu = gpk ? task_cpu(p: gpk) : -1; |
536 | pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#x ->cpu=%d\n" , |
537 | rcu_state.name, j, |
538 | (long)rcu_seq_current(&rcu_state.gp_seq), |
539 | data_race(READ_ONCE(rcu_state.gp_flags)), |
540 | gp_state_getname(rcu_state.gp_state), |
541 | data_race(READ_ONCE(rcu_state.gp_state)), |
542 | gpk ? data_race(READ_ONCE(gpk->__state)) : ~0, cpu); |
543 | if (gpk) { |
544 | struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); |
545 | |
546 | pr_err("\tUnless %s kthread gets sufficient CPU time, OOM is now expected behavior.\n" , rcu_state.name); |
547 | pr_err("RCU grace-period kthread stack dump:\n" ); |
548 | sched_show_task(p: gpk); |
549 | if (cpu_is_offline(cpu)) { |
550 | pr_err("RCU GP kthread last ran on offline CPU %d.\n" , cpu); |
551 | } else if (!(data_race(READ_ONCE(rdp->mynode->qsmask)) & rdp->grpmask)) { |
552 | pr_err("Stack dump where RCU GP kthread last ran:\n" ); |
553 | dump_cpu_task(cpu); |
554 | } |
555 | wake_up_process(tsk: gpk); |
556 | } |
557 | } |
558 | } |
559 | |
560 | /* Complain about missing wakeups from expired fqs wait timer */ |
561 | static void rcu_check_gp_kthread_expired_fqs_timer(void) |
562 | { |
563 | struct task_struct *gpk = rcu_state.gp_kthread; |
564 | short gp_state; |
565 | unsigned long jiffies_fqs; |
566 | int cpu; |
567 | |
568 | /* |
569 | * Order reads of .gp_state and .jiffies_force_qs. |
570 | * Matching smp_wmb() is present in rcu_gp_fqs_loop(). |
571 | */ |
572 | gp_state = smp_load_acquire(&rcu_state.gp_state); |
573 | jiffies_fqs = READ_ONCE(rcu_state.jiffies_force_qs); |
574 | |
575 | if (gp_state == RCU_GP_WAIT_FQS && |
576 | time_after(jiffies, jiffies_fqs + RCU_STALL_MIGHT_MIN) && |
577 | gpk && !READ_ONCE(gpk->on_rq)) { |
578 | cpu = task_cpu(p: gpk); |
579 | pr_err("%s kthread timer wakeup didn't happen for %ld jiffies! g%ld f%#x %s(%d) ->state=%#x\n" , |
580 | rcu_state.name, (jiffies - jiffies_fqs), |
581 | (long)rcu_seq_current(&rcu_state.gp_seq), |
582 | data_race(rcu_state.gp_flags), |
583 | gp_state_getname(RCU_GP_WAIT_FQS), RCU_GP_WAIT_FQS, |
584 | data_race(READ_ONCE(gpk->__state))); |
585 | pr_err("\tPossible timer handling issue on cpu=%d timer-softirq=%u\n" , |
586 | cpu, kstat_softirqs_cpu(TIMER_SOFTIRQ, cpu)); |
587 | } |
588 | } |
589 | |
590 | static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps) |
591 | { |
592 | int cpu; |
593 | unsigned long flags; |
594 | unsigned long gpa; |
595 | unsigned long j; |
596 | int ndetected = 0; |
597 | struct rcu_node *rnp; |
598 | long totqlen = 0; |
599 | |
600 | lockdep_assert_irqs_disabled(); |
601 | |
602 | /* Kick and suppress, if so configured. */ |
603 | rcu_stall_kick_kthreads(); |
604 | if (rcu_stall_is_suppressed()) |
605 | return; |
606 | |
607 | /* |
608 | * OK, time to rat on our buddy... |
609 | * See Documentation/RCU/stallwarn.rst for info on how to debug |
610 | * RCU CPU stall warnings. |
611 | */ |
612 | trace_rcu_stall_warning(rcuname: rcu_state.name, TPS("StallDetected" )); |
613 | pr_err("INFO: %s detected stalls on CPUs/tasks:\n" , rcu_state.name); |
614 | rcu_for_each_leaf_node(rnp) { |
615 | raw_spin_lock_irqsave_rcu_node(rnp, flags); |
616 | if (rnp->qsmask != 0) { |
617 | for_each_leaf_node_possible_cpu(rnp, cpu) |
618 | if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) { |
619 | print_cpu_stall_info(cpu); |
620 | ndetected++; |
621 | } |
622 | } |
623 | ndetected += rcu_print_task_stall(rnp, flags); // Releases rnp->lock. |
624 | lockdep_assert_irqs_disabled(); |
625 | } |
626 | |
627 | for_each_possible_cpu(cpu) |
628 | totqlen += rcu_get_n_cbs_cpu(cpu); |
629 | pr_err("\t(detected by %d, t=%ld jiffies, g=%ld, q=%lu ncpus=%d)\n" , |
630 | smp_processor_id(), (long)(jiffies - gps), |
631 | (long)rcu_seq_current(&rcu_state.gp_seq), totqlen, rcu_state.n_online_cpus); |
632 | if (ndetected) { |
633 | rcu_dump_cpu_stacks(); |
634 | |
635 | /* Complain about tasks blocking the grace period. */ |
636 | rcu_for_each_leaf_node(rnp) |
637 | rcu_print_detail_task_stall_rnp(rnp); |
638 | } else { |
639 | if (rcu_seq_current(sp: &rcu_state.gp_seq) != gp_seq) { |
640 | pr_err("INFO: Stall ended before state dump start\n" ); |
641 | } else { |
642 | j = jiffies; |
643 | gpa = data_race(READ_ONCE(rcu_state.gp_activity)); |
644 | pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld, root ->qsmask %#lx\n" , |
645 | rcu_state.name, j - gpa, j, gpa, |
646 | data_race(READ_ONCE(jiffies_till_next_fqs)), |
647 | data_race(READ_ONCE(rcu_get_root()->qsmask))); |
648 | } |
649 | } |
650 | /* Rewrite if needed in case of slow consoles. */ |
651 | if (ULONG_CMP_GE(jiffies, READ_ONCE(rcu_state.jiffies_stall))) |
652 | WRITE_ONCE(rcu_state.jiffies_stall, |
653 | jiffies + 3 * rcu_jiffies_till_stall_check() + 3); |
654 | |
655 | rcu_check_gp_kthread_expired_fqs_timer(); |
656 | rcu_check_gp_kthread_starvation(); |
657 | |
658 | panic_on_rcu_stall(); |
659 | |
660 | rcu_force_quiescent_state(); /* Kick them all. */ |
661 | } |
662 | |
663 | static void print_cpu_stall(unsigned long gps) |
664 | { |
665 | int cpu; |
666 | unsigned long flags; |
667 | struct rcu_data *rdp = this_cpu_ptr(&rcu_data); |
668 | struct rcu_node *rnp = rcu_get_root(); |
669 | long totqlen = 0; |
670 | |
671 | lockdep_assert_irqs_disabled(); |
672 | |
673 | /* Kick and suppress, if so configured. */ |
674 | rcu_stall_kick_kthreads(); |
675 | if (rcu_stall_is_suppressed()) |
676 | return; |
677 | |
678 | /* |
679 | * OK, time to rat on ourselves... |
680 | * See Documentation/RCU/stallwarn.rst for info on how to debug |
681 | * RCU CPU stall warnings. |
682 | */ |
683 | trace_rcu_stall_warning(rcuname: rcu_state.name, TPS("SelfDetected" )); |
684 | pr_err("INFO: %s self-detected stall on CPU\n" , rcu_state.name); |
685 | raw_spin_lock_irqsave_rcu_node(rdp->mynode, flags); |
686 | print_cpu_stall_info(smp_processor_id()); |
687 | raw_spin_unlock_irqrestore_rcu_node(rdp->mynode, flags); |
688 | for_each_possible_cpu(cpu) |
689 | totqlen += rcu_get_n_cbs_cpu(cpu); |
690 | pr_err("\t(t=%lu jiffies g=%ld q=%lu ncpus=%d)\n" , |
691 | jiffies - gps, |
692 | (long)rcu_seq_current(&rcu_state.gp_seq), totqlen, rcu_state.n_online_cpus); |
693 | |
694 | rcu_check_gp_kthread_expired_fqs_timer(); |
695 | rcu_check_gp_kthread_starvation(); |
696 | |
697 | rcu_dump_cpu_stacks(); |
698 | |
699 | raw_spin_lock_irqsave_rcu_node(rnp, flags); |
700 | /* Rewrite if needed in case of slow consoles. */ |
701 | if (ULONG_CMP_GE(jiffies, READ_ONCE(rcu_state.jiffies_stall))) |
702 | WRITE_ONCE(rcu_state.jiffies_stall, |
703 | jiffies + 3 * rcu_jiffies_till_stall_check() + 3); |
704 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
705 | |
706 | panic_on_rcu_stall(); |
707 | |
708 | /* |
709 | * Attempt to revive the RCU machinery by forcing a context switch. |
710 | * |
711 | * A context switch would normally allow the RCU state machine to make |
712 | * progress and it could be we're stuck in kernel space without context |
713 | * switches for an entirely unreasonable amount of time. |
714 | */ |
715 | set_tsk_need_resched(current); |
716 | set_preempt_need_resched(); |
717 | } |
718 | |
719 | static void check_cpu_stall(struct rcu_data *rdp) |
720 | { |
721 | bool self_detected; |
722 | unsigned long gs1; |
723 | unsigned long gs2; |
724 | unsigned long gps; |
725 | unsigned long j; |
726 | unsigned long jn; |
727 | unsigned long js; |
728 | struct rcu_node *rnp; |
729 | |
730 | lockdep_assert_irqs_disabled(); |
731 | if ((rcu_stall_is_suppressed() && !READ_ONCE(rcu_kick_kthreads)) || |
732 | !rcu_gp_in_progress()) |
733 | return; |
734 | rcu_stall_kick_kthreads(); |
735 | |
736 | /* |
737 | * Check if it was requested (via rcu_cpu_stall_reset()) that the FQS |
738 | * loop has to set jiffies to ensure a non-stale jiffies value. This |
739 | * is required to have good jiffies value after coming out of long |
740 | * breaks of jiffies updates. Not doing so can cause false positives. |
741 | */ |
742 | if (READ_ONCE(rcu_state.nr_fqs_jiffies_stall) > 0) |
743 | return; |
744 | |
745 | j = jiffies; |
746 | |
747 | /* |
748 | * Lots of memory barriers to reject false positives. |
749 | * |
750 | * The idea is to pick up rcu_state.gp_seq, then |
751 | * rcu_state.jiffies_stall, then rcu_state.gp_start, and finally |
752 | * another copy of rcu_state.gp_seq. These values are updated in |
753 | * the opposite order with memory barriers (or equivalent) during |
754 | * grace-period initialization and cleanup. Now, a false positive |
755 | * can occur if we get an new value of rcu_state.gp_start and a old |
756 | * value of rcu_state.jiffies_stall. But given the memory barriers, |
757 | * the only way that this can happen is if one grace period ends |
758 | * and another starts between these two fetches. This is detected |
759 | * by comparing the second fetch of rcu_state.gp_seq with the |
760 | * previous fetch from rcu_state.gp_seq. |
761 | * |
762 | * Given this check, comparisons of jiffies, rcu_state.jiffies_stall, |
763 | * and rcu_state.gp_start suffice to forestall false positives. |
764 | */ |
765 | gs1 = READ_ONCE(rcu_state.gp_seq); |
766 | smp_rmb(); /* Pick up ->gp_seq first... */ |
767 | js = READ_ONCE(rcu_state.jiffies_stall); |
768 | smp_rmb(); /* ...then ->jiffies_stall before the rest... */ |
769 | gps = READ_ONCE(rcu_state.gp_start); |
770 | smp_rmb(); /* ...and finally ->gp_start before ->gp_seq again. */ |
771 | gs2 = READ_ONCE(rcu_state.gp_seq); |
772 | if (gs1 != gs2 || |
773 | ULONG_CMP_LT(j, js) || |
774 | ULONG_CMP_GE(gps, js)) |
775 | return; /* No stall or GP completed since entering function. */ |
776 | rnp = rdp->mynode; |
777 | jn = jiffies + ULONG_MAX / 2; |
778 | self_detected = READ_ONCE(rnp->qsmask) & rdp->grpmask; |
779 | if (rcu_gp_in_progress() && |
780 | (self_detected || ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) && |
781 | cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) { |
782 | /* |
783 | * If a virtual machine is stopped by the host it can look to |
784 | * the watchdog like an RCU stall. Check to see if the host |
785 | * stopped the vm. |
786 | */ |
787 | if (kvm_check_and_clear_guest_paused()) |
788 | return; |
789 | |
790 | rcu_stall_notifier_call_chain(RCU_STALL_NOTIFY_NORM, v: (void *)j - gps); |
791 | if (self_detected) { |
792 | /* We haven't checked in, so go dump stack. */ |
793 | print_cpu_stall(gps); |
794 | } else { |
795 | /* They had a few time units to dump stack, so complain. */ |
796 | print_other_cpu_stall(gp_seq: gs2, gps); |
797 | } |
798 | |
799 | if (READ_ONCE(rcu_cpu_stall_ftrace_dump)) |
800 | rcu_ftrace_dump(DUMP_ALL); |
801 | |
802 | if (READ_ONCE(rcu_state.jiffies_stall) == jn) { |
803 | jn = jiffies + 3 * rcu_jiffies_till_stall_check() + 3; |
804 | WRITE_ONCE(rcu_state.jiffies_stall, jn); |
805 | } |
806 | } |
807 | } |
808 | |
809 | ////////////////////////////////////////////////////////////////////////////// |
810 | // |
811 | // RCU forward-progress mechanisms, including for callback invocation. |
812 | |
813 | |
814 | /* |
815 | * Check to see if a failure to end RCU priority inversion was due to |
816 | * a CPU not passing through a quiescent state. When this happens, there |
817 | * is nothing that RCU priority boosting can do to help, so we shouldn't |
818 | * count this as an RCU priority boosting failure. A return of true says |
819 | * RCU priority boosting is to blame, and false says otherwise. If false |
820 | * is returned, the first of the CPUs to blame is stored through cpup. |
821 | * If there was no CPU blocking the current grace period, but also nothing |
822 | * in need of being boosted, *cpup is set to -1. This can happen in case |
823 | * of vCPU preemption while the last CPU is reporting its quiscent state, |
824 | * for example. |
825 | * |
826 | * If cpup is NULL, then a lockless quick check is carried out, suitable |
827 | * for high-rate usage. On the other hand, if cpup is non-NULL, each |
828 | * rcu_node structure's ->lock is acquired, ruling out high-rate usage. |
829 | */ |
830 | bool rcu_check_boost_fail(unsigned long gp_state, int *cpup) |
831 | { |
832 | bool atb = false; |
833 | int cpu; |
834 | unsigned long flags; |
835 | struct rcu_node *rnp; |
836 | |
837 | rcu_for_each_leaf_node(rnp) { |
838 | if (!cpup) { |
839 | if (data_race(READ_ONCE(rnp->qsmask))) { |
840 | return false; |
841 | } else { |
842 | if (READ_ONCE(rnp->gp_tasks)) |
843 | atb = true; |
844 | continue; |
845 | } |
846 | } |
847 | *cpup = -1; |
848 | raw_spin_lock_irqsave_rcu_node(rnp, flags); |
849 | if (rnp->gp_tasks) |
850 | atb = true; |
851 | if (!rnp->qsmask) { |
852 | // No CPUs without quiescent states for this rnp. |
853 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
854 | continue; |
855 | } |
856 | // Find the first holdout CPU. |
857 | for_each_leaf_node_possible_cpu(rnp, cpu) { |
858 | if (rnp->qsmask & (1UL << (cpu - rnp->grplo))) { |
859 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
860 | *cpup = cpu; |
861 | return false; |
862 | } |
863 | } |
864 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
865 | } |
866 | // Can't blame CPUs, so must blame RCU priority boosting. |
867 | return atb; |
868 | } |
869 | EXPORT_SYMBOL_GPL(rcu_check_boost_fail); |
870 | |
871 | /* |
872 | * Show the state of the grace-period kthreads. |
873 | */ |
874 | void show_rcu_gp_kthreads(void) |
875 | { |
876 | unsigned long cbs = 0; |
877 | int cpu; |
878 | unsigned long j; |
879 | unsigned long ja; |
880 | unsigned long jr; |
881 | unsigned long js; |
882 | unsigned long jw; |
883 | struct rcu_data *rdp; |
884 | struct rcu_node *rnp; |
885 | struct task_struct *t = READ_ONCE(rcu_state.gp_kthread); |
886 | |
887 | j = jiffies; |
888 | ja = j - data_race(READ_ONCE(rcu_state.gp_activity)); |
889 | jr = j - data_race(READ_ONCE(rcu_state.gp_req_activity)); |
890 | js = j - data_race(READ_ONCE(rcu_state.gp_start)); |
891 | jw = j - data_race(READ_ONCE(rcu_state.gp_wake_time)); |
892 | pr_info("%s: wait state: %s(%d) ->state: %#x ->rt_priority %u delta ->gp_start %lu ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_max %lu ->gp_flags %#x\n" , |
893 | rcu_state.name, gp_state_getname(rcu_state.gp_state), |
894 | data_race(READ_ONCE(rcu_state.gp_state)), |
895 | t ? data_race(READ_ONCE(t->__state)) : 0x1ffff, t ? t->rt_priority : 0xffU, |
896 | js, ja, jr, jw, (long)data_race(READ_ONCE(rcu_state.gp_wake_seq)), |
897 | (long)data_race(READ_ONCE(rcu_state.gp_seq)), |
898 | (long)data_race(READ_ONCE(rcu_get_root()->gp_seq_needed)), |
899 | data_race(READ_ONCE(rcu_state.gp_max)), |
900 | data_race(READ_ONCE(rcu_state.gp_flags))); |
901 | rcu_for_each_node_breadth_first(rnp) { |
902 | if (ULONG_CMP_GE(READ_ONCE(rcu_state.gp_seq), READ_ONCE(rnp->gp_seq_needed)) && |
903 | !data_race(READ_ONCE(rnp->qsmask)) && !data_race(READ_ONCE(rnp->boost_tasks)) && |
904 | !data_race(READ_ONCE(rnp->exp_tasks)) && !data_race(READ_ONCE(rnp->gp_tasks))) |
905 | continue; |
906 | pr_info("\trcu_node %d:%d ->gp_seq %ld ->gp_seq_needed %ld ->qsmask %#lx %c%c%c%c ->n_boosts %ld\n" , |
907 | rnp->grplo, rnp->grphi, |
908 | (long)data_race(READ_ONCE(rnp->gp_seq)), |
909 | (long)data_race(READ_ONCE(rnp->gp_seq_needed)), |
910 | data_race(READ_ONCE(rnp->qsmask)), |
911 | ".b" [!!data_race(READ_ONCE(rnp->boost_kthread_task))], |
912 | ".B" [!!data_race(READ_ONCE(rnp->boost_tasks))], |
913 | ".E" [!!data_race(READ_ONCE(rnp->exp_tasks))], |
914 | ".G" [!!data_race(READ_ONCE(rnp->gp_tasks))], |
915 | data_race(READ_ONCE(rnp->n_boosts))); |
916 | if (!rcu_is_leaf_node(rnp)) |
917 | continue; |
918 | for_each_leaf_node_possible_cpu(rnp, cpu) { |
919 | rdp = per_cpu_ptr(&rcu_data, cpu); |
920 | if (READ_ONCE(rdp->gpwrap) || |
921 | ULONG_CMP_GE(READ_ONCE(rcu_state.gp_seq), |
922 | READ_ONCE(rdp->gp_seq_needed))) |
923 | continue; |
924 | pr_info("\tcpu %d ->gp_seq_needed %ld\n" , |
925 | cpu, (long)data_race(READ_ONCE(rdp->gp_seq_needed))); |
926 | } |
927 | } |
928 | for_each_possible_cpu(cpu) { |
929 | rdp = per_cpu_ptr(&rcu_data, cpu); |
930 | cbs += data_race(READ_ONCE(rdp->n_cbs_invoked)); |
931 | if (rcu_segcblist_is_offloaded(rsclp: &rdp->cblist)) |
932 | show_rcu_nocb_state(rdp); |
933 | } |
934 | pr_info("RCU callbacks invoked since boot: %lu\n" , cbs); |
935 | show_rcu_tasks_gp_kthreads(); |
936 | } |
937 | EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads); |
938 | |
939 | /* |
940 | * This function checks for grace-period requests that fail to motivate |
941 | * RCU to come out of its idle mode. |
942 | */ |
943 | static void rcu_check_gp_start_stall(struct rcu_node *rnp, struct rcu_data *rdp, |
944 | const unsigned long gpssdelay) |
945 | { |
946 | unsigned long flags; |
947 | unsigned long j; |
948 | struct rcu_node *rnp_root = rcu_get_root(); |
949 | static atomic_t warned = ATOMIC_INIT(0); |
950 | |
951 | if (!IS_ENABLED(CONFIG_PROVE_RCU) || rcu_gp_in_progress() || |
952 | ULONG_CMP_GE(READ_ONCE(rnp_root->gp_seq), |
953 | READ_ONCE(rnp_root->gp_seq_needed)) || |
954 | !smp_load_acquire(&rcu_state.gp_kthread)) // Get stable kthread. |
955 | return; |
956 | j = jiffies; /* Expensive access, and in common case don't get here. */ |
957 | if (time_before(j, READ_ONCE(rcu_state.gp_req_activity) + gpssdelay) || |
958 | time_before(j, READ_ONCE(rcu_state.gp_activity) + gpssdelay) || |
959 | atomic_read(v: &warned)) |
960 | return; |
961 | |
962 | raw_spin_lock_irqsave_rcu_node(rnp, flags); |
963 | j = jiffies; |
964 | if (rcu_gp_in_progress() || |
965 | ULONG_CMP_GE(READ_ONCE(rnp_root->gp_seq), |
966 | READ_ONCE(rnp_root->gp_seq_needed)) || |
967 | time_before(j, READ_ONCE(rcu_state.gp_req_activity) + gpssdelay) || |
968 | time_before(j, READ_ONCE(rcu_state.gp_activity) + gpssdelay) || |
969 | atomic_read(v: &warned)) { |
970 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
971 | return; |
972 | } |
973 | /* Hold onto the leaf lock to make others see warned==1. */ |
974 | |
975 | if (rnp_root != rnp) |
976 | raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */ |
977 | j = jiffies; |
978 | if (rcu_gp_in_progress() || |
979 | ULONG_CMP_GE(READ_ONCE(rnp_root->gp_seq), |
980 | READ_ONCE(rnp_root->gp_seq_needed)) || |
981 | time_before(j, READ_ONCE(rcu_state.gp_req_activity) + gpssdelay) || |
982 | time_before(j, READ_ONCE(rcu_state.gp_activity) + gpssdelay) || |
983 | atomic_xchg(v: &warned, new: 1)) { |
984 | if (rnp_root != rnp) |
985 | /* irqs remain disabled. */ |
986 | raw_spin_unlock_rcu_node(rnp_root); |
987 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
988 | return; |
989 | } |
990 | WARN_ON(1); |
991 | if (rnp_root != rnp) |
992 | raw_spin_unlock_rcu_node(rnp_root); |
993 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
994 | show_rcu_gp_kthreads(); |
995 | } |
996 | |
997 | /* |
998 | * Do a forward-progress check for rcutorture. This is normally invoked |
999 | * due to an OOM event. The argument "j" gives the time period during |
1000 | * which rcutorture would like progress to have been made. |
1001 | */ |
1002 | void rcu_fwd_progress_check(unsigned long j) |
1003 | { |
1004 | unsigned long cbs; |
1005 | int cpu; |
1006 | unsigned long max_cbs = 0; |
1007 | int max_cpu = -1; |
1008 | struct rcu_data *rdp; |
1009 | |
1010 | if (rcu_gp_in_progress()) { |
1011 | pr_info("%s: GP age %lu jiffies\n" , |
1012 | __func__, jiffies - data_race(READ_ONCE(rcu_state.gp_start))); |
1013 | show_rcu_gp_kthreads(); |
1014 | } else { |
1015 | pr_info("%s: Last GP end %lu jiffies ago\n" , |
1016 | __func__, jiffies - data_race(READ_ONCE(rcu_state.gp_end))); |
1017 | preempt_disable(); |
1018 | rdp = this_cpu_ptr(&rcu_data); |
1019 | rcu_check_gp_start_stall(rnp: rdp->mynode, rdp, gpssdelay: j); |
1020 | preempt_enable(); |
1021 | } |
1022 | for_each_possible_cpu(cpu) { |
1023 | cbs = rcu_get_n_cbs_cpu(cpu); |
1024 | if (!cbs) |
1025 | continue; |
1026 | if (max_cpu < 0) |
1027 | pr_info("%s: callbacks" , __func__); |
1028 | pr_cont(" %d: %lu" , cpu, cbs); |
1029 | if (cbs <= max_cbs) |
1030 | continue; |
1031 | max_cbs = cbs; |
1032 | max_cpu = cpu; |
1033 | } |
1034 | if (max_cpu >= 0) |
1035 | pr_cont("\n" ); |
1036 | } |
1037 | EXPORT_SYMBOL_GPL(rcu_fwd_progress_check); |
1038 | |
1039 | /* Commandeer a sysrq key to dump RCU's tree. */ |
1040 | static bool sysrq_rcu; |
1041 | module_param(sysrq_rcu, bool, 0444); |
1042 | |
1043 | /* Dump grace-period-request information due to commandeered sysrq. */ |
1044 | static void sysrq_show_rcu(u8 key) |
1045 | { |
1046 | show_rcu_gp_kthreads(); |
1047 | } |
1048 | |
1049 | static const struct sysrq_key_op sysrq_rcudump_op = { |
1050 | .handler = sysrq_show_rcu, |
1051 | .help_msg = "show-rcu(y)" , |
1052 | .action_msg = "Show RCU tree" , |
1053 | .enable_mask = SYSRQ_ENABLE_DUMP, |
1054 | }; |
1055 | |
1056 | static int __init rcu_sysrq_init(void) |
1057 | { |
1058 | if (sysrq_rcu) |
1059 | return register_sysrq_key(key: 'y', op: &sysrq_rcudump_op); |
1060 | return 0; |
1061 | } |
1062 | early_initcall(rcu_sysrq_init); |
1063 | |
1064 | #ifdef CONFIG_RCU_CPU_STALL_NOTIFIER |
1065 | |
1066 | ////////////////////////////////////////////////////////////////////////////// |
1067 | // |
1068 | // RCU CPU stall-warning notifiers |
1069 | |
1070 | static ATOMIC_NOTIFIER_HEAD(rcu_cpu_stall_notifier_list); |
1071 | |
1072 | /** |
1073 | * rcu_stall_chain_notifier_register - Add an RCU CPU stall notifier |
1074 | * @n: Entry to add. |
1075 | * |
1076 | * Adds an RCU CPU stall notifier to an atomic notifier chain. |
1077 | * The @action passed to a notifier will be @RCU_STALL_NOTIFY_NORM or |
1078 | * friends. The @data will be the duration of the stalled grace period, |
1079 | * in jiffies, coerced to a void* pointer. |
1080 | * |
1081 | * Returns 0 on success, %-EEXIST on error. |
1082 | */ |
1083 | int rcu_stall_chain_notifier_register(struct notifier_block *n) |
1084 | { |
1085 | int rcsn = rcu_cpu_stall_notifiers; |
1086 | |
1087 | WARN(1, "Adding %pS() to RCU stall notifier list (%s).\n" , n->notifier_call, |
1088 | rcsn ? "possibly suppressing RCU CPU stall warnings" : "failed, so all is well" ); |
1089 | if (rcsn) |
1090 | return atomic_notifier_chain_register(nh: &rcu_cpu_stall_notifier_list, nb: n); |
1091 | return -EEXIST; |
1092 | } |
1093 | EXPORT_SYMBOL_GPL(rcu_stall_chain_notifier_register); |
1094 | |
1095 | /** |
1096 | * rcu_stall_chain_notifier_unregister - Remove an RCU CPU stall notifier |
1097 | * @n: Entry to add. |
1098 | * |
1099 | * Removes an RCU CPU stall notifier from an atomic notifier chain. |
1100 | * |
1101 | * Returns zero on success, %-ENOENT on failure. |
1102 | */ |
1103 | int rcu_stall_chain_notifier_unregister(struct notifier_block *n) |
1104 | { |
1105 | return atomic_notifier_chain_unregister(nh: &rcu_cpu_stall_notifier_list, nb: n); |
1106 | } |
1107 | EXPORT_SYMBOL_GPL(rcu_stall_chain_notifier_unregister); |
1108 | |
1109 | /* |
1110 | * rcu_stall_notifier_call_chain - Call functions in an RCU CPU stall notifier chain |
1111 | * @val: Value passed unmodified to notifier function |
1112 | * @v: Pointer passed unmodified to notifier function |
1113 | * |
1114 | * Calls each function in the RCU CPU stall notifier chain in turn, which |
1115 | * is an atomic call chain. See atomic_notifier_call_chain() for more |
1116 | * information. |
1117 | * |
1118 | * This is for use within RCU, hence the omission of the extra asterisk |
1119 | * to indicate a non-kerneldoc format header comment. |
1120 | */ |
1121 | int rcu_stall_notifier_call_chain(unsigned long val, void *v) |
1122 | { |
1123 | return atomic_notifier_call_chain(nh: &rcu_cpu_stall_notifier_list, val, v); |
1124 | } |
1125 | |
1126 | #endif // #ifdef CONFIG_RCU_CPU_STALL_NOTIFIER |
1127 | |