1 | /* SPDX-License-Identifier: GPL-2.0+ */ |
2 | /* |
3 | * Read-Copy Update mechanism for mutual exclusion (tree-based version) |
4 | * Internal non-public definitions that provide either classic |
5 | * or preemptible semantics. |
6 | * |
7 | * Copyright Red Hat, 2009 |
8 | * Copyright IBM Corporation, 2009 |
9 | * Copyright SUSE, 2021 |
10 | * |
11 | * Author: Ingo Molnar <mingo@elte.hu> |
12 | * Paul E. McKenney <paulmck@linux.ibm.com> |
13 | * Frederic Weisbecker <frederic@kernel.org> |
14 | */ |
15 | |
16 | #ifdef CONFIG_RCU_NOCB_CPU |
17 | static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */ |
18 | static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */ |
19 | static inline int rcu_lockdep_is_held_nocb(struct rcu_data *rdp) |
20 | { |
21 | return lockdep_is_held(&rdp->nocb_lock); |
22 | } |
23 | |
24 | static inline bool rcu_current_is_nocb_kthread(struct rcu_data *rdp) |
25 | { |
26 | /* Race on early boot between thread creation and assignment */ |
27 | if (!rdp->nocb_cb_kthread || !rdp->nocb_gp_kthread) |
28 | return true; |
29 | |
30 | if (current == rdp->nocb_cb_kthread || current == rdp->nocb_gp_kthread) |
31 | if (in_task()) |
32 | return true; |
33 | return false; |
34 | } |
35 | |
36 | /* |
37 | * Offload callback processing from the boot-time-specified set of CPUs |
38 | * specified by rcu_nocb_mask. For the CPUs in the set, there are kthreads |
39 | * created that pull the callbacks from the corresponding CPU, wait for |
40 | * a grace period to elapse, and invoke the callbacks. These kthreads |
41 | * are organized into GP kthreads, which manage incoming callbacks, wait for |
42 | * grace periods, and awaken CB kthreads, and the CB kthreads, which only |
43 | * invoke callbacks. Each GP kthread invokes its own CBs. The no-CBs CPUs |
44 | * do a wake_up() on their GP kthread when they insert a callback into any |
45 | * empty list, unless the rcu_nocb_poll boot parameter has been specified, |
46 | * in which case each kthread actively polls its CPU. (Which isn't so great |
47 | * for energy efficiency, but which does reduce RCU's overhead on that CPU.) |
48 | * |
49 | * This is intended to be used in conjunction with Frederic Weisbecker's |
50 | * adaptive-idle work, which would seriously reduce OS jitter on CPUs |
51 | * running CPU-bound user-mode computations. |
52 | * |
53 | * Offloading of callbacks can also be used as an energy-efficiency |
54 | * measure because CPUs with no RCU callbacks queued are more aggressive |
55 | * about entering dyntick-idle mode. |
56 | */ |
57 | |
58 | |
59 | /* |
60 | * Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters. |
61 | * If the list is invalid, a warning is emitted and all CPUs are offloaded. |
62 | */ |
63 | static int __init rcu_nocb_setup(char *str) |
64 | { |
65 | alloc_bootmem_cpumask_var(mask: &rcu_nocb_mask); |
66 | if (*str == '=') { |
67 | if (cpulist_parse(buf: ++str, dstp: rcu_nocb_mask)) { |
68 | pr_warn("rcu_nocbs= bad CPU range, all CPUs set\n" ); |
69 | cpumask_setall(dstp: rcu_nocb_mask); |
70 | } |
71 | } |
72 | rcu_state.nocb_is_setup = true; |
73 | return 1; |
74 | } |
75 | __setup("rcu_nocbs" , rcu_nocb_setup); |
76 | |
77 | static int __init parse_rcu_nocb_poll(char *arg) |
78 | { |
79 | rcu_nocb_poll = true; |
80 | return 1; |
81 | } |
82 | __setup("rcu_nocb_poll" , parse_rcu_nocb_poll); |
83 | |
84 | /* |
85 | * Don't bother bypassing ->cblist if the call_rcu() rate is low. |
86 | * After all, the main point of bypassing is to avoid lock contention |
87 | * on ->nocb_lock, which only can happen at high call_rcu() rates. |
88 | */ |
89 | static int nocb_nobypass_lim_per_jiffy = 16 * 1000 / HZ; |
90 | module_param(nocb_nobypass_lim_per_jiffy, int, 0); |
91 | |
92 | /* |
93 | * Acquire the specified rcu_data structure's ->nocb_bypass_lock. If the |
94 | * lock isn't immediately available, increment ->nocb_lock_contended to |
95 | * flag the contention. |
96 | */ |
97 | static void rcu_nocb_bypass_lock(struct rcu_data *rdp) |
98 | __acquires(&rdp->nocb_bypass_lock) |
99 | { |
100 | lockdep_assert_irqs_disabled(); |
101 | if (raw_spin_trylock(&rdp->nocb_bypass_lock)) |
102 | return; |
103 | atomic_inc(v: &rdp->nocb_lock_contended); |
104 | WARN_ON_ONCE(smp_processor_id() != rdp->cpu); |
105 | smp_mb__after_atomic(); /* atomic_inc() before lock. */ |
106 | raw_spin_lock(&rdp->nocb_bypass_lock); |
107 | smp_mb__before_atomic(); /* atomic_dec() after lock. */ |
108 | atomic_dec(v: &rdp->nocb_lock_contended); |
109 | } |
110 | |
111 | /* |
112 | * Spinwait until the specified rcu_data structure's ->nocb_lock is |
113 | * not contended. Please note that this is extremely special-purpose, |
114 | * relying on the fact that at most two kthreads and one CPU contend for |
115 | * this lock, and also that the two kthreads are guaranteed to have frequent |
116 | * grace-period-duration time intervals between successive acquisitions |
117 | * of the lock. This allows us to use an extremely simple throttling |
118 | * mechanism, and further to apply it only to the CPU doing floods of |
119 | * call_rcu() invocations. Don't try this at home! |
120 | */ |
121 | static void rcu_nocb_wait_contended(struct rcu_data *rdp) |
122 | { |
123 | WARN_ON_ONCE(smp_processor_id() != rdp->cpu); |
124 | while (WARN_ON_ONCE(atomic_read(&rdp->nocb_lock_contended))) |
125 | cpu_relax(); |
126 | } |
127 | |
128 | /* |
129 | * Conditionally acquire the specified rcu_data structure's |
130 | * ->nocb_bypass_lock. |
131 | */ |
132 | static bool rcu_nocb_bypass_trylock(struct rcu_data *rdp) |
133 | { |
134 | lockdep_assert_irqs_disabled(); |
135 | return raw_spin_trylock(&rdp->nocb_bypass_lock); |
136 | } |
137 | |
138 | /* |
139 | * Release the specified rcu_data structure's ->nocb_bypass_lock. |
140 | */ |
141 | static void rcu_nocb_bypass_unlock(struct rcu_data *rdp) |
142 | __releases(&rdp->nocb_bypass_lock) |
143 | { |
144 | lockdep_assert_irqs_disabled(); |
145 | raw_spin_unlock(&rdp->nocb_bypass_lock); |
146 | } |
147 | |
148 | /* |
149 | * Acquire the specified rcu_data structure's ->nocb_lock, but only |
150 | * if it corresponds to a no-CBs CPU. |
151 | */ |
152 | static void rcu_nocb_lock(struct rcu_data *rdp) |
153 | { |
154 | lockdep_assert_irqs_disabled(); |
155 | if (!rcu_rdp_is_offloaded(rdp)) |
156 | return; |
157 | raw_spin_lock(&rdp->nocb_lock); |
158 | } |
159 | |
160 | /* |
161 | * Release the specified rcu_data structure's ->nocb_lock, but only |
162 | * if it corresponds to a no-CBs CPU. |
163 | */ |
164 | static void rcu_nocb_unlock(struct rcu_data *rdp) |
165 | { |
166 | if (rcu_rdp_is_offloaded(rdp)) { |
167 | lockdep_assert_irqs_disabled(); |
168 | raw_spin_unlock(&rdp->nocb_lock); |
169 | } |
170 | } |
171 | |
172 | /* |
173 | * Release the specified rcu_data structure's ->nocb_lock and restore |
174 | * interrupts, but only if it corresponds to a no-CBs CPU. |
175 | */ |
176 | static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp, |
177 | unsigned long flags) |
178 | { |
179 | if (rcu_rdp_is_offloaded(rdp)) { |
180 | lockdep_assert_irqs_disabled(); |
181 | raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); |
182 | } else { |
183 | local_irq_restore(flags); |
184 | } |
185 | } |
186 | |
187 | /* Lockdep check that ->cblist may be safely accessed. */ |
188 | static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp) |
189 | { |
190 | lockdep_assert_irqs_disabled(); |
191 | if (rcu_rdp_is_offloaded(rdp)) |
192 | lockdep_assert_held(&rdp->nocb_lock); |
193 | } |
194 | |
195 | /* |
196 | * Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended |
197 | * grace period. |
198 | */ |
199 | static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq) |
200 | { |
201 | swake_up_all(q: sq); |
202 | } |
203 | |
204 | static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp) |
205 | { |
206 | return &rnp->nocb_gp_wq[rcu_seq_ctr(s: rnp->gp_seq) & 0x1]; |
207 | } |
208 | |
209 | static void rcu_init_one_nocb(struct rcu_node *rnp) |
210 | { |
211 | init_swait_queue_head(&rnp->nocb_gp_wq[0]); |
212 | init_swait_queue_head(&rnp->nocb_gp_wq[1]); |
213 | } |
214 | |
215 | static bool __wake_nocb_gp(struct rcu_data *rdp_gp, |
216 | struct rcu_data *rdp, |
217 | bool force, unsigned long flags) |
218 | __releases(rdp_gp->nocb_gp_lock) |
219 | { |
220 | bool needwake = false; |
221 | |
222 | if (!READ_ONCE(rdp_gp->nocb_gp_kthread)) { |
223 | raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags); |
224 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, |
225 | TPS("AlreadyAwake" )); |
226 | return false; |
227 | } |
228 | |
229 | if (rdp_gp->nocb_defer_wakeup > RCU_NOCB_WAKE_NOT) { |
230 | WRITE_ONCE(rdp_gp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT); |
231 | del_timer(timer: &rdp_gp->nocb_timer); |
232 | } |
233 | |
234 | if (force || READ_ONCE(rdp_gp->nocb_gp_sleep)) { |
235 | WRITE_ONCE(rdp_gp->nocb_gp_sleep, false); |
236 | needwake = true; |
237 | } |
238 | raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags); |
239 | if (needwake) { |
240 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, TPS("DoWake" )); |
241 | wake_up_process(tsk: rdp_gp->nocb_gp_kthread); |
242 | } |
243 | |
244 | return needwake; |
245 | } |
246 | |
247 | /* |
248 | * Kick the GP kthread for this NOCB group. |
249 | */ |
250 | static bool wake_nocb_gp(struct rcu_data *rdp, bool force) |
251 | { |
252 | unsigned long flags; |
253 | struct rcu_data *rdp_gp = rdp->nocb_gp_rdp; |
254 | |
255 | raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags); |
256 | return __wake_nocb_gp(rdp_gp, rdp, force, flags); |
257 | } |
258 | |
259 | #ifdef CONFIG_RCU_LAZY |
260 | /* |
261 | * LAZY_FLUSH_JIFFIES decides the maximum amount of time that |
262 | * can elapse before lazy callbacks are flushed. Lazy callbacks |
263 | * could be flushed much earlier for a number of other reasons |
264 | * however, LAZY_FLUSH_JIFFIES will ensure no lazy callbacks are |
265 | * left unsubmitted to RCU after those many jiffies. |
266 | */ |
267 | #define LAZY_FLUSH_JIFFIES (10 * HZ) |
268 | static unsigned long jiffies_lazy_flush = LAZY_FLUSH_JIFFIES; |
269 | |
270 | // To be called only from test code. |
271 | void rcu_set_jiffies_lazy_flush(unsigned long jif) |
272 | { |
273 | jiffies_lazy_flush = jif; |
274 | } |
275 | EXPORT_SYMBOL(rcu_set_jiffies_lazy_flush); |
276 | |
277 | unsigned long rcu_get_jiffies_lazy_flush(void) |
278 | { |
279 | return jiffies_lazy_flush; |
280 | } |
281 | EXPORT_SYMBOL(rcu_get_jiffies_lazy_flush); |
282 | #endif |
283 | |
284 | /* |
285 | * Arrange to wake the GP kthread for this NOCB group at some future |
286 | * time when it is safe to do so. |
287 | */ |
288 | static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype, |
289 | const char *reason) |
290 | { |
291 | unsigned long flags; |
292 | struct rcu_data *rdp_gp = rdp->nocb_gp_rdp; |
293 | |
294 | raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags); |
295 | |
296 | /* |
297 | * Bypass wakeup overrides previous deferments. In case of |
298 | * callback storms, no need to wake up too early. |
299 | */ |
300 | if (waketype == RCU_NOCB_WAKE_LAZY && |
301 | rdp->nocb_defer_wakeup == RCU_NOCB_WAKE_NOT) { |
302 | mod_timer(timer: &rdp_gp->nocb_timer, expires: jiffies + rcu_get_jiffies_lazy_flush()); |
303 | WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype); |
304 | } else if (waketype == RCU_NOCB_WAKE_BYPASS) { |
305 | mod_timer(timer: &rdp_gp->nocb_timer, expires: jiffies + 2); |
306 | WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype); |
307 | } else { |
308 | if (rdp_gp->nocb_defer_wakeup < RCU_NOCB_WAKE) |
309 | mod_timer(timer: &rdp_gp->nocb_timer, expires: jiffies + 1); |
310 | if (rdp_gp->nocb_defer_wakeup < waketype) |
311 | WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype); |
312 | } |
313 | |
314 | raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags); |
315 | |
316 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, reason); |
317 | } |
318 | |
319 | /* |
320 | * Flush the ->nocb_bypass queue into ->cblist, enqueuing rhp if non-NULL. |
321 | * However, if there is a callback to be enqueued and if ->nocb_bypass |
322 | * proves to be initially empty, just return false because the no-CB GP |
323 | * kthread may need to be awakened in this case. |
324 | * |
325 | * Return true if there was something to be flushed and it succeeded, otherwise |
326 | * false. |
327 | * |
328 | * Note that this function always returns true if rhp is NULL. |
329 | */ |
330 | static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp_in, |
331 | unsigned long j, bool lazy) |
332 | { |
333 | struct rcu_cblist rcl; |
334 | struct rcu_head *rhp = rhp_in; |
335 | |
336 | WARN_ON_ONCE(!rcu_rdp_is_offloaded(rdp)); |
337 | rcu_lockdep_assert_cblist_protected(rdp); |
338 | lockdep_assert_held(&rdp->nocb_bypass_lock); |
339 | if (rhp && !rcu_cblist_n_cbs(rclp: &rdp->nocb_bypass)) { |
340 | raw_spin_unlock(&rdp->nocb_bypass_lock); |
341 | return false; |
342 | } |
343 | /* Note: ->cblist.len already accounts for ->nocb_bypass contents. */ |
344 | if (rhp) |
345 | rcu_segcblist_inc_len(rsclp: &rdp->cblist); /* Must precede enqueue. */ |
346 | |
347 | /* |
348 | * If the new CB requested was a lazy one, queue it onto the main |
349 | * ->cblist so that we can take advantage of the grace-period that will |
350 | * happen regardless. But queue it onto the bypass list first so that |
351 | * the lazy CB is ordered with the existing CBs in the bypass list. |
352 | */ |
353 | if (lazy && rhp) { |
354 | rcu_cblist_enqueue(rclp: &rdp->nocb_bypass, rhp); |
355 | rhp = NULL; |
356 | } |
357 | rcu_cblist_flush_enqueue(drclp: &rcl, srclp: &rdp->nocb_bypass, rhp); |
358 | WRITE_ONCE(rdp->lazy_len, 0); |
359 | |
360 | rcu_segcblist_insert_pend_cbs(rsclp: &rdp->cblist, rclp: &rcl); |
361 | WRITE_ONCE(rdp->nocb_bypass_first, j); |
362 | rcu_nocb_bypass_unlock(rdp); |
363 | return true; |
364 | } |
365 | |
366 | /* |
367 | * Flush the ->nocb_bypass queue into ->cblist, enqueuing rhp if non-NULL. |
368 | * However, if there is a callback to be enqueued and if ->nocb_bypass |
369 | * proves to be initially empty, just return false because the no-CB GP |
370 | * kthread may need to be awakened in this case. |
371 | * |
372 | * Note that this function always returns true if rhp is NULL. |
373 | */ |
374 | static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, |
375 | unsigned long j, bool lazy) |
376 | { |
377 | if (!rcu_rdp_is_offloaded(rdp)) |
378 | return true; |
379 | rcu_lockdep_assert_cblist_protected(rdp); |
380 | rcu_nocb_bypass_lock(rdp); |
381 | return rcu_nocb_do_flush_bypass(rdp, rhp_in: rhp, j, lazy); |
382 | } |
383 | |
384 | /* |
385 | * If the ->nocb_bypass_lock is immediately available, flush the |
386 | * ->nocb_bypass queue into ->cblist. |
387 | */ |
388 | static void rcu_nocb_try_flush_bypass(struct rcu_data *rdp, unsigned long j) |
389 | { |
390 | rcu_lockdep_assert_cblist_protected(rdp); |
391 | if (!rcu_rdp_is_offloaded(rdp) || |
392 | !rcu_nocb_bypass_trylock(rdp)) |
393 | return; |
394 | WARN_ON_ONCE(!rcu_nocb_do_flush_bypass(rdp, NULL, j, false)); |
395 | } |
396 | |
397 | /* |
398 | * See whether it is appropriate to use the ->nocb_bypass list in order |
399 | * to control contention on ->nocb_lock. A limited number of direct |
400 | * enqueues are permitted into ->cblist per jiffy. If ->nocb_bypass |
401 | * is non-empty, further callbacks must be placed into ->nocb_bypass, |
402 | * otherwise rcu_barrier() breaks. Use rcu_nocb_flush_bypass() to switch |
403 | * back to direct use of ->cblist. However, ->nocb_bypass should not be |
404 | * used if ->cblist is empty, because otherwise callbacks can be stranded |
405 | * on ->nocb_bypass because we cannot count on the current CPU ever again |
406 | * invoking call_rcu(). The general rule is that if ->nocb_bypass is |
407 | * non-empty, the corresponding no-CBs grace-period kthread must not be |
408 | * in an indefinite sleep state. |
409 | * |
410 | * Finally, it is not permitted to use the bypass during early boot, |
411 | * as doing so would confuse the auto-initialization code. Besides |
412 | * which, there is no point in worrying about lock contention while |
413 | * there is only one CPU in operation. |
414 | */ |
415 | static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, |
416 | bool *was_alldone, unsigned long flags, |
417 | bool lazy) |
418 | { |
419 | unsigned long c; |
420 | unsigned long cur_gp_seq; |
421 | unsigned long j = jiffies; |
422 | long ncbs = rcu_cblist_n_cbs(rclp: &rdp->nocb_bypass); |
423 | bool bypass_is_lazy = (ncbs == READ_ONCE(rdp->lazy_len)); |
424 | |
425 | lockdep_assert_irqs_disabled(); |
426 | |
427 | // Pure softirq/rcuc based processing: no bypassing, no |
428 | // locking. |
429 | if (!rcu_rdp_is_offloaded(rdp)) { |
430 | *was_alldone = !rcu_segcblist_pend_cbs(rsclp: &rdp->cblist); |
431 | return false; |
432 | } |
433 | |
434 | // In the process of (de-)offloading: no bypassing, but |
435 | // locking. |
436 | if (!rcu_segcblist_completely_offloaded(rsclp: &rdp->cblist)) { |
437 | rcu_nocb_lock(rdp); |
438 | *was_alldone = !rcu_segcblist_pend_cbs(rsclp: &rdp->cblist); |
439 | return false; /* Not offloaded, no bypassing. */ |
440 | } |
441 | |
442 | // Don't use ->nocb_bypass during early boot. |
443 | if (rcu_scheduler_active != RCU_SCHEDULER_RUNNING) { |
444 | rcu_nocb_lock(rdp); |
445 | WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass)); |
446 | *was_alldone = !rcu_segcblist_pend_cbs(rsclp: &rdp->cblist); |
447 | return false; |
448 | } |
449 | |
450 | // If we have advanced to a new jiffy, reset counts to allow |
451 | // moving back from ->nocb_bypass to ->cblist. |
452 | if (j == rdp->nocb_nobypass_last) { |
453 | c = rdp->nocb_nobypass_count + 1; |
454 | } else { |
455 | WRITE_ONCE(rdp->nocb_nobypass_last, j); |
456 | c = rdp->nocb_nobypass_count - nocb_nobypass_lim_per_jiffy; |
457 | if (ULONG_CMP_LT(rdp->nocb_nobypass_count, |
458 | nocb_nobypass_lim_per_jiffy)) |
459 | c = 0; |
460 | else if (c > nocb_nobypass_lim_per_jiffy) |
461 | c = nocb_nobypass_lim_per_jiffy; |
462 | } |
463 | WRITE_ONCE(rdp->nocb_nobypass_count, c); |
464 | |
465 | // If there hasn't yet been all that many ->cblist enqueues |
466 | // this jiffy, tell the caller to enqueue onto ->cblist. But flush |
467 | // ->nocb_bypass first. |
468 | // Lazy CBs throttle this back and do immediate bypass queuing. |
469 | if (rdp->nocb_nobypass_count < nocb_nobypass_lim_per_jiffy && !lazy) { |
470 | rcu_nocb_lock(rdp); |
471 | *was_alldone = !rcu_segcblist_pend_cbs(rsclp: &rdp->cblist); |
472 | if (*was_alldone) |
473 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, |
474 | TPS("FirstQ" )); |
475 | |
476 | WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, j, false)); |
477 | WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass)); |
478 | return false; // Caller must enqueue the callback. |
479 | } |
480 | |
481 | // If ->nocb_bypass has been used too long or is too full, |
482 | // flush ->nocb_bypass to ->cblist. |
483 | if ((ncbs && !bypass_is_lazy && j != READ_ONCE(rdp->nocb_bypass_first)) || |
484 | (ncbs && bypass_is_lazy && |
485 | (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + rcu_get_jiffies_lazy_flush()))) || |
486 | ncbs >= qhimark) { |
487 | rcu_nocb_lock(rdp); |
488 | *was_alldone = !rcu_segcblist_pend_cbs(rsclp: &rdp->cblist); |
489 | |
490 | if (!rcu_nocb_flush_bypass(rdp, rhp, j, lazy)) { |
491 | if (*was_alldone) |
492 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, |
493 | TPS("FirstQ" )); |
494 | WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass)); |
495 | return false; // Caller must enqueue the callback. |
496 | } |
497 | if (j != rdp->nocb_gp_adv_time && |
498 | rcu_segcblist_nextgp(rsclp: &rdp->cblist, lp: &cur_gp_seq) && |
499 | rcu_seq_done(sp: &rdp->mynode->gp_seq, s: cur_gp_seq)) { |
500 | rcu_advance_cbs_nowake(rnp: rdp->mynode, rdp); |
501 | rdp->nocb_gp_adv_time = j; |
502 | } |
503 | |
504 | // The flush succeeded and we moved CBs into the regular list. |
505 | // Don't wait for the wake up timer as it may be too far ahead. |
506 | // Wake up the GP thread now instead, if the cblist was empty. |
507 | __call_rcu_nocb_wake(rdp, was_empty: *was_alldone, flags); |
508 | |
509 | return true; // Callback already enqueued. |
510 | } |
511 | |
512 | // We need to use the bypass. |
513 | rcu_nocb_wait_contended(rdp); |
514 | rcu_nocb_bypass_lock(rdp); |
515 | ncbs = rcu_cblist_n_cbs(rclp: &rdp->nocb_bypass); |
516 | rcu_segcblist_inc_len(rsclp: &rdp->cblist); /* Must precede enqueue. */ |
517 | rcu_cblist_enqueue(rclp: &rdp->nocb_bypass, rhp); |
518 | |
519 | if (lazy) |
520 | WRITE_ONCE(rdp->lazy_len, rdp->lazy_len + 1); |
521 | |
522 | if (!ncbs) { |
523 | WRITE_ONCE(rdp->nocb_bypass_first, j); |
524 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, TPS("FirstBQ" )); |
525 | } |
526 | rcu_nocb_bypass_unlock(rdp); |
527 | smp_mb(); /* Order enqueue before wake. */ |
528 | // A wake up of the grace period kthread or timer adjustment |
529 | // needs to be done only if: |
530 | // 1. Bypass list was fully empty before (this is the first |
531 | // bypass list entry), or: |
532 | // 2. Both of these conditions are met: |
533 | // a. The bypass list previously had only lazy CBs, and: |
534 | // b. The new CB is non-lazy. |
535 | if (!ncbs || (bypass_is_lazy && !lazy)) { |
536 | // No-CBs GP kthread might be indefinitely asleep, if so, wake. |
537 | rcu_nocb_lock(rdp); // Rare during call_rcu() flood. |
538 | if (!rcu_segcblist_pend_cbs(rsclp: &rdp->cblist)) { |
539 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, |
540 | TPS("FirstBQwake" )); |
541 | __call_rcu_nocb_wake(rdp, was_empty: true, flags); |
542 | } else { |
543 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, |
544 | TPS("FirstBQnoWake" )); |
545 | rcu_nocb_unlock(rdp); |
546 | } |
547 | } |
548 | return true; // Callback already enqueued. |
549 | } |
550 | |
551 | /* |
552 | * Awaken the no-CBs grace-period kthread if needed, either due to it |
553 | * legitimately being asleep or due to overload conditions. |
554 | * |
555 | * If warranted, also wake up the kthread servicing this CPUs queues. |
556 | */ |
557 | static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone, |
558 | unsigned long flags) |
559 | __releases(rdp->nocb_lock) |
560 | { |
561 | long bypass_len; |
562 | unsigned long cur_gp_seq; |
563 | unsigned long j; |
564 | long lazy_len; |
565 | long len; |
566 | struct task_struct *t; |
567 | struct rcu_data *rdp_gp = rdp->nocb_gp_rdp; |
568 | |
569 | // If we are being polled or there is no kthread, just leave. |
570 | t = READ_ONCE(rdp->nocb_gp_kthread); |
571 | if (rcu_nocb_poll || !t) { |
572 | rcu_nocb_unlock(rdp); |
573 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, |
574 | TPS("WakeNotPoll" )); |
575 | return; |
576 | } |
577 | // Need to actually to a wakeup. |
578 | len = rcu_segcblist_n_cbs(rsclp: &rdp->cblist); |
579 | bypass_len = rcu_cblist_n_cbs(rclp: &rdp->nocb_bypass); |
580 | lazy_len = READ_ONCE(rdp->lazy_len); |
581 | if (was_alldone) { |
582 | rdp->qlen_last_fqs_check = len; |
583 | // Only lazy CBs in bypass list |
584 | if (lazy_len && bypass_len == lazy_len) { |
585 | rcu_nocb_unlock(rdp); |
586 | wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_LAZY, |
587 | TPS("WakeLazy" )); |
588 | } else if (!irqs_disabled_flags(flags)) { |
589 | /* ... if queue was empty ... */ |
590 | rcu_nocb_unlock(rdp); |
591 | wake_nocb_gp(rdp, force: false); |
592 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, |
593 | TPS("WakeEmpty" )); |
594 | } else { |
595 | rcu_nocb_unlock(rdp); |
596 | wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE, |
597 | TPS("WakeEmptyIsDeferred" )); |
598 | } |
599 | } else if (len > rdp->qlen_last_fqs_check + qhimark) { |
600 | /* ... or if many callbacks queued. */ |
601 | rdp->qlen_last_fqs_check = len; |
602 | j = jiffies; |
603 | if (j != rdp->nocb_gp_adv_time && |
604 | rcu_segcblist_nextgp(rsclp: &rdp->cblist, lp: &cur_gp_seq) && |
605 | rcu_seq_done(sp: &rdp->mynode->gp_seq, s: cur_gp_seq)) { |
606 | rcu_advance_cbs_nowake(rnp: rdp->mynode, rdp); |
607 | rdp->nocb_gp_adv_time = j; |
608 | } |
609 | smp_mb(); /* Enqueue before timer_pending(). */ |
610 | if ((rdp->nocb_cb_sleep || |
611 | !rcu_segcblist_ready_cbs(rsclp: &rdp->cblist)) && |
612 | !timer_pending(timer: &rdp_gp->nocb_timer)) { |
613 | rcu_nocb_unlock(rdp); |
614 | wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_FORCE, |
615 | TPS("WakeOvfIsDeferred" )); |
616 | } else { |
617 | rcu_nocb_unlock(rdp); |
618 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, TPS("WakeNot" )); |
619 | } |
620 | } else { |
621 | rcu_nocb_unlock(rdp); |
622 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, TPS("WakeNot" )); |
623 | } |
624 | } |
625 | |
626 | static void call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *head, |
627 | rcu_callback_t func, unsigned long flags, bool lazy) |
628 | { |
629 | bool was_alldone; |
630 | |
631 | if (!rcu_nocb_try_bypass(rdp, rhp: head, was_alldone: &was_alldone, flags, lazy)) { |
632 | /* Not enqueued on bypass but locked, do regular enqueue */ |
633 | rcutree_enqueue(rdp, head, func); |
634 | __call_rcu_nocb_wake(rdp, was_alldone, flags); /* unlocks */ |
635 | } |
636 | } |
637 | |
638 | static int nocb_gp_toggle_rdp(struct rcu_data *rdp, |
639 | bool *wake_state) |
640 | { |
641 | struct rcu_segcblist *cblist = &rdp->cblist; |
642 | unsigned long flags; |
643 | int ret; |
644 | |
645 | rcu_nocb_lock_irqsave(rdp, flags); |
646 | if (rcu_segcblist_test_flags(rsclp: cblist, SEGCBLIST_OFFLOADED) && |
647 | !rcu_segcblist_test_flags(rsclp: cblist, SEGCBLIST_KTHREAD_GP)) { |
648 | /* |
649 | * Offloading. Set our flag and notify the offload worker. |
650 | * We will handle this rdp until it ever gets de-offloaded. |
651 | */ |
652 | rcu_segcblist_set_flags(rsclp: cblist, SEGCBLIST_KTHREAD_GP); |
653 | if (rcu_segcblist_test_flags(rsclp: cblist, SEGCBLIST_KTHREAD_CB)) |
654 | *wake_state = true; |
655 | ret = 1; |
656 | } else if (!rcu_segcblist_test_flags(rsclp: cblist, SEGCBLIST_OFFLOADED) && |
657 | rcu_segcblist_test_flags(rsclp: cblist, SEGCBLIST_KTHREAD_GP)) { |
658 | /* |
659 | * De-offloading. Clear our flag and notify the de-offload worker. |
660 | * We will ignore this rdp until it ever gets re-offloaded. |
661 | */ |
662 | rcu_segcblist_clear_flags(rsclp: cblist, SEGCBLIST_KTHREAD_GP); |
663 | if (!rcu_segcblist_test_flags(rsclp: cblist, SEGCBLIST_KTHREAD_CB)) |
664 | *wake_state = true; |
665 | ret = 0; |
666 | } else { |
667 | WARN_ON_ONCE(1); |
668 | ret = -1; |
669 | } |
670 | |
671 | rcu_nocb_unlock_irqrestore(rdp, flags); |
672 | |
673 | return ret; |
674 | } |
675 | |
676 | static void nocb_gp_sleep(struct rcu_data *my_rdp, int cpu) |
677 | { |
678 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu, TPS("Sleep" )); |
679 | swait_event_interruptible_exclusive(my_rdp->nocb_gp_wq, |
680 | !READ_ONCE(my_rdp->nocb_gp_sleep)); |
681 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu, TPS("EndSleep" )); |
682 | } |
683 | |
684 | /* |
685 | * No-CBs GP kthreads come here to wait for additional callbacks to show up |
686 | * or for grace periods to end. |
687 | */ |
688 | static void nocb_gp_wait(struct rcu_data *my_rdp) |
689 | { |
690 | bool bypass = false; |
691 | int __maybe_unused cpu = my_rdp->cpu; |
692 | unsigned long cur_gp_seq; |
693 | unsigned long flags; |
694 | bool gotcbs = false; |
695 | unsigned long j = jiffies; |
696 | bool lazy = false; |
697 | bool needwait_gp = false; // This prevents actual uninitialized use. |
698 | bool needwake; |
699 | bool needwake_gp; |
700 | struct rcu_data *rdp, *rdp_toggling = NULL; |
701 | struct rcu_node *rnp; |
702 | unsigned long wait_gp_seq = 0; // Suppress "use uninitialized" warning. |
703 | bool wasempty = false; |
704 | |
705 | /* |
706 | * Each pass through the following loop checks for CBs and for the |
707 | * nearest grace period (if any) to wait for next. The CB kthreads |
708 | * and the global grace-period kthread are awakened if needed. |
709 | */ |
710 | WARN_ON_ONCE(my_rdp->nocb_gp_rdp != my_rdp); |
711 | /* |
712 | * An rcu_data structure is removed from the list after its |
713 | * CPU is de-offloaded and added to the list before that CPU is |
714 | * (re-)offloaded. If the following loop happens to be referencing |
715 | * that rcu_data structure during the time that the corresponding |
716 | * CPU is de-offloaded and then immediately re-offloaded, this |
717 | * loop's rdp pointer will be carried to the end of the list by |
718 | * the resulting pair of list operations. This can cause the loop |
719 | * to skip over some of the rcu_data structures that were supposed |
720 | * to have been scanned. Fortunately a new iteration through the |
721 | * entire loop is forced after a given CPU's rcu_data structure |
722 | * is added to the list, so the skipped-over rcu_data structures |
723 | * won't be ignored for long. |
724 | */ |
725 | list_for_each_entry(rdp, &my_rdp->nocb_head_rdp, nocb_entry_rdp) { |
726 | long bypass_ncbs; |
727 | bool flush_bypass = false; |
728 | long lazy_ncbs; |
729 | |
730 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, TPS("Check" )); |
731 | rcu_nocb_lock_irqsave(rdp, flags); |
732 | lockdep_assert_held(&rdp->nocb_lock); |
733 | bypass_ncbs = rcu_cblist_n_cbs(rclp: &rdp->nocb_bypass); |
734 | lazy_ncbs = READ_ONCE(rdp->lazy_len); |
735 | |
736 | if (bypass_ncbs && (lazy_ncbs == bypass_ncbs) && |
737 | (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + rcu_get_jiffies_lazy_flush()) || |
738 | bypass_ncbs > 2 * qhimark)) { |
739 | flush_bypass = true; |
740 | } else if (bypass_ncbs && (lazy_ncbs != bypass_ncbs) && |
741 | (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + 1) || |
742 | bypass_ncbs > 2 * qhimark)) { |
743 | flush_bypass = true; |
744 | } else if (!bypass_ncbs && rcu_segcblist_empty(rsclp: &rdp->cblist)) { |
745 | rcu_nocb_unlock_irqrestore(rdp, flags); |
746 | continue; /* No callbacks here, try next. */ |
747 | } |
748 | |
749 | if (flush_bypass) { |
750 | // Bypass full or old, so flush it. |
751 | (void)rcu_nocb_try_flush_bypass(rdp, j); |
752 | bypass_ncbs = rcu_cblist_n_cbs(rclp: &rdp->nocb_bypass); |
753 | lazy_ncbs = READ_ONCE(rdp->lazy_len); |
754 | } |
755 | |
756 | if (bypass_ncbs) { |
757 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, |
758 | reason: bypass_ncbs == lazy_ncbs ? TPS("Lazy" ) : TPS("Bypass" )); |
759 | if (bypass_ncbs == lazy_ncbs) |
760 | lazy = true; |
761 | else |
762 | bypass = true; |
763 | } |
764 | rnp = rdp->mynode; |
765 | |
766 | // Advance callbacks if helpful and low contention. |
767 | needwake_gp = false; |
768 | if (!rcu_segcblist_restempty(rsclp: &rdp->cblist, |
769 | RCU_NEXT_READY_TAIL) || |
770 | (rcu_segcblist_nextgp(rsclp: &rdp->cblist, lp: &cur_gp_seq) && |
771 | rcu_seq_done(sp: &rnp->gp_seq, s: cur_gp_seq))) { |
772 | raw_spin_lock_rcu_node(rnp); /* irqs disabled. */ |
773 | needwake_gp = rcu_advance_cbs(rnp, rdp); |
774 | wasempty = rcu_segcblist_restempty(rsclp: &rdp->cblist, |
775 | RCU_NEXT_READY_TAIL); |
776 | raw_spin_unlock_rcu_node(rnp); /* irqs disabled. */ |
777 | } |
778 | // Need to wait on some grace period? |
779 | WARN_ON_ONCE(wasempty && |
780 | !rcu_segcblist_restempty(&rdp->cblist, |
781 | RCU_NEXT_READY_TAIL)); |
782 | if (rcu_segcblist_nextgp(rsclp: &rdp->cblist, lp: &cur_gp_seq)) { |
783 | if (!needwait_gp || |
784 | ULONG_CMP_LT(cur_gp_seq, wait_gp_seq)) |
785 | wait_gp_seq = cur_gp_seq; |
786 | needwait_gp = true; |
787 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, |
788 | TPS("NeedWaitGP" )); |
789 | } |
790 | if (rcu_segcblist_ready_cbs(rsclp: &rdp->cblist)) { |
791 | needwake = rdp->nocb_cb_sleep; |
792 | WRITE_ONCE(rdp->nocb_cb_sleep, false); |
793 | } else { |
794 | needwake = false; |
795 | } |
796 | rcu_nocb_unlock_irqrestore(rdp, flags); |
797 | if (needwake) { |
798 | swake_up_one(q: &rdp->nocb_cb_wq); |
799 | gotcbs = true; |
800 | } |
801 | if (needwake_gp) |
802 | rcu_gp_kthread_wake(); |
803 | } |
804 | |
805 | my_rdp->nocb_gp_bypass = bypass; |
806 | my_rdp->nocb_gp_gp = needwait_gp; |
807 | my_rdp->nocb_gp_seq = needwait_gp ? wait_gp_seq : 0; |
808 | |
809 | // At least one child with non-empty ->nocb_bypass, so set |
810 | // timer in order to avoid stranding its callbacks. |
811 | if (!rcu_nocb_poll) { |
812 | // If bypass list only has lazy CBs. Add a deferred lazy wake up. |
813 | if (lazy && !bypass) { |
814 | wake_nocb_gp_defer(rdp: my_rdp, RCU_NOCB_WAKE_LAZY, |
815 | TPS("WakeLazyIsDeferred" )); |
816 | // Otherwise add a deferred bypass wake up. |
817 | } else if (bypass) { |
818 | wake_nocb_gp_defer(rdp: my_rdp, RCU_NOCB_WAKE_BYPASS, |
819 | TPS("WakeBypassIsDeferred" )); |
820 | } |
821 | } |
822 | |
823 | if (rcu_nocb_poll) { |
824 | /* Polling, so trace if first poll in the series. */ |
825 | if (gotcbs) |
826 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu, TPS("Poll" )); |
827 | if (list_empty(head: &my_rdp->nocb_head_rdp)) { |
828 | raw_spin_lock_irqsave(&my_rdp->nocb_gp_lock, flags); |
829 | if (!my_rdp->nocb_toggling_rdp) |
830 | WRITE_ONCE(my_rdp->nocb_gp_sleep, true); |
831 | raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags); |
832 | /* Wait for any offloading rdp */ |
833 | nocb_gp_sleep(my_rdp, cpu); |
834 | } else { |
835 | schedule_timeout_idle(timeout: 1); |
836 | } |
837 | } else if (!needwait_gp) { |
838 | /* Wait for callbacks to appear. */ |
839 | nocb_gp_sleep(my_rdp, cpu); |
840 | } else { |
841 | rnp = my_rdp->mynode; |
842 | trace_rcu_this_gp(rnp, rdp: my_rdp, gp_seq_req: wait_gp_seq, TPS("StartWait" )); |
843 | swait_event_interruptible_exclusive( |
844 | rnp->nocb_gp_wq[rcu_seq_ctr(wait_gp_seq) & 0x1], |
845 | rcu_seq_done(&rnp->gp_seq, wait_gp_seq) || |
846 | !READ_ONCE(my_rdp->nocb_gp_sleep)); |
847 | trace_rcu_this_gp(rnp, rdp: my_rdp, gp_seq_req: wait_gp_seq, TPS("EndWait" )); |
848 | } |
849 | |
850 | if (!rcu_nocb_poll) { |
851 | raw_spin_lock_irqsave(&my_rdp->nocb_gp_lock, flags); |
852 | // (De-)queue an rdp to/from the group if its nocb state is changing |
853 | rdp_toggling = my_rdp->nocb_toggling_rdp; |
854 | if (rdp_toggling) |
855 | my_rdp->nocb_toggling_rdp = NULL; |
856 | |
857 | if (my_rdp->nocb_defer_wakeup > RCU_NOCB_WAKE_NOT) { |
858 | WRITE_ONCE(my_rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT); |
859 | del_timer(timer: &my_rdp->nocb_timer); |
860 | } |
861 | WRITE_ONCE(my_rdp->nocb_gp_sleep, true); |
862 | raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags); |
863 | } else { |
864 | rdp_toggling = READ_ONCE(my_rdp->nocb_toggling_rdp); |
865 | if (rdp_toggling) { |
866 | /* |
867 | * Paranoid locking to make sure nocb_toggling_rdp is well |
868 | * reset *before* we (re)set SEGCBLIST_KTHREAD_GP or we could |
869 | * race with another round of nocb toggling for this rdp. |
870 | * Nocb locking should prevent from that already but we stick |
871 | * to paranoia, especially in rare path. |
872 | */ |
873 | raw_spin_lock_irqsave(&my_rdp->nocb_gp_lock, flags); |
874 | my_rdp->nocb_toggling_rdp = NULL; |
875 | raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags); |
876 | } |
877 | } |
878 | |
879 | if (rdp_toggling) { |
880 | bool wake_state = false; |
881 | int ret; |
882 | |
883 | ret = nocb_gp_toggle_rdp(rdp: rdp_toggling, wake_state: &wake_state); |
884 | if (ret == 1) |
885 | list_add_tail(new: &rdp_toggling->nocb_entry_rdp, head: &my_rdp->nocb_head_rdp); |
886 | else if (ret == 0) |
887 | list_del(entry: &rdp_toggling->nocb_entry_rdp); |
888 | if (wake_state) |
889 | swake_up_one(q: &rdp_toggling->nocb_state_wq); |
890 | } |
891 | |
892 | my_rdp->nocb_gp_seq = -1; |
893 | WARN_ON(signal_pending(current)); |
894 | } |
895 | |
896 | /* |
897 | * No-CBs grace-period-wait kthread. There is one of these per group |
898 | * of CPUs, but only once at least one CPU in that group has come online |
899 | * at least once since boot. This kthread checks for newly posted |
900 | * callbacks from any of the CPUs it is responsible for, waits for a |
901 | * grace period, then awakens all of the rcu_nocb_cb_kthread() instances |
902 | * that then have callback-invocation work to do. |
903 | */ |
904 | static int rcu_nocb_gp_kthread(void *arg) |
905 | { |
906 | struct rcu_data *rdp = arg; |
907 | |
908 | for (;;) { |
909 | WRITE_ONCE(rdp->nocb_gp_loops, rdp->nocb_gp_loops + 1); |
910 | nocb_gp_wait(my_rdp: rdp); |
911 | cond_resched_tasks_rcu_qs(); |
912 | } |
913 | return 0; |
914 | } |
915 | |
916 | static inline bool nocb_cb_can_run(struct rcu_data *rdp) |
917 | { |
918 | u8 flags = SEGCBLIST_OFFLOADED | SEGCBLIST_KTHREAD_CB; |
919 | |
920 | return rcu_segcblist_test_flags(rsclp: &rdp->cblist, flags); |
921 | } |
922 | |
923 | static inline bool nocb_cb_wait_cond(struct rcu_data *rdp) |
924 | { |
925 | return nocb_cb_can_run(rdp) && !READ_ONCE(rdp->nocb_cb_sleep); |
926 | } |
927 | |
928 | /* |
929 | * Invoke any ready callbacks from the corresponding no-CBs CPU, |
930 | * then, if there are no more, wait for more to appear. |
931 | */ |
932 | static void nocb_cb_wait(struct rcu_data *rdp) |
933 | { |
934 | struct rcu_segcblist *cblist = &rdp->cblist; |
935 | unsigned long cur_gp_seq; |
936 | unsigned long flags; |
937 | bool needwake_state = false; |
938 | bool needwake_gp = false; |
939 | bool can_sleep = true; |
940 | struct rcu_node *rnp = rdp->mynode; |
941 | |
942 | do { |
943 | swait_event_interruptible_exclusive(rdp->nocb_cb_wq, |
944 | nocb_cb_wait_cond(rdp)); |
945 | |
946 | if (READ_ONCE(rdp->nocb_cb_sleep)) { |
947 | WARN_ON(signal_pending(current)); |
948 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, TPS("WokeEmpty" )); |
949 | } |
950 | } while (!nocb_cb_can_run(rdp)); |
951 | |
952 | |
953 | local_irq_save(flags); |
954 | rcu_momentary_dyntick_idle(); |
955 | local_irq_restore(flags); |
956 | /* |
957 | * Disable BH to provide the expected environment. Also, when |
958 | * transitioning to/from NOCB mode, a self-requeuing callback might |
959 | * be invoked from softirq. A short grace period could cause both |
960 | * instances of this callback would execute concurrently. |
961 | */ |
962 | local_bh_disable(); |
963 | rcu_do_batch(rdp); |
964 | local_bh_enable(); |
965 | lockdep_assert_irqs_enabled(); |
966 | rcu_nocb_lock_irqsave(rdp, flags); |
967 | if (rcu_segcblist_nextgp(rsclp: cblist, lp: &cur_gp_seq) && |
968 | rcu_seq_done(sp: &rnp->gp_seq, s: cur_gp_seq) && |
969 | raw_spin_trylock_rcu_node(rnp)) { /* irqs already disabled. */ |
970 | needwake_gp = rcu_advance_cbs(rnp: rdp->mynode, rdp); |
971 | raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */ |
972 | } |
973 | |
974 | if (rcu_segcblist_test_flags(rsclp: cblist, SEGCBLIST_OFFLOADED)) { |
975 | if (!rcu_segcblist_test_flags(rsclp: cblist, SEGCBLIST_KTHREAD_CB)) { |
976 | rcu_segcblist_set_flags(rsclp: cblist, SEGCBLIST_KTHREAD_CB); |
977 | if (rcu_segcblist_test_flags(rsclp: cblist, SEGCBLIST_KTHREAD_GP)) |
978 | needwake_state = true; |
979 | } |
980 | if (rcu_segcblist_ready_cbs(rsclp: cblist)) |
981 | can_sleep = false; |
982 | } else { |
983 | /* |
984 | * De-offloading. Clear our flag and notify the de-offload worker. |
985 | * We won't touch the callbacks and keep sleeping until we ever |
986 | * get re-offloaded. |
987 | */ |
988 | WARN_ON_ONCE(!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB)); |
989 | rcu_segcblist_clear_flags(rsclp: cblist, SEGCBLIST_KTHREAD_CB); |
990 | if (!rcu_segcblist_test_flags(rsclp: cblist, SEGCBLIST_KTHREAD_GP)) |
991 | needwake_state = true; |
992 | } |
993 | |
994 | WRITE_ONCE(rdp->nocb_cb_sleep, can_sleep); |
995 | |
996 | if (rdp->nocb_cb_sleep) |
997 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, TPS("CBSleep" )); |
998 | |
999 | rcu_nocb_unlock_irqrestore(rdp, flags); |
1000 | if (needwake_gp) |
1001 | rcu_gp_kthread_wake(); |
1002 | |
1003 | if (needwake_state) |
1004 | swake_up_one(q: &rdp->nocb_state_wq); |
1005 | } |
1006 | |
1007 | /* |
1008 | * Per-rcu_data kthread, but only for no-CBs CPUs. Repeatedly invoke |
1009 | * nocb_cb_wait() to do the dirty work. |
1010 | */ |
1011 | static int rcu_nocb_cb_kthread(void *arg) |
1012 | { |
1013 | struct rcu_data *rdp = arg; |
1014 | |
1015 | // Each pass through this loop does one callback batch, and, |
1016 | // if there are no more ready callbacks, waits for them. |
1017 | for (;;) { |
1018 | nocb_cb_wait(rdp); |
1019 | cond_resched_tasks_rcu_qs(); |
1020 | } |
1021 | return 0; |
1022 | } |
1023 | |
1024 | /* Is a deferred wakeup of rcu_nocb_kthread() required? */ |
1025 | static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level) |
1026 | { |
1027 | return READ_ONCE(rdp->nocb_defer_wakeup) >= level; |
1028 | } |
1029 | |
1030 | /* Do a deferred wakeup of rcu_nocb_kthread(). */ |
1031 | static bool do_nocb_deferred_wakeup_common(struct rcu_data *rdp_gp, |
1032 | struct rcu_data *rdp, int level, |
1033 | unsigned long flags) |
1034 | __releases(rdp_gp->nocb_gp_lock) |
1035 | { |
1036 | int ndw; |
1037 | int ret; |
1038 | |
1039 | if (!rcu_nocb_need_deferred_wakeup(rdp: rdp_gp, level)) { |
1040 | raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags); |
1041 | return false; |
1042 | } |
1043 | |
1044 | ndw = rdp_gp->nocb_defer_wakeup; |
1045 | ret = __wake_nocb_gp(rdp_gp, rdp, force: ndw == RCU_NOCB_WAKE_FORCE, flags); |
1046 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, TPS("DeferredWake" )); |
1047 | |
1048 | return ret; |
1049 | } |
1050 | |
1051 | /* Do a deferred wakeup of rcu_nocb_kthread() from a timer handler. */ |
1052 | static void do_nocb_deferred_wakeup_timer(struct timer_list *t) |
1053 | { |
1054 | unsigned long flags; |
1055 | struct rcu_data *rdp = from_timer(rdp, t, nocb_timer); |
1056 | |
1057 | WARN_ON_ONCE(rdp->nocb_gp_rdp != rdp); |
1058 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, TPS("Timer" )); |
1059 | |
1060 | raw_spin_lock_irqsave(&rdp->nocb_gp_lock, flags); |
1061 | smp_mb__after_spinlock(); /* Timer expire before wakeup. */ |
1062 | do_nocb_deferred_wakeup_common(rdp_gp: rdp, rdp, RCU_NOCB_WAKE_BYPASS, flags); |
1063 | } |
1064 | |
1065 | /* |
1066 | * Do a deferred wakeup of rcu_nocb_kthread() from fastpath. |
1067 | * This means we do an inexact common-case check. Note that if |
1068 | * we miss, ->nocb_timer will eventually clean things up. |
1069 | */ |
1070 | static bool do_nocb_deferred_wakeup(struct rcu_data *rdp) |
1071 | { |
1072 | unsigned long flags; |
1073 | struct rcu_data *rdp_gp = rdp->nocb_gp_rdp; |
1074 | |
1075 | if (!rdp_gp || !rcu_nocb_need_deferred_wakeup(rdp: rdp_gp, RCU_NOCB_WAKE)) |
1076 | return false; |
1077 | |
1078 | raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags); |
1079 | return do_nocb_deferred_wakeup_common(rdp_gp, rdp, RCU_NOCB_WAKE, flags); |
1080 | } |
1081 | |
1082 | void rcu_nocb_flush_deferred_wakeup(void) |
1083 | { |
1084 | do_nocb_deferred_wakeup(this_cpu_ptr(&rcu_data)); |
1085 | } |
1086 | EXPORT_SYMBOL_GPL(rcu_nocb_flush_deferred_wakeup); |
1087 | |
1088 | static int rdp_offload_toggle(struct rcu_data *rdp, |
1089 | bool offload, unsigned long flags) |
1090 | __releases(rdp->nocb_lock) |
1091 | { |
1092 | struct rcu_segcblist *cblist = &rdp->cblist; |
1093 | struct rcu_data *rdp_gp = rdp->nocb_gp_rdp; |
1094 | bool wake_gp = false; |
1095 | |
1096 | rcu_segcblist_offload(rsclp: cblist, offload); |
1097 | |
1098 | if (rdp->nocb_cb_sleep) |
1099 | rdp->nocb_cb_sleep = false; |
1100 | rcu_nocb_unlock_irqrestore(rdp, flags); |
1101 | |
1102 | /* |
1103 | * Ignore former value of nocb_cb_sleep and force wake up as it could |
1104 | * have been spuriously set to false already. |
1105 | */ |
1106 | swake_up_one(q: &rdp->nocb_cb_wq); |
1107 | |
1108 | raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags); |
1109 | // Queue this rdp for add/del to/from the list to iterate on rcuog |
1110 | WRITE_ONCE(rdp_gp->nocb_toggling_rdp, rdp); |
1111 | if (rdp_gp->nocb_gp_sleep) { |
1112 | rdp_gp->nocb_gp_sleep = false; |
1113 | wake_gp = true; |
1114 | } |
1115 | raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags); |
1116 | |
1117 | return wake_gp; |
1118 | } |
1119 | |
1120 | static long rcu_nocb_rdp_deoffload(void *arg) |
1121 | { |
1122 | struct rcu_data *rdp = arg; |
1123 | struct rcu_segcblist *cblist = &rdp->cblist; |
1124 | unsigned long flags; |
1125 | int wake_gp; |
1126 | struct rcu_data *rdp_gp = rdp->nocb_gp_rdp; |
1127 | |
1128 | /* |
1129 | * rcu_nocb_rdp_deoffload() may be called directly if |
1130 | * rcuog/o[p] spawn failed, because at this time the rdp->cpu |
1131 | * is not online yet. |
1132 | */ |
1133 | WARN_ON_ONCE((rdp->cpu != raw_smp_processor_id()) && cpu_online(rdp->cpu)); |
1134 | |
1135 | pr_info("De-offloading %d\n" , rdp->cpu); |
1136 | |
1137 | rcu_nocb_lock_irqsave(rdp, flags); |
1138 | /* |
1139 | * Flush once and for all now. This suffices because we are |
1140 | * running on the target CPU holding ->nocb_lock (thus having |
1141 | * interrupts disabled), and because rdp_offload_toggle() |
1142 | * invokes rcu_segcblist_offload(), which clears SEGCBLIST_OFFLOADED. |
1143 | * Thus future calls to rcu_segcblist_completely_offloaded() will |
1144 | * return false, which means that future calls to rcu_nocb_try_bypass() |
1145 | * will refuse to put anything into the bypass. |
1146 | */ |
1147 | WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies, false)); |
1148 | /* |
1149 | * Start with invoking rcu_core() early. This way if the current thread |
1150 | * happens to preempt an ongoing call to rcu_core() in the middle, |
1151 | * leaving some work dismissed because rcu_core() still thinks the rdp is |
1152 | * completely offloaded, we are guaranteed a nearby future instance of |
1153 | * rcu_core() to catch up. |
1154 | */ |
1155 | rcu_segcblist_set_flags(rsclp: cblist, SEGCBLIST_RCU_CORE); |
1156 | invoke_rcu_core(); |
1157 | wake_gp = rdp_offload_toggle(rdp, offload: false, flags); |
1158 | |
1159 | mutex_lock(&rdp_gp->nocb_gp_kthread_mutex); |
1160 | if (rdp_gp->nocb_gp_kthread) { |
1161 | if (wake_gp) |
1162 | wake_up_process(tsk: rdp_gp->nocb_gp_kthread); |
1163 | |
1164 | /* |
1165 | * If rcuo[p] kthread spawn failed, directly remove SEGCBLIST_KTHREAD_CB. |
1166 | * Just wait SEGCBLIST_KTHREAD_GP to be cleared by rcuog. |
1167 | */ |
1168 | if (!rdp->nocb_cb_kthread) { |
1169 | rcu_nocb_lock_irqsave(rdp, flags); |
1170 | rcu_segcblist_clear_flags(rsclp: &rdp->cblist, SEGCBLIST_KTHREAD_CB); |
1171 | rcu_nocb_unlock_irqrestore(rdp, flags); |
1172 | } |
1173 | |
1174 | swait_event_exclusive(rdp->nocb_state_wq, |
1175 | !rcu_segcblist_test_flags(cblist, |
1176 | SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP)); |
1177 | } else { |
1178 | /* |
1179 | * No kthread to clear the flags for us or remove the rdp from the nocb list |
1180 | * to iterate. Do it here instead. Locking doesn't look stricly necessary |
1181 | * but we stick to paranoia in this rare path. |
1182 | */ |
1183 | rcu_nocb_lock_irqsave(rdp, flags); |
1184 | rcu_segcblist_clear_flags(rsclp: &rdp->cblist, |
1185 | SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP); |
1186 | rcu_nocb_unlock_irqrestore(rdp, flags); |
1187 | |
1188 | list_del(entry: &rdp->nocb_entry_rdp); |
1189 | } |
1190 | mutex_unlock(lock: &rdp_gp->nocb_gp_kthread_mutex); |
1191 | |
1192 | /* |
1193 | * Lock one last time to acquire latest callback updates from kthreads |
1194 | * so we can later handle callbacks locally without locking. |
1195 | */ |
1196 | rcu_nocb_lock_irqsave(rdp, flags); |
1197 | /* |
1198 | * Theoretically we could clear SEGCBLIST_LOCKING after the nocb |
1199 | * lock is released but how about being paranoid for once? |
1200 | */ |
1201 | rcu_segcblist_clear_flags(rsclp: cblist, SEGCBLIST_LOCKING); |
1202 | /* |
1203 | * Without SEGCBLIST_LOCKING, we can't use |
1204 | * rcu_nocb_unlock_irqrestore() anymore. |
1205 | */ |
1206 | raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); |
1207 | |
1208 | /* Sanity check */ |
1209 | WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass)); |
1210 | |
1211 | |
1212 | return 0; |
1213 | } |
1214 | |
1215 | int rcu_nocb_cpu_deoffload(int cpu) |
1216 | { |
1217 | struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); |
1218 | int ret = 0; |
1219 | |
1220 | cpus_read_lock(); |
1221 | mutex_lock(&rcu_state.barrier_mutex); |
1222 | if (rcu_rdp_is_offloaded(rdp)) { |
1223 | if (cpu_online(cpu)) { |
1224 | ret = work_on_cpu(cpu, rcu_nocb_rdp_deoffload, rdp); |
1225 | if (!ret) |
1226 | cpumask_clear_cpu(cpu, dstp: rcu_nocb_mask); |
1227 | } else { |
1228 | pr_info("NOCB: Cannot CB-deoffload offline CPU %d\n" , rdp->cpu); |
1229 | ret = -EINVAL; |
1230 | } |
1231 | } |
1232 | mutex_unlock(lock: &rcu_state.barrier_mutex); |
1233 | cpus_read_unlock(); |
1234 | |
1235 | return ret; |
1236 | } |
1237 | EXPORT_SYMBOL_GPL(rcu_nocb_cpu_deoffload); |
1238 | |
1239 | static long rcu_nocb_rdp_offload(void *arg) |
1240 | { |
1241 | struct rcu_data *rdp = arg; |
1242 | struct rcu_segcblist *cblist = &rdp->cblist; |
1243 | unsigned long flags; |
1244 | int wake_gp; |
1245 | struct rcu_data *rdp_gp = rdp->nocb_gp_rdp; |
1246 | |
1247 | WARN_ON_ONCE(rdp->cpu != raw_smp_processor_id()); |
1248 | /* |
1249 | * For now we only support re-offload, ie: the rdp must have been |
1250 | * offloaded on boot first. |
1251 | */ |
1252 | if (!rdp->nocb_gp_rdp) |
1253 | return -EINVAL; |
1254 | |
1255 | if (WARN_ON_ONCE(!rdp_gp->nocb_gp_kthread)) |
1256 | return -EINVAL; |
1257 | |
1258 | pr_info("Offloading %d\n" , rdp->cpu); |
1259 | |
1260 | /* |
1261 | * Can't use rcu_nocb_lock_irqsave() before SEGCBLIST_LOCKING |
1262 | * is set. |
1263 | */ |
1264 | raw_spin_lock_irqsave(&rdp->nocb_lock, flags); |
1265 | |
1266 | /* |
1267 | * We didn't take the nocb lock while working on the |
1268 | * rdp->cblist with SEGCBLIST_LOCKING cleared (pure softirq/rcuc mode). |
1269 | * Every modifications that have been done previously on |
1270 | * rdp->cblist must be visible remotely by the nocb kthreads |
1271 | * upon wake up after reading the cblist flags. |
1272 | * |
1273 | * The layout against nocb_lock enforces that ordering: |
1274 | * |
1275 | * __rcu_nocb_rdp_offload() nocb_cb_wait()/nocb_gp_wait() |
1276 | * ------------------------- ---------------------------- |
1277 | * WRITE callbacks rcu_nocb_lock() |
1278 | * rcu_nocb_lock() READ flags |
1279 | * WRITE flags READ callbacks |
1280 | * rcu_nocb_unlock() rcu_nocb_unlock() |
1281 | */ |
1282 | wake_gp = rdp_offload_toggle(rdp, offload: true, flags); |
1283 | if (wake_gp) |
1284 | wake_up_process(tsk: rdp_gp->nocb_gp_kthread); |
1285 | swait_event_exclusive(rdp->nocb_state_wq, |
1286 | rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB) && |
1287 | rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP)); |
1288 | |
1289 | /* |
1290 | * All kthreads are ready to work, we can finally relieve rcu_core() and |
1291 | * enable nocb bypass. |
1292 | */ |
1293 | rcu_nocb_lock_irqsave(rdp, flags); |
1294 | rcu_segcblist_clear_flags(rsclp: cblist, SEGCBLIST_RCU_CORE); |
1295 | rcu_nocb_unlock_irqrestore(rdp, flags); |
1296 | |
1297 | return 0; |
1298 | } |
1299 | |
1300 | int rcu_nocb_cpu_offload(int cpu) |
1301 | { |
1302 | struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); |
1303 | int ret = 0; |
1304 | |
1305 | cpus_read_lock(); |
1306 | mutex_lock(&rcu_state.barrier_mutex); |
1307 | if (!rcu_rdp_is_offloaded(rdp)) { |
1308 | if (cpu_online(cpu)) { |
1309 | ret = work_on_cpu(cpu, rcu_nocb_rdp_offload, rdp); |
1310 | if (!ret) |
1311 | cpumask_set_cpu(cpu, dstp: rcu_nocb_mask); |
1312 | } else { |
1313 | pr_info("NOCB: Cannot CB-offload offline CPU %d\n" , rdp->cpu); |
1314 | ret = -EINVAL; |
1315 | } |
1316 | } |
1317 | mutex_unlock(lock: &rcu_state.barrier_mutex); |
1318 | cpus_read_unlock(); |
1319 | |
1320 | return ret; |
1321 | } |
1322 | EXPORT_SYMBOL_GPL(rcu_nocb_cpu_offload); |
1323 | |
1324 | #ifdef CONFIG_RCU_LAZY |
1325 | static unsigned long |
1326 | lazy_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc) |
1327 | { |
1328 | int cpu; |
1329 | unsigned long count = 0; |
1330 | |
1331 | if (WARN_ON_ONCE(!cpumask_available(rcu_nocb_mask))) |
1332 | return 0; |
1333 | |
1334 | /* Protect rcu_nocb_mask against concurrent (de-)offloading. */ |
1335 | if (!mutex_trylock(lock: &rcu_state.barrier_mutex)) |
1336 | return 0; |
1337 | |
1338 | /* Snapshot count of all CPUs */ |
1339 | for_each_cpu(cpu, rcu_nocb_mask) { |
1340 | struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); |
1341 | |
1342 | count += READ_ONCE(rdp->lazy_len); |
1343 | } |
1344 | |
1345 | mutex_unlock(lock: &rcu_state.barrier_mutex); |
1346 | |
1347 | return count ? count : SHRINK_EMPTY; |
1348 | } |
1349 | |
1350 | static unsigned long |
1351 | lazy_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) |
1352 | { |
1353 | int cpu; |
1354 | unsigned long flags; |
1355 | unsigned long count = 0; |
1356 | |
1357 | if (WARN_ON_ONCE(!cpumask_available(rcu_nocb_mask))) |
1358 | return 0; |
1359 | /* |
1360 | * Protect against concurrent (de-)offloading. Otherwise nocb locking |
1361 | * may be ignored or imbalanced. |
1362 | */ |
1363 | if (!mutex_trylock(lock: &rcu_state.barrier_mutex)) { |
1364 | /* |
1365 | * But really don't insist if barrier_mutex is contended since we |
1366 | * can't guarantee that it will never engage in a dependency |
1367 | * chain involving memory allocation. The lock is seldom contended |
1368 | * anyway. |
1369 | */ |
1370 | return 0; |
1371 | } |
1372 | |
1373 | /* Snapshot count of all CPUs */ |
1374 | for_each_cpu(cpu, rcu_nocb_mask) { |
1375 | struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); |
1376 | int _count; |
1377 | |
1378 | if (WARN_ON_ONCE(!rcu_rdp_is_offloaded(rdp))) |
1379 | continue; |
1380 | |
1381 | if (!READ_ONCE(rdp->lazy_len)) |
1382 | continue; |
1383 | |
1384 | rcu_nocb_lock_irqsave(rdp, flags); |
1385 | /* |
1386 | * Recheck under the nocb lock. Since we are not holding the bypass |
1387 | * lock we may still race with increments from the enqueuer but still |
1388 | * we know for sure if there is at least one lazy callback. |
1389 | */ |
1390 | _count = READ_ONCE(rdp->lazy_len); |
1391 | if (!_count) { |
1392 | rcu_nocb_unlock_irqrestore(rdp, flags); |
1393 | continue; |
1394 | } |
1395 | rcu_nocb_try_flush_bypass(rdp, j: jiffies); |
1396 | rcu_nocb_unlock_irqrestore(rdp, flags); |
1397 | wake_nocb_gp(rdp, force: false); |
1398 | sc->nr_to_scan -= _count; |
1399 | count += _count; |
1400 | if (sc->nr_to_scan <= 0) |
1401 | break; |
1402 | } |
1403 | |
1404 | mutex_unlock(lock: &rcu_state.barrier_mutex); |
1405 | |
1406 | return count ? count : SHRINK_STOP; |
1407 | } |
1408 | #endif // #ifdef CONFIG_RCU_LAZY |
1409 | |
1410 | void __init rcu_init_nohz(void) |
1411 | { |
1412 | int cpu; |
1413 | struct rcu_data *rdp; |
1414 | const struct cpumask *cpumask = NULL; |
1415 | struct shrinker * __maybe_unused lazy_rcu_shrinker; |
1416 | |
1417 | #if defined(CONFIG_NO_HZ_FULL) |
1418 | if (tick_nohz_full_running && !cpumask_empty(tick_nohz_full_mask)) |
1419 | cpumask = tick_nohz_full_mask; |
1420 | #endif |
1421 | |
1422 | if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_DEFAULT_ALL) && |
1423 | !rcu_state.nocb_is_setup && !cpumask) |
1424 | cpumask = cpu_possible_mask; |
1425 | |
1426 | if (cpumask) { |
1427 | if (!cpumask_available(mask: rcu_nocb_mask)) { |
1428 | if (!zalloc_cpumask_var(mask: &rcu_nocb_mask, GFP_KERNEL)) { |
1429 | pr_info("rcu_nocb_mask allocation failed, callback offloading disabled.\n" ); |
1430 | return; |
1431 | } |
1432 | } |
1433 | |
1434 | cpumask_or(dstp: rcu_nocb_mask, src1p: rcu_nocb_mask, src2p: cpumask); |
1435 | rcu_state.nocb_is_setup = true; |
1436 | } |
1437 | |
1438 | if (!rcu_state.nocb_is_setup) |
1439 | return; |
1440 | |
1441 | #ifdef CONFIG_RCU_LAZY |
1442 | lazy_rcu_shrinker = shrinker_alloc(flags: 0, fmt: "rcu-lazy" ); |
1443 | if (!lazy_rcu_shrinker) { |
1444 | pr_err("Failed to allocate lazy_rcu shrinker!\n" ); |
1445 | } else { |
1446 | lazy_rcu_shrinker->count_objects = lazy_rcu_shrink_count; |
1447 | lazy_rcu_shrinker->scan_objects = lazy_rcu_shrink_scan; |
1448 | |
1449 | shrinker_register(shrinker: lazy_rcu_shrinker); |
1450 | } |
1451 | #endif // #ifdef CONFIG_RCU_LAZY |
1452 | |
1453 | if (!cpumask_subset(src1p: rcu_nocb_mask, cpu_possible_mask)) { |
1454 | pr_info("\tNote: kernel parameter 'rcu_nocbs=', 'nohz_full', or 'isolcpus=' contains nonexistent CPUs.\n" ); |
1455 | cpumask_and(dstp: rcu_nocb_mask, cpu_possible_mask, |
1456 | src2p: rcu_nocb_mask); |
1457 | } |
1458 | if (cpumask_empty(srcp: rcu_nocb_mask)) |
1459 | pr_info("\tOffload RCU callbacks from CPUs: (none).\n" ); |
1460 | else |
1461 | pr_info("\tOffload RCU callbacks from CPUs: %*pbl.\n" , |
1462 | cpumask_pr_args(rcu_nocb_mask)); |
1463 | if (rcu_nocb_poll) |
1464 | pr_info("\tPoll for callbacks from no-CBs CPUs.\n" ); |
1465 | |
1466 | for_each_cpu(cpu, rcu_nocb_mask) { |
1467 | rdp = per_cpu_ptr(&rcu_data, cpu); |
1468 | if (rcu_segcblist_empty(rsclp: &rdp->cblist)) |
1469 | rcu_segcblist_init(rsclp: &rdp->cblist); |
1470 | rcu_segcblist_offload(rsclp: &rdp->cblist, offload: true); |
1471 | rcu_segcblist_set_flags(rsclp: &rdp->cblist, SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP); |
1472 | rcu_segcblist_clear_flags(rsclp: &rdp->cblist, SEGCBLIST_RCU_CORE); |
1473 | } |
1474 | rcu_organize_nocb_kthreads(); |
1475 | } |
1476 | |
1477 | /* Initialize per-rcu_data variables for no-CBs CPUs. */ |
1478 | static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) |
1479 | { |
1480 | init_swait_queue_head(&rdp->nocb_cb_wq); |
1481 | init_swait_queue_head(&rdp->nocb_gp_wq); |
1482 | init_swait_queue_head(&rdp->nocb_state_wq); |
1483 | raw_spin_lock_init(&rdp->nocb_lock); |
1484 | raw_spin_lock_init(&rdp->nocb_bypass_lock); |
1485 | raw_spin_lock_init(&rdp->nocb_gp_lock); |
1486 | timer_setup(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer, 0); |
1487 | rcu_cblist_init(rclp: &rdp->nocb_bypass); |
1488 | WRITE_ONCE(rdp->lazy_len, 0); |
1489 | mutex_init(&rdp->nocb_gp_kthread_mutex); |
1490 | } |
1491 | |
1492 | /* |
1493 | * If the specified CPU is a no-CBs CPU that does not already have its |
1494 | * rcuo CB kthread, spawn it. Additionally, if the rcuo GP kthread |
1495 | * for this CPU's group has not yet been created, spawn it as well. |
1496 | */ |
1497 | static void rcu_spawn_cpu_nocb_kthread(int cpu) |
1498 | { |
1499 | struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); |
1500 | struct rcu_data *rdp_gp; |
1501 | struct task_struct *t; |
1502 | struct sched_param sp; |
1503 | |
1504 | if (!rcu_scheduler_fully_active || !rcu_state.nocb_is_setup) |
1505 | return; |
1506 | |
1507 | /* If there already is an rcuo kthread, then nothing to do. */ |
1508 | if (rdp->nocb_cb_kthread) |
1509 | return; |
1510 | |
1511 | /* If we didn't spawn the GP kthread first, reorganize! */ |
1512 | sp.sched_priority = kthread_prio; |
1513 | rdp_gp = rdp->nocb_gp_rdp; |
1514 | mutex_lock(&rdp_gp->nocb_gp_kthread_mutex); |
1515 | if (!rdp_gp->nocb_gp_kthread) { |
1516 | t = kthread_run(rcu_nocb_gp_kthread, rdp_gp, |
1517 | "rcuog/%d" , rdp_gp->cpu); |
1518 | if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo GP kthread, OOM is now expected behavior\n" , __func__)) { |
1519 | mutex_unlock(lock: &rdp_gp->nocb_gp_kthread_mutex); |
1520 | goto end; |
1521 | } |
1522 | WRITE_ONCE(rdp_gp->nocb_gp_kthread, t); |
1523 | if (kthread_prio) |
1524 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); |
1525 | } |
1526 | mutex_unlock(lock: &rdp_gp->nocb_gp_kthread_mutex); |
1527 | |
1528 | /* Spawn the kthread for this CPU. */ |
1529 | t = kthread_run(rcu_nocb_cb_kthread, rdp, |
1530 | "rcuo%c/%d" , rcu_state.abbr, cpu); |
1531 | if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo CB kthread, OOM is now expected behavior\n" , __func__)) |
1532 | goto end; |
1533 | |
1534 | if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_CB_BOOST) && kthread_prio) |
1535 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); |
1536 | |
1537 | WRITE_ONCE(rdp->nocb_cb_kthread, t); |
1538 | WRITE_ONCE(rdp->nocb_gp_kthread, rdp_gp->nocb_gp_kthread); |
1539 | return; |
1540 | end: |
1541 | mutex_lock(&rcu_state.barrier_mutex); |
1542 | if (rcu_rdp_is_offloaded(rdp)) { |
1543 | rcu_nocb_rdp_deoffload(arg: rdp); |
1544 | cpumask_clear_cpu(cpu, dstp: rcu_nocb_mask); |
1545 | } |
1546 | mutex_unlock(lock: &rcu_state.barrier_mutex); |
1547 | } |
1548 | |
1549 | /* How many CB CPU IDs per GP kthread? Default of -1 for sqrt(nr_cpu_ids). */ |
1550 | static int rcu_nocb_gp_stride = -1; |
1551 | module_param(rcu_nocb_gp_stride, int, 0444); |
1552 | |
1553 | /* |
1554 | * Initialize GP-CB relationships for all no-CBs CPU. |
1555 | */ |
1556 | static void __init rcu_organize_nocb_kthreads(void) |
1557 | { |
1558 | int cpu; |
1559 | bool firsttime = true; |
1560 | bool gotnocbs = false; |
1561 | bool gotnocbscbs = true; |
1562 | int ls = rcu_nocb_gp_stride; |
1563 | int nl = 0; /* Next GP kthread. */ |
1564 | struct rcu_data *rdp; |
1565 | struct rcu_data *rdp_gp = NULL; /* Suppress misguided gcc warn. */ |
1566 | |
1567 | if (!cpumask_available(mask: rcu_nocb_mask)) |
1568 | return; |
1569 | if (ls == -1) { |
1570 | ls = nr_cpu_ids / int_sqrt(nr_cpu_ids); |
1571 | rcu_nocb_gp_stride = ls; |
1572 | } |
1573 | |
1574 | /* |
1575 | * Each pass through this loop sets up one rcu_data structure. |
1576 | * Should the corresponding CPU come online in the future, then |
1577 | * we will spawn the needed set of rcu_nocb_kthread() kthreads. |
1578 | */ |
1579 | for_each_possible_cpu(cpu) { |
1580 | rdp = per_cpu_ptr(&rcu_data, cpu); |
1581 | if (rdp->cpu >= nl) { |
1582 | /* New GP kthread, set up for CBs & next GP. */ |
1583 | gotnocbs = true; |
1584 | nl = DIV_ROUND_UP(rdp->cpu + 1, ls) * ls; |
1585 | rdp_gp = rdp; |
1586 | INIT_LIST_HEAD(list: &rdp->nocb_head_rdp); |
1587 | if (dump_tree) { |
1588 | if (!firsttime) |
1589 | pr_cont("%s\n" , gotnocbscbs |
1590 | ? "" : " (self only)" ); |
1591 | gotnocbscbs = false; |
1592 | firsttime = false; |
1593 | pr_alert("%s: No-CB GP kthread CPU %d:" , |
1594 | __func__, cpu); |
1595 | } |
1596 | } else { |
1597 | /* Another CB kthread, link to previous GP kthread. */ |
1598 | gotnocbscbs = true; |
1599 | if (dump_tree) |
1600 | pr_cont(" %d" , cpu); |
1601 | } |
1602 | rdp->nocb_gp_rdp = rdp_gp; |
1603 | if (cpumask_test_cpu(cpu, cpumask: rcu_nocb_mask)) |
1604 | list_add_tail(new: &rdp->nocb_entry_rdp, head: &rdp_gp->nocb_head_rdp); |
1605 | } |
1606 | if (gotnocbs && dump_tree) |
1607 | pr_cont("%s\n" , gotnocbscbs ? "" : " (self only)" ); |
1608 | } |
1609 | |
1610 | /* |
1611 | * Bind the current task to the offloaded CPUs. If there are no offloaded |
1612 | * CPUs, leave the task unbound. Splat if the bind attempt fails. |
1613 | */ |
1614 | void rcu_bind_current_to_nocb(void) |
1615 | { |
1616 | if (cpumask_available(mask: rcu_nocb_mask) && !cpumask_empty(srcp: rcu_nocb_mask)) |
1617 | WARN_ON(sched_setaffinity(current->pid, rcu_nocb_mask)); |
1618 | } |
1619 | EXPORT_SYMBOL_GPL(rcu_bind_current_to_nocb); |
1620 | |
1621 | // The ->on_cpu field is available only in CONFIG_SMP=y, so... |
1622 | #ifdef CONFIG_SMP |
1623 | static char *show_rcu_should_be_on_cpu(struct task_struct *tsp) |
1624 | { |
1625 | return tsp && task_is_running(tsp) && !tsp->on_cpu ? "!" : "" ; |
1626 | } |
1627 | #else // #ifdef CONFIG_SMP |
1628 | static char *show_rcu_should_be_on_cpu(struct task_struct *tsp) |
1629 | { |
1630 | return "" ; |
1631 | } |
1632 | #endif // #else #ifdef CONFIG_SMP |
1633 | |
1634 | /* |
1635 | * Dump out nocb grace-period kthread state for the specified rcu_data |
1636 | * structure. |
1637 | */ |
1638 | static void show_rcu_nocb_gp_state(struct rcu_data *rdp) |
1639 | { |
1640 | struct rcu_node *rnp = rdp->mynode; |
1641 | |
1642 | pr_info("nocb GP %d %c%c%c%c%c %c[%c%c] %c%c:%ld rnp %d:%d %lu %c CPU %d%s\n" , |
1643 | rdp->cpu, |
1644 | "kK" [!!rdp->nocb_gp_kthread], |
1645 | "lL" [raw_spin_is_locked(&rdp->nocb_gp_lock)], |
1646 | "dD" [!!rdp->nocb_defer_wakeup], |
1647 | "tT" [timer_pending(&rdp->nocb_timer)], |
1648 | "sS" [!!rdp->nocb_gp_sleep], |
1649 | ".W" [swait_active(&rdp->nocb_gp_wq)], |
1650 | ".W" [swait_active(&rnp->nocb_gp_wq[0])], |
1651 | ".W" [swait_active(&rnp->nocb_gp_wq[1])], |
1652 | ".B" [!!rdp->nocb_gp_bypass], |
1653 | ".G" [!!rdp->nocb_gp_gp], |
1654 | (long)rdp->nocb_gp_seq, |
1655 | rnp->grplo, rnp->grphi, READ_ONCE(rdp->nocb_gp_loops), |
1656 | rdp->nocb_gp_kthread ? task_state_to_char(rdp->nocb_gp_kthread) : '.', |
1657 | rdp->nocb_gp_kthread ? (int)task_cpu(rdp->nocb_gp_kthread) : -1, |
1658 | show_rcu_should_be_on_cpu(rdp->nocb_gp_kthread)); |
1659 | } |
1660 | |
1661 | /* Dump out nocb kthread state for the specified rcu_data structure. */ |
1662 | static void show_rcu_nocb_state(struct rcu_data *rdp) |
1663 | { |
1664 | char bufw[20]; |
1665 | char bufr[20]; |
1666 | struct rcu_data *nocb_next_rdp; |
1667 | struct rcu_segcblist *rsclp = &rdp->cblist; |
1668 | bool waslocked; |
1669 | bool wassleep; |
1670 | |
1671 | if (rdp->nocb_gp_rdp == rdp) |
1672 | show_rcu_nocb_gp_state(rdp); |
1673 | |
1674 | nocb_next_rdp = list_next_or_null_rcu(&rdp->nocb_gp_rdp->nocb_head_rdp, |
1675 | &rdp->nocb_entry_rdp, |
1676 | typeof(*rdp), |
1677 | nocb_entry_rdp); |
1678 | |
1679 | sprintf(buf: bufw, fmt: "%ld" , rsclp->gp_seq[RCU_WAIT_TAIL]); |
1680 | sprintf(buf: bufr, fmt: "%ld" , rsclp->gp_seq[RCU_NEXT_READY_TAIL]); |
1681 | pr_info(" CB %d^%d->%d %c%c%c%c%c%c F%ld L%ld C%d %c%c%s%c%s%c%c q%ld %c CPU %d%s\n" , |
1682 | rdp->cpu, rdp->nocb_gp_rdp->cpu, |
1683 | nocb_next_rdp ? nocb_next_rdp->cpu : -1, |
1684 | "kK" [!!rdp->nocb_cb_kthread], |
1685 | "bB" [raw_spin_is_locked(&rdp->nocb_bypass_lock)], |
1686 | "cC" [!!atomic_read(&rdp->nocb_lock_contended)], |
1687 | "lL" [raw_spin_is_locked(&rdp->nocb_lock)], |
1688 | "sS" [!!rdp->nocb_cb_sleep], |
1689 | ".W" [swait_active(&rdp->nocb_cb_wq)], |
1690 | jiffies - rdp->nocb_bypass_first, |
1691 | jiffies - rdp->nocb_nobypass_last, |
1692 | rdp->nocb_nobypass_count, |
1693 | ".D" [rcu_segcblist_ready_cbs(rsclp)], |
1694 | ".W" [!rcu_segcblist_segempty(rsclp, RCU_WAIT_TAIL)], |
1695 | rcu_segcblist_segempty(rsclp, RCU_WAIT_TAIL) ? "" : bufw, |
1696 | ".R" [!rcu_segcblist_segempty(rsclp, RCU_NEXT_READY_TAIL)], |
1697 | rcu_segcblist_segempty(rsclp, RCU_NEXT_READY_TAIL) ? "" : bufr, |
1698 | ".N" [!rcu_segcblist_segempty(rsclp, RCU_NEXT_TAIL)], |
1699 | ".B" [!!rcu_cblist_n_cbs(&rdp->nocb_bypass)], |
1700 | rcu_segcblist_n_cbs(&rdp->cblist), |
1701 | rdp->nocb_cb_kthread ? task_state_to_char(rdp->nocb_cb_kthread) : '.', |
1702 | rdp->nocb_cb_kthread ? (int)task_cpu(rdp->nocb_cb_kthread) : -1, |
1703 | show_rcu_should_be_on_cpu(rdp->nocb_cb_kthread)); |
1704 | |
1705 | /* It is OK for GP kthreads to have GP state. */ |
1706 | if (rdp->nocb_gp_rdp == rdp) |
1707 | return; |
1708 | |
1709 | waslocked = raw_spin_is_locked(&rdp->nocb_gp_lock); |
1710 | wassleep = swait_active(wq: &rdp->nocb_gp_wq); |
1711 | if (!rdp->nocb_gp_sleep && !waslocked && !wassleep) |
1712 | return; /* Nothing untoward. */ |
1713 | |
1714 | pr_info(" nocb GP activity on CB-only CPU!!! %c%c%c %c\n" , |
1715 | "lL" [waslocked], |
1716 | "dD" [!!rdp->nocb_defer_wakeup], |
1717 | "sS" [!!rdp->nocb_gp_sleep], |
1718 | ".W" [wassleep]); |
1719 | } |
1720 | |
1721 | #else /* #ifdef CONFIG_RCU_NOCB_CPU */ |
1722 | |
1723 | static inline int rcu_lockdep_is_held_nocb(struct rcu_data *rdp) |
1724 | { |
1725 | return 0; |
1726 | } |
1727 | |
1728 | static inline bool rcu_current_is_nocb_kthread(struct rcu_data *rdp) |
1729 | { |
1730 | return false; |
1731 | } |
1732 | |
1733 | /* No ->nocb_lock to acquire. */ |
1734 | static void rcu_nocb_lock(struct rcu_data *rdp) |
1735 | { |
1736 | } |
1737 | |
1738 | /* No ->nocb_lock to release. */ |
1739 | static void rcu_nocb_unlock(struct rcu_data *rdp) |
1740 | { |
1741 | } |
1742 | |
1743 | /* No ->nocb_lock to release. */ |
1744 | static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp, |
1745 | unsigned long flags) |
1746 | { |
1747 | local_irq_restore(flags); |
1748 | } |
1749 | |
1750 | /* Lockdep check that ->cblist may be safely accessed. */ |
1751 | static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp) |
1752 | { |
1753 | lockdep_assert_irqs_disabled(); |
1754 | } |
1755 | |
1756 | static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq) |
1757 | { |
1758 | } |
1759 | |
1760 | static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp) |
1761 | { |
1762 | return NULL; |
1763 | } |
1764 | |
1765 | static void rcu_init_one_nocb(struct rcu_node *rnp) |
1766 | { |
1767 | } |
1768 | |
1769 | static bool wake_nocb_gp(struct rcu_data *rdp, bool force) |
1770 | { |
1771 | return false; |
1772 | } |
1773 | |
1774 | static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, |
1775 | unsigned long j, bool lazy) |
1776 | { |
1777 | return true; |
1778 | } |
1779 | |
1780 | static void call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *head, |
1781 | rcu_callback_t func, unsigned long flags, bool lazy) |
1782 | { |
1783 | WARN_ON_ONCE(1); /* Should be dead code! */ |
1784 | } |
1785 | |
1786 | static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty, |
1787 | unsigned long flags) |
1788 | { |
1789 | WARN_ON_ONCE(1); /* Should be dead code! */ |
1790 | } |
1791 | |
1792 | static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) |
1793 | { |
1794 | } |
1795 | |
1796 | static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level) |
1797 | { |
1798 | return false; |
1799 | } |
1800 | |
1801 | static bool do_nocb_deferred_wakeup(struct rcu_data *rdp) |
1802 | { |
1803 | return false; |
1804 | } |
1805 | |
1806 | static void rcu_spawn_cpu_nocb_kthread(int cpu) |
1807 | { |
1808 | } |
1809 | |
1810 | static void show_rcu_nocb_state(struct rcu_data *rdp) |
1811 | { |
1812 | } |
1813 | |
1814 | #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ |
1815 | |