tree_nocb.h source code [linux/kernel/rcu/tree_nocb.h]

1	/ SPDX-License-Identifier: GPL-2.0+ /
2	/*
3	* Read-Copy Update mechanism for mutual exclusion (tree-based version)
4	* Internal non-public definitions that provide either classic
5	* or preemptible semantics.
6	*
7	* Copyright Red Hat, 2009
8	* Copyright IBM Corporation, 2009
9	* Copyright SUSE, 2021
10	*
11	* Author: Ingo Molnar <mingo@elte.hu>
12	* Paul E. McKenney <paulmck@linux.ibm.com>
13	* Frederic Weisbecker <frederic@kernel.org>
14	*/
15
16	#ifdef CONFIG_RCU_NOCB_CPU
17	static cpumask_var_t rcu_nocb_mask; / CPUs to have callbacks offloaded. /
18	static bool __read_mostly rcu_nocb_poll; / Offload kthread are to poll. /
19	static inline int rcu_lockdep_is_held_nocb(struct rcu_data *rdp)
20	{
21	return lockdep_is_held(&rdp->nocb_lock);
22	}
23
24	static inline bool rcu_current_is_nocb_kthread(struct rcu_data *rdp)
25	{
26	/ Race on early boot between thread creation and assignment /
27	if (!rdp->nocb_cb_kthread \|\| !rdp->nocb_gp_kthread)
28	return true;
29
30	if (current == rdp->nocb_cb_kthread \|\| current == rdp->nocb_gp_kthread)
31	if (in_task())
32	return true;
33	return false;
34	}
35
36	/*
37	* Offload callback processing from the boot-time-specified set of CPUs
38	* specified by rcu_nocb_mask. For the CPUs in the set, there are kthreads
39	* created that pull the callbacks from the corresponding CPU, wait for
40	* a grace period to elapse, and invoke the callbacks. These kthreads
41	* are organized into GP kthreads, which manage incoming callbacks, wait for
42	* grace periods, and awaken CB kthreads, and the CB kthreads, which only
43	* invoke callbacks. Each GP kthread invokes its own CBs. The no-CBs CPUs
44	* do a wake_up() on their GP kthread when they insert a callback into any
45	* empty list, unless the rcu_nocb_poll boot parameter has been specified,
46	* in which case each kthread actively polls its CPU. (Which isn't so great
47	* for energy efficiency, but which does reduce RCU's overhead on that CPU.)
48	*
49	* This is intended to be used in conjunction with Frederic Weisbecker's
50	* adaptive-idle work, which would seriously reduce OS jitter on CPUs
51	* running CPU-bound user-mode computations.
52	*
53	* Offloading of callbacks can also be used as an energy-efficiency
54	* measure because CPUs with no RCU callbacks queued are more aggressive
55	* about entering dyntick-idle mode.
56	*/
57
58
59	/*
60	* Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters.
61	* If the list is invalid, a warning is emitted and all CPUs are offloaded.
62	*/
63	static int __init rcu_nocb_setup(char *str)
64	{
65	alloc_bootmem_cpumask_var(mask: &rcu_nocb_mask);
66	if (*str == `'='`) {
67	if (cpulist_parse(buf: ++str, dstp: rcu_nocb_mask)) {
68	pr_warn("rcu_nocbs= bad CPU range, all CPUs set\n");
69	cpumask_setall(dstp: rcu_nocb_mask);
70	}
71	}
72	rcu_state.nocb_is_setup = true;
73	return `1`;
74	}
75	__setup("rcu_nocbs", rcu_nocb_setup);
76
77	static int __init parse_rcu_nocb_poll(char *arg)
78	{
79	rcu_nocb_poll = true;
80	return `1`;
81	}
82	__setup("rcu_nocb_poll", parse_rcu_nocb_poll);
83
84	/*
85	* Don't bother bypassing ->cblist if the call_rcu() rate is low.
86	* After all, the main point of bypassing is to avoid lock contention
87	* on ->nocb_lock, which only can happen at high call_rcu() rates.
88	*/
89	static int nocb_nobypass_lim_per_jiffy = `16` * `1000` / HZ;
90	module_param(nocb_nobypass_lim_per_jiffy, int, `0`);
91
92	/*
93	* Acquire the specified rcu_data structure's ->nocb_bypass_lock. If the
94	* lock isn't immediately available, increment ->nocb_lock_contended to
95	* flag the contention.
96	*/
97	static void rcu_nocb_bypass_lock(struct rcu_data *rdp)
98	__acquires(&rdp->nocb_bypass_lock)
99	{
100	lockdep_assert_irqs_disabled();
101	if (raw_spin_trylock(&rdp->nocb_bypass_lock))
102	return;
103	atomic_inc(v: &rdp->nocb_lock_contended);
104	WARN_ON_ONCE(smp_processor_id() != rdp->cpu);
105	smp_mb__after_atomic(); / atomic_inc() before lock. /
106	raw_spin_lock(&rdp->nocb_bypass_lock);
107	smp_mb__before_atomic(); / atomic_dec() after lock. /
108	atomic_dec(v: &rdp->nocb_lock_contended);
109	}
110
111	/*
112	* Spinwait until the specified rcu_data structure's ->nocb_lock is
113	* not contended. Please note that this is extremely special-purpose,
114	* relying on the fact that at most two kthreads and one CPU contend for
115	* this lock, and also that the two kthreads are guaranteed to have frequent
116	* grace-period-duration time intervals between successive acquisitions
117	* of the lock. This allows us to use an extremely simple throttling
118	* mechanism, and further to apply it only to the CPU doing floods of
119	* call_rcu() invocations. Don't try this at home!
120	*/
121	static void rcu_nocb_wait_contended(struct rcu_data *rdp)
122	{
123	WARN_ON_ONCE(smp_processor_id() != rdp->cpu);
124	while (WARN_ON_ONCE(atomic_read(&rdp->nocb_lock_contended)))
125	cpu_relax();
126	}
127
128	/*
129	* Conditionally acquire the specified rcu_data structure's
130	* ->nocb_bypass_lock.
131	*/
132	static bool rcu_nocb_bypass_trylock(struct rcu_data *rdp)
133	{
134	lockdep_assert_irqs_disabled();
135	return raw_spin_trylock(&rdp->nocb_bypass_lock);
136	}
137
138	/*
139	* Release the specified rcu_data structure's ->nocb_bypass_lock.
140	*/
141	static void rcu_nocb_bypass_unlock(struct rcu_data *rdp)
142	__releases(&rdp->nocb_bypass_lock)
143	{
144	lockdep_assert_irqs_disabled();
145	raw_spin_unlock(&rdp->nocb_bypass_lock);
146	}
147
148	/*
149	* Acquire the specified rcu_data structure's ->nocb_lock, but only
150	* if it corresponds to a no-CBs CPU.
151	*/
152	static void rcu_nocb_lock(struct rcu_data *rdp)
153	{
154	lockdep_assert_irqs_disabled();
155	if (!rcu_rdp_is_offloaded(rdp))
156	return;
157	raw_spin_lock(&rdp->nocb_lock);
158	}
159
160	/*
161	* Release the specified rcu_data structure's ->nocb_lock, but only
162	* if it corresponds to a no-CBs CPU.
163	*/
164	static void rcu_nocb_unlock(struct rcu_data *rdp)
165	{
166	if (rcu_rdp_is_offloaded(rdp)) {
167	lockdep_assert_irqs_disabled();
168	raw_spin_unlock(&rdp->nocb_lock);
169	}
170	}
171
172	/*
173	* Release the specified rcu_data structure's ->nocb_lock and restore
174	* interrupts, but only if it corresponds to a no-CBs CPU.
175	*/
176	static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp,
177	unsigned long flags)
178	{
179	if (rcu_rdp_is_offloaded(rdp)) {
180	lockdep_assert_irqs_disabled();
181	raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
182	} else {
183	local_irq_restore(flags);
184	}
185	}
186
187	/ Lockdep check that ->cblist may be safely accessed. /
188	static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp)
189	{
190	lockdep_assert_irqs_disabled();
191	if (rcu_rdp_is_offloaded(rdp))
192	lockdep_assert_held(&rdp->nocb_lock);
193	}
194
195	/*
196	* Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended
197	* grace period.
198	*/
199	static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
200	{
201	swake_up_all(q: sq);
202	}
203
204	static struct swait_queue_head rcu_nocb_gp_get(struct* rcu_node *rnp)
205	{
206	return &rnp->nocb_gp_wq[rcu_seq_ctr(s: rnp->gp_seq) & `0x1`];
207	}
208
209	static void rcu_init_one_nocb(struct rcu_node *rnp)
210	{
211	init_swait_queue_head(&rnp->nocb_gp_wq[`0`]);
212	init_swait_queue_head(&rnp->nocb_gp_wq[`1`]);
213	}
214
215	static bool __wake_nocb_gp(struct rcu_data *rdp_gp,
216	struct rcu_data *rdp,
217	bool force, unsigned long flags)
218	__releases(rdp_gp->nocb_gp_lock)
219	{
220	bool needwake = false;
221
222	if (!READ_ONCE(rdp_gp->nocb_gp_kthread)) {
223	raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
224	trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu,
225	TPS("AlreadyAwake"));
226	return false;
227	}
228
229	if (rdp_gp->nocb_defer_wakeup > RCU_NOCB_WAKE_NOT) {
230	WRITE_ONCE(rdp_gp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
231	del_timer(timer: &rdp_gp->nocb_timer);
232	}
233
234	if (force \|\| READ_ONCE(rdp_gp->nocb_gp_sleep)) {
235	WRITE_ONCE(rdp_gp->nocb_gp_sleep, false);
236	needwake = true;
237	}
238	raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
239	if (needwake) {
240	trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, TPS("DoWake"));
241	wake_up_process(tsk: rdp_gp->nocb_gp_kthread);
242	}
243
244	return needwake;
245	}
246
247	/*
248	* Kick the GP kthread for this NOCB group.
249	*/
250	static bool wake_nocb_gp(struct rcu_data *rdp, bool force)
251	{
252	unsigned long flags;
253	struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
254
255	raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
256	return __wake_nocb_gp(rdp_gp, rdp, force, flags);
257	}
258
259	#ifdef CONFIG_RCU_LAZY
260	/*
261	* LAZY_FLUSH_JIFFIES decides the maximum amount of time that
262	* can elapse before lazy callbacks are flushed. Lazy callbacks
263	* could be flushed much earlier for a number of other reasons
264	* however, LAZY_FLUSH_JIFFIES will ensure no lazy callbacks are
265	* left unsubmitted to RCU after those many jiffies.
266	*/
267	#define LAZY_FLUSH_JIFFIES (10 * HZ)
268	static unsigned long jiffies_lazy_flush = LAZY_FLUSH_JIFFIES;
269
270	// To be called only from test code.
271	void rcu_set_jiffies_lazy_flush(unsigned long jif)
272	{
273	jiffies_lazy_flush = jif;
274	}
275	EXPORT_SYMBOL(rcu_set_jiffies_lazy_flush);
276
277	unsigned long rcu_get_jiffies_lazy_flush(void)
278	{
279	return jiffies_lazy_flush;
280	}
281	EXPORT_SYMBOL(rcu_get_jiffies_lazy_flush);
282	#endif
283
284	/*
285	* Arrange to wake the GP kthread for this NOCB group at some future
286	* time when it is safe to do so.
287	*/
288	static void wake_nocb_gp_defer(struct rcu_data rdp, int* waketype,
289	const char *reason)
290	{
291	unsigned long flags;
292	struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
293
294	raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
295
296	/*
297	* Bypass wakeup overrides previous deferments. In case of
298	* callback storms, no need to wake up too early.
299	*/
300	if (waketype == RCU_NOCB_WAKE_LAZY &&
301	rdp->nocb_defer_wakeup == RCU_NOCB_WAKE_NOT) {
302	mod_timer(timer: &rdp_gp->nocb_timer, expires: jiffies + rcu_get_jiffies_lazy_flush());
303	WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype);
304	} else if (waketype == RCU_NOCB_WAKE_BYPASS) {
305	mod_timer(timer: &rdp_gp->nocb_timer, expires: jiffies + `2`);
306	WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype);
307	} else {
308	if (rdp_gp->nocb_defer_wakeup < RCU_NOCB_WAKE)
309	mod_timer(timer: &rdp_gp->nocb_timer, expires: jiffies + `1`);
310	if (rdp_gp->nocb_defer_wakeup < waketype)
311	WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype);
312	}
313
314	raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
315
316	trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, reason);
317	}
318
319	/*
320	* Flush the ->nocb_bypass queue into ->cblist, enqueuing rhp if non-NULL.
321	* However, if there is a callback to be enqueued and if ->nocb_bypass
322	* proves to be initially empty, just return false because the no-CB GP
323	* kthread may need to be awakened in this case.
324	*
325	* Return true if there was something to be flushed and it succeeded, otherwise
326	* false.
327	*
328	* Note that this function always returns true if rhp is NULL.
329	*/
330	static bool rcu_nocb_do_flush_bypass(struct rcu_data rdp, struct* rcu_head *rhp_in,
331	unsigned long j, bool lazy)
332	{
333	struct rcu_cblist rcl;
334	struct rcu_head *rhp = rhp_in;
335
336	WARN_ON_ONCE(!rcu_rdp_is_offloaded(rdp));
337	rcu_lockdep_assert_cblist_protected(rdp);
338	lockdep_assert_held(&rdp->nocb_bypass_lock);
339	if (rhp && !rcu_cblist_n_cbs(rclp: &rdp->nocb_bypass)) {
340	raw_spin_unlock(&rdp->nocb_bypass_lock);
341	return false;
342	}
343	/ Note: ->cblist.len already accounts for ->nocb_bypass contents. /
344	if (rhp)
345	rcu_segcblist_inc_len(rsclp: &rdp->cblist); / Must precede enqueue. /
346
347	/*
348	* If the new CB requested was a lazy one, queue it onto the main
349	* ->cblist so that we can take advantage of the grace-period that will
350	* happen regardless. But queue it onto the bypass list first so that
351	* the lazy CB is ordered with the existing CBs in the bypass list.
352	*/
353	if (lazy && rhp) {
354	rcu_cblist_enqueue(rclp: &rdp->nocb_bypass, rhp);
355	rhp = NULL;
356	}
357	rcu_cblist_flush_enqueue(drclp: &rcl, srclp: &rdp->nocb_bypass, rhp);
358	WRITE_ONCE(rdp->lazy_len, `0`);
359
360	rcu_segcblist_insert_pend_cbs(rsclp: &rdp->cblist, rclp: &rcl);
361	WRITE_ONCE(rdp->nocb_bypass_first, j);
362	rcu_nocb_bypass_unlock(rdp);
363	return true;
364	}
365
366	/*
367	* Flush the ->nocb_bypass queue into ->cblist, enqueuing rhp if non-NULL.
368	* However, if there is a callback to be enqueued and if ->nocb_bypass
369	* proves to be initially empty, just return false because the no-CB GP
370	* kthread may need to be awakened in this case.
371	*
372	* Note that this function always returns true if rhp is NULL.
373	*/
374	static bool rcu_nocb_flush_bypass(struct rcu_data rdp, struct* rcu_head *rhp,
375	unsigned long j, bool lazy)
376	{
377	if (!rcu_rdp_is_offloaded(rdp))
378	return true;
379	rcu_lockdep_assert_cblist_protected(rdp);
380	rcu_nocb_bypass_lock(rdp);
381	return rcu_nocb_do_flush_bypass(rdp, rhp_in: rhp, j, lazy);
382	}
383
384	/*
385	* If the ->nocb_bypass_lock is immediately available, flush the
386	* ->nocb_bypass queue into ->cblist.
387	*/
388	static void rcu_nocb_try_flush_bypass(struct rcu_data rdp, unsigned* long j)
389	{
390	rcu_lockdep_assert_cblist_protected(rdp);
391	if (!rcu_rdp_is_offloaded(rdp) \|\|
392	!rcu_nocb_bypass_trylock(rdp))
393	return;
394	WARN_ON_ONCE(!rcu_nocb_do_flush_bypass(rdp, NULL, j, false));
395	}
396
397	/*
398	* See whether it is appropriate to use the ->nocb_bypass list in order
399	* to control contention on ->nocb_lock. A limited number of direct
400	* enqueues are permitted into ->cblist per jiffy. If ->nocb_bypass
401	* is non-empty, further callbacks must be placed into ->nocb_bypass,
402	* otherwise rcu_barrier() breaks. Use rcu_nocb_flush_bypass() to switch
403	* back to direct use of ->cblist. However, ->nocb_bypass should not be
404	* used if ->cblist is empty, because otherwise callbacks can be stranded
405	* on ->nocb_bypass because we cannot count on the current CPU ever again
406	* invoking call_rcu(). The general rule is that if ->nocb_bypass is
407	* non-empty, the corresponding no-CBs grace-period kthread must not be
408	* in an indefinite sleep state.
409	*
410	* Finally, it is not permitted to use the bypass during early boot,
411	* as doing so would confuse the auto-initialization code. Besides
412	* which, there is no point in worrying about lock contention while
413	* there is only one CPU in operation.
414	*/
415	static bool rcu_nocb_try_bypass(struct rcu_data rdp, struct* rcu_head *rhp,
416	bool was_alldone, unsigned* long flags,
417	bool lazy)
418	{
419	unsigned long c;
420	unsigned long cur_gp_seq;
421	unsigned long j = jiffies;
422	long ncbs = rcu_cblist_n_cbs(rclp: &rdp->nocb_bypass);
423	bool bypass_is_lazy = (ncbs == READ_ONCE(rdp->lazy_len));
424
425	lockdep_assert_irqs_disabled();
426
427	// Pure softirq/rcuc based processing: no bypassing, no
428	// locking.
429	if (!rcu_rdp_is_offloaded(rdp)) {
430	*was_alldone = !rcu_segcblist_pend_cbs(rsclp: &rdp->cblist);
431	return false;
432	}
433
434	// In the process of (de-)offloading: no bypassing, but
435	// locking.
436	if (!rcu_segcblist_completely_offloaded(rsclp: &rdp->cblist)) {
437	rcu_nocb_lock(rdp);
438	*was_alldone = !rcu_segcblist_pend_cbs(rsclp: &rdp->cblist);
439	return false; / Not offloaded, no bypassing. /
440	}
441
442	// Don't use ->nocb_bypass during early boot.
443	if (rcu_scheduler_active != RCU_SCHEDULER_RUNNING) {
444	rcu_nocb_lock(rdp);
445	WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
446	*was_alldone = !rcu_segcblist_pend_cbs(rsclp: &rdp->cblist);
447	return false;
448	}
449
450	// If we have advanced to a new jiffy, reset counts to allow
451	// moving back from ->nocb_bypass to ->cblist.
452	if (j == rdp->nocb_nobypass_last) {
453	c = rdp->nocb_nobypass_count + `1`;
454	} else {
455	WRITE_ONCE(rdp->nocb_nobypass_last, j);
456	c = rdp->nocb_nobypass_count - nocb_nobypass_lim_per_jiffy;
457	if (ULONG_CMP_LT(rdp->nocb_nobypass_count,
458	nocb_nobypass_lim_per_jiffy))
459	c = `0`;
460	else if (c > nocb_nobypass_lim_per_jiffy)
461	c = nocb_nobypass_lim_per_jiffy;
462	}
463	WRITE_ONCE(rdp->nocb_nobypass_count, c);
464
465	// If there hasn't yet been all that many ->cblist enqueues
466	// this jiffy, tell the caller to enqueue onto ->cblist. But flush
467	// ->nocb_bypass first.
468	// Lazy CBs throttle this back and do immediate bypass queuing.
469	if (rdp->nocb_nobypass_count < nocb_nobypass_lim_per_jiffy && !lazy) {
470	rcu_nocb_lock(rdp);
471	*was_alldone = !rcu_segcblist_pend_cbs(rsclp: &rdp->cblist);
472	if (*was_alldone)
473	trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu,
474	TPS("FirstQ"));
475
476	WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, j, false));
477	WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
478	return false; // Caller must enqueue the callback.
479	}
480
481	// If ->nocb_bypass has been used too long or is too full,
482	// flush ->nocb_bypass to ->cblist.
483	if ((ncbs && !bypass_is_lazy && j != READ_ONCE(rdp->nocb_bypass_first)) \|\|
484	(ncbs && bypass_is_lazy &&
485	(time_after(j, READ_ONCE(rdp->nocb_bypass_first) + rcu_get_jiffies_lazy_flush()))) \|\|
486	ncbs >= qhimark) {
487	rcu_nocb_lock(rdp);
488	*was_alldone = !rcu_segcblist_pend_cbs(rsclp: &rdp->cblist);
489
490	if (!rcu_nocb_flush_bypass(rdp, rhp, j, lazy)) {
491	if (*was_alldone)
492	trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu,
493	TPS("FirstQ"));
494	WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
495	return false; // Caller must enqueue the callback.
496	}
497	if (j != rdp->nocb_gp_adv_time &&
498	rcu_segcblist_nextgp(rsclp: &rdp->cblist, lp: &cur_gp_seq) &&
499	rcu_seq_done(sp: &rdp->mynode->gp_seq, s: cur_gp_seq)) {
500	rcu_advance_cbs_nowake(rnp: rdp->mynode, rdp);
501	rdp->nocb_gp_adv_time = j;
502	}
503
504	// The flush succeeded and we moved CBs into the regular list.
505	// Don't wait for the wake up timer as it may be too far ahead.
506	// Wake up the GP thread now instead, if the cblist was empty.
507	__call_rcu_nocb_wake(rdp, was_empty: *was_alldone, flags);
508
509	return true; // Callback already enqueued.
510	}
511
512	// We need to use the bypass.
513	rcu_nocb_wait_contended(rdp);
514	rcu_nocb_bypass_lock(rdp);
515	ncbs = rcu_cblist_n_cbs(rclp: &rdp->nocb_bypass);
516	rcu_segcblist_inc_len(rsclp: &rdp->cblist); / Must precede enqueue. /
517	rcu_cblist_enqueue(rclp: &rdp->nocb_bypass, rhp);
518
519	if (lazy)
520	WRITE_ONCE(rdp->lazy_len, rdp->lazy_len + `1`);
521
522	if (!ncbs) {
523	WRITE_ONCE(rdp->nocb_bypass_first, j);
524	trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, TPS("FirstBQ"));
525	}
526	rcu_nocb_bypass_unlock(rdp);
527	smp_mb(); / Order enqueue before wake. /
528	// A wake up of the grace period kthread or timer adjustment
529	// needs to be done only if:
530	// 1. Bypass list was fully empty before (this is the first
531	// bypass list entry), or:
532	// 2. Both of these conditions are met:
533	// a. The bypass list previously had only lazy CBs, and:
534	// b. The new CB is non-lazy.
535	if (!ncbs \|\| (bypass_is_lazy && !lazy)) {
536	// No-CBs GP kthread might be indefinitely asleep, if so, wake.
537	rcu_nocb_lock(rdp); // Rare during call_rcu() flood.
538	if (!rcu_segcblist_pend_cbs(rsclp: &rdp->cblist)) {
539	trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu,
540	TPS("FirstBQwake"));
541	__call_rcu_nocb_wake(rdp, was_empty: true, flags);
542	} else {
543	trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu,
544	TPS("FirstBQnoWake"));
545	rcu_nocb_unlock(rdp);
546	}
547	}
548	return true; // Callback already enqueued.
549	}
550
551	/*
552	* Awaken the no-CBs grace-period kthread if needed, either due to it
553	* legitimately being asleep or due to overload conditions.
554	*
555	* If warranted, also wake up the kthread servicing this CPUs queues.
556	*/
557	static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
558	unsigned long flags)
559	__releases(rdp->nocb_lock)
560	{
561	long bypass_len;
562	unsigned long cur_gp_seq;
563	unsigned long j;
564	long lazy_len;
565	long len;
566	struct task_struct *t;
567	struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
568
569	// If we are being polled or there is no kthread, just leave.
570	t = READ_ONCE(rdp->nocb_gp_kthread);
571	if (rcu_nocb_poll \|\| !t) {
572	rcu_nocb_unlock(rdp);
573	trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu,
574	TPS("WakeNotPoll"));
575	return;
576	}
577	// Need to actually to a wakeup.
578	len = rcu_segcblist_n_cbs(rsclp: &rdp->cblist);
579	bypass_len = rcu_cblist_n_cbs(rclp: &rdp->nocb_bypass);
580	lazy_len = READ_ONCE(rdp->lazy_len);
581	if (was_alldone) {
582	rdp->qlen_last_fqs_check = len;
583	// Only lazy CBs in bypass list
584	if (lazy_len && bypass_len == lazy_len) {
585	rcu_nocb_unlock(rdp);
586	wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_LAZY,
587	TPS("WakeLazy"));
588	} else if (!irqs_disabled_flags(flags)) {
589	/ ... if queue was empty ... /
590	rcu_nocb_unlock(rdp);
591	wake_nocb_gp(rdp, force: false);
592	trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu,
593	TPS("WakeEmpty"));
594	} else {
595	rcu_nocb_unlock(rdp);
596	wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE,
597	TPS("WakeEmptyIsDeferred"));
598	}
599	} else if (len > rdp->qlen_last_fqs_check + qhimark) {
600	/ ... or if many callbacks queued. /
601	rdp->qlen_last_fqs_check = len;
602	j = jiffies;
603	if (j != rdp->nocb_gp_adv_time &&
604	rcu_segcblist_nextgp(rsclp: &rdp->cblist, lp: &cur_gp_seq) &&
605	rcu_seq_done(sp: &rdp->mynode->gp_seq, s: cur_gp_seq)) {
606	rcu_advance_cbs_nowake(rnp: rdp->mynode, rdp);
607	rdp->nocb_gp_adv_time = j;
608	}
609	smp_mb(); / Enqueue before timer_pending(). /
610	if ((rdp->nocb_cb_sleep \|\|
611	!rcu_segcblist_ready_cbs(rsclp: &rdp->cblist)) &&
612	!timer_pending(timer: &rdp_gp->nocb_timer)) {
613	rcu_nocb_unlock(rdp);
614	wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_FORCE,
615	TPS("WakeOvfIsDeferred"));
616	} else {
617	rcu_nocb_unlock(rdp);
618	trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, TPS("WakeNot"));
619	}
620	} else {
621	rcu_nocb_unlock(rdp);
622	trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, TPS("WakeNot"));
623	}
624	}
625
626	static void call_rcu_nocb(struct rcu_data rdp, struct* rcu_head *head,
627	rcu_callback_t func, unsigned long flags, bool lazy)
628	{
629	bool was_alldone;
630
631	if (!rcu_nocb_try_bypass(rdp, rhp: head, was_alldone: &was_alldone, flags, lazy)) {
632	/ Not enqueued on bypass but locked, do regular enqueue /
633	rcutree_enqueue(rdp, head, func);
634	__call_rcu_nocb_wake(rdp, was_alldone, flags); / unlocks /
635	}
636	}
637
638	static int nocb_gp_toggle_rdp(struct rcu_data *rdp,
639	bool *wake_state)
640	{
641	struct rcu_segcblist *cblist = &rdp->cblist;
642	unsigned long flags;
643	int ret;
644
645	rcu_nocb_lock_irqsave(rdp, flags);
646	if (rcu_segcblist_test_flags(rsclp: cblist, SEGCBLIST_OFFLOADED) &&
647	!rcu_segcblist_test_flags(rsclp: cblist, SEGCBLIST_KTHREAD_GP)) {
648	/*
649	* Offloading. Set our flag and notify the offload worker.
650	* We will handle this rdp until it ever gets de-offloaded.
651	*/
652	rcu_segcblist_set_flags(rsclp: cblist, SEGCBLIST_KTHREAD_GP);
653	if (rcu_segcblist_test_flags(rsclp: cblist, SEGCBLIST_KTHREAD_CB))
654	*wake_state = true;
655	ret = `1`;
656	} else if (!rcu_segcblist_test_flags(rsclp: cblist, SEGCBLIST_OFFLOADED) &&
657	rcu_segcblist_test_flags(rsclp: cblist, SEGCBLIST_KTHREAD_GP)) {
658	/*
659	* De-offloading. Clear our flag and notify the de-offload worker.
660	* We will ignore this rdp until it ever gets re-offloaded.
661	*/
662	rcu_segcblist_clear_flags(rsclp: cblist, SEGCBLIST_KTHREAD_GP);
663	if (!rcu_segcblist_test_flags(rsclp: cblist, SEGCBLIST_KTHREAD_CB))
664	*wake_state = true;
665	ret = `0`;
666	} else {
667	WARN_ON_ONCE(`1`);
668	ret = -`1`;
669	}
670
671	rcu_nocb_unlock_irqrestore(rdp, flags);
672
673	return ret;
674	}
675
676	static void nocb_gp_sleep(struct rcu_data my_rdp, int* cpu)
677	{
678	trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu, TPS("Sleep"));
679	swait_event_interruptible_exclusive(my_rdp->nocb_gp_wq,
680	!READ_ONCE(my_rdp->nocb_gp_sleep));
681	trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu, TPS("EndSleep"));
682	}
683
684	/*
685	* No-CBs GP kthreads come here to wait for additional callbacks to show up
686	* or for grace periods to end.
687	*/
688	static void nocb_gp_wait(struct rcu_data *my_rdp)
689	{
690	bool bypass = false;
691	int __maybe_unused cpu = my_rdp->cpu;
692	unsigned long cur_gp_seq;
693	unsigned long flags;
694	bool gotcbs = false;
695	unsigned long j = jiffies;
696	bool lazy = false;
697	bool needwait_gp = false; // This prevents actual uninitialized use.
698	bool needwake;
699	bool needwake_gp;
700	struct rcu_data rdp, rdp_toggling = NULL;
701	struct rcu_node *rnp;
702	unsigned long wait_gp_seq = `0`; // Suppress "use uninitialized" warning.
703	bool wasempty = false;
704
705	/*
706	* Each pass through the following loop checks for CBs and for the
707	* nearest grace period (if any) to wait for next. The CB kthreads
708	* and the global grace-period kthread are awakened if needed.
709	*/
710	WARN_ON_ONCE(my_rdp->nocb_gp_rdp != my_rdp);
711	/*
712	* An rcu_data structure is removed from the list after its
713	* CPU is de-offloaded and added to the list before that CPU is
714	* (re-)offloaded. If the following loop happens to be referencing
715	* that rcu_data structure during the time that the corresponding
716	* CPU is de-offloaded and then immediately re-offloaded, this
717	* loop's rdp pointer will be carried to the end of the list by
718	* the resulting pair of list operations. This can cause the loop
719	* to skip over some of the rcu_data structures that were supposed
720	* to have been scanned. Fortunately a new iteration through the
721	* entire loop is forced after a given CPU's rcu_data structure
722	* is added to the list, so the skipped-over rcu_data structures
723	* won't be ignored for long.
724	*/
725	list_for_each_entry(rdp, &my_rdp->nocb_head_rdp, nocb_entry_rdp) {
726	long bypass_ncbs;
727	bool flush_bypass = false;
728	long lazy_ncbs;
729
730	trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, TPS("Check"));
731	rcu_nocb_lock_irqsave(rdp, flags);
732	lockdep_assert_held(&rdp->nocb_lock);
733	bypass_ncbs = rcu_cblist_n_cbs(rclp: &rdp->nocb_bypass);
734	lazy_ncbs = READ_ONCE(rdp->lazy_len);
735
736	if (bypass_ncbs && (lazy_ncbs == bypass_ncbs) &&
737	(time_after(j, READ_ONCE(rdp->nocb_bypass_first) + rcu_get_jiffies_lazy_flush()) \|\|
738	bypass_ncbs > `2` * qhimark)) {
739	flush_bypass = true;
740	} else if (bypass_ncbs && (lazy_ncbs != bypass_ncbs) &&
741	(time_after(j, READ_ONCE(rdp->nocb_bypass_first) + `1`) \|\|
742	bypass_ncbs > `2` * qhimark)) {
743	flush_bypass = true;
744	} else if (!bypass_ncbs && rcu_segcblist_empty(rsclp: &rdp->cblist)) {
745	rcu_nocb_unlock_irqrestore(rdp, flags);
746	continue; / No callbacks here, try next. /
747	}
748
749	if (flush_bypass) {
750	// Bypass full or old, so flush it.
751	(void)rcu_nocb_try_flush_bypass(rdp, j);
752	bypass_ncbs = rcu_cblist_n_cbs(rclp: &rdp->nocb_bypass);
753	lazy_ncbs = READ_ONCE(rdp->lazy_len);
754	}
755
756	if (bypass_ncbs) {
757	trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu,
758	reason: bypass_ncbs == lazy_ncbs ? TPS("Lazy") : TPS("Bypass"));
759	if (bypass_ncbs == lazy_ncbs)
760	lazy = true;
761	else
762	bypass = true;
763	}
764	rnp = rdp->mynode;
765
766	// Advance callbacks if helpful and low contention.
767	needwake_gp = false;
768	if (!rcu_segcblist_restempty(rsclp: &rdp->cblist,
769	RCU_NEXT_READY_TAIL) \|\|
770	(rcu_segcblist_nextgp(rsclp: &rdp->cblist, lp: &cur_gp_seq) &&
771	rcu_seq_done(sp: &rnp->gp_seq, s: cur_gp_seq))) {
772	raw_spin_lock_rcu_node(rnp); / irqs disabled. /
773	needwake_gp = rcu_advance_cbs(rnp, rdp);
774	wasempty = rcu_segcblist_restempty(rsclp: &rdp->cblist,
775	RCU_NEXT_READY_TAIL);
776	raw_spin_unlock_rcu_node(rnp); / irqs disabled. /
777	}
778	// Need to wait on some grace period?
779	WARN_ON_ONCE(wasempty &&
780	!rcu_segcblist_restempty(&rdp->cblist,
781	RCU_NEXT_READY_TAIL));
782	if (rcu_segcblist_nextgp(rsclp: &rdp->cblist, lp: &cur_gp_seq)) {
783	if (!needwait_gp \|\|
784	ULONG_CMP_LT(cur_gp_seq, wait_gp_seq))
785	wait_gp_seq = cur_gp_seq;
786	needwait_gp = true;
787	trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu,
788	TPS("NeedWaitGP"));
789	}
790	if (rcu_segcblist_ready_cbs(rsclp: &rdp->cblist)) {
791	needwake = rdp->nocb_cb_sleep;
792	WRITE_ONCE(rdp->nocb_cb_sleep, false);
793	} else {
794	needwake = false;
795	}
796	rcu_nocb_unlock_irqrestore(rdp, flags);
797	if (needwake) {
798	swake_up_one(q: &rdp->nocb_cb_wq);
799	gotcbs = true;
800	}
801	if (needwake_gp)
802	rcu_gp_kthread_wake();
803	}
804
805	my_rdp->nocb_gp_bypass = bypass;
806	my_rdp->nocb_gp_gp = needwait_gp;
807	my_rdp->nocb_gp_seq = needwait_gp ? wait_gp_seq : `0`;
808
809	// At least one child with non-empty ->nocb_bypass, so set
810	// timer in order to avoid stranding its callbacks.
811	if (!rcu_nocb_poll) {
812	// If bypass list only has lazy CBs. Add a deferred lazy wake up.
813	if (lazy && !bypass) {
814	wake_nocb_gp_defer(rdp: my_rdp, RCU_NOCB_WAKE_LAZY,
815	TPS("WakeLazyIsDeferred"));
816	// Otherwise add a deferred bypass wake up.
817	} else if (bypass) {
818	wake_nocb_gp_defer(rdp: my_rdp, RCU_NOCB_WAKE_BYPASS,
819	TPS("WakeBypassIsDeferred"));
820	}
821	}
822
823	if (rcu_nocb_poll) {
824	/ Polling, so trace if first poll in the series. /
825	if (gotcbs)
826	trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu, TPS("Poll"));
827	if (list_empty(head: &my_rdp->nocb_head_rdp)) {
828	raw_spin_lock_irqsave(&my_rdp->nocb_gp_lock, flags);
829	if (!my_rdp->nocb_toggling_rdp)
830	WRITE_ONCE(my_rdp->nocb_gp_sleep, true);
831	raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags);
832	/ Wait for any offloading rdp /
833	nocb_gp_sleep(my_rdp, cpu);
834	} else {
835	schedule_timeout_idle(timeout: `1`);
836	}
837	} else if (!needwait_gp) {
838	/ Wait for callbacks to appear. /
839	nocb_gp_sleep(my_rdp, cpu);
840	} else {
841	rnp = my_rdp->mynode;
842	trace_rcu_this_gp(rnp, rdp: my_rdp, gp_seq_req: wait_gp_seq, TPS("StartWait"));
843	swait_event_interruptible_exclusive(
844	rnp->nocb_gp_wq[rcu_seq_ctr(wait_gp_seq) & `0x1`],
845	rcu_seq_done(&rnp->gp_seq, wait_gp_seq) \|\|
846	!READ_ONCE(my_rdp->nocb_gp_sleep));
847	trace_rcu_this_gp(rnp, rdp: my_rdp, gp_seq_req: wait_gp_seq, TPS("EndWait"));
848	}
849
850	if (!rcu_nocb_poll) {
851	raw_spin_lock_irqsave(&my_rdp->nocb_gp_lock, flags);
852	// (De-)queue an rdp to/from the group if its nocb state is changing
853	rdp_toggling = my_rdp->nocb_toggling_rdp;
854	if (rdp_toggling)
855	my_rdp->nocb_toggling_rdp = NULL;
856
857	if (my_rdp->nocb_defer_wakeup > RCU_NOCB_WAKE_NOT) {
858	WRITE_ONCE(my_rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
859	del_timer(timer: &my_rdp->nocb_timer);
860	}
861	WRITE_ONCE(my_rdp->nocb_gp_sleep, true);
862	raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags);
863	} else {
864	rdp_toggling = READ_ONCE(my_rdp->nocb_toggling_rdp);
865	if (rdp_toggling) {
866	/*
867	* Paranoid locking to make sure nocb_toggling_rdp is well
868	* reset before we (re)set SEGCBLIST_KTHREAD_GP or we could
869	* race with another round of nocb toggling for this rdp.
870	* Nocb locking should prevent from that already but we stick
871	* to paranoia, especially in rare path.
872	*/
873	raw_spin_lock_irqsave(&my_rdp->nocb_gp_lock, flags);
874	my_rdp->nocb_toggling_rdp = NULL;
875	raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags);
876	}
877	}
878
879	if (rdp_toggling) {
880	bool wake_state = false;
881	int ret;
882
883	ret = nocb_gp_toggle_rdp(rdp: rdp_toggling, wake_state: &wake_state);
884	if (ret == `1`)
885	list_add_tail(new: &rdp_toggling->nocb_entry_rdp, head: &my_rdp->nocb_head_rdp);
886	else if (ret == `0`)
887	list_del(entry: &rdp_toggling->nocb_entry_rdp);
888	if (wake_state)
889	swake_up_one(q: &rdp_toggling->nocb_state_wq);
890	}
891
892	my_rdp->nocb_gp_seq = -`1`;
893	WARN_ON(signal_pending(current));
894	}
895
896	/*
897	* No-CBs grace-period-wait kthread. There is one of these per group
898	* of CPUs, but only once at least one CPU in that group has come online
899	* at least once since boot. This kthread checks for newly posted
900	* callbacks from any of the CPUs it is responsible for, waits for a
901	* grace period, then awakens all of the rcu_nocb_cb_kthread() instances
902	* that then have callback-invocation work to do.
903	*/
904	static int rcu_nocb_gp_kthread(void *arg)
905	{
906	struct rcu_data *rdp = arg;
907
908	for (;;) {
909	WRITE_ONCE(rdp->nocb_gp_loops, rdp->nocb_gp_loops + `1`);
910	nocb_gp_wait(my_rdp: rdp);
911	cond_resched_tasks_rcu_qs();
912	}
913	return `0`;
914	}
915
916	static inline bool nocb_cb_can_run(struct rcu_data *rdp)
917	{
918	u8 flags = SEGCBLIST_OFFLOADED \| SEGCBLIST_KTHREAD_CB;
919
920	return rcu_segcblist_test_flags(rsclp: &rdp->cblist, flags);
921	}
922
923	static inline bool nocb_cb_wait_cond(struct rcu_data *rdp)
924	{
925	return nocb_cb_can_run(rdp) && !READ_ONCE(rdp->nocb_cb_sleep);
926	}
927
928	/*
929	* Invoke any ready callbacks from the corresponding no-CBs CPU,
930	* then, if there are no more, wait for more to appear.
931	*/
932	static void nocb_cb_wait(struct rcu_data *rdp)
933	{
934	struct rcu_segcblist *cblist = &rdp->cblist;
935	unsigned long cur_gp_seq;
936	unsigned long flags;
937	bool needwake_state = false;
938	bool needwake_gp = false;
939	bool can_sleep = true;
940	struct rcu_node *rnp = rdp->mynode;
941
942	do {
943	swait_event_interruptible_exclusive(rdp->nocb_cb_wq,
944	nocb_cb_wait_cond(rdp));
945
946	if (READ_ONCE(rdp->nocb_cb_sleep)) {
947	WARN_ON(signal_pending(current));
948	trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, TPS("WokeEmpty"));
949	}
950	} while (!nocb_cb_can_run(rdp));
951
952
953	local_irq_save(flags);
954	rcu_momentary_dyntick_idle();
955	local_irq_restore(flags);
956	/*
957	* Disable BH to provide the expected environment. Also, when
958	* transitioning to/from NOCB mode, a self-requeuing callback might
959	* be invoked from softirq. A short grace period could cause both
960	* instances of this callback would execute concurrently.
961	*/
962	local_bh_disable();
963	rcu_do_batch(rdp);
964	local_bh_enable();
965	lockdep_assert_irqs_enabled();
966	rcu_nocb_lock_irqsave(rdp, flags);
967	if (rcu_segcblist_nextgp(rsclp: cblist, lp: &cur_gp_seq) &&
968	rcu_seq_done(sp: &rnp->gp_seq, s: cur_gp_seq) &&
969	raw_spin_trylock_rcu_node(rnp)) { / irqs already disabled. /
970	needwake_gp = rcu_advance_cbs(rnp: rdp->mynode, rdp);
971	raw_spin_unlock_rcu_node(rnp); / irqs remain disabled. /
972	}
973
974	if (rcu_segcblist_test_flags(rsclp: cblist, SEGCBLIST_OFFLOADED)) {
975	if (!rcu_segcblist_test_flags(rsclp: cblist, SEGCBLIST_KTHREAD_CB)) {
976	rcu_segcblist_set_flags(rsclp: cblist, SEGCBLIST_KTHREAD_CB);
977	if (rcu_segcblist_test_flags(rsclp: cblist, SEGCBLIST_KTHREAD_GP))
978	needwake_state = true;
979	}
980	if (rcu_segcblist_ready_cbs(rsclp: cblist))
981	can_sleep = false;
982	} else {
983	/*
984	* De-offloading. Clear our flag and notify the de-offload worker.
985	* We won't touch the callbacks and keep sleeping until we ever
986	* get re-offloaded.
987	*/
988	WARN_ON_ONCE(!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB));
989	rcu_segcblist_clear_flags(rsclp: cblist, SEGCBLIST_KTHREAD_CB);
990	if (!rcu_segcblist_test_flags(rsclp: cblist, SEGCBLIST_KTHREAD_GP))
991	needwake_state = true;
992	}
993
994	WRITE_ONCE(rdp->nocb_cb_sleep, can_sleep);
995
996	if (rdp->nocb_cb_sleep)
997	trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, TPS("CBSleep"));
998
999	rcu_nocb_unlock_irqrestore(rdp, flags);
1000	if (needwake_gp)
1001	rcu_gp_kthread_wake();
1002
1003	if (needwake_state)
1004	swake_up_one(q: &rdp->nocb_state_wq);
1005	}
1006
1007	/*
1008	* Per-rcu_data kthread, but only for no-CBs CPUs. Repeatedly invoke
1009	* nocb_cb_wait() to do the dirty work.
1010	*/
1011	static int rcu_nocb_cb_kthread(void *arg)
1012	{
1013	struct rcu_data *rdp = arg;
1014
1015	// Each pass through this loop does one callback batch, and,
1016	// if there are no more ready callbacks, waits for them.
1017	for (;;) {
1018	nocb_cb_wait(rdp);
1019	cond_resched_tasks_rcu_qs();
1020	}
1021	return `0`;
1022	}
1023
1024	/ Is a deferred wakeup of rcu_nocb_kthread() required? /
1025	static int rcu_nocb_need_deferred_wakeup(struct rcu_data rdp, int* level)
1026	{
1027	return READ_ONCE(rdp->nocb_defer_wakeup) >= level;
1028	}
1029
1030	/ Do a deferred wakeup of rcu_nocb_kthread(). /
1031	static bool do_nocb_deferred_wakeup_common(struct rcu_data *rdp_gp,
1032	struct rcu_data rdp, int* level,
1033	unsigned long flags)
1034	__releases(rdp_gp->nocb_gp_lock)
1035	{
1036	int ndw;
1037	int ret;
1038
1039	if (!rcu_nocb_need_deferred_wakeup(rdp: rdp_gp, level)) {
1040	raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
1041	return false;
1042	}
1043
1044	ndw = rdp_gp->nocb_defer_wakeup;
1045	ret = __wake_nocb_gp(rdp_gp, rdp, force: ndw == RCU_NOCB_WAKE_FORCE, flags);
1046	trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, TPS("DeferredWake"));
1047
1048	return ret;
1049	}
1050
1051	/ Do a deferred wakeup of rcu_nocb_kthread() from a timer handler. /
1052	static void do_nocb_deferred_wakeup_timer(struct timer_list *t)
1053	{
1054	unsigned long flags;
1055	struct rcu_data *rdp = from_timer(rdp, t, nocb_timer);
1056
1057	WARN_ON_ONCE(rdp->nocb_gp_rdp != rdp);
1058	trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, TPS("Timer"));
1059
1060	raw_spin_lock_irqsave(&rdp->nocb_gp_lock, flags);
1061	smp_mb__after_spinlock(); / Timer expire before wakeup. /
1062	do_nocb_deferred_wakeup_common(rdp_gp: rdp, rdp, RCU_NOCB_WAKE_BYPASS, flags);
1063	}
1064
1065	/*
1066	* Do a deferred wakeup of rcu_nocb_kthread() from fastpath.
1067	* This means we do an inexact common-case check. Note that if
1068	* we miss, ->nocb_timer will eventually clean things up.
1069	*/
1070	static bool do_nocb_deferred_wakeup(struct rcu_data *rdp)
1071	{
1072	unsigned long flags;
1073	struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
1074
1075	if (!rdp_gp \|\| !rcu_nocb_need_deferred_wakeup(rdp: rdp_gp, RCU_NOCB_WAKE))
1076	return false;
1077
1078	raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
1079	return do_nocb_deferred_wakeup_common(rdp_gp, rdp, RCU_NOCB_WAKE, flags);
1080	}
1081
1082	void rcu_nocb_flush_deferred_wakeup(void)
1083	{
1084	do_nocb_deferred_wakeup(this_cpu_ptr(&rcu_data));
1085	}
1086	EXPORT_SYMBOL_GPL(rcu_nocb_flush_deferred_wakeup);
1087
1088	static int rdp_offload_toggle(struct rcu_data *rdp,
1089	bool offload, unsigned long flags)
1090	__releases(rdp->nocb_lock)
1091	{
1092	struct rcu_segcblist *cblist = &rdp->cblist;
1093	struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
1094	bool wake_gp = false;
1095
1096	rcu_segcblist_offload(rsclp: cblist, offload);
1097
1098	if (rdp->nocb_cb_sleep)
1099	rdp->nocb_cb_sleep = false;
1100	rcu_nocb_unlock_irqrestore(rdp, flags);
1101
1102	/*
1103	* Ignore former value of nocb_cb_sleep and force wake up as it could
1104	* have been spuriously set to false already.
1105	*/
1106	swake_up_one(q: &rdp->nocb_cb_wq);
1107
1108	raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
1109	// Queue this rdp for add/del to/from the list to iterate on rcuog
1110	WRITE_ONCE(rdp_gp->nocb_toggling_rdp, rdp);
1111	if (rdp_gp->nocb_gp_sleep) {
1112	rdp_gp->nocb_gp_sleep = false;
1113	wake_gp = true;
1114	}
1115	raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
1116
1117	return wake_gp;
1118	}
1119
1120	static long rcu_nocb_rdp_deoffload(void *arg)
1121	{
1122	struct rcu_data *rdp = arg;
1123	struct rcu_segcblist *cblist = &rdp->cblist;
1124	unsigned long flags;
1125	int wake_gp;
1126	struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
1127
1128	/*
1129	* rcu_nocb_rdp_deoffload() may be called directly if
1130	* rcuog/o[p] spawn failed, because at this time the rdp->cpu
1131	* is not online yet.
1132	*/
1133	WARN_ON_ONCE((rdp->cpu != raw_smp_processor_id()) && cpu_online(rdp->cpu));
1134
1135	pr_info("De-offloading %d\n", rdp->cpu);
1136
1137	rcu_nocb_lock_irqsave(rdp, flags);
1138	/*
1139	* Flush once and for all now. This suffices because we are
1140	* running on the target CPU holding ->nocb_lock (thus having
1141	* interrupts disabled), and because rdp_offload_toggle()
1142	* invokes rcu_segcblist_offload(), which clears SEGCBLIST_OFFLOADED.
1143	* Thus future calls to rcu_segcblist_completely_offloaded() will
1144	* return false, which means that future calls to rcu_nocb_try_bypass()
1145	* will refuse to put anything into the bypass.
1146	*/
1147	WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies, false));
1148	/*
1149	* Start with invoking rcu_core() early. This way if the current thread
1150	* happens to preempt an ongoing call to rcu_core() in the middle,
1151	* leaving some work dismissed because rcu_core() still thinks the rdp is
1152	* completely offloaded, we are guaranteed a nearby future instance of
1153	* rcu_core() to catch up.
1154	*/
1155	rcu_segcblist_set_flags(rsclp: cblist, SEGCBLIST_RCU_CORE);
1156	invoke_rcu_core();
1157	wake_gp = rdp_offload_toggle(rdp, offload: false, flags);
1158
1159	mutex_lock(&rdp_gp->nocb_gp_kthread_mutex);
1160	if (rdp_gp->nocb_gp_kthread) {
1161	if (wake_gp)
1162	wake_up_process(tsk: rdp_gp->nocb_gp_kthread);
1163
1164	/*
1165	* If rcuo[p] kthread spawn failed, directly remove SEGCBLIST_KTHREAD_CB.
1166	* Just wait SEGCBLIST_KTHREAD_GP to be cleared by rcuog.
1167	*/
1168	if (!rdp->nocb_cb_kthread) {
1169	rcu_nocb_lock_irqsave(rdp, flags);
1170	rcu_segcblist_clear_flags(rsclp: &rdp->cblist, SEGCBLIST_KTHREAD_CB);
1171	rcu_nocb_unlock_irqrestore(rdp, flags);
1172	}
1173
1174	swait_event_exclusive(rdp->nocb_state_wq,
1175	!rcu_segcblist_test_flags(cblist,
1176	SEGCBLIST_KTHREAD_CB \| SEGCBLIST_KTHREAD_GP));
1177	} else {
1178	/*
1179	* No kthread to clear the flags for us or remove the rdp from the nocb list
1180	* to iterate. Do it here instead. Locking doesn't look stricly necessary
1181	* but we stick to paranoia in this rare path.
1182	*/
1183	rcu_nocb_lock_irqsave(rdp, flags);
1184	rcu_segcblist_clear_flags(rsclp: &rdp->cblist,
1185	SEGCBLIST_KTHREAD_CB \| SEGCBLIST_KTHREAD_GP);
1186	rcu_nocb_unlock_irqrestore(rdp, flags);
1187
1188	list_del(entry: &rdp->nocb_entry_rdp);
1189	}
1190	mutex_unlock(lock: &rdp_gp->nocb_gp_kthread_mutex);
1191
1192	/*
1193	* Lock one last time to acquire latest callback updates from kthreads
1194	* so we can later handle callbacks locally without locking.
1195	*/
1196	rcu_nocb_lock_irqsave(rdp, flags);
1197	/*
1198	* Theoretically we could clear SEGCBLIST_LOCKING after the nocb
1199	* lock is released but how about being paranoid for once?
1200	*/
1201	rcu_segcblist_clear_flags(rsclp: cblist, SEGCBLIST_LOCKING);
1202	/*
1203	* Without SEGCBLIST_LOCKING, we can't use
1204	* rcu_nocb_unlock_irqrestore() anymore.
1205	*/
1206	raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
1207
1208	/ Sanity check /
1209	WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
1210
1211
1212	return `0`;
1213	}
1214
1215	int rcu_nocb_cpu_deoffload(int cpu)
1216	{
1217	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
1218	int ret = `0`;
1219
1220	cpus_read_lock();
1221	mutex_lock(&rcu_state.barrier_mutex);
1222	if (rcu_rdp_is_offloaded(rdp)) {
1223	if (cpu_online(cpu)) {
1224	ret = work_on_cpu(cpu, rcu_nocb_rdp_deoffload, rdp);
1225	if (!ret)
1226	cpumask_clear_cpu(cpu, dstp: rcu_nocb_mask);
1227	} else {
1228	pr_info("NOCB: Cannot CB-deoffload offline CPU %d\n", rdp->cpu);
1229	ret = -EINVAL;
1230	}
1231	}
1232	mutex_unlock(lock: &rcu_state.barrier_mutex);
1233	cpus_read_unlock();
1234
1235	return ret;
1236	}
1237	EXPORT_SYMBOL_GPL(rcu_nocb_cpu_deoffload);
1238
1239	static long rcu_nocb_rdp_offload(void *arg)
1240	{
1241	struct rcu_data *rdp = arg;
1242	struct rcu_segcblist *cblist = &rdp->cblist;
1243	unsigned long flags;
1244	int wake_gp;
1245	struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
1246
1247	WARN_ON_ONCE(rdp->cpu != raw_smp_processor_id());
1248	/*
1249	* For now we only support re-offload, ie: the rdp must have been
1250	* offloaded on boot first.
1251	*/
1252	if (!rdp->nocb_gp_rdp)
1253	return -EINVAL;
1254
1255	if (WARN_ON_ONCE(!rdp_gp->nocb_gp_kthread))
1256	return -EINVAL;
1257
1258	pr_info("Offloading %d\n", rdp->cpu);
1259
1260	/*
1261	* Can't use rcu_nocb_lock_irqsave() before SEGCBLIST_LOCKING
1262	* is set.
1263	*/
1264	raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
1265
1266	/*
1267	* We didn't take the nocb lock while working on the
1268	* rdp->cblist with SEGCBLIST_LOCKING cleared (pure softirq/rcuc mode).
1269	* Every modifications that have been done previously on
1270	* rdp->cblist must be visible remotely by the nocb kthreads
1271	* upon wake up after reading the cblist flags.
1272	*
1273	* The layout against nocb_lock enforces that ordering:
1274	*
1275	* __rcu_nocb_rdp_offload() nocb_cb_wait()/nocb_gp_wait()
1276	* ------------------------- ----------------------------
1277	* WRITE callbacks rcu_nocb_lock()
1278	* rcu_nocb_lock() READ flags
1279	* WRITE flags READ callbacks
1280	* rcu_nocb_unlock() rcu_nocb_unlock()
1281	*/
1282	wake_gp = rdp_offload_toggle(rdp, offload: true, flags);
1283	if (wake_gp)
1284	wake_up_process(tsk: rdp_gp->nocb_gp_kthread);
1285	swait_event_exclusive(rdp->nocb_state_wq,
1286	rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB) &&
1287	rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP));
1288
1289	/*
1290	* All kthreads are ready to work, we can finally relieve rcu_core() and
1291	* enable nocb bypass.
1292	*/
1293	rcu_nocb_lock_irqsave(rdp, flags);
1294	rcu_segcblist_clear_flags(rsclp: cblist, SEGCBLIST_RCU_CORE);
1295	rcu_nocb_unlock_irqrestore(rdp, flags);
1296
1297	return `0`;
1298	}
1299
1300	int rcu_nocb_cpu_offload(int cpu)
1301	{
1302	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
1303	int ret = `0`;
1304
1305	cpus_read_lock();
1306	mutex_lock(&rcu_state.barrier_mutex);
1307	if (!rcu_rdp_is_offloaded(rdp)) {
1308	if (cpu_online(cpu)) {
1309	ret = work_on_cpu(cpu, rcu_nocb_rdp_offload, rdp);
1310	if (!ret)
1311	cpumask_set_cpu(cpu, dstp: rcu_nocb_mask);
1312	} else {
1313	pr_info("NOCB: Cannot CB-offload offline CPU %d\n", rdp->cpu);
1314	ret = -EINVAL;
1315	}
1316	}
1317	mutex_unlock(lock: &rcu_state.barrier_mutex);
1318	cpus_read_unlock();
1319
1320	return ret;
1321	}
1322	EXPORT_SYMBOL_GPL(rcu_nocb_cpu_offload);
1323
1324	#ifdef CONFIG_RCU_LAZY
1325	static unsigned long
1326	lazy_rcu_shrink_count(struct shrinker shrink, struct* shrink_control *sc)
1327	{
1328	int cpu;
1329	unsigned long count = `0`;
1330
1331	if (WARN_ON_ONCE(!cpumask_available(rcu_nocb_mask)))
1332	return `0`;
1333
1334	/ Protect rcu_nocb_mask against concurrent (de-)offloading. /
1335	if (!mutex_trylock(lock: &rcu_state.barrier_mutex))
1336	return `0`;
1337
1338	/ Snapshot count of all CPUs /
1339	for_each_cpu(cpu, rcu_nocb_mask) {
1340	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
1341
1342	count += READ_ONCE(rdp->lazy_len);
1343	}
1344
1345	mutex_unlock(lock: &rcu_state.barrier_mutex);
1346
1347	return count ? count : SHRINK_EMPTY;
1348	}
1349
1350	static unsigned long
1351	lazy_rcu_shrink_scan(struct shrinker shrink, struct* shrink_control *sc)
1352	{
1353	int cpu;
1354	unsigned long flags;
1355	unsigned long count = `0`;
1356
1357	if (WARN_ON_ONCE(!cpumask_available(rcu_nocb_mask)))
1358	return `0`;
1359	/*
1360	* Protect against concurrent (de-)offloading. Otherwise nocb locking
1361	* may be ignored or imbalanced.
1362	*/
1363	if (!mutex_trylock(lock: &rcu_state.barrier_mutex)) {
1364	/*
1365	* But really don't insist if barrier_mutex is contended since we
1366	* can't guarantee that it will never engage in a dependency
1367	* chain involving memory allocation. The lock is seldom contended
1368	* anyway.
1369	*/
1370	return `0`;
1371	}
1372
1373	/ Snapshot count of all CPUs /
1374	for_each_cpu(cpu, rcu_nocb_mask) {
1375	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
1376	int _count;
1377
1378	if (WARN_ON_ONCE(!rcu_rdp_is_offloaded(rdp)))
1379	continue;
1380
1381	if (!READ_ONCE(rdp->lazy_len))
1382	continue;
1383
1384	rcu_nocb_lock_irqsave(rdp, flags);
1385	/*
1386	* Recheck under the nocb lock. Since we are not holding the bypass
1387	* lock we may still race with increments from the enqueuer but still
1388	* we know for sure if there is at least one lazy callback.
1389	*/
1390	_count = READ_ONCE(rdp->lazy_len);
1391	if (!_count) {
1392	rcu_nocb_unlock_irqrestore(rdp, flags);
1393	continue;
1394	}
1395	rcu_nocb_try_flush_bypass(rdp, j: jiffies);
1396	rcu_nocb_unlock_irqrestore(rdp, flags);
1397	wake_nocb_gp(rdp, force: false);
1398	sc->nr_to_scan -= _count;
1399	count += _count;
1400	if (sc->nr_to_scan <= `0`)
1401	break;
1402	}
1403
1404	mutex_unlock(lock: &rcu_state.barrier_mutex);
1405
1406	return count ? count : SHRINK_STOP;
1407	}
1408	#endif // #ifdef CONFIG_RCU_LAZY
1409
1410	void __init rcu_init_nohz(void)
1411	{
1412	int cpu;
1413	struct rcu_data *rdp;
1414	const struct cpumask *cpumask = NULL;
1415	struct shrinker * __maybe_unused lazy_rcu_shrinker;
1416
1417	#if defined(CONFIG_NO_HZ_FULL)
1418	if (tick_nohz_full_running && !cpumask_empty(tick_nohz_full_mask))
1419	cpumask = tick_nohz_full_mask;
1420	#endif
1421
1422	if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_DEFAULT_ALL) &&
1423	!rcu_state.nocb_is_setup && !cpumask)
1424	cpumask = cpu_possible_mask;
1425
1426	if (cpumask) {
1427	if (!cpumask_available(mask: rcu_nocb_mask)) {
1428	if (!zalloc_cpumask_var(mask: &rcu_nocb_mask, GFP_KERNEL)) {
1429	pr_info("rcu_nocb_mask allocation failed, callback offloading disabled.\n");
1430	return;
1431	}
1432	}
1433
1434	cpumask_or(dstp: rcu_nocb_mask, src1p: rcu_nocb_mask, src2p: cpumask);
1435	rcu_state.nocb_is_setup = true;
1436	}
1437
1438	if (!rcu_state.nocb_is_setup)
1439	return;
1440
1441	#ifdef CONFIG_RCU_LAZY
1442	lazy_rcu_shrinker = shrinker_alloc(flags: `0`, fmt: "rcu-lazy");
1443	if (!lazy_rcu_shrinker) {
1444	pr_err("Failed to allocate lazy_rcu shrinker!\n");
1445	} else {
1446	lazy_rcu_shrinker->count_objects = lazy_rcu_shrink_count;
1447	lazy_rcu_shrinker->scan_objects = lazy_rcu_shrink_scan;
1448
1449	shrinker_register(shrinker: lazy_rcu_shrinker);
1450	}
1451	#endif // #ifdef CONFIG_RCU_LAZY
1452
1453	if (!cpumask_subset(src1p: rcu_nocb_mask, cpu_possible_mask)) {
1454	pr_info("\tNote: kernel parameter 'rcu_nocbs=', 'nohz_full', or 'isolcpus=' contains nonexistent CPUs.\n");
1455	cpumask_and(dstp: rcu_nocb_mask, cpu_possible_mask,
1456	src2p: rcu_nocb_mask);
1457	}
1458	if (cpumask_empty(srcp: rcu_nocb_mask))
1459	pr_info("\tOffload RCU callbacks from CPUs: (none).\n");
1460	else
1461	pr_info("\tOffload RCU callbacks from CPUs: %*pbl.\n",
1462	cpumask_pr_args(rcu_nocb_mask));
1463	if (rcu_nocb_poll)
1464	pr_info("\tPoll for callbacks from no-CBs CPUs.\n");
1465
1466	for_each_cpu(cpu, rcu_nocb_mask) {
1467	rdp = per_cpu_ptr(&rcu_data, cpu);
1468	if (rcu_segcblist_empty(rsclp: &rdp->cblist))
1469	rcu_segcblist_init(rsclp: &rdp->cblist);
1470	rcu_segcblist_offload(rsclp: &rdp->cblist, offload: true);
1471	rcu_segcblist_set_flags(rsclp: &rdp->cblist, SEGCBLIST_KTHREAD_CB \| SEGCBLIST_KTHREAD_GP);
1472	rcu_segcblist_clear_flags(rsclp: &rdp->cblist, SEGCBLIST_RCU_CORE);
1473	}
1474	rcu_organize_nocb_kthreads();
1475	}
1476
1477	/ Initialize per-rcu_data variables for no-CBs CPUs. /
1478	static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
1479	{
1480	init_swait_queue_head(&rdp->nocb_cb_wq);
1481	init_swait_queue_head(&rdp->nocb_gp_wq);
1482	init_swait_queue_head(&rdp->nocb_state_wq);
1483	raw_spin_lock_init(&rdp->nocb_lock);
1484	raw_spin_lock_init(&rdp->nocb_bypass_lock);
1485	raw_spin_lock_init(&rdp->nocb_gp_lock);
1486	timer_setup(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer, `0`);
1487	rcu_cblist_init(rclp: &rdp->nocb_bypass);
1488	WRITE_ONCE(rdp->lazy_len, `0`);
1489	mutex_init(&rdp->nocb_gp_kthread_mutex);
1490	}
1491
1492	/*
1493	* If the specified CPU is a no-CBs CPU that does not already have its
1494	* rcuo CB kthread, spawn it. Additionally, if the rcuo GP kthread
1495	* for this CPU's group has not yet been created, spawn it as well.
1496	*/
1497	static void rcu_spawn_cpu_nocb_kthread(int cpu)
1498	{
1499	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
1500	struct rcu_data *rdp_gp;
1501	struct task_struct *t;
1502	struct sched_param sp;
1503
1504	if (!rcu_scheduler_fully_active \|\| !rcu_state.nocb_is_setup)
1505	return;
1506
1507	/ If there already is an rcuo kthread, then nothing to do. /
1508	if (rdp->nocb_cb_kthread)
1509	return;
1510
1511	/ If we didn't spawn the GP kthread first, reorganize! /
1512	sp.sched_priority = kthread_prio;
1513	rdp_gp = rdp->nocb_gp_rdp;
1514	mutex_lock(&rdp_gp->nocb_gp_kthread_mutex);
1515	if (!rdp_gp->nocb_gp_kthread) {
1516	t = kthread_run(rcu_nocb_gp_kthread, rdp_gp,
1517	"rcuog/%d", rdp_gp->cpu);
1518	if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo GP kthread, OOM is now expected behavior\n", __func__)) {
1519	mutex_unlock(lock: &rdp_gp->nocb_gp_kthread_mutex);
1520	goto end;
1521	}
1522	WRITE_ONCE(rdp_gp->nocb_gp_kthread, t);
1523	if (kthread_prio)
1524	sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1525	}
1526	mutex_unlock(lock: &rdp_gp->nocb_gp_kthread_mutex);
1527
1528	/ Spawn the kthread for this CPU. /
1529	t = kthread_run(rcu_nocb_cb_kthread, rdp,
1530	"rcuo%c/%d", rcu_state.abbr, cpu);
1531	if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo CB kthread, OOM is now expected behavior\n", __func__))
1532	goto end;
1533
1534	if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_CB_BOOST) && kthread_prio)
1535	sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1536
1537	WRITE_ONCE(rdp->nocb_cb_kthread, t);
1538	WRITE_ONCE(rdp->nocb_gp_kthread, rdp_gp->nocb_gp_kthread);
1539	return;
1540	end:
1541	mutex_lock(&rcu_state.barrier_mutex);
1542	if (rcu_rdp_is_offloaded(rdp)) {
1543	rcu_nocb_rdp_deoffload(arg: rdp);
1544	cpumask_clear_cpu(cpu, dstp: rcu_nocb_mask);
1545	}
1546	mutex_unlock(lock: &rcu_state.barrier_mutex);
1547	}
1548
1549	/ How many CB CPU IDs per GP kthread? Default of -1 for sqrt(nr_cpu_ids). /
1550	static int rcu_nocb_gp_stride = -`1`;
1551	module_param(rcu_nocb_gp_stride, int, `0444`);
1552
1553	/*
1554	* Initialize GP-CB relationships for all no-CBs CPU.
1555	*/
1556	static void __init rcu_organize_nocb_kthreads(void)
1557	{
1558	int cpu;
1559	bool firsttime = true;
1560	bool gotnocbs = false;
1561	bool gotnocbscbs = true;
1562	int ls = rcu_nocb_gp_stride;
1563	int nl = `0`; / Next GP kthread. /
1564	struct rcu_data *rdp;
1565	struct rcu_data rdp_gp = NULL; /* Suppress misguided gcc warn. /
1566
1567	if (!cpumask_available(mask: rcu_nocb_mask))
1568	return;
1569	if (ls == -`1`) {
1570	ls = nr_cpu_ids / int_sqrt(nr_cpu_ids);
1571	rcu_nocb_gp_stride = ls;
1572	}
1573
1574	/*
1575	* Each pass through this loop sets up one rcu_data structure.
1576	* Should the corresponding CPU come online in the future, then
1577	* we will spawn the needed set of rcu_nocb_kthread() kthreads.
1578	*/
1579	for_each_possible_cpu(cpu) {
1580	rdp = per_cpu_ptr(&rcu_data, cpu);
1581	if (rdp->cpu >= nl) {
1582	/ New GP kthread, set up for CBs & next GP. /
1583	gotnocbs = true;
1584	nl = DIV_ROUND_UP(rdp->cpu + `1`, ls) * ls;
1585	rdp_gp = rdp;
1586	INIT_LIST_HEAD(list: &rdp->nocb_head_rdp);
1587	if (dump_tree) {
1588	if (!firsttime)
1589	pr_cont("%s\n", gotnocbscbs
1590	? "" : " (self only)");
1591	gotnocbscbs = false;
1592	firsttime = false;
1593	pr_alert("%s: No-CB GP kthread CPU %d:",
1594	__func__, cpu);
1595	}
1596	} else {
1597	/ Another CB kthread, link to previous GP kthread. /
1598	gotnocbscbs = true;
1599	if (dump_tree)
1600	pr_cont(" %d", cpu);
1601	}
1602	rdp->nocb_gp_rdp = rdp_gp;
1603	if (cpumask_test_cpu(cpu, cpumask: rcu_nocb_mask))
1604	list_add_tail(new: &rdp->nocb_entry_rdp, head: &rdp_gp->nocb_head_rdp);
1605	}
1606	if (gotnocbs && dump_tree)
1607	pr_cont("%s\n", gotnocbscbs ? "" : " (self only)");
1608	}
1609
1610	/*
1611	* Bind the current task to the offloaded CPUs. If there are no offloaded
1612	* CPUs, leave the task unbound. Splat if the bind attempt fails.
1613	*/
1614	void rcu_bind_current_to_nocb(void)
1615	{
1616	if (cpumask_available(mask: rcu_nocb_mask) && !cpumask_empty(srcp: rcu_nocb_mask))
1617	WARN_ON(sched_setaffinity(current->pid, rcu_nocb_mask));
1618	}
1619	EXPORT_SYMBOL_GPL(rcu_bind_current_to_nocb);
1620
1621	// The ->on_cpu field is available only in CONFIG_SMP=y, so...
1622	#ifdef CONFIG_SMP
1623	static char show_rcu_should_be_on_cpu(struct* task_struct *tsp)
1624	{
1625	return tsp && task_is_running(tsp) && !tsp->on_cpu ? "!" : "";
1626	}
1627	#else // #ifdef CONFIG_SMP
1628	static char show_rcu_should_be_on_cpu(struct* task_struct *tsp)
1629	{
1630	return "";
1631	}
1632	#endif // #else #ifdef CONFIG_SMP
1633
1634	/*
1635	* Dump out nocb grace-period kthread state for the specified rcu_data
1636	* structure.
1637	*/
1638	static void show_rcu_nocb_gp_state(struct rcu_data *rdp)
1639	{
1640	struct rcu_node *rnp = rdp->mynode;
1641
1642	pr_info("nocb GP %d %c%c%c%c%c %c[%c%c] %c%c:%ld rnp %d:%d %lu %c CPU %d%s\n",
1643	rdp->cpu,
1644	"kK"[!!rdp->nocb_gp_kthread],
1645	"lL"[raw_spin_is_locked(&rdp->nocb_gp_lock)],
1646	"dD"[!!rdp->nocb_defer_wakeup],
1647	"tT"[timer_pending(&rdp->nocb_timer)],
1648	"sS"[!!rdp->nocb_gp_sleep],
1649	".W"[swait_active(&rdp->nocb_gp_wq)],
1650	".W"[swait_active(&rnp->nocb_gp_wq[`0`])],
1651	".W"[swait_active(&rnp->nocb_gp_wq[`1`])],
1652	".B"[!!rdp->nocb_gp_bypass],
1653	".G"[!!rdp->nocb_gp_gp],
1654	(long)rdp->nocb_gp_seq,
1655	rnp->grplo, rnp->grphi, READ_ONCE(rdp->nocb_gp_loops),
1656	rdp->nocb_gp_kthread ? task_state_to_char(rdp->nocb_gp_kthread) : `'.'`,
1657	rdp->nocb_gp_kthread ? (int)task_cpu(rdp->nocb_gp_kthread) : -`1`,
1658	show_rcu_should_be_on_cpu(rdp->nocb_gp_kthread));
1659	}
1660
1661	/ Dump out nocb kthread state for the specified rcu_data structure. /
1662	static void show_rcu_nocb_state(struct rcu_data *rdp)
1663	{
1664	char bufw[`20`];
1665	char bufr[`20`];
1666	struct rcu_data *nocb_next_rdp;
1667	struct rcu_segcblist *rsclp = &rdp->cblist;
1668	bool waslocked;
1669	bool wassleep;
1670
1671	if (rdp->nocb_gp_rdp == rdp)
1672	show_rcu_nocb_gp_state(rdp);
1673
1674	nocb_next_rdp = list_next_or_null_rcu(&rdp->nocb_gp_rdp->nocb_head_rdp,
1675	&rdp->nocb_entry_rdp,
1676	typeof(*rdp),
1677	nocb_entry_rdp);
1678
1679	sprintf(buf: bufw, fmt: "%ld", rsclp->gp_seq[RCU_WAIT_TAIL]);
1680	sprintf(buf: bufr, fmt: "%ld", rsclp->gp_seq[RCU_NEXT_READY_TAIL]);
1681	pr_info(" CB %d^%d->%d %c%c%c%c%c%c F%ld L%ld C%d %c%c%s%c%s%c%c q%ld %c CPU %d%s\n",
1682	rdp->cpu, rdp->nocb_gp_rdp->cpu,
1683	nocb_next_rdp ? nocb_next_rdp->cpu : -`1`,
1684	"kK"[!!rdp->nocb_cb_kthread],
1685	"bB"[raw_spin_is_locked(&rdp->nocb_bypass_lock)],
1686	"cC"[!!atomic_read(&rdp->nocb_lock_contended)],
1687	"lL"[raw_spin_is_locked(&rdp->nocb_lock)],
1688	"sS"[!!rdp->nocb_cb_sleep],
1689	".W"[swait_active(&rdp->nocb_cb_wq)],
1690	jiffies - rdp->nocb_bypass_first,
1691	jiffies - rdp->nocb_nobypass_last,
1692	rdp->nocb_nobypass_count,
1693	".D"[rcu_segcblist_ready_cbs(rsclp)],
1694	".W"[!rcu_segcblist_segempty(rsclp, RCU_WAIT_TAIL)],
1695	rcu_segcblist_segempty(rsclp, RCU_WAIT_TAIL) ? "" : bufw,
1696	".R"[!rcu_segcblist_segempty(rsclp, RCU_NEXT_READY_TAIL)],
1697	rcu_segcblist_segempty(rsclp, RCU_NEXT_READY_TAIL) ? "" : bufr,
1698	".N"[!rcu_segcblist_segempty(rsclp, RCU_NEXT_TAIL)],
1699	".B"[!!rcu_cblist_n_cbs(&rdp->nocb_bypass)],
1700	rcu_segcblist_n_cbs(&rdp->cblist),
1701	rdp->nocb_cb_kthread ? task_state_to_char(rdp->nocb_cb_kthread) : `'.'`,
1702	rdp->nocb_cb_kthread ? (int)task_cpu(rdp->nocb_cb_kthread) : -`1`,
1703	show_rcu_should_be_on_cpu(rdp->nocb_cb_kthread));
1704
1705	/ It is OK for GP kthreads to have GP state. /
1706	if (rdp->nocb_gp_rdp == rdp)
1707	return;
1708
1709	waslocked = raw_spin_is_locked(&rdp->nocb_gp_lock);
1710	wassleep = swait_active(wq: &rdp->nocb_gp_wq);
1711	if (!rdp->nocb_gp_sleep && !waslocked && !wassleep)
1712	return; / Nothing untoward. /
1713
1714	pr_info(" nocb GP activity on CB-only CPU!!! %c%c%c %c\n",
1715	"lL"[waslocked],
1716	"dD"[!!rdp->nocb_defer_wakeup],
1717	"sS"[!!rdp->nocb_gp_sleep],
1718	".W"[wassleep]);
1719	}
1720
1721	#else /* #ifdef CONFIG_RCU_NOCB_CPU */
1722
1723	static inline int rcu_lockdep_is_held_nocb(struct rcu_data *rdp)
1724	{
1725	return `0`;
1726	}
1727
1728	static inline bool rcu_current_is_nocb_kthread(struct rcu_data *rdp)
1729	{
1730	return false;
1731	}
1732
1733	/ No ->nocb_lock to acquire. /
1734	static void rcu_nocb_lock(struct rcu_data *rdp)
1735	{
1736	}
1737
1738	/ No ->nocb_lock to release. /
1739	static void rcu_nocb_unlock(struct rcu_data *rdp)
1740	{
1741	}
1742
1743	/ No ->nocb_lock to release. /
1744	static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp,
1745	unsigned long flags)
1746	{
1747	local_irq_restore(flags);
1748	}
1749
1750	/ Lockdep check that ->cblist may be safely accessed. /
1751	static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp)
1752	{
1753	lockdep_assert_irqs_disabled();
1754	}
1755
1756	static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
1757	{
1758	}
1759
1760	static struct swait_queue_head rcu_nocb_gp_get(struct* rcu_node *rnp)
1761	{
1762	return NULL;
1763	}
1764
1765	static void rcu_init_one_nocb(struct rcu_node *rnp)
1766	{
1767	}
1768
1769	static bool wake_nocb_gp(struct rcu_data *rdp, bool force)
1770	{
1771	return false;
1772	}
1773
1774	static bool rcu_nocb_flush_bypass(struct rcu_data rdp, struct* rcu_head *rhp,
1775	unsigned long j, bool lazy)
1776	{
1777	return true;
1778	}
1779
1780	static void call_rcu_nocb(struct rcu_data rdp, struct* rcu_head *head,
1781	rcu_callback_t func, unsigned long flags, bool lazy)
1782	{
1783	WARN_ON_ONCE(`1`); / Should be dead code! /
1784	}
1785
1786	static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty,
1787	unsigned long flags)
1788	{
1789	WARN_ON_ONCE(`1`); / Should be dead code! /
1790	}
1791
1792	static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
1793	{
1794	}
1795
1796	static int rcu_nocb_need_deferred_wakeup(struct rcu_data rdp, int* level)
1797	{
1798	return false;
1799	}
1800
1801	static bool do_nocb_deferred_wakeup(struct rcu_data *rdp)
1802	{
1803	return false;
1804	}
1805
1806	static void rcu_spawn_cpu_nocb_kthread(int cpu)
1807	{
1808	}
1809
1810	static void show_rcu_nocb_state(struct rcu_data *rdp)
1811	{
1812	}
1813
1814	#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
1815

source code of linux/kernel/rcu/tree_nocb.h