1 | // SPDX-License-Identifier: GPL-2.0+ |
2 | /* |
3 | * Sleepable Read-Copy Update mechanism for mutual exclusion. |
4 | * |
5 | * Copyright (C) IBM Corporation, 2006 |
6 | * Copyright (C) Fujitsu, 2012 |
7 | * |
8 | * Authors: Paul McKenney <paulmck@linux.ibm.com> |
9 | * Lai Jiangshan <laijs@cn.fujitsu.com> |
10 | * |
11 | * For detailed explanation of Read-Copy Update mechanism see - |
12 | * Documentation/RCU/ *.txt |
13 | * |
14 | */ |
15 | |
16 | #define pr_fmt(fmt) "rcu: " fmt |
17 | |
18 | #include <linux/export.h> |
19 | #include <linux/mutex.h> |
20 | #include <linux/percpu.h> |
21 | #include <linux/preempt.h> |
22 | #include <linux/rcupdate_wait.h> |
23 | #include <linux/sched.h> |
24 | #include <linux/smp.h> |
25 | #include <linux/delay.h> |
26 | #include <linux/module.h> |
27 | #include <linux/slab.h> |
28 | #include <linux/srcu.h> |
29 | |
30 | #include "rcu.h" |
31 | #include "rcu_segcblist.h" |
32 | |
33 | /* Holdoff in nanoseconds for auto-expediting. */ |
34 | #define DEFAULT_SRCU_EXP_HOLDOFF (25 * 1000) |
35 | static ulong exp_holdoff = DEFAULT_SRCU_EXP_HOLDOFF; |
36 | module_param(exp_holdoff, ulong, 0444); |
37 | |
38 | /* Overflow-check frequency. N bits roughly says every 2**N grace periods. */ |
39 | static ulong counter_wrap_check = (ULONG_MAX >> 2); |
40 | module_param(counter_wrap_check, ulong, 0444); |
41 | |
42 | /* |
43 | * Control conversion to SRCU_SIZE_BIG: |
44 | * 0: Don't convert at all. |
45 | * 1: Convert at init_srcu_struct() time. |
46 | * 2: Convert when rcutorture invokes srcu_torture_stats_print(). |
47 | * 3: Decide at boot time based on system shape (default). |
48 | * 0x1x: Convert when excessive contention encountered. |
49 | */ |
50 | #define SRCU_SIZING_NONE 0 |
51 | #define SRCU_SIZING_INIT 1 |
52 | #define SRCU_SIZING_TORTURE 2 |
53 | #define SRCU_SIZING_AUTO 3 |
54 | #define SRCU_SIZING_CONTEND 0x10 |
55 | #define SRCU_SIZING_IS(x) ((convert_to_big & ~SRCU_SIZING_CONTEND) == x) |
56 | #define SRCU_SIZING_IS_NONE() (SRCU_SIZING_IS(SRCU_SIZING_NONE)) |
57 | #define SRCU_SIZING_IS_INIT() (SRCU_SIZING_IS(SRCU_SIZING_INIT)) |
58 | #define SRCU_SIZING_IS_TORTURE() (SRCU_SIZING_IS(SRCU_SIZING_TORTURE)) |
59 | #define SRCU_SIZING_IS_CONTEND() (convert_to_big & SRCU_SIZING_CONTEND) |
60 | static int convert_to_big = SRCU_SIZING_AUTO; |
61 | module_param(convert_to_big, int, 0444); |
62 | |
63 | /* Number of CPUs to trigger init_srcu_struct()-time transition to big. */ |
64 | static int big_cpu_lim __read_mostly = 128; |
65 | module_param(big_cpu_lim, int, 0444); |
66 | |
67 | /* Contention events per jiffy to initiate transition to big. */ |
68 | static int small_contention_lim __read_mostly = 100; |
69 | module_param(small_contention_lim, int, 0444); |
70 | |
71 | /* Early-boot callback-management, so early that no lock is required! */ |
72 | static LIST_HEAD(srcu_boot_list); |
73 | static bool __read_mostly srcu_init_done; |
74 | |
75 | static void srcu_invoke_callbacks(struct work_struct *work); |
76 | static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay); |
77 | static void process_srcu(struct work_struct *work); |
78 | static void srcu_delay_timer(struct timer_list *t); |
79 | |
80 | /* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */ |
81 | #define spin_lock_rcu_node(p) \ |
82 | do { \ |
83 | spin_lock(&ACCESS_PRIVATE(p, lock)); \ |
84 | smp_mb__after_unlock_lock(); \ |
85 | } while (0) |
86 | |
87 | #define spin_unlock_rcu_node(p) spin_unlock(&ACCESS_PRIVATE(p, lock)) |
88 | |
89 | #define spin_lock_irq_rcu_node(p) \ |
90 | do { \ |
91 | spin_lock_irq(&ACCESS_PRIVATE(p, lock)); \ |
92 | smp_mb__after_unlock_lock(); \ |
93 | } while (0) |
94 | |
95 | #define spin_unlock_irq_rcu_node(p) \ |
96 | spin_unlock_irq(&ACCESS_PRIVATE(p, lock)) |
97 | |
98 | #define spin_lock_irqsave_rcu_node(p, flags) \ |
99 | do { \ |
100 | spin_lock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \ |
101 | smp_mb__after_unlock_lock(); \ |
102 | } while (0) |
103 | |
104 | #define spin_trylock_irqsave_rcu_node(p, flags) \ |
105 | ({ \ |
106 | bool ___locked = spin_trylock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \ |
107 | \ |
108 | if (___locked) \ |
109 | smp_mb__after_unlock_lock(); \ |
110 | ___locked; \ |
111 | }) |
112 | |
113 | #define spin_unlock_irqrestore_rcu_node(p, flags) \ |
114 | spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags) \ |
115 | |
116 | /* |
117 | * Initialize SRCU per-CPU data. Note that statically allocated |
118 | * srcu_struct structures might already have srcu_read_lock() and |
119 | * srcu_read_unlock() running against them. So if the is_static parameter |
120 | * is set, don't initialize ->srcu_lock_count[] and ->srcu_unlock_count[]. |
121 | */ |
122 | static void init_srcu_struct_data(struct srcu_struct *ssp) |
123 | { |
124 | int cpu; |
125 | struct srcu_data *sdp; |
126 | |
127 | /* |
128 | * Initialize the per-CPU srcu_data array, which feeds into the |
129 | * leaves of the srcu_node tree. |
130 | */ |
131 | WARN_ON_ONCE(ARRAY_SIZE(sdp->srcu_lock_count) != |
132 | ARRAY_SIZE(sdp->srcu_unlock_count)); |
133 | for_each_possible_cpu(cpu) { |
134 | sdp = per_cpu_ptr(ssp->sda, cpu); |
135 | spin_lock_init(&ACCESS_PRIVATE(sdp, lock)); |
136 | rcu_segcblist_init(rsclp: &sdp->srcu_cblist); |
137 | sdp->srcu_cblist_invoking = false; |
138 | sdp->srcu_gp_seq_needed = ssp->srcu_sup->srcu_gp_seq; |
139 | sdp->srcu_gp_seq_needed_exp = ssp->srcu_sup->srcu_gp_seq; |
140 | sdp->mynode = NULL; |
141 | sdp->cpu = cpu; |
142 | INIT_WORK(&sdp->work, srcu_invoke_callbacks); |
143 | timer_setup(&sdp->delay_work, srcu_delay_timer, 0); |
144 | sdp->ssp = ssp; |
145 | } |
146 | } |
147 | |
148 | /* Invalid seq state, used during snp node initialization */ |
149 | #define SRCU_SNP_INIT_SEQ 0x2 |
150 | |
151 | /* |
152 | * Check whether sequence number corresponding to snp node, |
153 | * is invalid. |
154 | */ |
155 | static inline bool srcu_invl_snp_seq(unsigned long s) |
156 | { |
157 | return s == SRCU_SNP_INIT_SEQ; |
158 | } |
159 | |
160 | /* |
161 | * Allocated and initialize SRCU combining tree. Returns @true if |
162 | * allocation succeeded and @false otherwise. |
163 | */ |
164 | static bool init_srcu_struct_nodes(struct srcu_struct *ssp, gfp_t gfp_flags) |
165 | { |
166 | int cpu; |
167 | int i; |
168 | int level = 0; |
169 | int levelspread[RCU_NUM_LVLS]; |
170 | struct srcu_data *sdp; |
171 | struct srcu_node *snp; |
172 | struct srcu_node *snp_first; |
173 | |
174 | /* Initialize geometry if it has not already been initialized. */ |
175 | rcu_init_geometry(); |
176 | ssp->srcu_sup->node = kcalloc(n: rcu_num_nodes, size: sizeof(*ssp->srcu_sup->node), flags: gfp_flags); |
177 | if (!ssp->srcu_sup->node) |
178 | return false; |
179 | |
180 | /* Work out the overall tree geometry. */ |
181 | ssp->srcu_sup->level[0] = &ssp->srcu_sup->node[0]; |
182 | for (i = 1; i < rcu_num_lvls; i++) |
183 | ssp->srcu_sup->level[i] = ssp->srcu_sup->level[i - 1] + num_rcu_lvl[i - 1]; |
184 | rcu_init_levelspread(levelspread, levelcnt: num_rcu_lvl); |
185 | |
186 | /* Each pass through this loop initializes one srcu_node structure. */ |
187 | srcu_for_each_node_breadth_first(ssp, snp) { |
188 | spin_lock_init(&ACCESS_PRIVATE(snp, lock)); |
189 | WARN_ON_ONCE(ARRAY_SIZE(snp->srcu_have_cbs) != |
190 | ARRAY_SIZE(snp->srcu_data_have_cbs)); |
191 | for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) { |
192 | snp->srcu_have_cbs[i] = SRCU_SNP_INIT_SEQ; |
193 | snp->srcu_data_have_cbs[i] = 0; |
194 | } |
195 | snp->srcu_gp_seq_needed_exp = SRCU_SNP_INIT_SEQ; |
196 | snp->grplo = -1; |
197 | snp->grphi = -1; |
198 | if (snp == &ssp->srcu_sup->node[0]) { |
199 | /* Root node, special case. */ |
200 | snp->srcu_parent = NULL; |
201 | continue; |
202 | } |
203 | |
204 | /* Non-root node. */ |
205 | if (snp == ssp->srcu_sup->level[level + 1]) |
206 | level++; |
207 | snp->srcu_parent = ssp->srcu_sup->level[level - 1] + |
208 | (snp - ssp->srcu_sup->level[level]) / |
209 | levelspread[level - 1]; |
210 | } |
211 | |
212 | /* |
213 | * Initialize the per-CPU srcu_data array, which feeds into the |
214 | * leaves of the srcu_node tree. |
215 | */ |
216 | level = rcu_num_lvls - 1; |
217 | snp_first = ssp->srcu_sup->level[level]; |
218 | for_each_possible_cpu(cpu) { |
219 | sdp = per_cpu_ptr(ssp->sda, cpu); |
220 | sdp->mynode = &snp_first[cpu / levelspread[level]]; |
221 | for (snp = sdp->mynode; snp != NULL; snp = snp->srcu_parent) { |
222 | if (snp->grplo < 0) |
223 | snp->grplo = cpu; |
224 | snp->grphi = cpu; |
225 | } |
226 | sdp->grpmask = 1UL << (cpu - sdp->mynode->grplo); |
227 | } |
228 | smp_store_release(&ssp->srcu_sup->srcu_size_state, SRCU_SIZE_WAIT_BARRIER); |
229 | return true; |
230 | } |
231 | |
232 | /* |
233 | * Initialize non-compile-time initialized fields, including the |
234 | * associated srcu_node and srcu_data structures. The is_static parameter |
235 | * tells us that ->sda has already been wired up to srcu_data. |
236 | */ |
237 | static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) |
238 | { |
239 | if (!is_static) |
240 | ssp->srcu_sup = kzalloc(size: sizeof(*ssp->srcu_sup), GFP_KERNEL); |
241 | if (!ssp->srcu_sup) |
242 | return -ENOMEM; |
243 | if (!is_static) |
244 | spin_lock_init(&ACCESS_PRIVATE(ssp->srcu_sup, lock)); |
245 | ssp->srcu_sup->srcu_size_state = SRCU_SIZE_SMALL; |
246 | ssp->srcu_sup->node = NULL; |
247 | mutex_init(&ssp->srcu_sup->srcu_cb_mutex); |
248 | mutex_init(&ssp->srcu_sup->srcu_gp_mutex); |
249 | ssp->srcu_idx = 0; |
250 | ssp->srcu_sup->srcu_gp_seq = 0; |
251 | ssp->srcu_sup->srcu_barrier_seq = 0; |
252 | mutex_init(&ssp->srcu_sup->srcu_barrier_mutex); |
253 | atomic_set(v: &ssp->srcu_sup->srcu_barrier_cpu_cnt, i: 0); |
254 | INIT_DELAYED_WORK(&ssp->srcu_sup->work, process_srcu); |
255 | ssp->srcu_sup->sda_is_static = is_static; |
256 | if (!is_static) |
257 | ssp->sda = alloc_percpu(struct srcu_data); |
258 | if (!ssp->sda) |
259 | goto err_free_sup; |
260 | init_srcu_struct_data(ssp); |
261 | ssp->srcu_sup->srcu_gp_seq_needed_exp = 0; |
262 | ssp->srcu_sup->srcu_last_gp_end = ktime_get_mono_fast_ns(); |
263 | if (READ_ONCE(ssp->srcu_sup->srcu_size_state) == SRCU_SIZE_SMALL && SRCU_SIZING_IS_INIT()) { |
264 | if (!init_srcu_struct_nodes(ssp, GFP_ATOMIC)) |
265 | goto err_free_sda; |
266 | WRITE_ONCE(ssp->srcu_sup->srcu_size_state, SRCU_SIZE_BIG); |
267 | } |
268 | ssp->srcu_sup->srcu_ssp = ssp; |
269 | smp_store_release(&ssp->srcu_sup->srcu_gp_seq_needed, 0); /* Init done. */ |
270 | return 0; |
271 | |
272 | err_free_sda: |
273 | if (!is_static) { |
274 | free_percpu(pdata: ssp->sda); |
275 | ssp->sda = NULL; |
276 | } |
277 | err_free_sup: |
278 | if (!is_static) { |
279 | kfree(objp: ssp->srcu_sup); |
280 | ssp->srcu_sup = NULL; |
281 | } |
282 | return -ENOMEM; |
283 | } |
284 | |
285 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
286 | |
287 | int __init_srcu_struct(struct srcu_struct *ssp, const char *name, |
288 | struct lock_class_key *key) |
289 | { |
290 | /* Don't re-initialize a lock while it is held. */ |
291 | debug_check_no_locks_freed(from: (void *)ssp, len: sizeof(*ssp)); |
292 | lockdep_init_map(lock: &ssp->dep_map, name, key, subclass: 0); |
293 | return init_srcu_struct_fields(ssp, is_static: false); |
294 | } |
295 | EXPORT_SYMBOL_GPL(__init_srcu_struct); |
296 | |
297 | #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |
298 | |
299 | /** |
300 | * init_srcu_struct - initialize a sleep-RCU structure |
301 | * @ssp: structure to initialize. |
302 | * |
303 | * Must invoke this on a given srcu_struct before passing that srcu_struct |
304 | * to any other function. Each srcu_struct represents a separate domain |
305 | * of SRCU protection. |
306 | */ |
307 | int init_srcu_struct(struct srcu_struct *ssp) |
308 | { |
309 | return init_srcu_struct_fields(ssp, false); |
310 | } |
311 | EXPORT_SYMBOL_GPL(init_srcu_struct); |
312 | |
313 | #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |
314 | |
315 | /* |
316 | * Initiate a transition to SRCU_SIZE_BIG with lock held. |
317 | */ |
318 | static void __srcu_transition_to_big(struct srcu_struct *ssp) |
319 | { |
320 | lockdep_assert_held(&ACCESS_PRIVATE(ssp->srcu_sup, lock)); |
321 | smp_store_release(&ssp->srcu_sup->srcu_size_state, SRCU_SIZE_ALLOC); |
322 | } |
323 | |
324 | /* |
325 | * Initiate an idempotent transition to SRCU_SIZE_BIG. |
326 | */ |
327 | static void srcu_transition_to_big(struct srcu_struct *ssp) |
328 | { |
329 | unsigned long flags; |
330 | |
331 | /* Double-checked locking on ->srcu_size-state. */ |
332 | if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) != SRCU_SIZE_SMALL) |
333 | return; |
334 | spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags); |
335 | if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) != SRCU_SIZE_SMALL) { |
336 | spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); |
337 | return; |
338 | } |
339 | __srcu_transition_to_big(ssp); |
340 | spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); |
341 | } |
342 | |
343 | /* |
344 | * Check to see if the just-encountered contention event justifies |
345 | * a transition to SRCU_SIZE_BIG. |
346 | */ |
347 | static void spin_lock_irqsave_check_contention(struct srcu_struct *ssp) |
348 | { |
349 | unsigned long j; |
350 | |
351 | if (!SRCU_SIZING_IS_CONTEND() || ssp->srcu_sup->srcu_size_state) |
352 | return; |
353 | j = jiffies; |
354 | if (ssp->srcu_sup->srcu_size_jiffies != j) { |
355 | ssp->srcu_sup->srcu_size_jiffies = j; |
356 | ssp->srcu_sup->srcu_n_lock_retries = 0; |
357 | } |
358 | if (++ssp->srcu_sup->srcu_n_lock_retries <= small_contention_lim) |
359 | return; |
360 | __srcu_transition_to_big(ssp); |
361 | } |
362 | |
363 | /* |
364 | * Acquire the specified srcu_data structure's ->lock, but check for |
365 | * excessive contention, which results in initiation of a transition |
366 | * to SRCU_SIZE_BIG. But only if the srcutree.convert_to_big module |
367 | * parameter permits this. |
368 | */ |
369 | static void spin_lock_irqsave_sdp_contention(struct srcu_data *sdp, unsigned long *flags) |
370 | { |
371 | struct srcu_struct *ssp = sdp->ssp; |
372 | |
373 | if (spin_trylock_irqsave_rcu_node(sdp, *flags)) |
374 | return; |
375 | spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags); |
376 | spin_lock_irqsave_check_contention(ssp); |
377 | spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, *flags); |
378 | spin_lock_irqsave_rcu_node(sdp, *flags); |
379 | } |
380 | |
381 | /* |
382 | * Acquire the specified srcu_struct structure's ->lock, but check for |
383 | * excessive contention, which results in initiation of a transition |
384 | * to SRCU_SIZE_BIG. But only if the srcutree.convert_to_big module |
385 | * parameter permits this. |
386 | */ |
387 | static void spin_lock_irqsave_ssp_contention(struct srcu_struct *ssp, unsigned long *flags) |
388 | { |
389 | if (spin_trylock_irqsave_rcu_node(ssp->srcu_sup, *flags)) |
390 | return; |
391 | spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags); |
392 | spin_lock_irqsave_check_contention(ssp); |
393 | } |
394 | |
395 | /* |
396 | * First-use initialization of statically allocated srcu_struct |
397 | * structure. Wiring up the combining tree is more than can be |
398 | * done with compile-time initialization, so this check is added |
399 | * to each update-side SRCU primitive. Use ssp->lock, which -is- |
400 | * compile-time initialized, to resolve races involving multiple |
401 | * CPUs trying to garner first-use privileges. |
402 | */ |
403 | static void check_init_srcu_struct(struct srcu_struct *ssp) |
404 | { |
405 | unsigned long flags; |
406 | |
407 | /* The smp_load_acquire() pairs with the smp_store_release(). */ |
408 | if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq_needed))) /*^^^*/ |
409 | return; /* Already initialized. */ |
410 | spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags); |
411 | if (!rcu_seq_state(s: ssp->srcu_sup->srcu_gp_seq_needed)) { |
412 | spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); |
413 | return; |
414 | } |
415 | init_srcu_struct_fields(ssp, is_static: true); |
416 | spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); |
417 | } |
418 | |
419 | /* |
420 | * Returns approximate total of the readers' ->srcu_lock_count[] values |
421 | * for the rank of per-CPU counters specified by idx. |
422 | */ |
423 | static unsigned long srcu_readers_lock_idx(struct srcu_struct *ssp, int idx) |
424 | { |
425 | int cpu; |
426 | unsigned long sum = 0; |
427 | |
428 | for_each_possible_cpu(cpu) { |
429 | struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu); |
430 | |
431 | sum += atomic_long_read(v: &cpuc->srcu_lock_count[idx]); |
432 | } |
433 | return sum; |
434 | } |
435 | |
436 | /* |
437 | * Returns approximate total of the readers' ->srcu_unlock_count[] values |
438 | * for the rank of per-CPU counters specified by idx. |
439 | */ |
440 | static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx) |
441 | { |
442 | int cpu; |
443 | unsigned long mask = 0; |
444 | unsigned long sum = 0; |
445 | |
446 | for_each_possible_cpu(cpu) { |
447 | struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu); |
448 | |
449 | sum += atomic_long_read(v: &cpuc->srcu_unlock_count[idx]); |
450 | if (IS_ENABLED(CONFIG_PROVE_RCU)) |
451 | mask = mask | READ_ONCE(cpuc->srcu_nmi_safety); |
452 | } |
453 | WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask >> 1)), |
454 | "Mixed NMI-safe readers for srcu_struct at %ps.\n" , ssp); |
455 | return sum; |
456 | } |
457 | |
458 | /* |
459 | * Return true if the number of pre-existing readers is determined to |
460 | * be zero. |
461 | */ |
462 | static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) |
463 | { |
464 | unsigned long unlocks; |
465 | |
466 | unlocks = srcu_readers_unlock_idx(ssp, idx); |
467 | |
468 | /* |
469 | * Make sure that a lock is always counted if the corresponding |
470 | * unlock is counted. Needs to be a smp_mb() as the read side may |
471 | * contain a read from a variable that is written to before the |
472 | * synchronize_srcu() in the write side. In this case smp_mb()s |
473 | * A and B act like the store buffering pattern. |
474 | * |
475 | * This smp_mb() also pairs with smp_mb() C to prevent accesses |
476 | * after the synchronize_srcu() from being executed before the |
477 | * grace period ends. |
478 | */ |
479 | smp_mb(); /* A */ |
480 | |
481 | /* |
482 | * If the locks are the same as the unlocks, then there must have |
483 | * been no readers on this index at some point in this function. |
484 | * But there might be more readers, as a task might have read |
485 | * the current ->srcu_idx but not yet have incremented its CPU's |
486 | * ->srcu_lock_count[idx] counter. In fact, it is possible |
487 | * that most of the tasks have been preempted between fetching |
488 | * ->srcu_idx and incrementing ->srcu_lock_count[idx]. And there |
489 | * could be almost (ULONG_MAX / sizeof(struct task_struct)) tasks |
490 | * in a system whose address space was fully populated with memory. |
491 | * Call this quantity Nt. |
492 | * |
493 | * So suppose that the updater is preempted at this point in the |
494 | * code for a long time. That now-preempted updater has already |
495 | * flipped ->srcu_idx (possibly during the preceding grace period), |
496 | * done an smp_mb() (again, possibly during the preceding grace |
497 | * period), and summed up the ->srcu_unlock_count[idx] counters. |
498 | * How many times can a given one of the aforementioned Nt tasks |
499 | * increment the old ->srcu_idx value's ->srcu_lock_count[idx] |
500 | * counter, in the absence of nesting? |
501 | * |
502 | * It can clearly do so once, given that it has already fetched |
503 | * the old value of ->srcu_idx and is just about to use that value |
504 | * to index its increment of ->srcu_lock_count[idx]. But as soon as |
505 | * it leaves that SRCU read-side critical section, it will increment |
506 | * ->srcu_unlock_count[idx], which must follow the updater's above |
507 | * read from that same value. Thus, as soon the reading task does |
508 | * an smp_mb() and a later fetch from ->srcu_idx, that task will be |
509 | * guaranteed to get the new index. Except that the increment of |
510 | * ->srcu_unlock_count[idx] in __srcu_read_unlock() is after the |
511 | * smp_mb(), and the fetch from ->srcu_idx in __srcu_read_lock() |
512 | * is before the smp_mb(). Thus, that task might not see the new |
513 | * value of ->srcu_idx until the -second- __srcu_read_lock(), |
514 | * which in turn means that this task might well increment |
515 | * ->srcu_lock_count[idx] for the old value of ->srcu_idx twice, |
516 | * not just once. |
517 | * |
518 | * However, it is important to note that a given smp_mb() takes |
519 | * effect not just for the task executing it, but also for any |
520 | * later task running on that same CPU. |
521 | * |
522 | * That is, there can be almost Nt + Nc further increments of |
523 | * ->srcu_lock_count[idx] for the old index, where Nc is the number |
524 | * of CPUs. But this is OK because the size of the task_struct |
525 | * structure limits the value of Nt and current systems limit Nc |
526 | * to a few thousand. |
527 | * |
528 | * OK, but what about nesting? This does impose a limit on |
529 | * nesting of half of the size of the task_struct structure |
530 | * (measured in bytes), which should be sufficient. A late 2022 |
531 | * TREE01 rcutorture run reported this size to be no less than |
532 | * 9408 bytes, allowing up to 4704 levels of nesting, which is |
533 | * comfortably beyond excessive. Especially on 64-bit systems, |
534 | * which are unlikely to be configured with an address space fully |
535 | * populated with memory, at least not anytime soon. |
536 | */ |
537 | return srcu_readers_lock_idx(ssp, idx) == unlocks; |
538 | } |
539 | |
540 | /** |
541 | * srcu_readers_active - returns true if there are readers. and false |
542 | * otherwise |
543 | * @ssp: which srcu_struct to count active readers (holding srcu_read_lock). |
544 | * |
545 | * Note that this is not an atomic primitive, and can therefore suffer |
546 | * severe errors when invoked on an active srcu_struct. That said, it |
547 | * can be useful as an error check at cleanup time. |
548 | */ |
549 | static bool srcu_readers_active(struct srcu_struct *ssp) |
550 | { |
551 | int cpu; |
552 | unsigned long sum = 0; |
553 | |
554 | for_each_possible_cpu(cpu) { |
555 | struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu); |
556 | |
557 | sum += atomic_long_read(v: &cpuc->srcu_lock_count[0]); |
558 | sum += atomic_long_read(v: &cpuc->srcu_lock_count[1]); |
559 | sum -= atomic_long_read(v: &cpuc->srcu_unlock_count[0]); |
560 | sum -= atomic_long_read(v: &cpuc->srcu_unlock_count[1]); |
561 | } |
562 | return sum; |
563 | } |
564 | |
565 | /* |
566 | * We use an adaptive strategy for synchronize_srcu() and especially for |
567 | * synchronize_srcu_expedited(). We spin for a fixed time period |
568 | * (defined below, boot time configurable) to allow SRCU readers to exit |
569 | * their read-side critical sections. If there are still some readers |
570 | * after one jiffy, we repeatedly block for one jiffy time periods. |
571 | * The blocking time is increased as the grace-period age increases, |
572 | * with max blocking time capped at 10 jiffies. |
573 | */ |
574 | #define SRCU_DEFAULT_RETRY_CHECK_DELAY 5 |
575 | |
576 | static ulong srcu_retry_check_delay = SRCU_DEFAULT_RETRY_CHECK_DELAY; |
577 | module_param(srcu_retry_check_delay, ulong, 0444); |
578 | |
579 | #define SRCU_INTERVAL 1 // Base delay if no expedited GPs pending. |
580 | #define SRCU_MAX_INTERVAL 10 // Maximum incremental delay from slow readers. |
581 | |
582 | #define SRCU_DEFAULT_MAX_NODELAY_PHASE_LO 3UL // Lowmark on default per-GP-phase |
583 | // no-delay instances. |
584 | #define SRCU_DEFAULT_MAX_NODELAY_PHASE_HI 1000UL // Highmark on default per-GP-phase |
585 | // no-delay instances. |
586 | |
587 | #define SRCU_UL_CLAMP_LO(val, low) ((val) > (low) ? (val) : (low)) |
588 | #define SRCU_UL_CLAMP_HI(val, high) ((val) < (high) ? (val) : (high)) |
589 | #define SRCU_UL_CLAMP(val, low, high) SRCU_UL_CLAMP_HI(SRCU_UL_CLAMP_LO((val), (low)), (high)) |
590 | // per-GP-phase no-delay instances adjusted to allow non-sleeping poll upto |
591 | // one jiffies time duration. Mult by 2 is done to factor in the srcu_get_delay() |
592 | // called from process_srcu(). |
593 | #define SRCU_DEFAULT_MAX_NODELAY_PHASE_ADJUSTED \ |
594 | (2UL * USEC_PER_SEC / HZ / SRCU_DEFAULT_RETRY_CHECK_DELAY) |
595 | |
596 | // Maximum per-GP-phase consecutive no-delay instances. |
597 | #define SRCU_DEFAULT_MAX_NODELAY_PHASE \ |
598 | SRCU_UL_CLAMP(SRCU_DEFAULT_MAX_NODELAY_PHASE_ADJUSTED, \ |
599 | SRCU_DEFAULT_MAX_NODELAY_PHASE_LO, \ |
600 | SRCU_DEFAULT_MAX_NODELAY_PHASE_HI) |
601 | |
602 | static ulong srcu_max_nodelay_phase = SRCU_DEFAULT_MAX_NODELAY_PHASE; |
603 | module_param(srcu_max_nodelay_phase, ulong, 0444); |
604 | |
605 | // Maximum consecutive no-delay instances. |
606 | #define SRCU_DEFAULT_MAX_NODELAY (SRCU_DEFAULT_MAX_NODELAY_PHASE > 100 ? \ |
607 | SRCU_DEFAULT_MAX_NODELAY_PHASE : 100) |
608 | |
609 | static ulong srcu_max_nodelay = SRCU_DEFAULT_MAX_NODELAY; |
610 | module_param(srcu_max_nodelay, ulong, 0444); |
611 | |
612 | /* |
613 | * Return grace-period delay, zero if there are expedited grace |
614 | * periods pending, SRCU_INTERVAL otherwise. |
615 | */ |
616 | static unsigned long srcu_get_delay(struct srcu_struct *ssp) |
617 | { |
618 | unsigned long gpstart; |
619 | unsigned long j; |
620 | unsigned long jbase = SRCU_INTERVAL; |
621 | struct srcu_usage *sup = ssp->srcu_sup; |
622 | |
623 | if (ULONG_CMP_LT(READ_ONCE(sup->srcu_gp_seq), READ_ONCE(sup->srcu_gp_seq_needed_exp))) |
624 | jbase = 0; |
625 | if (rcu_seq_state(READ_ONCE(sup->srcu_gp_seq))) { |
626 | j = jiffies - 1; |
627 | gpstart = READ_ONCE(sup->srcu_gp_start); |
628 | if (time_after(j, gpstart)) |
629 | jbase += j - gpstart; |
630 | if (!jbase) { |
631 | WRITE_ONCE(sup->srcu_n_exp_nodelay, READ_ONCE(sup->srcu_n_exp_nodelay) + 1); |
632 | if (READ_ONCE(sup->srcu_n_exp_nodelay) > srcu_max_nodelay_phase) |
633 | jbase = 1; |
634 | } |
635 | } |
636 | return jbase > SRCU_MAX_INTERVAL ? SRCU_MAX_INTERVAL : jbase; |
637 | } |
638 | |
639 | /** |
640 | * cleanup_srcu_struct - deconstruct a sleep-RCU structure |
641 | * @ssp: structure to clean up. |
642 | * |
643 | * Must invoke this after you are finished using a given srcu_struct that |
644 | * was initialized via init_srcu_struct(), else you leak memory. |
645 | */ |
646 | void cleanup_srcu_struct(struct srcu_struct *ssp) |
647 | { |
648 | int cpu; |
649 | struct srcu_usage *sup = ssp->srcu_sup; |
650 | |
651 | if (WARN_ON(!srcu_get_delay(ssp))) |
652 | return; /* Just leak it! */ |
653 | if (WARN_ON(srcu_readers_active(ssp))) |
654 | return; /* Just leak it! */ |
655 | flush_delayed_work(dwork: &sup->work); |
656 | for_each_possible_cpu(cpu) { |
657 | struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu); |
658 | |
659 | del_timer_sync(timer: &sdp->delay_work); |
660 | flush_work(work: &sdp->work); |
661 | if (WARN_ON(rcu_segcblist_n_cbs(&sdp->srcu_cblist))) |
662 | return; /* Forgot srcu_barrier(), so just leak it! */ |
663 | } |
664 | if (WARN_ON(rcu_seq_state(READ_ONCE(sup->srcu_gp_seq)) != SRCU_STATE_IDLE) || |
665 | WARN_ON(rcu_seq_current(&sup->srcu_gp_seq) != sup->srcu_gp_seq_needed) || |
666 | WARN_ON(srcu_readers_active(ssp))) { |
667 | pr_info("%s: Active srcu_struct %p read state: %d gp state: %lu/%lu\n" , |
668 | __func__, ssp, rcu_seq_state(READ_ONCE(sup->srcu_gp_seq)), |
669 | rcu_seq_current(&sup->srcu_gp_seq), sup->srcu_gp_seq_needed); |
670 | return; /* Caller forgot to stop doing call_srcu()? */ |
671 | } |
672 | kfree(objp: sup->node); |
673 | sup->node = NULL; |
674 | sup->srcu_size_state = SRCU_SIZE_SMALL; |
675 | if (!sup->sda_is_static) { |
676 | free_percpu(pdata: ssp->sda); |
677 | ssp->sda = NULL; |
678 | kfree(objp: sup); |
679 | ssp->srcu_sup = NULL; |
680 | } |
681 | } |
682 | EXPORT_SYMBOL_GPL(cleanup_srcu_struct); |
683 | |
684 | #ifdef CONFIG_PROVE_RCU |
685 | /* |
686 | * Check for consistent NMI safety. |
687 | */ |
688 | void srcu_check_nmi_safety(struct srcu_struct *ssp, bool nmi_safe) |
689 | { |
690 | int nmi_safe_mask = 1 << nmi_safe; |
691 | int old_nmi_safe_mask; |
692 | struct srcu_data *sdp; |
693 | |
694 | /* NMI-unsafe use in NMI is a bad sign */ |
695 | WARN_ON_ONCE(!nmi_safe && in_nmi()); |
696 | sdp = raw_cpu_ptr(ssp->sda); |
697 | old_nmi_safe_mask = READ_ONCE(sdp->srcu_nmi_safety); |
698 | if (!old_nmi_safe_mask) { |
699 | WRITE_ONCE(sdp->srcu_nmi_safety, nmi_safe_mask); |
700 | return; |
701 | } |
702 | WARN_ONCE(old_nmi_safe_mask != nmi_safe_mask, "CPU %d old state %d new state %d\n" , sdp->cpu, old_nmi_safe_mask, nmi_safe_mask); |
703 | } |
704 | EXPORT_SYMBOL_GPL(srcu_check_nmi_safety); |
705 | #endif /* CONFIG_PROVE_RCU */ |
706 | |
707 | /* |
708 | * Counts the new reader in the appropriate per-CPU element of the |
709 | * srcu_struct. |
710 | * Returns an index that must be passed to the matching srcu_read_unlock(). |
711 | */ |
712 | int __srcu_read_lock(struct srcu_struct *ssp) |
713 | { |
714 | int idx; |
715 | |
716 | idx = READ_ONCE(ssp->srcu_idx) & 0x1; |
717 | this_cpu_inc(ssp->sda->srcu_lock_count[idx].counter); |
718 | smp_mb(); /* B */ /* Avoid leaking the critical section. */ |
719 | return idx; |
720 | } |
721 | EXPORT_SYMBOL_GPL(__srcu_read_lock); |
722 | |
723 | /* |
724 | * Removes the count for the old reader from the appropriate per-CPU |
725 | * element of the srcu_struct. Note that this may well be a different |
726 | * CPU than that which was incremented by the corresponding srcu_read_lock(). |
727 | */ |
728 | void __srcu_read_unlock(struct srcu_struct *ssp, int idx) |
729 | { |
730 | smp_mb(); /* C */ /* Avoid leaking the critical section. */ |
731 | this_cpu_inc(ssp->sda->srcu_unlock_count[idx].counter); |
732 | } |
733 | EXPORT_SYMBOL_GPL(__srcu_read_unlock); |
734 | |
735 | #ifdef CONFIG_NEED_SRCU_NMI_SAFE |
736 | |
737 | /* |
738 | * Counts the new reader in the appropriate per-CPU element of the |
739 | * srcu_struct, but in an NMI-safe manner using RMW atomics. |
740 | * Returns an index that must be passed to the matching srcu_read_unlock(). |
741 | */ |
742 | int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) |
743 | { |
744 | int idx; |
745 | struct srcu_data *sdp = raw_cpu_ptr(ssp->sda); |
746 | |
747 | idx = READ_ONCE(ssp->srcu_idx) & 0x1; |
748 | atomic_long_inc(&sdp->srcu_lock_count[idx]); |
749 | smp_mb__after_atomic(); /* B */ /* Avoid leaking the critical section. */ |
750 | return idx; |
751 | } |
752 | EXPORT_SYMBOL_GPL(__srcu_read_lock_nmisafe); |
753 | |
754 | /* |
755 | * Removes the count for the old reader from the appropriate per-CPU |
756 | * element of the srcu_struct. Note that this may well be a different |
757 | * CPU than that which was incremented by the corresponding srcu_read_lock(). |
758 | */ |
759 | void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) |
760 | { |
761 | struct srcu_data *sdp = raw_cpu_ptr(ssp->sda); |
762 | |
763 | smp_mb__before_atomic(); /* C */ /* Avoid leaking the critical section. */ |
764 | atomic_long_inc(&sdp->srcu_unlock_count[idx]); |
765 | } |
766 | EXPORT_SYMBOL_GPL(__srcu_read_unlock_nmisafe); |
767 | |
768 | #endif // CONFIG_NEED_SRCU_NMI_SAFE |
769 | |
770 | /* |
771 | * Start an SRCU grace period. |
772 | */ |
773 | static void srcu_gp_start(struct srcu_struct *ssp) |
774 | { |
775 | struct srcu_data *sdp; |
776 | int state; |
777 | |
778 | if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER) |
779 | sdp = per_cpu_ptr(ssp->sda, get_boot_cpu_id()); |
780 | else |
781 | sdp = this_cpu_ptr(ssp->sda); |
782 | lockdep_assert_held(&ACCESS_PRIVATE(ssp->srcu_sup, lock)); |
783 | WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed)); |
784 | spin_lock_rcu_node(sdp); /* Interrupts already disabled. */ |
785 | rcu_segcblist_advance(rsclp: &sdp->srcu_cblist, |
786 | seq: rcu_seq_current(sp: &ssp->srcu_sup->srcu_gp_seq)); |
787 | WARN_ON_ONCE(!rcu_segcblist_segempty(&sdp->srcu_cblist, RCU_NEXT_TAIL)); |
788 | spin_unlock_rcu_node(sdp); /* Interrupts remain disabled. */ |
789 | WRITE_ONCE(ssp->srcu_sup->srcu_gp_start, jiffies); |
790 | WRITE_ONCE(ssp->srcu_sup->srcu_n_exp_nodelay, 0); |
791 | smp_mb(); /* Order prior store to ->srcu_gp_seq_needed vs. GP start. */ |
792 | rcu_seq_start(sp: &ssp->srcu_sup->srcu_gp_seq); |
793 | state = rcu_seq_state(s: ssp->srcu_sup->srcu_gp_seq); |
794 | WARN_ON_ONCE(state != SRCU_STATE_SCAN1); |
795 | } |
796 | |
797 | |
798 | static void srcu_delay_timer(struct timer_list *t) |
799 | { |
800 | struct srcu_data *sdp = container_of(t, struct srcu_data, delay_work); |
801 | |
802 | queue_work_on(cpu: sdp->cpu, wq: rcu_gp_wq, work: &sdp->work); |
803 | } |
804 | |
805 | static void srcu_queue_delayed_work_on(struct srcu_data *sdp, |
806 | unsigned long delay) |
807 | { |
808 | if (!delay) { |
809 | queue_work_on(cpu: sdp->cpu, wq: rcu_gp_wq, work: &sdp->work); |
810 | return; |
811 | } |
812 | |
813 | timer_reduce(timer: &sdp->delay_work, expires: jiffies + delay); |
814 | } |
815 | |
816 | /* |
817 | * Schedule callback invocation for the specified srcu_data structure, |
818 | * if possible, on the corresponding CPU. |
819 | */ |
820 | static void srcu_schedule_cbs_sdp(struct srcu_data *sdp, unsigned long delay) |
821 | { |
822 | srcu_queue_delayed_work_on(sdp, delay); |
823 | } |
824 | |
825 | /* |
826 | * Schedule callback invocation for all srcu_data structures associated |
827 | * with the specified srcu_node structure that have callbacks for the |
828 | * just-completed grace period, the one corresponding to idx. If possible, |
829 | * schedule this invocation on the corresponding CPUs. |
830 | */ |
831 | static void srcu_schedule_cbs_snp(struct srcu_struct *ssp, struct srcu_node *snp, |
832 | unsigned long mask, unsigned long delay) |
833 | { |
834 | int cpu; |
835 | |
836 | for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) { |
837 | if (!(mask & (1UL << (cpu - snp->grplo)))) |
838 | continue; |
839 | srcu_schedule_cbs_sdp(per_cpu_ptr(ssp->sda, cpu), delay); |
840 | } |
841 | } |
842 | |
843 | /* |
844 | * Note the end of an SRCU grace period. Initiates callback invocation |
845 | * and starts a new grace period if needed. |
846 | * |
847 | * The ->srcu_cb_mutex acquisition does not protect any data, but |
848 | * instead prevents more than one grace period from starting while we |
849 | * are initiating callback invocation. This allows the ->srcu_have_cbs[] |
850 | * array to have a finite number of elements. |
851 | */ |
852 | static void srcu_gp_end(struct srcu_struct *ssp) |
853 | { |
854 | unsigned long cbdelay = 1; |
855 | bool cbs; |
856 | bool last_lvl; |
857 | int cpu; |
858 | unsigned long flags; |
859 | unsigned long gpseq; |
860 | int idx; |
861 | unsigned long mask; |
862 | struct srcu_data *sdp; |
863 | unsigned long sgsne; |
864 | struct srcu_node *snp; |
865 | int ss_state; |
866 | struct srcu_usage *sup = ssp->srcu_sup; |
867 | |
868 | /* Prevent more than one additional grace period. */ |
869 | mutex_lock(&sup->srcu_cb_mutex); |
870 | |
871 | /* End the current grace period. */ |
872 | spin_lock_irq_rcu_node(sup); |
873 | idx = rcu_seq_state(s: sup->srcu_gp_seq); |
874 | WARN_ON_ONCE(idx != SRCU_STATE_SCAN2); |
875 | if (ULONG_CMP_LT(READ_ONCE(sup->srcu_gp_seq), READ_ONCE(sup->srcu_gp_seq_needed_exp))) |
876 | cbdelay = 0; |
877 | |
878 | WRITE_ONCE(sup->srcu_last_gp_end, ktime_get_mono_fast_ns()); |
879 | rcu_seq_end(sp: &sup->srcu_gp_seq); |
880 | gpseq = rcu_seq_current(sp: &sup->srcu_gp_seq); |
881 | if (ULONG_CMP_LT(sup->srcu_gp_seq_needed_exp, gpseq)) |
882 | WRITE_ONCE(sup->srcu_gp_seq_needed_exp, gpseq); |
883 | spin_unlock_irq_rcu_node(sup); |
884 | mutex_unlock(lock: &sup->srcu_gp_mutex); |
885 | /* A new grace period can start at this point. But only one. */ |
886 | |
887 | /* Initiate callback invocation as needed. */ |
888 | ss_state = smp_load_acquire(&sup->srcu_size_state); |
889 | if (ss_state < SRCU_SIZE_WAIT_BARRIER) { |
890 | srcu_schedule_cbs_sdp(per_cpu_ptr(ssp->sda, get_boot_cpu_id()), |
891 | delay: cbdelay); |
892 | } else { |
893 | idx = rcu_seq_ctr(s: gpseq) % ARRAY_SIZE(snp->srcu_have_cbs); |
894 | srcu_for_each_node_breadth_first(ssp, snp) { |
895 | spin_lock_irq_rcu_node(snp); |
896 | cbs = false; |
897 | last_lvl = snp >= sup->level[rcu_num_lvls - 1]; |
898 | if (last_lvl) |
899 | cbs = ss_state < SRCU_SIZE_BIG || snp->srcu_have_cbs[idx] == gpseq; |
900 | snp->srcu_have_cbs[idx] = gpseq; |
901 | rcu_seq_set_state(sp: &snp->srcu_have_cbs[idx], newstate: 1); |
902 | sgsne = snp->srcu_gp_seq_needed_exp; |
903 | if (srcu_invl_snp_seq(s: sgsne) || ULONG_CMP_LT(sgsne, gpseq)) |
904 | WRITE_ONCE(snp->srcu_gp_seq_needed_exp, gpseq); |
905 | if (ss_state < SRCU_SIZE_BIG) |
906 | mask = ~0; |
907 | else |
908 | mask = snp->srcu_data_have_cbs[idx]; |
909 | snp->srcu_data_have_cbs[idx] = 0; |
910 | spin_unlock_irq_rcu_node(snp); |
911 | if (cbs) |
912 | srcu_schedule_cbs_snp(ssp, snp, mask, delay: cbdelay); |
913 | } |
914 | } |
915 | |
916 | /* Occasionally prevent srcu_data counter wrap. */ |
917 | if (!(gpseq & counter_wrap_check)) |
918 | for_each_possible_cpu(cpu) { |
919 | sdp = per_cpu_ptr(ssp->sda, cpu); |
920 | spin_lock_irqsave_rcu_node(sdp, flags); |
921 | if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed + 100)) |
922 | sdp->srcu_gp_seq_needed = gpseq; |
923 | if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed_exp + 100)) |
924 | sdp->srcu_gp_seq_needed_exp = gpseq; |
925 | spin_unlock_irqrestore_rcu_node(sdp, flags); |
926 | } |
927 | |
928 | /* Callback initiation done, allow grace periods after next. */ |
929 | mutex_unlock(lock: &sup->srcu_cb_mutex); |
930 | |
931 | /* Start a new grace period if needed. */ |
932 | spin_lock_irq_rcu_node(sup); |
933 | gpseq = rcu_seq_current(sp: &sup->srcu_gp_seq); |
934 | if (!rcu_seq_state(s: gpseq) && |
935 | ULONG_CMP_LT(gpseq, sup->srcu_gp_seq_needed)) { |
936 | srcu_gp_start(ssp); |
937 | spin_unlock_irq_rcu_node(sup); |
938 | srcu_reschedule(ssp, delay: 0); |
939 | } else { |
940 | spin_unlock_irq_rcu_node(sup); |
941 | } |
942 | |
943 | /* Transition to big if needed. */ |
944 | if (ss_state != SRCU_SIZE_SMALL && ss_state != SRCU_SIZE_BIG) { |
945 | if (ss_state == SRCU_SIZE_ALLOC) |
946 | init_srcu_struct_nodes(ssp, GFP_KERNEL); |
947 | else |
948 | smp_store_release(&sup->srcu_size_state, ss_state + 1); |
949 | } |
950 | } |
951 | |
952 | /* |
953 | * Funnel-locking scheme to scalably mediate many concurrent expedited |
954 | * grace-period requests. This function is invoked for the first known |
955 | * expedited request for a grace period that has already been requested, |
956 | * but without expediting. To start a completely new grace period, |
957 | * whether expedited or not, use srcu_funnel_gp_start() instead. |
958 | */ |
959 | static void srcu_funnel_exp_start(struct srcu_struct *ssp, struct srcu_node *snp, |
960 | unsigned long s) |
961 | { |
962 | unsigned long flags; |
963 | unsigned long sgsne; |
964 | |
965 | if (snp) |
966 | for (; snp != NULL; snp = snp->srcu_parent) { |
967 | sgsne = READ_ONCE(snp->srcu_gp_seq_needed_exp); |
968 | if (WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, s)) || |
969 | (!srcu_invl_snp_seq(s: sgsne) && ULONG_CMP_GE(sgsne, s))) |
970 | return; |
971 | spin_lock_irqsave_rcu_node(snp, flags); |
972 | sgsne = snp->srcu_gp_seq_needed_exp; |
973 | if (!srcu_invl_snp_seq(s: sgsne) && ULONG_CMP_GE(sgsne, s)) { |
974 | spin_unlock_irqrestore_rcu_node(snp, flags); |
975 | return; |
976 | } |
977 | WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s); |
978 | spin_unlock_irqrestore_rcu_node(snp, flags); |
979 | } |
980 | spin_lock_irqsave_ssp_contention(ssp, flags: &flags); |
981 | if (ULONG_CMP_LT(ssp->srcu_sup->srcu_gp_seq_needed_exp, s)) |
982 | WRITE_ONCE(ssp->srcu_sup->srcu_gp_seq_needed_exp, s); |
983 | spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); |
984 | } |
985 | |
986 | /* |
987 | * Funnel-locking scheme to scalably mediate many concurrent grace-period |
988 | * requests. The winner has to do the work of actually starting grace |
989 | * period s. Losers must either ensure that their desired grace-period |
990 | * number is recorded on at least their leaf srcu_node structure, or they |
991 | * must take steps to invoke their own callbacks. |
992 | * |
993 | * Note that this function also does the work of srcu_funnel_exp_start(), |
994 | * in some cases by directly invoking it. |
995 | * |
996 | * The srcu read lock should be hold around this function. And s is a seq snap |
997 | * after holding that lock. |
998 | */ |
999 | static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp, |
1000 | unsigned long s, bool do_norm) |
1001 | { |
1002 | unsigned long flags; |
1003 | int idx = rcu_seq_ctr(s) % ARRAY_SIZE(sdp->mynode->srcu_have_cbs); |
1004 | unsigned long sgsne; |
1005 | struct srcu_node *snp; |
1006 | struct srcu_node *snp_leaf; |
1007 | unsigned long snp_seq; |
1008 | struct srcu_usage *sup = ssp->srcu_sup; |
1009 | |
1010 | /* Ensure that snp node tree is fully initialized before traversing it */ |
1011 | if (smp_load_acquire(&sup->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER) |
1012 | snp_leaf = NULL; |
1013 | else |
1014 | snp_leaf = sdp->mynode; |
1015 | |
1016 | if (snp_leaf) |
1017 | /* Each pass through the loop does one level of the srcu_node tree. */ |
1018 | for (snp = snp_leaf; snp != NULL; snp = snp->srcu_parent) { |
1019 | if (WARN_ON_ONCE(rcu_seq_done(&sup->srcu_gp_seq, s)) && snp != snp_leaf) |
1020 | return; /* GP already done and CBs recorded. */ |
1021 | spin_lock_irqsave_rcu_node(snp, flags); |
1022 | snp_seq = snp->srcu_have_cbs[idx]; |
1023 | if (!srcu_invl_snp_seq(s: snp_seq) && ULONG_CMP_GE(snp_seq, s)) { |
1024 | if (snp == snp_leaf && snp_seq == s) |
1025 | snp->srcu_data_have_cbs[idx] |= sdp->grpmask; |
1026 | spin_unlock_irqrestore_rcu_node(snp, flags); |
1027 | if (snp == snp_leaf && snp_seq != s) { |
1028 | srcu_schedule_cbs_sdp(sdp, delay: do_norm ? SRCU_INTERVAL : 0); |
1029 | return; |
1030 | } |
1031 | if (!do_norm) |
1032 | srcu_funnel_exp_start(ssp, snp, s); |
1033 | return; |
1034 | } |
1035 | snp->srcu_have_cbs[idx] = s; |
1036 | if (snp == snp_leaf) |
1037 | snp->srcu_data_have_cbs[idx] |= sdp->grpmask; |
1038 | sgsne = snp->srcu_gp_seq_needed_exp; |
1039 | if (!do_norm && (srcu_invl_snp_seq(s: sgsne) || ULONG_CMP_LT(sgsne, s))) |
1040 | WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s); |
1041 | spin_unlock_irqrestore_rcu_node(snp, flags); |
1042 | } |
1043 | |
1044 | /* Top of tree, must ensure the grace period will be started. */ |
1045 | spin_lock_irqsave_ssp_contention(ssp, flags: &flags); |
1046 | if (ULONG_CMP_LT(sup->srcu_gp_seq_needed, s)) { |
1047 | /* |
1048 | * Record need for grace period s. Pair with load |
1049 | * acquire setting up for initialization. |
1050 | */ |
1051 | smp_store_release(&sup->srcu_gp_seq_needed, s); /*^^^*/ |
1052 | } |
1053 | if (!do_norm && ULONG_CMP_LT(sup->srcu_gp_seq_needed_exp, s)) |
1054 | WRITE_ONCE(sup->srcu_gp_seq_needed_exp, s); |
1055 | |
1056 | /* If grace period not already in progress, start it. */ |
1057 | if (!WARN_ON_ONCE(rcu_seq_done(&sup->srcu_gp_seq, s)) && |
1058 | rcu_seq_state(s: sup->srcu_gp_seq) == SRCU_STATE_IDLE) { |
1059 | WARN_ON_ONCE(ULONG_CMP_GE(sup->srcu_gp_seq, sup->srcu_gp_seq_needed)); |
1060 | srcu_gp_start(ssp); |
1061 | |
1062 | // And how can that list_add() in the "else" clause |
1063 | // possibly be safe for concurrent execution? Well, |
1064 | // it isn't. And it does not have to be. After all, it |
1065 | // can only be executed during early boot when there is only |
1066 | // the one boot CPU running with interrupts still disabled. |
1067 | if (likely(srcu_init_done)) |
1068 | queue_delayed_work(wq: rcu_gp_wq, dwork: &sup->work, |
1069 | delay: !!srcu_get_delay(ssp)); |
1070 | else if (list_empty(head: &sup->work.work.entry)) |
1071 | list_add(new: &sup->work.work.entry, head: &srcu_boot_list); |
1072 | } |
1073 | spin_unlock_irqrestore_rcu_node(sup, flags); |
1074 | } |
1075 | |
1076 | /* |
1077 | * Wait until all readers counted by array index idx complete, but |
1078 | * loop an additional time if there is an expedited grace period pending. |
1079 | * The caller must ensure that ->srcu_idx is not changed while checking. |
1080 | */ |
1081 | static bool try_check_zero(struct srcu_struct *ssp, int idx, int trycount) |
1082 | { |
1083 | unsigned long curdelay; |
1084 | |
1085 | curdelay = !srcu_get_delay(ssp); |
1086 | |
1087 | for (;;) { |
1088 | if (srcu_readers_active_idx_check(ssp, idx)) |
1089 | return true; |
1090 | if ((--trycount + curdelay) <= 0) |
1091 | return false; |
1092 | udelay(srcu_retry_check_delay); |
1093 | } |
1094 | } |
1095 | |
1096 | /* |
1097 | * Increment the ->srcu_idx counter so that future SRCU readers will |
1098 | * use the other rank of the ->srcu_(un)lock_count[] arrays. This allows |
1099 | * us to wait for pre-existing readers in a starvation-free manner. |
1100 | */ |
1101 | static void srcu_flip(struct srcu_struct *ssp) |
1102 | { |
1103 | /* |
1104 | * Because the flip of ->srcu_idx is executed only if the |
1105 | * preceding call to srcu_readers_active_idx_check() found that |
1106 | * the ->srcu_unlock_count[] and ->srcu_lock_count[] sums matched |
1107 | * and because that summing uses atomic_long_read(), there is |
1108 | * ordering due to a control dependency between that summing and |
1109 | * the WRITE_ONCE() in this call to srcu_flip(). This ordering |
1110 | * ensures that if this updater saw a given reader's increment from |
1111 | * __srcu_read_lock(), that reader was using a value of ->srcu_idx |
1112 | * from before the previous call to srcu_flip(), which should be |
1113 | * quite rare. This ordering thus helps forward progress because |
1114 | * the grace period could otherwise be delayed by additional |
1115 | * calls to __srcu_read_lock() using that old (soon to be new) |
1116 | * value of ->srcu_idx. |
1117 | * |
1118 | * This sum-equality check and ordering also ensures that if |
1119 | * a given call to __srcu_read_lock() uses the new value of |
1120 | * ->srcu_idx, this updater's earlier scans cannot have seen |
1121 | * that reader's increments, which is all to the good, because |
1122 | * this grace period need not wait on that reader. After all, |
1123 | * if those earlier scans had seen that reader, there would have |
1124 | * been a sum mismatch and this code would not be reached. |
1125 | * |
1126 | * This means that the following smp_mb() is redundant, but |
1127 | * it stays until either (1) Compilers learn about this sort of |
1128 | * control dependency or (2) Some production workload running on |
1129 | * a production system is unduly delayed by this slowpath smp_mb(). |
1130 | */ |
1131 | smp_mb(); /* E */ /* Pairs with B and C. */ |
1132 | |
1133 | WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1); // Flip the counter. |
1134 | |
1135 | /* |
1136 | * Ensure that if the updater misses an __srcu_read_unlock() |
1137 | * increment, that task's __srcu_read_lock() following its next |
1138 | * __srcu_read_lock() or __srcu_read_unlock() will see the above |
1139 | * counter update. Note that both this memory barrier and the |
1140 | * one in srcu_readers_active_idx_check() provide the guarantee |
1141 | * for __srcu_read_lock(). |
1142 | */ |
1143 | smp_mb(); /* D */ /* Pairs with C. */ |
1144 | } |
1145 | |
1146 | /* |
1147 | * If SRCU is likely idle, return true, otherwise return false. |
1148 | * |
1149 | * Note that it is OK for several current from-idle requests for a new |
1150 | * grace period from idle to specify expediting because they will all end |
1151 | * up requesting the same grace period anyhow. So no loss. |
1152 | * |
1153 | * Note also that if any CPU (including the current one) is still invoking |
1154 | * callbacks, this function will nevertheless say "idle". This is not |
1155 | * ideal, but the overhead of checking all CPUs' callback lists is even |
1156 | * less ideal, especially on large systems. Furthermore, the wakeup |
1157 | * can happen before the callback is fully removed, so we have no choice |
1158 | * but to accept this type of error. |
1159 | * |
1160 | * This function is also subject to counter-wrap errors, but let's face |
1161 | * it, if this function was preempted for enough time for the counters |
1162 | * to wrap, it really doesn't matter whether or not we expedite the grace |
1163 | * period. The extra overhead of a needlessly expedited grace period is |
1164 | * negligible when amortized over that time period, and the extra latency |
1165 | * of a needlessly non-expedited grace period is similarly negligible. |
1166 | */ |
1167 | static bool srcu_might_be_idle(struct srcu_struct *ssp) |
1168 | { |
1169 | unsigned long curseq; |
1170 | unsigned long flags; |
1171 | struct srcu_data *sdp; |
1172 | unsigned long t; |
1173 | unsigned long tlast; |
1174 | |
1175 | check_init_srcu_struct(ssp); |
1176 | /* If the local srcu_data structure has callbacks, not idle. */ |
1177 | sdp = raw_cpu_ptr(ssp->sda); |
1178 | spin_lock_irqsave_rcu_node(sdp, flags); |
1179 | if (rcu_segcblist_pend_cbs(rsclp: &sdp->srcu_cblist)) { |
1180 | spin_unlock_irqrestore_rcu_node(sdp, flags); |
1181 | return false; /* Callbacks already present, so not idle. */ |
1182 | } |
1183 | spin_unlock_irqrestore_rcu_node(sdp, flags); |
1184 | |
1185 | /* |
1186 | * No local callbacks, so probabilistically probe global state. |
1187 | * Exact information would require acquiring locks, which would |
1188 | * kill scalability, hence the probabilistic nature of the probe. |
1189 | */ |
1190 | |
1191 | /* First, see if enough time has passed since the last GP. */ |
1192 | t = ktime_get_mono_fast_ns(); |
1193 | tlast = READ_ONCE(ssp->srcu_sup->srcu_last_gp_end); |
1194 | if (exp_holdoff == 0 || |
1195 | time_in_range_open(t, tlast, tlast + exp_holdoff)) |
1196 | return false; /* Too soon after last GP. */ |
1197 | |
1198 | /* Next, check for probable idleness. */ |
1199 | curseq = rcu_seq_current(sp: &ssp->srcu_sup->srcu_gp_seq); |
1200 | smp_mb(); /* Order ->srcu_gp_seq with ->srcu_gp_seq_needed. */ |
1201 | if (ULONG_CMP_LT(curseq, READ_ONCE(ssp->srcu_sup->srcu_gp_seq_needed))) |
1202 | return false; /* Grace period in progress, so not idle. */ |
1203 | smp_mb(); /* Order ->srcu_gp_seq with prior access. */ |
1204 | if (curseq != rcu_seq_current(sp: &ssp->srcu_sup->srcu_gp_seq)) |
1205 | return false; /* GP # changed, so not idle. */ |
1206 | return true; /* With reasonable probability, idle! */ |
1207 | } |
1208 | |
1209 | /* |
1210 | * SRCU callback function to leak a callback. |
1211 | */ |
1212 | static void srcu_leak_callback(struct rcu_head *rhp) |
1213 | { |
1214 | } |
1215 | |
1216 | /* |
1217 | * Start an SRCU grace period, and also queue the callback if non-NULL. |
1218 | */ |
1219 | static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, |
1220 | struct rcu_head *rhp, bool do_norm) |
1221 | { |
1222 | unsigned long flags; |
1223 | int idx; |
1224 | bool needexp = false; |
1225 | bool needgp = false; |
1226 | unsigned long s; |
1227 | struct srcu_data *sdp; |
1228 | struct srcu_node *sdp_mynode; |
1229 | int ss_state; |
1230 | |
1231 | check_init_srcu_struct(ssp); |
1232 | /* |
1233 | * While starting a new grace period, make sure we are in an |
1234 | * SRCU read-side critical section so that the grace-period |
1235 | * sequence number cannot wrap around in the meantime. |
1236 | */ |
1237 | idx = __srcu_read_lock_nmisafe(ssp); |
1238 | ss_state = smp_load_acquire(&ssp->srcu_sup->srcu_size_state); |
1239 | if (ss_state < SRCU_SIZE_WAIT_CALL) |
1240 | sdp = per_cpu_ptr(ssp->sda, get_boot_cpu_id()); |
1241 | else |
1242 | sdp = raw_cpu_ptr(ssp->sda); |
1243 | spin_lock_irqsave_sdp_contention(sdp, flags: &flags); |
1244 | if (rhp) |
1245 | rcu_segcblist_enqueue(rsclp: &sdp->srcu_cblist, rhp); |
1246 | /* |
1247 | * The snapshot for acceleration must be taken _before_ the read of the |
1248 | * current gp sequence used for advancing, otherwise advancing may fail |
1249 | * and acceleration may then fail too. |
1250 | * |
1251 | * This could happen if: |
1252 | * |
1253 | * 1) The RCU_WAIT_TAIL segment has callbacks (gp_num = X + 4) and the |
1254 | * RCU_NEXT_READY_TAIL also has callbacks (gp_num = X + 8). |
1255 | * |
1256 | * 2) The grace period for RCU_WAIT_TAIL is seen as started but not |
1257 | * completed so rcu_seq_current() returns X + SRCU_STATE_SCAN1. |
1258 | * |
1259 | * 3) This value is passed to rcu_segcblist_advance() which can't move |
1260 | * any segment forward and fails. |
1261 | * |
1262 | * 4) srcu_gp_start_if_needed() still proceeds with callback acceleration. |
1263 | * But then the call to rcu_seq_snap() observes the grace period for the |
1264 | * RCU_WAIT_TAIL segment as completed and the subsequent one for the |
1265 | * RCU_NEXT_READY_TAIL segment as started (ie: X + 4 + SRCU_STATE_SCAN1) |
1266 | * so it returns a snapshot of the next grace period, which is X + 12. |
1267 | * |
1268 | * 5) The value of X + 12 is passed to rcu_segcblist_accelerate() but the |
1269 | * freshly enqueued callback in RCU_NEXT_TAIL can't move to |
1270 | * RCU_NEXT_READY_TAIL which already has callbacks for a previous grace |
1271 | * period (gp_num = X + 8). So acceleration fails. |
1272 | */ |
1273 | s = rcu_seq_snap(sp: &ssp->srcu_sup->srcu_gp_seq); |
1274 | rcu_segcblist_advance(rsclp: &sdp->srcu_cblist, |
1275 | seq: rcu_seq_current(sp: &ssp->srcu_sup->srcu_gp_seq)); |
1276 | WARN_ON_ONCE(!rcu_segcblist_accelerate(&sdp->srcu_cblist, s) && rhp); |
1277 | if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) { |
1278 | sdp->srcu_gp_seq_needed = s; |
1279 | needgp = true; |
1280 | } |
1281 | if (!do_norm && ULONG_CMP_LT(sdp->srcu_gp_seq_needed_exp, s)) { |
1282 | sdp->srcu_gp_seq_needed_exp = s; |
1283 | needexp = true; |
1284 | } |
1285 | spin_unlock_irqrestore_rcu_node(sdp, flags); |
1286 | |
1287 | /* Ensure that snp node tree is fully initialized before traversing it */ |
1288 | if (ss_state < SRCU_SIZE_WAIT_BARRIER) |
1289 | sdp_mynode = NULL; |
1290 | else |
1291 | sdp_mynode = sdp->mynode; |
1292 | |
1293 | if (needgp) |
1294 | srcu_funnel_gp_start(ssp, sdp, s, do_norm); |
1295 | else if (needexp) |
1296 | srcu_funnel_exp_start(ssp, snp: sdp_mynode, s); |
1297 | __srcu_read_unlock_nmisafe(ssp, idx); |
1298 | return s; |
1299 | } |
1300 | |
1301 | /* |
1302 | * Enqueue an SRCU callback on the srcu_data structure associated with |
1303 | * the current CPU and the specified srcu_struct structure, initiating |
1304 | * grace-period processing if it is not already running. |
1305 | * |
1306 | * Note that all CPUs must agree that the grace period extended beyond |
1307 | * all pre-existing SRCU read-side critical section. On systems with |
1308 | * more than one CPU, this means that when "func()" is invoked, each CPU |
1309 | * is guaranteed to have executed a full memory barrier since the end of |
1310 | * its last corresponding SRCU read-side critical section whose beginning |
1311 | * preceded the call to call_srcu(). It also means that each CPU executing |
1312 | * an SRCU read-side critical section that continues beyond the start of |
1313 | * "func()" must have executed a memory barrier after the call_srcu() |
1314 | * but before the beginning of that SRCU read-side critical section. |
1315 | * Note that these guarantees include CPUs that are offline, idle, or |
1316 | * executing in user mode, as well as CPUs that are executing in the kernel. |
1317 | * |
1318 | * Furthermore, if CPU A invoked call_srcu() and CPU B invoked the |
1319 | * resulting SRCU callback function "func()", then both CPU A and CPU |
1320 | * B are guaranteed to execute a full memory barrier during the time |
1321 | * interval between the call to call_srcu() and the invocation of "func()". |
1322 | * This guarantee applies even if CPU A and CPU B are the same CPU (but |
1323 | * again only if the system has more than one CPU). |
1324 | * |
1325 | * Of course, these guarantees apply only for invocations of call_srcu(), |
1326 | * srcu_read_lock(), and srcu_read_unlock() that are all passed the same |
1327 | * srcu_struct structure. |
1328 | */ |
1329 | static void __call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp, |
1330 | rcu_callback_t func, bool do_norm) |
1331 | { |
1332 | if (debug_rcu_head_queue(head: rhp)) { |
1333 | /* Probable double call_srcu(), so leak the callback. */ |
1334 | WRITE_ONCE(rhp->func, srcu_leak_callback); |
1335 | WARN_ONCE(1, "call_srcu(): Leaked duplicate callback\n" ); |
1336 | return; |
1337 | } |
1338 | rhp->func = func; |
1339 | (void)srcu_gp_start_if_needed(ssp, rhp, do_norm); |
1340 | } |
1341 | |
1342 | /** |
1343 | * call_srcu() - Queue a callback for invocation after an SRCU grace period |
1344 | * @ssp: srcu_struct in queue the callback |
1345 | * @rhp: structure to be used for queueing the SRCU callback. |
1346 | * @func: function to be invoked after the SRCU grace period |
1347 | * |
1348 | * The callback function will be invoked some time after a full SRCU |
1349 | * grace period elapses, in other words after all pre-existing SRCU |
1350 | * read-side critical sections have completed. However, the callback |
1351 | * function might well execute concurrently with other SRCU read-side |
1352 | * critical sections that started after call_srcu() was invoked. SRCU |
1353 | * read-side critical sections are delimited by srcu_read_lock() and |
1354 | * srcu_read_unlock(), and may be nested. |
1355 | * |
1356 | * The callback will be invoked from process context, but must nevertheless |
1357 | * be fast and must not block. |
1358 | */ |
1359 | void call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp, |
1360 | rcu_callback_t func) |
1361 | { |
1362 | __call_srcu(ssp, rhp, func, do_norm: true); |
1363 | } |
1364 | EXPORT_SYMBOL_GPL(call_srcu); |
1365 | |
1366 | /* |
1367 | * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). |
1368 | */ |
1369 | static void __synchronize_srcu(struct srcu_struct *ssp, bool do_norm) |
1370 | { |
1371 | struct rcu_synchronize rcu; |
1372 | |
1373 | srcu_lock_sync(map: &ssp->dep_map); |
1374 | |
1375 | RCU_LOCKDEP_WARN(lockdep_is_held(ssp) || |
1376 | lock_is_held(&rcu_bh_lock_map) || |
1377 | lock_is_held(&rcu_lock_map) || |
1378 | lock_is_held(&rcu_sched_lock_map), |
1379 | "Illegal synchronize_srcu() in same-type SRCU (or in RCU) read-side critical section" ); |
1380 | |
1381 | if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE) |
1382 | return; |
1383 | might_sleep(); |
1384 | check_init_srcu_struct(ssp); |
1385 | init_completion(x: &rcu.completion); |
1386 | init_rcu_head_on_stack(head: &rcu.head); |
1387 | __call_srcu(ssp, rhp: &rcu.head, func: wakeme_after_rcu, do_norm); |
1388 | wait_for_completion(&rcu.completion); |
1389 | destroy_rcu_head_on_stack(head: &rcu.head); |
1390 | |
1391 | /* |
1392 | * Make sure that later code is ordered after the SRCU grace |
1393 | * period. This pairs with the spin_lock_irq_rcu_node() |
1394 | * in srcu_invoke_callbacks(). Unlike Tree RCU, this is needed |
1395 | * because the current CPU might have been totally uninvolved with |
1396 | * (and thus unordered against) that grace period. |
1397 | */ |
1398 | smp_mb(); |
1399 | } |
1400 | |
1401 | /** |
1402 | * synchronize_srcu_expedited - Brute-force SRCU grace period |
1403 | * @ssp: srcu_struct with which to synchronize. |
1404 | * |
1405 | * Wait for an SRCU grace period to elapse, but be more aggressive about |
1406 | * spinning rather than blocking when waiting. |
1407 | * |
1408 | * Note that synchronize_srcu_expedited() has the same deadlock and |
1409 | * memory-ordering properties as does synchronize_srcu(). |
1410 | */ |
1411 | void synchronize_srcu_expedited(struct srcu_struct *ssp) |
1412 | { |
1413 | __synchronize_srcu(ssp, do_norm: rcu_gp_is_normal()); |
1414 | } |
1415 | EXPORT_SYMBOL_GPL(synchronize_srcu_expedited); |
1416 | |
1417 | /** |
1418 | * synchronize_srcu - wait for prior SRCU read-side critical-section completion |
1419 | * @ssp: srcu_struct with which to synchronize. |
1420 | * |
1421 | * Wait for the count to drain to zero of both indexes. To avoid the |
1422 | * possible starvation of synchronize_srcu(), it waits for the count of |
1423 | * the index=((->srcu_idx & 1) ^ 1) to drain to zero at first, |
1424 | * and then flip the srcu_idx and wait for the count of the other index. |
1425 | * |
1426 | * Can block; must be called from process context. |
1427 | * |
1428 | * Note that it is illegal to call synchronize_srcu() from the corresponding |
1429 | * SRCU read-side critical section; doing so will result in deadlock. |
1430 | * However, it is perfectly legal to call synchronize_srcu() on one |
1431 | * srcu_struct from some other srcu_struct's read-side critical section, |
1432 | * as long as the resulting graph of srcu_structs is acyclic. |
1433 | * |
1434 | * There are memory-ordering constraints implied by synchronize_srcu(). |
1435 | * On systems with more than one CPU, when synchronize_srcu() returns, |
1436 | * each CPU is guaranteed to have executed a full memory barrier since |
1437 | * the end of its last corresponding SRCU read-side critical section |
1438 | * whose beginning preceded the call to synchronize_srcu(). In addition, |
1439 | * each CPU having an SRCU read-side critical section that extends beyond |
1440 | * the return from synchronize_srcu() is guaranteed to have executed a |
1441 | * full memory barrier after the beginning of synchronize_srcu() and before |
1442 | * the beginning of that SRCU read-side critical section. Note that these |
1443 | * guarantees include CPUs that are offline, idle, or executing in user mode, |
1444 | * as well as CPUs that are executing in the kernel. |
1445 | * |
1446 | * Furthermore, if CPU A invoked synchronize_srcu(), which returned |
1447 | * to its caller on CPU B, then both CPU A and CPU B are guaranteed |
1448 | * to have executed a full memory barrier during the execution of |
1449 | * synchronize_srcu(). This guarantee applies even if CPU A and CPU B |
1450 | * are the same CPU, but again only if the system has more than one CPU. |
1451 | * |
1452 | * Of course, these memory-ordering guarantees apply only when |
1453 | * synchronize_srcu(), srcu_read_lock(), and srcu_read_unlock() are |
1454 | * passed the same srcu_struct structure. |
1455 | * |
1456 | * Implementation of these memory-ordering guarantees is similar to |
1457 | * that of synchronize_rcu(). |
1458 | * |
1459 | * If SRCU is likely idle, expedite the first request. This semantic |
1460 | * was provided by Classic SRCU, and is relied upon by its users, so TREE |
1461 | * SRCU must also provide it. Note that detecting idleness is heuristic |
1462 | * and subject to both false positives and negatives. |
1463 | */ |
1464 | void synchronize_srcu(struct srcu_struct *ssp) |
1465 | { |
1466 | if (srcu_might_be_idle(ssp) || rcu_gp_is_expedited()) |
1467 | synchronize_srcu_expedited(ssp); |
1468 | else |
1469 | __synchronize_srcu(ssp, do_norm: true); |
1470 | } |
1471 | EXPORT_SYMBOL_GPL(synchronize_srcu); |
1472 | |
1473 | /** |
1474 | * get_state_synchronize_srcu - Provide an end-of-grace-period cookie |
1475 | * @ssp: srcu_struct to provide cookie for. |
1476 | * |
1477 | * This function returns a cookie that can be passed to |
1478 | * poll_state_synchronize_srcu(), which will return true if a full grace |
1479 | * period has elapsed in the meantime. It is the caller's responsibility |
1480 | * to make sure that grace period happens, for example, by invoking |
1481 | * call_srcu() after return from get_state_synchronize_srcu(). |
1482 | */ |
1483 | unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp) |
1484 | { |
1485 | // Any prior manipulation of SRCU-protected data must happen |
1486 | // before the load from ->srcu_gp_seq. |
1487 | smp_mb(); |
1488 | return rcu_seq_snap(sp: &ssp->srcu_sup->srcu_gp_seq); |
1489 | } |
1490 | EXPORT_SYMBOL_GPL(get_state_synchronize_srcu); |
1491 | |
1492 | /** |
1493 | * start_poll_synchronize_srcu - Provide cookie and start grace period |
1494 | * @ssp: srcu_struct to provide cookie for. |
1495 | * |
1496 | * This function returns a cookie that can be passed to |
1497 | * poll_state_synchronize_srcu(), which will return true if a full grace |
1498 | * period has elapsed in the meantime. Unlike get_state_synchronize_srcu(), |
1499 | * this function also ensures that any needed SRCU grace period will be |
1500 | * started. This convenience does come at a cost in terms of CPU overhead. |
1501 | */ |
1502 | unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp) |
1503 | { |
1504 | return srcu_gp_start_if_needed(ssp, NULL, do_norm: true); |
1505 | } |
1506 | EXPORT_SYMBOL_GPL(start_poll_synchronize_srcu); |
1507 | |
1508 | /** |
1509 | * poll_state_synchronize_srcu - Has cookie's grace period ended? |
1510 | * @ssp: srcu_struct to provide cookie for. |
1511 | * @cookie: Return value from get_state_synchronize_srcu() or start_poll_synchronize_srcu(). |
1512 | * |
1513 | * This function takes the cookie that was returned from either |
1514 | * get_state_synchronize_srcu() or start_poll_synchronize_srcu(), and |
1515 | * returns @true if an SRCU grace period elapsed since the time that the |
1516 | * cookie was created. |
1517 | * |
1518 | * Because cookies are finite in size, wrapping/overflow is possible. |
1519 | * This is more pronounced on 32-bit systems where cookies are 32 bits, |
1520 | * where in theory wrapping could happen in about 14 hours assuming |
1521 | * 25-microsecond expedited SRCU grace periods. However, a more likely |
1522 | * overflow lower bound is on the order of 24 days in the case of |
1523 | * one-millisecond SRCU grace periods. Of course, wrapping in a 64-bit |
1524 | * system requires geologic timespans, as in more than seven million years |
1525 | * even for expedited SRCU grace periods. |
1526 | * |
1527 | * Wrapping/overflow is much more of an issue for CONFIG_SMP=n systems |
1528 | * that also have CONFIG_PREEMPTION=n, which selects Tiny SRCU. This uses |
1529 | * a 16-bit cookie, which rcutorture routinely wraps in a matter of a |
1530 | * few minutes. If this proves to be a problem, this counter will be |
1531 | * expanded to the same size as for Tree SRCU. |
1532 | */ |
1533 | bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie) |
1534 | { |
1535 | if (!rcu_seq_done(sp: &ssp->srcu_sup->srcu_gp_seq, s: cookie)) |
1536 | return false; |
1537 | // Ensure that the end of the SRCU grace period happens before |
1538 | // any subsequent code that the caller might execute. |
1539 | smp_mb(); // ^^^ |
1540 | return true; |
1541 | } |
1542 | EXPORT_SYMBOL_GPL(poll_state_synchronize_srcu); |
1543 | |
1544 | /* |
1545 | * Callback function for srcu_barrier() use. |
1546 | */ |
1547 | static void srcu_barrier_cb(struct rcu_head *rhp) |
1548 | { |
1549 | struct srcu_data *sdp; |
1550 | struct srcu_struct *ssp; |
1551 | |
1552 | sdp = container_of(rhp, struct srcu_data, srcu_barrier_head); |
1553 | ssp = sdp->ssp; |
1554 | if (atomic_dec_and_test(v: &ssp->srcu_sup->srcu_barrier_cpu_cnt)) |
1555 | complete(&ssp->srcu_sup->srcu_barrier_completion); |
1556 | } |
1557 | |
1558 | /* |
1559 | * Enqueue an srcu_barrier() callback on the specified srcu_data |
1560 | * structure's ->cblist. but only if that ->cblist already has at least one |
1561 | * callback enqueued. Note that if a CPU already has callbacks enqueue, |
1562 | * it must have already registered the need for a future grace period, |
1563 | * so all we need do is enqueue a callback that will use the same grace |
1564 | * period as the last callback already in the queue. |
1565 | */ |
1566 | static void srcu_barrier_one_cpu(struct srcu_struct *ssp, struct srcu_data *sdp) |
1567 | { |
1568 | spin_lock_irq_rcu_node(sdp); |
1569 | atomic_inc(v: &ssp->srcu_sup->srcu_barrier_cpu_cnt); |
1570 | sdp->srcu_barrier_head.func = srcu_barrier_cb; |
1571 | debug_rcu_head_queue(head: &sdp->srcu_barrier_head); |
1572 | if (!rcu_segcblist_entrain(rsclp: &sdp->srcu_cblist, |
1573 | rhp: &sdp->srcu_barrier_head)) { |
1574 | debug_rcu_head_unqueue(head: &sdp->srcu_barrier_head); |
1575 | atomic_dec(v: &ssp->srcu_sup->srcu_barrier_cpu_cnt); |
1576 | } |
1577 | spin_unlock_irq_rcu_node(sdp); |
1578 | } |
1579 | |
1580 | /** |
1581 | * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete. |
1582 | * @ssp: srcu_struct on which to wait for in-flight callbacks. |
1583 | */ |
1584 | void srcu_barrier(struct srcu_struct *ssp) |
1585 | { |
1586 | int cpu; |
1587 | int idx; |
1588 | unsigned long s = rcu_seq_snap(sp: &ssp->srcu_sup->srcu_barrier_seq); |
1589 | |
1590 | check_init_srcu_struct(ssp); |
1591 | mutex_lock(&ssp->srcu_sup->srcu_barrier_mutex); |
1592 | if (rcu_seq_done(sp: &ssp->srcu_sup->srcu_barrier_seq, s)) { |
1593 | smp_mb(); /* Force ordering following return. */ |
1594 | mutex_unlock(lock: &ssp->srcu_sup->srcu_barrier_mutex); |
1595 | return; /* Someone else did our work for us. */ |
1596 | } |
1597 | rcu_seq_start(sp: &ssp->srcu_sup->srcu_barrier_seq); |
1598 | init_completion(x: &ssp->srcu_sup->srcu_barrier_completion); |
1599 | |
1600 | /* Initial count prevents reaching zero until all CBs are posted. */ |
1601 | atomic_set(v: &ssp->srcu_sup->srcu_barrier_cpu_cnt, i: 1); |
1602 | |
1603 | idx = __srcu_read_lock_nmisafe(ssp); |
1604 | if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER) |
1605 | srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, get_boot_cpu_id())); |
1606 | else |
1607 | for_each_possible_cpu(cpu) |
1608 | srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, cpu)); |
1609 | __srcu_read_unlock_nmisafe(ssp, idx); |
1610 | |
1611 | /* Remove the initial count, at which point reaching zero can happen. */ |
1612 | if (atomic_dec_and_test(v: &ssp->srcu_sup->srcu_barrier_cpu_cnt)) |
1613 | complete(&ssp->srcu_sup->srcu_barrier_completion); |
1614 | wait_for_completion(&ssp->srcu_sup->srcu_barrier_completion); |
1615 | |
1616 | rcu_seq_end(sp: &ssp->srcu_sup->srcu_barrier_seq); |
1617 | mutex_unlock(lock: &ssp->srcu_sup->srcu_barrier_mutex); |
1618 | } |
1619 | EXPORT_SYMBOL_GPL(srcu_barrier); |
1620 | |
1621 | /** |
1622 | * srcu_batches_completed - return batches completed. |
1623 | * @ssp: srcu_struct on which to report batch completion. |
1624 | * |
1625 | * Report the number of batches, correlated with, but not necessarily |
1626 | * precisely the same as, the number of grace periods that have elapsed. |
1627 | */ |
1628 | unsigned long srcu_batches_completed(struct srcu_struct *ssp) |
1629 | { |
1630 | return READ_ONCE(ssp->srcu_idx); |
1631 | } |
1632 | EXPORT_SYMBOL_GPL(srcu_batches_completed); |
1633 | |
1634 | /* |
1635 | * Core SRCU state machine. Push state bits of ->srcu_gp_seq |
1636 | * to SRCU_STATE_SCAN2, and invoke srcu_gp_end() when scan has |
1637 | * completed in that state. |
1638 | */ |
1639 | static void srcu_advance_state(struct srcu_struct *ssp) |
1640 | { |
1641 | int idx; |
1642 | |
1643 | mutex_lock(&ssp->srcu_sup->srcu_gp_mutex); |
1644 | |
1645 | /* |
1646 | * Because readers might be delayed for an extended period after |
1647 | * fetching ->srcu_idx for their index, at any point in time there |
1648 | * might well be readers using both idx=0 and idx=1. We therefore |
1649 | * need to wait for readers to clear from both index values before |
1650 | * invoking a callback. |
1651 | * |
1652 | * The load-acquire ensures that we see the accesses performed |
1653 | * by the prior grace period. |
1654 | */ |
1655 | idx = rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq)); /* ^^^ */ |
1656 | if (idx == SRCU_STATE_IDLE) { |
1657 | spin_lock_irq_rcu_node(ssp->srcu_sup); |
1658 | if (ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed)) { |
1659 | WARN_ON_ONCE(rcu_seq_state(ssp->srcu_sup->srcu_gp_seq)); |
1660 | spin_unlock_irq_rcu_node(ssp->srcu_sup); |
1661 | mutex_unlock(lock: &ssp->srcu_sup->srcu_gp_mutex); |
1662 | return; |
1663 | } |
1664 | idx = rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)); |
1665 | if (idx == SRCU_STATE_IDLE) |
1666 | srcu_gp_start(ssp); |
1667 | spin_unlock_irq_rcu_node(ssp->srcu_sup); |
1668 | if (idx != SRCU_STATE_IDLE) { |
1669 | mutex_unlock(lock: &ssp->srcu_sup->srcu_gp_mutex); |
1670 | return; /* Someone else started the grace period. */ |
1671 | } |
1672 | } |
1673 | |
1674 | if (rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)) == SRCU_STATE_SCAN1) { |
1675 | idx = 1 ^ (ssp->srcu_idx & 1); |
1676 | if (!try_check_zero(ssp, idx, trycount: 1)) { |
1677 | mutex_unlock(lock: &ssp->srcu_sup->srcu_gp_mutex); |
1678 | return; /* readers present, retry later. */ |
1679 | } |
1680 | srcu_flip(ssp); |
1681 | spin_lock_irq_rcu_node(ssp->srcu_sup); |
1682 | rcu_seq_set_state(sp: &ssp->srcu_sup->srcu_gp_seq, SRCU_STATE_SCAN2); |
1683 | ssp->srcu_sup->srcu_n_exp_nodelay = 0; |
1684 | spin_unlock_irq_rcu_node(ssp->srcu_sup); |
1685 | } |
1686 | |
1687 | if (rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)) == SRCU_STATE_SCAN2) { |
1688 | |
1689 | /* |
1690 | * SRCU read-side critical sections are normally short, |
1691 | * so check at least twice in quick succession after a flip. |
1692 | */ |
1693 | idx = 1 ^ (ssp->srcu_idx & 1); |
1694 | if (!try_check_zero(ssp, idx, trycount: 2)) { |
1695 | mutex_unlock(lock: &ssp->srcu_sup->srcu_gp_mutex); |
1696 | return; /* readers present, retry later. */ |
1697 | } |
1698 | ssp->srcu_sup->srcu_n_exp_nodelay = 0; |
1699 | srcu_gp_end(ssp); /* Releases ->srcu_gp_mutex. */ |
1700 | } |
1701 | } |
1702 | |
1703 | /* |
1704 | * Invoke a limited number of SRCU callbacks that have passed through |
1705 | * their grace period. If there are more to do, SRCU will reschedule |
1706 | * the workqueue. Note that needed memory barriers have been executed |
1707 | * in this task's context by srcu_readers_active_idx_check(). |
1708 | */ |
1709 | static void srcu_invoke_callbacks(struct work_struct *work) |
1710 | { |
1711 | long len; |
1712 | bool more; |
1713 | struct rcu_cblist ready_cbs; |
1714 | struct rcu_head *rhp; |
1715 | struct srcu_data *sdp; |
1716 | struct srcu_struct *ssp; |
1717 | |
1718 | sdp = container_of(work, struct srcu_data, work); |
1719 | |
1720 | ssp = sdp->ssp; |
1721 | rcu_cblist_init(rclp: &ready_cbs); |
1722 | spin_lock_irq_rcu_node(sdp); |
1723 | WARN_ON_ONCE(!rcu_segcblist_segempty(&sdp->srcu_cblist, RCU_NEXT_TAIL)); |
1724 | rcu_segcblist_advance(rsclp: &sdp->srcu_cblist, |
1725 | seq: rcu_seq_current(sp: &ssp->srcu_sup->srcu_gp_seq)); |
1726 | if (sdp->srcu_cblist_invoking || |
1727 | !rcu_segcblist_ready_cbs(rsclp: &sdp->srcu_cblist)) { |
1728 | spin_unlock_irq_rcu_node(sdp); |
1729 | return; /* Someone else on the job or nothing to do. */ |
1730 | } |
1731 | |
1732 | /* We are on the job! Extract and invoke ready callbacks. */ |
1733 | sdp->srcu_cblist_invoking = true; |
1734 | rcu_segcblist_extract_done_cbs(rsclp: &sdp->srcu_cblist, rclp: &ready_cbs); |
1735 | len = ready_cbs.len; |
1736 | spin_unlock_irq_rcu_node(sdp); |
1737 | rhp = rcu_cblist_dequeue(rclp: &ready_cbs); |
1738 | for (; rhp != NULL; rhp = rcu_cblist_dequeue(rclp: &ready_cbs)) { |
1739 | debug_rcu_head_unqueue(head: rhp); |
1740 | debug_rcu_head_callback(rhp); |
1741 | local_bh_disable(); |
1742 | rhp->func(rhp); |
1743 | local_bh_enable(); |
1744 | } |
1745 | WARN_ON_ONCE(ready_cbs.len); |
1746 | |
1747 | /* |
1748 | * Update counts, accelerate new callbacks, and if needed, |
1749 | * schedule another round of callback invocation. |
1750 | */ |
1751 | spin_lock_irq_rcu_node(sdp); |
1752 | rcu_segcblist_add_len(rsclp: &sdp->srcu_cblist, v: -len); |
1753 | sdp->srcu_cblist_invoking = false; |
1754 | more = rcu_segcblist_ready_cbs(rsclp: &sdp->srcu_cblist); |
1755 | spin_unlock_irq_rcu_node(sdp); |
1756 | if (more) |
1757 | srcu_schedule_cbs_sdp(sdp, delay: 0); |
1758 | } |
1759 | |
1760 | /* |
1761 | * Finished one round of SRCU grace period. Start another if there are |
1762 | * more SRCU callbacks queued, otherwise put SRCU into not-running state. |
1763 | */ |
1764 | static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay) |
1765 | { |
1766 | bool pushgp = true; |
1767 | |
1768 | spin_lock_irq_rcu_node(ssp->srcu_sup); |
1769 | if (ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed)) { |
1770 | if (!WARN_ON_ONCE(rcu_seq_state(ssp->srcu_sup->srcu_gp_seq))) { |
1771 | /* All requests fulfilled, time to go idle. */ |
1772 | pushgp = false; |
1773 | } |
1774 | } else if (!rcu_seq_state(s: ssp->srcu_sup->srcu_gp_seq)) { |
1775 | /* Outstanding request and no GP. Start one. */ |
1776 | srcu_gp_start(ssp); |
1777 | } |
1778 | spin_unlock_irq_rcu_node(ssp->srcu_sup); |
1779 | |
1780 | if (pushgp) |
1781 | queue_delayed_work(wq: rcu_gp_wq, dwork: &ssp->srcu_sup->work, delay); |
1782 | } |
1783 | |
1784 | /* |
1785 | * This is the work-queue function that handles SRCU grace periods. |
1786 | */ |
1787 | static void process_srcu(struct work_struct *work) |
1788 | { |
1789 | unsigned long curdelay; |
1790 | unsigned long j; |
1791 | struct srcu_struct *ssp; |
1792 | struct srcu_usage *sup; |
1793 | |
1794 | sup = container_of(work, struct srcu_usage, work.work); |
1795 | ssp = sup->srcu_ssp; |
1796 | |
1797 | srcu_advance_state(ssp); |
1798 | curdelay = srcu_get_delay(ssp); |
1799 | if (curdelay) { |
1800 | WRITE_ONCE(sup->reschedule_count, 0); |
1801 | } else { |
1802 | j = jiffies; |
1803 | if (READ_ONCE(sup->reschedule_jiffies) == j) { |
1804 | WRITE_ONCE(sup->reschedule_count, READ_ONCE(sup->reschedule_count) + 1); |
1805 | if (READ_ONCE(sup->reschedule_count) > srcu_max_nodelay) |
1806 | curdelay = 1; |
1807 | } else { |
1808 | WRITE_ONCE(sup->reschedule_count, 1); |
1809 | WRITE_ONCE(sup->reschedule_jiffies, j); |
1810 | } |
1811 | } |
1812 | srcu_reschedule(ssp, delay: curdelay); |
1813 | } |
1814 | |
1815 | void srcutorture_get_gp_data(enum rcutorture_type test_type, |
1816 | struct srcu_struct *ssp, int *flags, |
1817 | unsigned long *gp_seq) |
1818 | { |
1819 | if (test_type != SRCU_FLAVOR) |
1820 | return; |
1821 | *flags = 0; |
1822 | *gp_seq = rcu_seq_current(sp: &ssp->srcu_sup->srcu_gp_seq); |
1823 | } |
1824 | EXPORT_SYMBOL_GPL(srcutorture_get_gp_data); |
1825 | |
1826 | static const char * const srcu_size_state_name[] = { |
1827 | "SRCU_SIZE_SMALL" , |
1828 | "SRCU_SIZE_ALLOC" , |
1829 | "SRCU_SIZE_WAIT_BARRIER" , |
1830 | "SRCU_SIZE_WAIT_CALL" , |
1831 | "SRCU_SIZE_WAIT_CBS1" , |
1832 | "SRCU_SIZE_WAIT_CBS2" , |
1833 | "SRCU_SIZE_WAIT_CBS3" , |
1834 | "SRCU_SIZE_WAIT_CBS4" , |
1835 | "SRCU_SIZE_BIG" , |
1836 | "SRCU_SIZE_???" , |
1837 | }; |
1838 | |
1839 | void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf) |
1840 | { |
1841 | int cpu; |
1842 | int idx; |
1843 | unsigned long s0 = 0, s1 = 0; |
1844 | int ss_state = READ_ONCE(ssp->srcu_sup->srcu_size_state); |
1845 | int ss_state_idx = ss_state; |
1846 | |
1847 | idx = ssp->srcu_idx & 0x1; |
1848 | if (ss_state < 0 || ss_state >= ARRAY_SIZE(srcu_size_state_name)) |
1849 | ss_state_idx = ARRAY_SIZE(srcu_size_state_name) - 1; |
1850 | pr_alert("%s%s Tree SRCU g%ld state %d (%s)" , |
1851 | tt, tf, rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq), ss_state, |
1852 | srcu_size_state_name[ss_state_idx]); |
1853 | if (!ssp->sda) { |
1854 | // Called after cleanup_srcu_struct(), perhaps. |
1855 | pr_cont(" No per-CPU srcu_data structures (->sda == NULL).\n" ); |
1856 | } else { |
1857 | pr_cont(" per-CPU(idx=%d):" , idx); |
1858 | for_each_possible_cpu(cpu) { |
1859 | unsigned long l0, l1; |
1860 | unsigned long u0, u1; |
1861 | long c0, c1; |
1862 | struct srcu_data *sdp; |
1863 | |
1864 | sdp = per_cpu_ptr(ssp->sda, cpu); |
1865 | u0 = data_race(atomic_long_read(&sdp->srcu_unlock_count[!idx])); |
1866 | u1 = data_race(atomic_long_read(&sdp->srcu_unlock_count[idx])); |
1867 | |
1868 | /* |
1869 | * Make sure that a lock is always counted if the corresponding |
1870 | * unlock is counted. |
1871 | */ |
1872 | smp_rmb(); |
1873 | |
1874 | l0 = data_race(atomic_long_read(&sdp->srcu_lock_count[!idx])); |
1875 | l1 = data_race(atomic_long_read(&sdp->srcu_lock_count[idx])); |
1876 | |
1877 | c0 = l0 - u0; |
1878 | c1 = l1 - u1; |
1879 | pr_cont(" %d(%ld,%ld %c)" , |
1880 | cpu, c0, c1, |
1881 | "C." [rcu_segcblist_empty(&sdp->srcu_cblist)]); |
1882 | s0 += c0; |
1883 | s1 += c1; |
1884 | } |
1885 | pr_cont(" T(%ld,%ld)\n" , s0, s1); |
1886 | } |
1887 | if (SRCU_SIZING_IS_TORTURE()) |
1888 | srcu_transition_to_big(ssp); |
1889 | } |
1890 | EXPORT_SYMBOL_GPL(srcu_torture_stats_print); |
1891 | |
1892 | static int __init srcu_bootup_announce(void) |
1893 | { |
1894 | pr_info("Hierarchical SRCU implementation.\n" ); |
1895 | if (exp_holdoff != DEFAULT_SRCU_EXP_HOLDOFF) |
1896 | pr_info("\tNon-default auto-expedite holdoff of %lu ns.\n" , exp_holdoff); |
1897 | if (srcu_retry_check_delay != SRCU_DEFAULT_RETRY_CHECK_DELAY) |
1898 | pr_info("\tNon-default retry check delay of %lu us.\n" , srcu_retry_check_delay); |
1899 | if (srcu_max_nodelay != SRCU_DEFAULT_MAX_NODELAY) |
1900 | pr_info("\tNon-default max no-delay of %lu.\n" , srcu_max_nodelay); |
1901 | pr_info("\tMax phase no-delay instances is %lu.\n" , srcu_max_nodelay_phase); |
1902 | return 0; |
1903 | } |
1904 | early_initcall(srcu_bootup_announce); |
1905 | |
1906 | void __init srcu_init(void) |
1907 | { |
1908 | struct srcu_usage *sup; |
1909 | |
1910 | /* Decide on srcu_struct-size strategy. */ |
1911 | if (SRCU_SIZING_IS(SRCU_SIZING_AUTO)) { |
1912 | if (nr_cpu_ids >= big_cpu_lim) { |
1913 | convert_to_big = SRCU_SIZING_INIT; // Don't bother waiting for contention. |
1914 | pr_info("%s: Setting srcu_struct sizes to big.\n" , __func__); |
1915 | } else { |
1916 | convert_to_big = SRCU_SIZING_NONE | SRCU_SIZING_CONTEND; |
1917 | pr_info("%s: Setting srcu_struct sizes based on contention.\n" , __func__); |
1918 | } |
1919 | } |
1920 | |
1921 | /* |
1922 | * Once that is set, call_srcu() can follow the normal path and |
1923 | * queue delayed work. This must follow RCU workqueues creation |
1924 | * and timers initialization. |
1925 | */ |
1926 | srcu_init_done = true; |
1927 | while (!list_empty(head: &srcu_boot_list)) { |
1928 | sup = list_first_entry(&srcu_boot_list, struct srcu_usage, |
1929 | work.work.entry); |
1930 | list_del_init(entry: &sup->work.work.entry); |
1931 | if (SRCU_SIZING_IS(SRCU_SIZING_INIT) && |
1932 | sup->srcu_size_state == SRCU_SIZE_SMALL) |
1933 | sup->srcu_size_state = SRCU_SIZE_ALLOC; |
1934 | queue_work(wq: rcu_gp_wq, work: &sup->work.work); |
1935 | } |
1936 | } |
1937 | |
1938 | #ifdef CONFIG_MODULES |
1939 | |
1940 | /* Initialize any global-scope srcu_struct structures used by this module. */ |
1941 | static int srcu_module_coming(struct module *mod) |
1942 | { |
1943 | int i; |
1944 | struct srcu_struct *ssp; |
1945 | struct srcu_struct **sspp = mod->srcu_struct_ptrs; |
1946 | |
1947 | for (i = 0; i < mod->num_srcu_structs; i++) { |
1948 | ssp = *(sspp++); |
1949 | ssp->sda = alloc_percpu(struct srcu_data); |
1950 | if (WARN_ON_ONCE(!ssp->sda)) |
1951 | return -ENOMEM; |
1952 | } |
1953 | return 0; |
1954 | } |
1955 | |
1956 | /* Clean up any global-scope srcu_struct structures used by this module. */ |
1957 | static void srcu_module_going(struct module *mod) |
1958 | { |
1959 | int i; |
1960 | struct srcu_struct *ssp; |
1961 | struct srcu_struct **sspp = mod->srcu_struct_ptrs; |
1962 | |
1963 | for (i = 0; i < mod->num_srcu_structs; i++) { |
1964 | ssp = *(sspp++); |
1965 | if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq_needed)) && |
1966 | !WARN_ON_ONCE(!ssp->srcu_sup->sda_is_static)) |
1967 | cleanup_srcu_struct(ssp); |
1968 | if (!WARN_ON(srcu_readers_active(ssp))) |
1969 | free_percpu(pdata: ssp->sda); |
1970 | } |
1971 | } |
1972 | |
1973 | /* Handle one module, either coming or going. */ |
1974 | static int srcu_module_notify(struct notifier_block *self, |
1975 | unsigned long val, void *data) |
1976 | { |
1977 | struct module *mod = data; |
1978 | int ret = 0; |
1979 | |
1980 | switch (val) { |
1981 | case MODULE_STATE_COMING: |
1982 | ret = srcu_module_coming(mod); |
1983 | break; |
1984 | case MODULE_STATE_GOING: |
1985 | srcu_module_going(mod); |
1986 | break; |
1987 | default: |
1988 | break; |
1989 | } |
1990 | return ret; |
1991 | } |
1992 | |
1993 | static struct notifier_block srcu_module_nb = { |
1994 | .notifier_call = srcu_module_notify, |
1995 | .priority = 0, |
1996 | }; |
1997 | |
1998 | static __init int init_srcu_module_notifier(void) |
1999 | { |
2000 | int ret; |
2001 | |
2002 | ret = register_module_notifier(nb: &srcu_module_nb); |
2003 | if (ret) |
2004 | pr_warn("Failed to register srcu module notifier\n" ); |
2005 | return ret; |
2006 | } |
2007 | late_initcall(init_srcu_module_notifier); |
2008 | |
2009 | #endif /* #ifdef CONFIG_MODULES */ |
2010 | |