rhashtable.c source code [linux/lib/rhashtable.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/*
3	* Resizable, Scalable, Concurrent Hash Table
4	*
5	* Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au>
6	* Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
7	* Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
8	*
9	* Code partially derived from nft_hash
10	* Rewritten with rehash code from br_multicast plus single list
11	* pointer as suggested by Josh Triplett
12	*/
13
14	#include <linux/atomic.h>
15	#include <linux/kernel.h>
16	#include <linux/init.h>
17	#include <linux/log2.h>
18	#include <linux/sched.h>
19	#include <linux/rculist.h>
20	#include <linux/slab.h>
21	#include <linux/vmalloc.h>
22	#include <linux/mm.h>
23	#include <linux/jhash.h>
24	#include <linux/random.h>
25	#include <linux/rhashtable.h>
26	#include <linux/err.h>
27	#include <linux/export.h>
28
29	#define HASH_DEFAULT_SIZE 64UL
30	#define HASH_MIN_SIZE 4U
31
32	union nested_table {
33	union nested_table __rcu *table;
34	struct rhash_lock_head __rcu *bucket;
35	};
36
37	static u32 head_hashfn(struct rhashtable *ht,
38	const struct bucket_table *tbl,
39	const struct rhash_head *he)
40	{
41	return rht_head_hashfn(ht, tbl, he, params: ht->p);
42	}
43
44	#ifdef CONFIG_PROVE_LOCKING
45	#define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT))
46
47	int lockdep_rht_mutex_is_held(struct rhashtable *ht)
48	{
49	return (debug_locks) ? lockdep_is_held(&ht->mutex) : `1`;
50	}
51	EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
52
53	int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
54	{
55	if (!debug_locks)
56	return `1`;
57	if (unlikely(tbl->nest))
58	return `1`;
59	return bit_spin_is_locked(bitnum: `0`, addr: (unsigned long *)&tbl->buckets[hash]);
60	}
61	EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
62	#else
63	#define ASSERT_RHT_MUTEX(HT)
64	#endif
65
66	static inline union nested_table *nested_table_top(
67	const struct bucket_table *tbl)
68	{
69	/ The top-level bucket entry does not need RCU protection*
70	* because it's set at the same time as tbl->nest.
71	*/
72	return (void *)rcu_dereference_protected(tbl->buckets[`0`], `1`);
73	}
74
75	static void nested_table_free(union nested_table ntbl, unsigned* int size)
76	{
77	const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
78	const unsigned int len = `1` << shift;
79	unsigned int i;
80
81	ntbl = rcu_dereference_protected(ntbl->table, `1`);
82	if (!ntbl)
83	return;
84
85	if (size > len) {
86	size >>= shift;
87	for (i = `0`; i < len; i++)
88	nested_table_free(ntbl: ntbl + i, size);
89	}
90
91	kfree(objp: ntbl);
92	}
93
94	static void nested_bucket_table_free(const struct bucket_table *tbl)
95	{
96	unsigned int size = tbl->size >> tbl->nest;
97	unsigned int len = `1` << tbl->nest;
98	union nested_table *ntbl;
99	unsigned int i;
100
101	ntbl = nested_table_top(tbl);
102
103	for (i = `0`; i < len; i++)
104	nested_table_free(ntbl: ntbl + i, size);
105
106	kfree(objp: ntbl);
107	}
108
109	static void bucket_table_free(const struct bucket_table *tbl)
110	{
111	if (tbl->nest)
112	nested_bucket_table_free(tbl);
113
114	kvfree(addr: tbl);
115	}
116
117	static void bucket_table_free_rcu(struct rcu_head *head)
118	{
119	bucket_table_free(container_of(head, struct bucket_table, rcu));
120	}
121
122	static union nested_table nested_table_alloc(struct* rhashtable *ht,
123	union nested_table __rcu **prev,
124	bool leaf)
125	{
126	union nested_table *ntbl;
127	int i;
128
129	ntbl = rcu_dereference(*prev);
130	if (ntbl)
131	return ntbl;
132
133	ntbl = kzalloc(PAGE_SIZE, GFP_ATOMIC);
134
135	if (ntbl && leaf) {
136	for (i = `0`; i < PAGE_SIZE / sizeof(ntbl[`0`]); i++)
137	INIT_RHT_NULLS_HEAD(ntbl[i].bucket);
138	}
139
140	if (cmpxchg((union nested_table **)prev, NULL, ntbl) == NULL)
141	return ntbl;
142	/ Raced with another thread. /
143	kfree(objp: ntbl);
144	return rcu_dereference(*prev);
145	}
146
147	static struct bucket_table nested_bucket_table_alloc(struct* rhashtable *ht,
148	size_t nbuckets,
149	gfp_t gfp)
150	{
151	const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
152	struct bucket_table *tbl;
153	size_t size;
154
155	if (nbuckets < (`1` << (shift + `1`)))
156	return NULL;
157
158	size = sizeof(tbl) + sizeof*(tbl->buckets[`0`]);
159
160	tbl = kzalloc(size, flags: gfp);
161	if (!tbl)
162	return NULL;
163
164	if (!nested_table_alloc(ht, prev: (union nested_table __rcu **)tbl->buckets,
165	leaf: false)) {
166	kfree(objp: tbl);
167	return NULL;
168	}
169
170	tbl->nest = (ilog2(nbuckets) - `1`) % shift + `1`;
171
172	return tbl;
173	}
174
175	static struct bucket_table bucket_table_alloc(struct* rhashtable *ht,
176	size_t nbuckets,
177	gfp_t gfp)
178	{
179	struct bucket_table *tbl = NULL;
180	size_t size;
181	int i;
182	static struct lock_class_key __key;
183
184	tbl = kvzalloc(struct_size(tbl, buckets, nbuckets), flags: gfp);
185
186	size = nbuckets;
187
188	if (tbl == NULL && (gfp & ~__GFP_NOFAIL) != GFP_KERNEL) {
189	tbl = nested_bucket_table_alloc(ht, nbuckets, gfp);
190	nbuckets = `0`;
191	}
192
193	if (tbl == NULL)
194	return NULL;
195
196	lockdep_init_map(lock: &tbl->dep_map, name: "rhashtable_bucket", key: &__key, subclass: `0`);
197
198	tbl->size = size;
199
200	rcu_head_init(rhp: &tbl->rcu);
201	INIT_LIST_HEAD(list: &tbl->walkers);
202
203	tbl->hash_rnd = get_random_u32();
204
205	for (i = `0`; i < nbuckets; i++)
206	INIT_RHT_NULLS_HEAD(tbl->buckets[i]);
207
208	return tbl;
209	}
210
211	static struct bucket_table rhashtable_last_table(struct* rhashtable *ht,
212	struct bucket_table *tbl)
213	{
214	struct bucket_table *new_tbl;
215
216	do {
217	new_tbl = tbl;
218	tbl = rht_dereference_rcu(tbl->future_tbl, ht);
219	} while (tbl);
220
221	return new_tbl;
222	}
223
224	static int rhashtable_rehash_one(struct rhashtable *ht,
225	struct rhash_lock_head __rcu **bkt,
226	unsigned int old_hash)
227	{
228	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
229	struct bucket_table *new_tbl = rhashtable_last_table(ht, tbl: old_tbl);
230	int err = -EAGAIN;
231	struct rhash_head head, next, *entry;
232	struct rhash_head __rcu **pprev = NULL;
233	unsigned int new_hash;
234	unsigned long flags;
235
236	if (new_tbl->nest)
237	goto out;
238
239	err = -ENOENT;
240
241	rht_for_each_from(entry, rht_ptr(bkt, old_tbl, old_hash),
242	old_tbl, old_hash) {
243	err = `0`;
244	next = rht_dereference_bucket(entry->next, old_tbl, old_hash);
245
246	if (rht_is_a_nulls(ptr: next))
247	break;
248
249	pprev = &entry->next;
250	}
251
252	if (err)
253	goto out;
254
255	new_hash = head_hashfn(ht, tbl: new_tbl, he: entry);
256
257	flags = rht_lock_nested(tbl: new_tbl, bucket: &new_tbl->buckets[new_hash],
258	SINGLE_DEPTH_NESTING);
259
260	head = rht_ptr(bkt: new_tbl->buckets + new_hash, tbl: new_tbl, hash: new_hash);
261
262	RCU_INIT_POINTER(entry->next, head);
263
264	rht_assign_unlock(tbl: new_tbl, bkt: &new_tbl->buckets[new_hash], obj: entry, flags);
265
266	if (pprev)
267	rcu_assign_pointer(*pprev, next);
268	else
269	/ Need to preserved the bit lock. /
270	rht_assign_locked(bkt, obj: next);
271
272	out:
273	return err;
274	}
275
276	static int rhashtable_rehash_chain(struct rhashtable *ht,
277	unsigned int old_hash)
278	{
279	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
280	struct rhash_lock_head __rcu **bkt = rht_bucket_var(tbl: old_tbl, hash: old_hash);
281	unsigned long flags;
282	int err;
283
284	if (!bkt)
285	return `0`;
286	flags = rht_lock(tbl: old_tbl, bkt);
287
288	while (!(err = rhashtable_rehash_one(ht, bkt, old_hash)))
289	;
290
291	if (err == -ENOENT)
292	err = `0`;
293	rht_unlock(tbl: old_tbl, bkt, flags);
294
295	return err;
296	}
297
298	static int rhashtable_rehash_attach(struct rhashtable *ht,
299	struct bucket_table *old_tbl,
300	struct bucket_table *new_tbl)
301	{
302	/ Make insertions go into the new, empty table right away. Deletions*
303	* and lookups will be attempted in both tables until we synchronize.
304	* As cmpxchg() provides strong barriers, we do not need
305	* rcu_assign_pointer().
306	*/
307
308	if (cmpxchg((struct bucket_table **)&old_tbl->future_tbl, NULL,
309	new_tbl) != NULL)
310	return -EEXIST;
311
312	return `0`;
313	}
314
315	static int rhashtable_rehash_table(struct rhashtable *ht)
316	{
317	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
318	struct bucket_table *new_tbl;
319	struct rhashtable_walker *walker;
320	unsigned int old_hash;
321	int err;
322
323	new_tbl = rht_dereference(old_tbl->future_tbl, ht);
324	if (!new_tbl)
325	return `0`;
326
327	for (old_hash = `0`; old_hash < old_tbl->size; old_hash++) {
328	err = rhashtable_rehash_chain(ht, old_hash);
329	if (err)
330	return err;
331	cond_resched();
332	}
333
334	/ Publish the new table pointer. /
335	rcu_assign_pointer(ht->tbl, new_tbl);
336
337	spin_lock(lock: &ht->lock);
338	list_for_each_entry(walker, &old_tbl->walkers, list)
339	walker->tbl = NULL;
340
341	/ Wait for readers. All new readers will see the new*
342	* table, and thus no references to the old table will
343	* remain.
344	* We do this inside the locked region so that
345	* rhashtable_walk_stop() can use rcu_head_after_call_rcu()
346	* to check if it should not re-link the table.
347	*/
348	call_rcu(head: &old_tbl->rcu, func: bucket_table_free_rcu);
349	spin_unlock(lock: &ht->lock);
350
351	return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : `0`;
352	}
353
354	static int rhashtable_rehash_alloc(struct rhashtable *ht,
355	struct bucket_table *old_tbl,
356	unsigned int size)
357	{
358	struct bucket_table *new_tbl;
359	int err;
360
361	ASSERT_RHT_MUTEX(ht);
362
363	new_tbl = bucket_table_alloc(ht, nbuckets: size, GFP_KERNEL);
364	if (new_tbl == NULL)
365	return -ENOMEM;
366
367	err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
368	if (err)
369	bucket_table_free(tbl: new_tbl);
370
371	return err;
372	}
373
374	/**
375	* rhashtable_shrink - Shrink hash table while allowing concurrent lookups
376	* @ht: the hash table to shrink
377	*
378	* This function shrinks the hash table to fit, i.e., the smallest
379	* size would not cause it to expand right away automatically.
380	*
381	* The caller must ensure that no concurrent resizing occurs by holding
382	* ht->mutex.
383	*
384	* The caller must ensure that no concurrent table mutations take place.
385	* It is however valid to have concurrent lookups if they are RCU protected.
386	*
387	* It is valid to have concurrent insertions and deletions protected by per
388	* bucket locks or concurrent RCU protected lookups and traversals.
389	*/
390	static int rhashtable_shrink(struct rhashtable *ht)
391	{
392	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
393	unsigned int nelems = atomic_read(v: &ht->nelems);
394	unsigned int size = `0`;
395
396	if (nelems)
397	size = roundup_pow_of_two(nelems * `3` / `2`);
398	if (size < ht->p.min_size)
399	size = ht->p.min_size;
400
401	if (old_tbl->size <= size)
402	return `0`;
403
404	if (rht_dereference(old_tbl->future_tbl, ht))
405	return -EEXIST;
406
407	return rhashtable_rehash_alloc(ht, old_tbl, size);
408	}
409
410	static void rht_deferred_worker(struct work_struct *work)
411	{
412	struct rhashtable *ht;
413	struct bucket_table *tbl;
414	int err = `0`;
415
416	ht = container_of(work, struct rhashtable, run_work);
417	mutex_lock(&ht->mutex);
418
419	tbl = rht_dereference(ht->tbl, ht);
420	tbl = rhashtable_last_table(ht, tbl);
421
422	if (rht_grow_above_75(ht, tbl))
423	err = rhashtable_rehash_alloc(ht, old_tbl: tbl, size: tbl->size * `2`);
424	else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl))
425	err = rhashtable_shrink(ht);
426	else if (tbl->nest)
427	err = rhashtable_rehash_alloc(ht, old_tbl: tbl, size: tbl->size);
428
429	if (!err \|\| err == -EEXIST) {
430	int nerr;
431
432	nerr = rhashtable_rehash_table(ht);
433	err = err ?: nerr;
434	}
435
436	mutex_unlock(lock: &ht->mutex);
437
438	if (err)
439	schedule_work(work: &ht->run_work);
440	}
441
442	static int rhashtable_insert_rehash(struct rhashtable *ht,
443	struct bucket_table *tbl)
444	{
445	struct bucket_table *old_tbl;
446	struct bucket_table *new_tbl;
447	unsigned int size;
448	int err;
449
450	old_tbl = rht_dereference_rcu(ht->tbl, ht);
451
452	size = tbl->size;
453
454	err = -EBUSY;
455
456	if (rht_grow_above_75(ht, tbl))
457	size *= `2`;
458	/ Do not schedule more than one rehash /
459	else if (old_tbl != tbl)
460	goto fail;
461
462	err = -ENOMEM;
463
464	new_tbl = bucket_table_alloc(ht, nbuckets: size, GFP_ATOMIC \| __GFP_NOWARN);
465	if (new_tbl == NULL)
466	goto fail;
467
468	err = rhashtable_rehash_attach(ht, old_tbl: tbl, new_tbl);
469	if (err) {
470	bucket_table_free(tbl: new_tbl);
471	if (err == -EEXIST)
472	err = `0`;
473	} else
474	schedule_work(work: &ht->run_work);
475
476	return err;
477
478	fail:
479	/ Do not fail the insert if someone else did a rehash. /
480	if (likely(rcu_access_pointer(tbl->future_tbl)))
481	return `0`;
482
483	/ Schedule async rehash to retry allocation in process context. /
484	if (err == -ENOMEM)
485	schedule_work(work: &ht->run_work);
486
487	return err;
488	}
489
490	static void rhashtable_lookup_one(struct* rhashtable *ht,
491	struct rhash_lock_head __rcu **bkt,
492	struct bucket_table tbl, unsigned* int hash,
493	const void key, struct* rhash_head *obj)
494	{
495	struct rhashtable_compare_arg arg = {
496	.ht = ht,
497	.key = key,
498	};
499	struct rhash_head __rcu **pprev = NULL;
500	struct rhash_head *head;
501	int elasticity;
502
503	elasticity = RHT_ELASTICITY;
504	rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) {
505	struct rhlist_head *list;
506	struct rhlist_head *plist;
507
508	elasticity--;
509	if (!key \|\|
510	(ht->p.obj_cmpfn ?
511	ht->p.obj_cmpfn(&arg, rht_obj(ht, he: head)) :
512	rhashtable_compare(arg: &arg, obj: rht_obj(ht, he: head)))) {
513	pprev = &head->next;
514	continue;
515	}
516
517	if (!ht->rhlist)
518	return rht_obj(ht, he: head);
519
520	list = container_of(obj, struct rhlist_head, rhead);
521	plist = container_of(head, struct rhlist_head, rhead);
522
523	RCU_INIT_POINTER(list->next, plist);
524	head = rht_dereference_bucket(head->next, tbl, hash);
525	RCU_INIT_POINTER(list->rhead.next, head);
526	if (pprev)
527	rcu_assign_pointer(*pprev, obj);
528	else
529	/ Need to preserve the bit lock /
530	rht_assign_locked(bkt, obj);
531
532	return NULL;
533	}
534
535	if (elasticity <= `0`)
536	return ERR_PTR(error: -EAGAIN);
537
538	return ERR_PTR(error: -ENOENT);
539	}
540
541	static struct bucket_table *rhashtable_insert_one(
542	struct rhashtable ht, struct* rhash_lock_head __rcu **bkt,
543	struct bucket_table tbl, unsigned* int hash, struct rhash_head *obj,
544	void *data)
545	{
546	struct bucket_table *new_tbl;
547	struct rhash_head *head;
548
549	if (!IS_ERR_OR_NULL(ptr: data))
550	return ERR_PTR(error: -EEXIST);
551
552	if (PTR_ERR(ptr: data) != -EAGAIN && PTR_ERR(ptr: data) != -ENOENT)
553	return ERR_CAST(ptr: data);
554
555	new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
556	if (new_tbl)
557	return new_tbl;
558
559	if (PTR_ERR(ptr: data) != -ENOENT)
560	return ERR_CAST(ptr: data);
561
562	if (unlikely(rht_grow_above_max(ht, tbl)))
563	return ERR_PTR(error: -E2BIG);
564
565	if (unlikely(rht_grow_above_100(ht, tbl)))
566	return ERR_PTR(error: -EAGAIN);
567
568	head = rht_ptr(bkt, tbl, hash);
569
570	RCU_INIT_POINTER(obj->next, head);
571	if (ht->rhlist) {
572	struct rhlist_head *list;
573
574	list = container_of(obj, struct rhlist_head, rhead);
575	RCU_INIT_POINTER(list->next, NULL);
576	}
577
578	/ bkt is always the head of the list, so it holds*
579	* the lock, which we need to preserve
580	*/
581	rht_assign_locked(bkt, obj);
582
583	atomic_inc(v: &ht->nelems);
584	if (rht_grow_above_75(ht, tbl))
585	schedule_work(work: &ht->run_work);
586
587	return NULL;
588	}
589
590	static void rhashtable_try_insert(struct* rhashtable ht, const* void *key,
591	struct rhash_head *obj)
592	{
593	struct bucket_table *new_tbl;
594	struct bucket_table *tbl;
595	struct rhash_lock_head __rcu **bkt;
596	unsigned long flags;
597	unsigned int hash;
598	void *data;
599
600	new_tbl = rcu_dereference(ht->tbl);
601
602	do {
603	tbl = new_tbl;
604	hash = rht_head_hashfn(ht, tbl, he: obj, params: ht->p);
605	if (rcu_access_pointer(tbl->future_tbl))
606	/ Failure is OK /
607	bkt = rht_bucket_var(tbl, hash);
608	else
609	bkt = rht_bucket_insert(ht, tbl, hash);
610	if (bkt == NULL) {
611	new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
612	data = ERR_PTR(error: -EAGAIN);
613	} else {
614	flags = rht_lock(tbl, bkt);
615	data = rhashtable_lookup_one(ht, bkt, tbl,
616	hash, key, obj);
617	new_tbl = rhashtable_insert_one(ht, bkt, tbl,
618	hash, obj, data);
619	if (PTR_ERR(ptr: new_tbl) != -EEXIST)
620	data = ERR_CAST(ptr: new_tbl);
621
622	rht_unlock(tbl, bkt, flags);
623	}
624	} while (!IS_ERR_OR_NULL(ptr: new_tbl));
625
626	if (PTR_ERR(ptr: data) == -EAGAIN)
627	data = ERR_PTR(error: rhashtable_insert_rehash(ht, tbl) ?:
628	-EAGAIN);
629
630	return data;
631	}
632
633	void rhashtable_insert_slow(struct* rhashtable ht, const* void *key,
634	struct rhash_head *obj)
635	{
636	void *data;
637
638	do {
639	rcu_read_lock();
640	data = rhashtable_try_insert(ht, key, obj);
641	rcu_read_unlock();
642	} while (PTR_ERR(ptr: data) == -EAGAIN);
643
644	return data;
645	}
646	EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
647
648	/**
649	* rhashtable_walk_enter - Initialise an iterator
650	* @ht: Table to walk over
651	* @iter: Hash table Iterator
652	*
653	* This function prepares a hash table walk.
654	*
655	* Note that if you restart a walk after rhashtable_walk_stop you
656	* may see the same object twice. Also, you may miss objects if
657	* there are removals in between rhashtable_walk_stop and the next
658	* call to rhashtable_walk_start.
659	*
660	* For a completely stable walk you should construct your own data
661	* structure outside the hash table.
662	*
663	* This function may be called from any process context, including
664	* non-preemptable context, but cannot be called from softirq or
665	* hardirq context.
666	*
667	* You must call rhashtable_walk_exit after this function returns.
668	*/
669	void rhashtable_walk_enter(struct rhashtable ht, struct* rhashtable_iter *iter)
670	{
671	iter->ht = ht;
672	iter->p = NULL;
673	iter->slot = `0`;
674	iter->skip = `0`;
675	iter->end_of_table = `0`;
676
677	spin_lock(lock: &ht->lock);
678	iter->walker.tbl =
679	rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock));
680	list_add(new: &iter->walker.list, head: &iter->walker.tbl->walkers);
681	spin_unlock(lock: &ht->lock);
682	}
683	EXPORT_SYMBOL_GPL(rhashtable_walk_enter);
684
685	/**
686	* rhashtable_walk_exit - Free an iterator
687	* @iter: Hash table Iterator
688	*
689	* This function frees resources allocated by rhashtable_walk_enter.
690	*/
691	void rhashtable_walk_exit(struct rhashtable_iter *iter)
692	{
693	spin_lock(lock: &iter->ht->lock);
694	if (iter->walker.tbl)
695	list_del(entry: &iter->walker.list);
696	spin_unlock(lock: &iter->ht->lock);
697	}
698	EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
699
700	/**
701	* rhashtable_walk_start_check - Start a hash table walk
702	* @iter: Hash table iterator
703	*
704	* Start a hash table walk at the current iterator position. Note that we take
705	* the RCU lock in all cases including when we return an error. So you must
706	* always call rhashtable_walk_stop to clean up.
707	*
708	* Returns zero if successful.
709	*
710	* Returns -EAGAIN if resize event occurred. Note that the iterator
711	* will rewind back to the beginning and you may use it immediately
712	* by calling rhashtable_walk_next.
713	*
714	* rhashtable_walk_start is defined as an inline variant that returns
715	* void. This is preferred in cases where the caller would ignore
716	* resize events and always continue.
717	*/
718	int rhashtable_walk_start_check(struct rhashtable_iter *iter)
719	__acquires(RCU)
720	{
721	struct rhashtable *ht = iter->ht;
722	bool rhlist = ht->rhlist;
723
724	rcu_read_lock();
725
726	spin_lock(lock: &ht->lock);
727	if (iter->walker.tbl)
728	list_del(entry: &iter->walker.list);
729	spin_unlock(lock: &ht->lock);
730
731	if (iter->end_of_table)
732	return `0`;
733	if (!iter->walker.tbl) {
734	iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht);
735	iter->slot = `0`;
736	iter->skip = `0`;
737	return -EAGAIN;
738	}
739
740	if (iter->p && !rhlist) {
741	/*
742	* We need to validate that 'p' is still in the table, and
743	* if so, update 'skip'
744	*/
745	struct rhash_head *p;
746	int skip = `0`;
747	rht_for_each_rcu(p, iter->walker.tbl, iter->slot) {
748	skip++;
749	if (p == iter->p) {
750	iter->skip = skip;
751	goto found;
752	}
753	}
754	iter->p = NULL;
755	} else if (iter->p && rhlist) {
756	/ Need to validate that 'list' is still in the table, and*
757	* if so, update 'skip' and 'p'.
758	*/
759	struct rhash_head *p;
760	struct rhlist_head *list;
761	int skip = `0`;
762	rht_for_each_rcu(p, iter->walker.tbl, iter->slot) {
763	for (list = container_of(p, struct rhlist_head, rhead);
764	list;
765	list = rcu_dereference(list->next)) {
766	skip++;
767	if (list == iter->list) {
768	iter->p = p;
769	iter->skip = skip;
770	goto found;
771	}
772	}
773	}
774	iter->p = NULL;
775	}
776	found:
777	return `0`;
778	}
779	EXPORT_SYMBOL_GPL(rhashtable_walk_start_check);
780
781	/**
782	* __rhashtable_walk_find_next - Find the next element in a table (or the first
783	* one in case of a new walk).
784	*
785	* @iter: Hash table iterator
786	*
787	* Returns the found object or NULL when the end of the table is reached.
788	*
789	* Returns -EAGAIN if resize event occurred.
790	*/
791	static void __rhashtable_walk_find_next(struct* rhashtable_iter *iter)
792	{
793	struct bucket_table *tbl = iter->walker.tbl;
794	struct rhlist_head *list = iter->list;
795	struct rhashtable *ht = iter->ht;
796	struct rhash_head *p = iter->p;
797	bool rhlist = ht->rhlist;
798
799	if (!tbl)
800	return NULL;
801
802	for (; iter->slot < tbl->size; iter->slot++) {
803	int skip = iter->skip;
804
805	rht_for_each_rcu(p, tbl, iter->slot) {
806	if (rhlist) {
807	list = container_of(p, struct rhlist_head,
808	rhead);
809	do {
810	if (!skip)
811	goto next;
812	skip--;
813	list = rcu_dereference(list->next);
814	} while (list);
815
816	continue;
817	}
818	if (!skip)
819	break;
820	skip--;
821	}
822
823	next:
824	if (!rht_is_a_nulls(ptr: p)) {
825	iter->skip++;
826	iter->p = p;
827	iter->list = list;
828	return rht_obj(ht, he: rhlist ? &list->rhead : p);
829	}
830
831	iter->skip = `0`;
832	}
833
834	iter->p = NULL;
835
836	/ Ensure we see any new tables. /
837	smp_rmb();
838
839	iter->walker.tbl = rht_dereference_rcu(tbl->future_tbl, ht);
840	if (iter->walker.tbl) {
841	iter->slot = `0`;
842	iter->skip = `0`;
843	return ERR_PTR(error: -EAGAIN);
844	} else {
845	iter->end_of_table = true;
846	}
847
848	return NULL;
849	}
850
851	/**
852	* rhashtable_walk_next - Return the next object and advance the iterator
853	* @iter: Hash table iterator
854	*
855	* Note that you must call rhashtable_walk_stop when you are finished
856	* with the walk.
857	*
858	* Returns the next object or NULL when the end of the table is reached.
859	*
860	* Returns -EAGAIN if resize event occurred. Note that the iterator
861	* will rewind back to the beginning and you may continue to use it.
862	*/
863	void rhashtable_walk_next(struct* rhashtable_iter *iter)
864	{
865	struct rhlist_head *list = iter->list;
866	struct rhashtable *ht = iter->ht;
867	struct rhash_head *p = iter->p;
868	bool rhlist = ht->rhlist;
869
870	if (p) {
871	if (!rhlist \|\| !(list = rcu_dereference(list->next))) {
872	p = rcu_dereference(p->next);
873	list = container_of(p, struct rhlist_head, rhead);
874	}
875	if (!rht_is_a_nulls(ptr: p)) {
876	iter->skip++;
877	iter->p = p;
878	iter->list = list;
879	return rht_obj(ht, he: rhlist ? &list->rhead : p);
880	}
881
882	/ At the end of this slot, switch to next one and then find*
883	* next entry from that point.
884	*/
885	iter->skip = `0`;
886	iter->slot++;
887	}
888
889	return __rhashtable_walk_find_next(iter);
890	}
891	EXPORT_SYMBOL_GPL(rhashtable_walk_next);
892
893	/**
894	* rhashtable_walk_peek - Return the next object but don't advance the iterator
895	* @iter: Hash table iterator
896	*
897	* Returns the next object or NULL when the end of the table is reached.
898	*
899	* Returns -EAGAIN if resize event occurred. Note that the iterator
900	* will rewind back to the beginning and you may continue to use it.
901	*/
902	void rhashtable_walk_peek(struct* rhashtable_iter *iter)
903	{
904	struct rhlist_head *list = iter->list;
905	struct rhashtable *ht = iter->ht;
906	struct rhash_head *p = iter->p;
907
908	if (p)
909	return rht_obj(ht, he: ht->rhlist ? &list->rhead : p);
910
911	/ No object found in current iter, find next one in the table. /
912
913	if (iter->skip) {
914	/ A nonzero skip value points to the next entry in the table*
915	* beyond that last one that was found. Decrement skip so
916	* we find the current value. __rhashtable_walk_find_next
917	* will restore the original value of skip assuming that
918	* the table hasn't changed.
919	*/
920	iter->skip--;
921	}
922
923	return __rhashtable_walk_find_next(iter);
924	}
925	EXPORT_SYMBOL_GPL(rhashtable_walk_peek);
926
927	/**
928	* rhashtable_walk_stop - Finish a hash table walk
929	* @iter: Hash table iterator
930	*
931	* Finish a hash table walk. Does not reset the iterator to the start of the
932	* hash table.
933	*/
934	void rhashtable_walk_stop(struct rhashtable_iter *iter)
935	__releases(RCU)
936	{
937	struct rhashtable *ht;
938	struct bucket_table *tbl = iter->walker.tbl;
939
940	if (!tbl)
941	goto out;
942
943	ht = iter->ht;
944
945	spin_lock(lock: &ht->lock);
946	if (rcu_head_after_call_rcu(rhp: &tbl->rcu, f: bucket_table_free_rcu))
947	/ This bucket table is being freed, don't re-link it. /
948	iter->walker.tbl = NULL;
949	else
950	list_add(new: &iter->walker.list, head: &tbl->walkers);
951	spin_unlock(lock: &ht->lock);
952
953	out:
954	rcu_read_unlock();
955	}
956	EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
957
958	static size_t rounded_hashtable_size(const struct rhashtable_params *params)
959	{
960	size_t retsize;
961
962	if (params->nelem_hint)
963	retsize = max(roundup_pow_of_two(params->nelem_hint * `4` / `3`),
964	(unsigned long)params->min_size);
965	else
966	retsize = max(HASH_DEFAULT_SIZE,
967	(unsigned long)params->min_size);
968
969	return retsize;
970	}
971
972	static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
973	{
974	return jhash2(k: key, length, initval: seed);
975	}
976
977	/**
978	* rhashtable_init - initialize a new hash table
979	* @ht: hash table to be initialized
980	* @params: configuration parameters
981	*
982	* Initializes a new hash table based on the provided configuration
983	* parameters. A table can be configured either with a variable or
984	* fixed length key:
985	*
986	* Configuration Example 1: Fixed length keys
987	* struct test_obj {
988	* int key;
989	* void * my_member;
990	* struct rhash_head node;
991	* };
992	*
993	* struct rhashtable_params params = {
994	* .head_offset = offsetof(struct test_obj, node),
995	* .key_offset = offsetof(struct test_obj, key),
996	* .key_len = sizeof(int),
997	* .hashfn = jhash,
998	* };
999	*
1000	* Configuration Example 2: Variable length keys
1001	* struct test_obj {
1002	* [...]
1003	* struct rhash_head node;
1004	* };
1005	*
1006	* u32 my_hash_fn(const void *data, u32 len, u32 seed)
1007	* {
1008	* struct test_obj *obj = data;
1009	*
1010	* return [... hash ...];
1011	* }
1012	*
1013	* struct rhashtable_params params = {
1014	* .head_offset = offsetof(struct test_obj, node),
1015	* .hashfn = jhash,
1016	* .obj_hashfn = my_hash_fn,
1017	* };
1018	*/
1019	int rhashtable_init(struct rhashtable *ht,
1020	const struct rhashtable_params *params)
1021	{
1022	struct bucket_table *tbl;
1023	size_t size;
1024
1025	if ((!params->key_len && !params->obj_hashfn) \|\|
1026	(params->obj_hashfn && !params->obj_cmpfn))
1027	return -EINVAL;
1028
1029	memset(ht, `0`, sizeof(*ht));
1030	mutex_init(&ht->mutex);
1031	spin_lock_init(&ht->lock);
1032	memcpy(&ht->p, params, sizeof(*params));
1033
1034	if (params->min_size)
1035	ht->p.min_size = roundup_pow_of_two(params->min_size);
1036
1037	/ Cap total entries at 2^31 to avoid nelems overflow. /
1038	ht->max_elems = `1u` << `31`;
1039
1040	if (params->max_size) {
1041	ht->p.max_size = rounddown_pow_of_two(params->max_size);
1042	if (ht->p.max_size < ht->max_elems / `2`)
1043	ht->max_elems = ht->p.max_size * `2`;
1044	}
1045
1046	ht->p.min_size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE);
1047
1048	size = rounded_hashtable_size(params: &ht->p);
1049
1050	ht->key_len = ht->p.key_len;
1051	if (!params->hashfn) {
1052	ht->p.hashfn = jhash;
1053
1054	if (!(ht->key_len & (sizeof(u32) - `1`))) {
1055	ht->key_len /= sizeof(u32);
1056	ht->p.hashfn = rhashtable_jhash2;
1057	}
1058	}
1059
1060	/*
1061	* This is api initialization and thus we need to guarantee the
1062	* initial rhashtable allocation. Upon failure, retry with the
1063	* smallest possible size with __GFP_NOFAIL semantics.
1064	*/
1065	tbl = bucket_table_alloc(ht, nbuckets: size, GFP_KERNEL);
1066	if (unlikely(tbl == NULL)) {
1067	size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE);
1068	tbl = bucket_table_alloc(ht, nbuckets: size, GFP_KERNEL \| __GFP_NOFAIL);
1069	}
1070
1071	atomic_set(v: &ht->nelems, i: `0`);
1072
1073	RCU_INIT_POINTER(ht->tbl, tbl);
1074
1075	INIT_WORK(&ht->run_work, rht_deferred_worker);
1076
1077	return `0`;
1078	}
1079	EXPORT_SYMBOL_GPL(rhashtable_init);
1080
1081	/**
1082	* rhltable_init - initialize a new hash list table
1083	* @hlt: hash list table to be initialized
1084	* @params: configuration parameters
1085	*
1086	* Initializes a new hash list table.
1087	*
1088	* See documentation for rhashtable_init.
1089	*/
1090	int rhltable_init(struct rhltable hlt, const* struct rhashtable_params *params)
1091	{
1092	int err;
1093
1094	err = rhashtable_init(&hlt->ht, params);
1095	hlt->ht.rhlist = true;
1096	return err;
1097	}
1098	EXPORT_SYMBOL_GPL(rhltable_init);
1099
1100	static void rhashtable_free_one(struct rhashtable ht, struct* rhash_head *obj,
1101	void (free_fn)(void* ptr, void* *arg),
1102	void *arg)
1103	{
1104	struct rhlist_head *list;
1105
1106	if (!ht->rhlist) {
1107	free_fn(rht_obj(ht, he: obj), arg);
1108	return;
1109	}
1110
1111	list = container_of(obj, struct rhlist_head, rhead);
1112	do {
1113	obj = &list->rhead;
1114	list = rht_dereference(list->next, ht);
1115	free_fn(rht_obj(ht, he: obj), arg);
1116	} while (list);
1117	}
1118
1119	/**
1120	* rhashtable_free_and_destroy - free elements and destroy hash table
1121	* @ht: the hash table to destroy
1122	* @free_fn: callback to release resources of element
1123	* @arg: pointer passed to free_fn
1124	*
1125	* Stops an eventual async resize. If defined, invokes free_fn for each
1126	* element to releasal resources. Please note that RCU protected
1127	* readers may still be accessing the elements. Releasing of resources
1128	* must occur in a compatible manner. Then frees the bucket array.
1129	*
1130	* This function will eventually sleep to wait for an async resize
1131	* to complete. The caller is responsible that no further write operations
1132	* occurs in parallel.
1133	*/
1134	void rhashtable_free_and_destroy(struct rhashtable *ht,
1135	void (free_fn)(void* ptr, void* *arg),
1136	void *arg)
1137	{
1138	struct bucket_table tbl, next_tbl;
1139	unsigned int i;
1140
1141	cancel_work_sync(work: &ht->run_work);
1142
1143	mutex_lock(&ht->mutex);
1144	tbl = rht_dereference(ht->tbl, ht);
1145	restart:
1146	if (free_fn) {
1147	for (i = `0`; i < tbl->size; i++) {
1148	struct rhash_head pos, next;
1149
1150	cond_resched();
1151	for (pos = rht_ptr_exclusive(bkt: rht_bucket(tbl, hash: i)),
1152	next = !rht_is_a_nulls(ptr: pos) ?
1153	rht_dereference(pos->next, ht) : NULL;
1154	!rht_is_a_nulls(ptr: pos);
1155	pos = next,
1156	next = !rht_is_a_nulls(ptr: pos) ?
1157	rht_dereference(pos->next, ht) : NULL)
1158	rhashtable_free_one(ht, obj: pos, free_fn, arg);
1159	}
1160	}
1161
1162	next_tbl = rht_dereference(tbl->future_tbl, ht);
1163	bucket_table_free(tbl);
1164	if (next_tbl) {
1165	tbl = next_tbl;
1166	goto restart;
1167	}
1168	mutex_unlock(lock: &ht->mutex);
1169	}
1170	EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy);
1171
1172	void rhashtable_destroy(struct rhashtable *ht)
1173	{
1174	return rhashtable_free_and_destroy(ht, NULL, NULL);
1175	}
1176	EXPORT_SYMBOL_GPL(rhashtable_destroy);
1177
1178	struct rhash_lock_head __rcu **__rht_bucket_nested(
1179	const struct bucket_table tbl, unsigned* int hash)
1180	{
1181	const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
1182	unsigned int index = hash & ((`1` << tbl->nest) - `1`);
1183	unsigned int size = tbl->size >> tbl->nest;
1184	unsigned int subhash = hash;
1185	union nested_table *ntbl;
1186
1187	ntbl = nested_table_top(tbl);
1188	ntbl = rht_dereference_bucket_rcu(ntbl[index].table, tbl, hash);
1189	subhash >>= tbl->nest;
1190
1191	while (ntbl && size > (`1` << shift)) {
1192	index = subhash & ((`1` << shift) - `1`);
1193	ntbl = rht_dereference_bucket_rcu(ntbl[index].table,
1194	tbl, hash);
1195	size >>= shift;
1196	subhash >>= shift;
1197	}
1198
1199	if (!ntbl)
1200	return NULL;
1201
1202	return &ntbl[subhash].bucket;
1203
1204	}
1205	EXPORT_SYMBOL_GPL(__rht_bucket_nested);
1206
1207	struct rhash_lock_head __rcu **rht_bucket_nested(
1208	const struct bucket_table tbl, unsigned* int hash)
1209	{
1210	static struct rhash_lock_head __rcu *rhnull;
1211
1212	if (!rhnull)
1213	INIT_RHT_NULLS_HEAD(rhnull);
1214	return __rht_bucket_nested(tbl, hash) ?: &rhnull;
1215	}
1216	EXPORT_SYMBOL_GPL(rht_bucket_nested);
1217
1218	struct rhash_lock_head __rcu **rht_bucket_nested_insert(
1219	struct rhashtable ht, struct* bucket_table tbl, unsigned* int hash)
1220	{
1221	const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
1222	unsigned int index = hash & ((`1` << tbl->nest) - `1`);
1223	unsigned int size = tbl->size >> tbl->nest;
1224	union nested_table *ntbl;
1225
1226	ntbl = nested_table_top(tbl);
1227	hash >>= tbl->nest;
1228	ntbl = nested_table_alloc(ht, prev: &ntbl[index].table,
1229	leaf: size <= (`1` << shift));
1230
1231	while (ntbl && size > (`1` << shift)) {
1232	index = hash & ((`1` << shift) - `1`);
1233	size >>= shift;
1234	hash >>= shift;
1235	ntbl = nested_table_alloc(ht, prev: &ntbl[index].table,
1236	leaf: size <= (`1` << shift));
1237	}
1238
1239	if (!ntbl)
1240	return NULL;
1241
1242	return &ntbl[hash].bucket;
1243
1244	}
1245	EXPORT_SYMBOL_GPL(rht_bucket_nested_insert);
1246

source code of linux/lib/rhashtable.c