1/*
2 * Copyright 2017 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24#include <linux/types.h>
25#include <linux/hash.h>
26#include <linux/bug.h>
27#include <linux/slab.h>
28#include <linux/module.h>
29#include <linux/sched/clock.h>
30#include <asm/div64.h>
31#include <linux/chash.h>
32
33/**
34 * chash_table_alloc - Allocate closed hash table
35 * @table: Pointer to the table structure
36 * @bits: Table size will be 2^bits entries
37 * @key_size: Size of hash keys in bytes, 4 or 8
38 * @value_size: Size of data values in bytes, can be 0
39 */
40int chash_table_alloc(struct chash_table *table, u8 bits, u8 key_size,
41 unsigned int value_size, gfp_t gfp_mask)
42{
43 if (bits > 31)
44 return -EINVAL;
45
46 if (key_size != 4 && key_size != 8)
47 return -EINVAL;
48
49 table->data = kcalloc(__CHASH_DATA_SIZE(bits, key_size, value_size),
50 sizeof(long), gfp_mask);
51 if (!table->data)
52 return -ENOMEM;
53
54 __CHASH_TABLE_INIT(table->table, table->data,
55 bits, key_size, value_size);
56
57 return 0;
58}
59EXPORT_SYMBOL(chash_table_alloc);
60
61/**
62 * chash_table_free - Free closed hash table
63 * @table: Pointer to the table structure
64 */
65void chash_table_free(struct chash_table *table)
66{
67 kfree(table->data);
68}
69EXPORT_SYMBOL(chash_table_free);
70
71#ifdef CONFIG_CHASH_STATS
72
73#define DIV_FRAC(nom, denom, quot, frac, frac_digits) do { \
74 u64 __nom = (nom); \
75 u64 __denom = (denom); \
76 u64 __quot, __frac; \
77 u32 __rem; \
78 \
79 while (__denom >> 32) { \
80 __nom >>= 1; \
81 __denom >>= 1; \
82 } \
83 __quot = __nom; \
84 __rem = do_div(__quot, __denom); \
85 __frac = __rem * (frac_digits) + (__denom >> 1); \
86 do_div(__frac, __denom); \
87 (quot) = __quot; \
88 (frac) = __frac; \
89 } while (0)
90
91void __chash_table_dump_stats(struct __chash_table *table)
92{
93 struct chash_iter iter = CHASH_ITER_INIT(table, 0);
94 u32 filled = 0, empty = 0, tombstones = 0;
95 u64 quot1, quot2;
96 u32 frac1, frac2;
97
98 do {
99 if (chash_iter_is_valid(iter))
100 filled++;
101 else if (chash_iter_is_empty(iter))
102 empty++;
103 else
104 tombstones++;
105 CHASH_ITER_INC(iter);
106 } while (iter.slot);
107
108 pr_debug("chash: key size %u, value size %u\n",
109 table->key_size, table->value_size);
110 pr_debug(" Slots total/filled/empty/tombstones: %u / %u / %u / %u\n",
111 1 << table->bits, filled, empty, tombstones);
112 if (table->hits > 0) {
113 DIV_FRAC(table->hits_steps, table->hits, quot1, frac1, 1000);
114 DIV_FRAC(table->hits * 1000, table->hits_time_ns,
115 quot2, frac2, 1000);
116 } else {
117 quot1 = quot2 = 0;
118 frac1 = frac2 = 0;
119 }
120 pr_debug(" Hits (avg.cost, rate): %llu (%llu.%03u, %llu.%03u M/s)\n",
121 table->hits, quot1, frac1, quot2, frac2);
122 if (table->miss > 0) {
123 DIV_FRAC(table->miss_steps, table->miss, quot1, frac1, 1000);
124 DIV_FRAC(table->miss * 1000, table->miss_time_ns,
125 quot2, frac2, 1000);
126 } else {
127 quot1 = quot2 = 0;
128 frac1 = frac2 = 0;
129 }
130 pr_debug(" Misses (avg.cost, rate): %llu (%llu.%03u, %llu.%03u M/s)\n",
131 table->miss, quot1, frac1, quot2, frac2);
132 if (table->hits + table->miss > 0) {
133 DIV_FRAC(table->hits_steps + table->miss_steps,
134 table->hits + table->miss, quot1, frac1, 1000);
135 DIV_FRAC((table->hits + table->miss) * 1000,
136 (table->hits_time_ns + table->miss_time_ns),
137 quot2, frac2, 1000);
138 } else {
139 quot1 = quot2 = 0;
140 frac1 = frac2 = 0;
141 }
142 pr_debug(" Total (avg.cost, rate): %llu (%llu.%03u, %llu.%03u M/s)\n",
143 table->hits + table->miss, quot1, frac1, quot2, frac2);
144 if (table->relocs > 0) {
145 DIV_FRAC(table->hits + table->miss, table->relocs,
146 quot1, frac1, 1000);
147 DIV_FRAC(table->reloc_dist, table->relocs, quot2, frac2, 1000);
148 pr_debug(" Relocations (freq, avg.dist): %llu (1:%llu.%03u, %llu.%03u)\n",
149 table->relocs, quot1, frac1, quot2, frac2);
150 } else {
151 pr_debug(" No relocations\n");
152 }
153}
154EXPORT_SYMBOL(__chash_table_dump_stats);
155
156#undef DIV_FRAC
157#endif
158
159#define CHASH_INC(table, a) ((a) = ((a) + 1) & (table)->size_mask)
160#define CHASH_ADD(table, a, b) (((a) + (b)) & (table)->size_mask)
161#define CHASH_SUB(table, a, b) (((a) - (b)) & (table)->size_mask)
162#define CHASH_IN_RANGE(table, slot, first, last) \
163 (CHASH_SUB(table, slot, first) <= CHASH_SUB(table, last, first))
164
165/*#define CHASH_DEBUG Uncomment this to enable verbose debug output*/
166#ifdef CHASH_DEBUG
167static void chash_table_dump(struct __chash_table *table)
168{
169 struct chash_iter iter = CHASH_ITER_INIT(table, 0);
170
171 do {
172 if ((iter.slot & 3) == 0)
173 pr_debug("%04x: ", iter.slot);
174
175 if (chash_iter_is_valid(iter))
176 pr_debug("[%016llx] ", chash_iter_key(iter));
177 else if (chash_iter_is_empty(iter))
178 pr_debug("[ <empty> ] ");
179 else
180 pr_debug("[ <tombstone> ] ");
181
182 if ((iter.slot & 3) == 3)
183 pr_debug("\n");
184
185 CHASH_ITER_INC(iter);
186 } while (iter.slot);
187
188 if ((iter.slot & 3) != 0)
189 pr_debug("\n");
190}
191
192static int chash_table_check(struct __chash_table *table)
193{
194 u32 hash;
195 struct chash_iter iter = CHASH_ITER_INIT(table, 0);
196 struct chash_iter cur = CHASH_ITER_INIT(table, 0);
197
198 do {
199 if (!chash_iter_is_valid(iter)) {
200 CHASH_ITER_INC(iter);
201 continue;
202 }
203
204 hash = chash_iter_hash(iter);
205 CHASH_ITER_SET(cur, hash);
206 while (cur.slot != iter.slot) {
207 if (chash_iter_is_empty(cur)) {
208 pr_err("Path to element at %x with hash %x broken at slot %x\n",
209 iter.slot, hash, cur.slot);
210 chash_table_dump(table);
211 return -EINVAL;
212 }
213 CHASH_ITER_INC(cur);
214 }
215
216 CHASH_ITER_INC(iter);
217 } while (iter.slot);
218
219 return 0;
220}
221#endif
222
223static void chash_iter_relocate(struct chash_iter dst, struct chash_iter src)
224{
225 BUG_ON(src.table == dst.table && src.slot == dst.slot);
226 BUG_ON(src.table->key_size != dst.table->key_size);
227 BUG_ON(src.table->value_size != dst.table->value_size);
228
229 if (dst.table->key_size == 4)
230 dst.table->keys32[dst.slot] = src.table->keys32[src.slot];
231 else
232 dst.table->keys64[dst.slot] = src.table->keys64[src.slot];
233
234 if (dst.table->value_size)
235 memcpy(chash_iter_value(dst), chash_iter_value(src),
236 dst.table->value_size);
237
238 chash_iter_set_valid(dst);
239 chash_iter_set_invalid(src);
240
241#ifdef CONFIG_CHASH_STATS
242 if (src.table == dst.table) {
243 dst.table->relocs++;
244 dst.table->reloc_dist +=
245 CHASH_SUB(dst.table, src.slot, dst.slot);
246 }
247#endif
248}
249
250/**
251 * __chash_table_find - Helper for looking up a hash table entry
252 * @iter: Pointer to hash table iterator
253 * @key: Key of the entry to find
254 * @for_removal: set to true if the element will be removed soon
255 *
256 * Searches for an entry in the hash table with a given key. iter must
257 * be initialized by the caller to point to the home position of the
258 * hypothetical entry, i.e. it must be initialized with the hash table
259 * and the key's hash as the initial slot for the search.
260 *
261 * This function also does some local clean-up to speed up future
262 * look-ups by relocating entries to better slots and removing
263 * tombstones that are no longer needed.
264 *
265 * If @for_removal is true, the function avoids relocating the entry
266 * that is being returned.
267 *
268 * Returns 0 if the search is successful. In this case iter is updated
269 * to point to the found entry. Otherwise %-EINVAL is returned and the
270 * iter is updated to point to the first available slot for the given
271 * key. If the table is full, the slot is set to -1.
272 */
273static int chash_table_find(struct chash_iter *iter, u64 key,
274 bool for_removal)
275{
276#ifdef CONFIG_CHASH_STATS
277 u64 ts1 = local_clock();
278#endif
279 u32 hash = iter->slot;
280 struct chash_iter first_redundant = CHASH_ITER_INIT(iter->table, -1);
281 int first_avail = (for_removal ? -2 : -1);
282
283 while (!chash_iter_is_valid(*iter) || chash_iter_key(*iter) != key) {
284 if (chash_iter_is_empty(*iter)) {
285 /* Found an empty slot, which ends the
286 * search. Clean up any preceding tombstones
287 * that are no longer needed because they lead
288 * to no-where
289 */
290 if ((int)first_redundant.slot < 0)
291 goto not_found;
292 while (first_redundant.slot != iter->slot) {
293 if (!chash_iter_is_valid(first_redundant))
294 chash_iter_set_empty(first_redundant);
295 CHASH_ITER_INC(first_redundant);
296 }
297#ifdef CHASH_DEBUG
298 chash_table_check(iter->table);
299#endif
300 goto not_found;
301 } else if (!chash_iter_is_valid(*iter)) {
302 /* Found a tombstone. Remember it as candidate
303 * for relocating the entry we're looking for
304 * or for adding a new entry with the given key
305 */
306 if (first_avail == -1)
307 first_avail = iter->slot;
308 /* Or mark it as the start of a series of
309 * potentially redundant tombstones
310 */
311 else if (first_redundant.slot == -1)
312 CHASH_ITER_SET(first_redundant, iter->slot);
313 } else if (first_redundant.slot >= 0) {
314 /* Found a valid, occupied slot with a
315 * preceding series of tombstones. Relocate it
316 * to a better position that no longer depends
317 * on those tombstones
318 */
319 u32 cur_hash = chash_iter_hash(*iter);
320
321 if (!CHASH_IN_RANGE(iter->table, cur_hash,
322 first_redundant.slot + 1,
323 iter->slot)) {
324 /* This entry has a hash at or before
325 * the first tombstone we found. We
326 * can relocate it to that tombstone
327 * and advance to the next tombstone
328 */
329 chash_iter_relocate(first_redundant, *iter);
330 do {
331 CHASH_ITER_INC(first_redundant);
332 } while (chash_iter_is_valid(first_redundant));
333 } else if (cur_hash != iter->slot) {
334 /* Relocate entry to its home position
335 * or as close as possible so it no
336 * longer depends on any preceding
337 * tombstones
338 */
339 struct chash_iter new_iter =
340 CHASH_ITER_INIT(iter->table, cur_hash);
341
342 while (new_iter.slot != iter->slot &&
343 chash_iter_is_valid(new_iter))
344 CHASH_ITER_INC(new_iter);
345
346 if (new_iter.slot != iter->slot)
347 chash_iter_relocate(new_iter, *iter);
348 }
349 }
350
351 CHASH_ITER_INC(*iter);
352 if (iter->slot == hash) {
353 iter->slot = -1;
354 goto not_found;
355 }
356 }
357
358#ifdef CONFIG_CHASH_STATS
359 iter->table->hits++;
360 iter->table->hits_steps += CHASH_SUB(iter->table, iter->slot, hash) + 1;
361#endif
362
363 if (first_avail >= 0) {
364 CHASH_ITER_SET(first_redundant, first_avail);
365 chash_iter_relocate(first_redundant, *iter);
366 iter->slot = first_redundant.slot;
367 iter->mask = first_redundant.mask;
368 }
369
370#ifdef CONFIG_CHASH_STATS
371 iter->table->hits_time_ns += local_clock() - ts1;
372#endif
373
374 return 0;
375
376not_found:
377#ifdef CONFIG_CHASH_STATS
378 iter->table->miss++;
379 iter->table->miss_steps += (iter->slot < 0) ?
380 (1 << iter->table->bits) :
381 CHASH_SUB(iter->table, iter->slot, hash) + 1;
382#endif
383
384 if (first_avail >= 0)
385 CHASH_ITER_SET(*iter, first_avail);
386
387#ifdef CONFIG_CHASH_STATS
388 iter->table->miss_time_ns += local_clock() - ts1;
389#endif
390
391 return -EINVAL;
392}
393
394int __chash_table_copy_in(struct __chash_table *table, u64 key,
395 const void *value)
396{
397 u32 hash = (table->key_size == 4) ?
398 hash_32(key, table->bits) : hash_64(key, table->bits);
399 struct chash_iter iter = CHASH_ITER_INIT(table, hash);
400 int r = chash_table_find(&iter, key, false);
401
402 /* Found an existing entry */
403 if (!r) {
404 if (value && table->value_size)
405 memcpy(chash_iter_value(iter), value,
406 table->value_size);
407 return 1;
408 }
409
410 /* Is there a place to add a new entry? */
411 if (iter.slot < 0) {
412 pr_err("Hash table overflow\n");
413 return -ENOMEM;
414 }
415
416 chash_iter_set_valid(iter);
417
418 if (table->key_size == 4)
419 table->keys32[iter.slot] = key;
420 else
421 table->keys64[iter.slot] = key;
422 if (value && table->value_size)
423 memcpy(chash_iter_value(iter), value, table->value_size);
424
425 return 0;
426}
427EXPORT_SYMBOL(__chash_table_copy_in);
428
429int __chash_table_copy_out(struct __chash_table *table, u64 key,
430 void *value, bool remove)
431{
432 u32 hash = (table->key_size == 4) ?
433 hash_32(key, table->bits) : hash_64(key, table->bits);
434 struct chash_iter iter = CHASH_ITER_INIT(table, hash);
435 int r = chash_table_find(&iter, key, remove);
436
437 if (r < 0)
438 return r;
439
440 if (value && table->value_size)
441 memcpy(value, chash_iter_value(iter), table->value_size);
442
443 if (remove)
444 chash_iter_set_invalid(iter);
445
446 return iter.slot;
447}
448EXPORT_SYMBOL(__chash_table_copy_out);
449
450#ifdef CONFIG_CHASH_SELFTEST
451/**
452 * chash_self_test - Run a self-test of the hash table implementation
453 * @bits: Table size will be 2^bits entries
454 * @key_size: Size of hash keys in bytes, 4 or 8
455 * @min_fill: Minimum fill level during the test
456 * @max_fill: Maximum fill level during the test
457 * @iterations: Number of test iterations
458 *
459 * The test adds and removes entries from a hash table, cycling the
460 * fill level between min_fill and max_fill entries. Also tests lookup
461 * and value retrieval.
462 */
463static int __init chash_self_test(u8 bits, u8 key_size,
464 int min_fill, int max_fill,
465 u64 iterations)
466{
467 struct chash_table table;
468 int ret;
469 u64 add_count, rmv_count;
470 u64 value;
471
472 if (key_size == 4 && iterations > 0xffffffff)
473 return -EINVAL;
474 if (min_fill >= max_fill)
475 return -EINVAL;
476
477 ret = chash_table_alloc(&table, bits, key_size, sizeof(u64),
478 GFP_KERNEL);
479 if (ret) {
480 pr_err("chash_table_alloc failed: %d\n", ret);
481 return ret;
482 }
483
484 for (add_count = 0, rmv_count = 0; add_count < iterations;
485 add_count++) {
486 /* When we hit the max_fill level, remove entries down
487 * to min_fill
488 */
489 if (add_count - rmv_count == max_fill) {
490 u64 find_count = rmv_count;
491
492 /* First try to find all entries that we're
493 * about to remove, confirm their value, test
494 * writing them back a second time.
495 */
496 for (; add_count - find_count > min_fill;
497 find_count++) {
498 ret = chash_table_copy_out(&table, find_count,
499 &value);
500 if (ret < 0) {
501 pr_err("chash_table_copy_out failed: %d\n",
502 ret);
503 goto out;
504 }
505 if (value != ~find_count) {
506 pr_err("Wrong value retrieved for key 0x%llx, expected 0x%llx got 0x%llx\n",
507 find_count, ~find_count, value);
508#ifdef CHASH_DEBUG
509 chash_table_dump(&table.table);
510#endif
511 ret = -EFAULT;
512 goto out;
513 }
514 ret = chash_table_copy_in(&table, find_count,
515 &value);
516 if (ret != 1) {
517 pr_err("copy_in second time returned %d, expected 1\n",
518 ret);
519 ret = -EFAULT;
520 goto out;
521 }
522 }
523 /* Remove them until we hit min_fill level */
524 for (; add_count - rmv_count > min_fill; rmv_count++) {
525 ret = chash_table_remove(&table, rmv_count,
526 NULL);
527 if (ret < 0) {
528 pr_err("chash_table_remove failed: %d\n",
529 ret);
530 goto out;
531 }
532 }
533 }
534
535 /* Add a new value */
536 value = ~add_count;
537 ret = chash_table_copy_in(&table, add_count, &value);
538 if (ret != 0) {
539 pr_err("copy_in first time returned %d, expected 0\n",
540 ret);
541 ret = -EFAULT;
542 goto out;
543 }
544 }
545
546 chash_table_dump_stats(&table);
547 chash_table_reset_stats(&table);
548
549out:
550 chash_table_free(&table);
551 return ret;
552}
553
554static unsigned int chash_test_bits = 10;
555MODULE_PARM_DESC(test_bits,
556 "Selftest number of hash bits ([4..20], default=10)");
557module_param_named(test_bits, chash_test_bits, uint, 0444);
558
559static unsigned int chash_test_keysize = 8;
560MODULE_PARM_DESC(test_keysize, "Selftest keysize (4 or 8, default=8)");
561module_param_named(test_keysize, chash_test_keysize, uint, 0444);
562
563static unsigned int chash_test_minfill;
564MODULE_PARM_DESC(test_minfill, "Selftest minimum #entries (default=50%)");
565module_param_named(test_minfill, chash_test_minfill, uint, 0444);
566
567static unsigned int chash_test_maxfill;
568MODULE_PARM_DESC(test_maxfill, "Selftest maximum #entries (default=80%)");
569module_param_named(test_maxfill, chash_test_maxfill, uint, 0444);
570
571static unsigned long chash_test_iters;
572MODULE_PARM_DESC(test_iters, "Selftest iterations (default=1000 x #entries)");
573module_param_named(test_iters, chash_test_iters, ulong, 0444);
574
575static int __init chash_init(void)
576{
577 int ret;
578 u64 ts1_ns;
579
580 /* Skip self test on user errors */
581 if (chash_test_bits < 4 || chash_test_bits > 20) {
582 pr_err("chash: test_bits out of range [4..20].\n");
583 return 0;
584 }
585 if (chash_test_keysize != 4 && chash_test_keysize != 8) {
586 pr_err("chash: test_keysize invalid. Must be 4 or 8.\n");
587 return 0;
588 }
589
590 if (!chash_test_minfill)
591 chash_test_minfill = (1 << chash_test_bits) / 2;
592 if (!chash_test_maxfill)
593 chash_test_maxfill = (1 << chash_test_bits) * 4 / 5;
594 if (!chash_test_iters)
595 chash_test_iters = (1 << chash_test_bits) * 1000;
596
597 if (chash_test_minfill >= (1 << chash_test_bits)) {
598 pr_err("chash: test_minfill too big. Must be < table size.\n");
599 return 0;
600 }
601 if (chash_test_maxfill >= (1 << chash_test_bits)) {
602 pr_err("chash: test_maxfill too big. Must be < table size.\n");
603 return 0;
604 }
605 if (chash_test_minfill >= chash_test_maxfill) {
606 pr_err("chash: test_minfill must be < test_maxfill.\n");
607 return 0;
608 }
609 if (chash_test_keysize == 4 && chash_test_iters > 0xffffffff) {
610 pr_err("chash: test_iters must be < 4G for 4 byte keys.\n");
611 return 0;
612 }
613
614 ts1_ns = local_clock();
615 ret = chash_self_test(chash_test_bits, chash_test_keysize,
616 chash_test_minfill, chash_test_maxfill,
617 chash_test_iters);
618 if (!ret) {
619 u64 ts_delta_us = local_clock() - ts1_ns;
620 u64 iters_per_second = (u64)chash_test_iters * 1000000;
621
622 do_div(ts_delta_us, 1000);
623 do_div(iters_per_second, ts_delta_us);
624 pr_info("chash: self test took %llu us, %llu iterations/s\n",
625 ts_delta_us, iters_per_second);
626 } else {
627 pr_err("chash: self test failed: %d\n", ret);
628 }
629
630 return ret;
631}
632
633module_init(chash_init);
634
635#endif /* CONFIG_CHASH_SELFTEST */
636
637MODULE_DESCRIPTION("Closed hash table");
638MODULE_LICENSE("GPL and additional rights");
639