1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * tracing_map - lock-free map for tracing |
4 | * |
5 | * Copyright (C) 2015 Tom Zanussi <tom.zanussi@linux.intel.com> |
6 | * |
7 | * tracing_map implementation inspired by lock-free map algorithms |
8 | * originated by Dr. Cliff Click: |
9 | * |
10 | * http://www.azulsystems.com/blog/cliff/2007-03-26-non-blocking-hashtable |
11 | * http://www.azulsystems.com/events/javaone_2007/2007_LockFreeHash.pdf |
12 | */ |
13 | |
14 | #include <linux/vmalloc.h> |
15 | #include <linux/jhash.h> |
16 | #include <linux/slab.h> |
17 | #include <linux/sort.h> |
18 | #include <linux/kmemleak.h> |
19 | |
20 | #include "tracing_map.h" |
21 | #include "trace.h" |
22 | |
23 | /* |
24 | * NOTE: For a detailed description of the data structures used by |
25 | * these functions (such as tracing_map_elt) please see the overview |
26 | * of tracing_map data structures at the beginning of tracing_map.h. |
27 | */ |
28 | |
29 | /** |
30 | * tracing_map_update_sum - Add a value to a tracing_map_elt's sum field |
31 | * @elt: The tracing_map_elt |
32 | * @i: The index of the given sum associated with the tracing_map_elt |
33 | * @n: The value to add to the sum |
34 | * |
35 | * Add n to sum i associated with the specified tracing_map_elt |
36 | * instance. The index i is the index returned by the call to |
37 | * tracing_map_add_sum_field() when the tracing map was set up. |
38 | */ |
39 | void tracing_map_update_sum(struct tracing_map_elt *elt, unsigned int i, u64 n) |
40 | { |
41 | atomic64_add(i: n, v: &elt->fields[i].sum); |
42 | } |
43 | |
44 | /** |
45 | * tracing_map_read_sum - Return the value of a tracing_map_elt's sum field |
46 | * @elt: The tracing_map_elt |
47 | * @i: The index of the given sum associated with the tracing_map_elt |
48 | * |
49 | * Retrieve the value of the sum i associated with the specified |
50 | * tracing_map_elt instance. The index i is the index returned by the |
51 | * call to tracing_map_add_sum_field() when the tracing map was set |
52 | * up. |
53 | * |
54 | * Return: The sum associated with field i for elt. |
55 | */ |
56 | u64 tracing_map_read_sum(struct tracing_map_elt *elt, unsigned int i) |
57 | { |
58 | return (u64)atomic64_read(v: &elt->fields[i].sum); |
59 | } |
60 | |
61 | /** |
62 | * tracing_map_set_var - Assign a tracing_map_elt's variable field |
63 | * @elt: The tracing_map_elt |
64 | * @i: The index of the given variable associated with the tracing_map_elt |
65 | * @n: The value to assign |
66 | * |
67 | * Assign n to variable i associated with the specified tracing_map_elt |
68 | * instance. The index i is the index returned by the call to |
69 | * tracing_map_add_var() when the tracing map was set up. |
70 | */ |
71 | void tracing_map_set_var(struct tracing_map_elt *elt, unsigned int i, u64 n) |
72 | { |
73 | atomic64_set(v: &elt->vars[i], i: n); |
74 | elt->var_set[i] = true; |
75 | } |
76 | |
77 | /** |
78 | * tracing_map_var_set - Return whether or not a variable has been set |
79 | * @elt: The tracing_map_elt |
80 | * @i: The index of the given variable associated with the tracing_map_elt |
81 | * |
82 | * Return true if the variable has been set, false otherwise. The |
83 | * index i is the index returned by the call to tracing_map_add_var() |
84 | * when the tracing map was set up. |
85 | */ |
86 | bool tracing_map_var_set(struct tracing_map_elt *elt, unsigned int i) |
87 | { |
88 | return elt->var_set[i]; |
89 | } |
90 | |
91 | /** |
92 | * tracing_map_read_var - Return the value of a tracing_map_elt's variable field |
93 | * @elt: The tracing_map_elt |
94 | * @i: The index of the given variable associated with the tracing_map_elt |
95 | * |
96 | * Retrieve the value of the variable i associated with the specified |
97 | * tracing_map_elt instance. The index i is the index returned by the |
98 | * call to tracing_map_add_var() when the tracing map was set |
99 | * up. |
100 | * |
101 | * Return: The variable value associated with field i for elt. |
102 | */ |
103 | u64 tracing_map_read_var(struct tracing_map_elt *elt, unsigned int i) |
104 | { |
105 | return (u64)atomic64_read(v: &elt->vars[i]); |
106 | } |
107 | |
108 | /** |
109 | * tracing_map_read_var_once - Return and reset a tracing_map_elt's variable field |
110 | * @elt: The tracing_map_elt |
111 | * @i: The index of the given variable associated with the tracing_map_elt |
112 | * |
113 | * Retrieve the value of the variable i associated with the specified |
114 | * tracing_map_elt instance, and reset the variable to the 'not set' |
115 | * state. The index i is the index returned by the call to |
116 | * tracing_map_add_var() when the tracing map was set up. The reset |
117 | * essentially makes the variable a read-once variable if it's only |
118 | * accessed using this function. |
119 | * |
120 | * Return: The variable value associated with field i for elt. |
121 | */ |
122 | u64 tracing_map_read_var_once(struct tracing_map_elt *elt, unsigned int i) |
123 | { |
124 | elt->var_set[i] = false; |
125 | return (u64)atomic64_read(v: &elt->vars[i]); |
126 | } |
127 | |
128 | int tracing_map_cmp_string(void *val_a, void *val_b) |
129 | { |
130 | char *a = val_a; |
131 | char *b = val_b; |
132 | |
133 | return strcmp(a, b); |
134 | } |
135 | |
136 | int tracing_map_cmp_none(void *val_a, void *val_b) |
137 | { |
138 | return 0; |
139 | } |
140 | |
141 | static int tracing_map_cmp_atomic64(void *val_a, void *val_b) |
142 | { |
143 | u64 a = atomic64_read(v: (atomic64_t *)val_a); |
144 | u64 b = atomic64_read(v: (atomic64_t *)val_b); |
145 | |
146 | return (a > b) ? 1 : ((a < b) ? -1 : 0); |
147 | } |
148 | |
149 | #define DEFINE_TRACING_MAP_CMP_FN(type) \ |
150 | static int tracing_map_cmp_##type(void *val_a, void *val_b) \ |
151 | { \ |
152 | type a = (type)(*(u64 *)val_a); \ |
153 | type b = (type)(*(u64 *)val_b); \ |
154 | \ |
155 | return (a > b) ? 1 : ((a < b) ? -1 : 0); \ |
156 | } |
157 | |
158 | DEFINE_TRACING_MAP_CMP_FN(s64); |
159 | DEFINE_TRACING_MAP_CMP_FN(u64); |
160 | DEFINE_TRACING_MAP_CMP_FN(s32); |
161 | DEFINE_TRACING_MAP_CMP_FN(u32); |
162 | DEFINE_TRACING_MAP_CMP_FN(s16); |
163 | DEFINE_TRACING_MAP_CMP_FN(u16); |
164 | DEFINE_TRACING_MAP_CMP_FN(s8); |
165 | DEFINE_TRACING_MAP_CMP_FN(u8); |
166 | |
167 | tracing_map_cmp_fn_t tracing_map_cmp_num(int field_size, |
168 | int field_is_signed) |
169 | { |
170 | tracing_map_cmp_fn_t fn = tracing_map_cmp_none; |
171 | |
172 | switch (field_size) { |
173 | case 8: |
174 | if (field_is_signed) |
175 | fn = tracing_map_cmp_s64; |
176 | else |
177 | fn = tracing_map_cmp_u64; |
178 | break; |
179 | case 4: |
180 | if (field_is_signed) |
181 | fn = tracing_map_cmp_s32; |
182 | else |
183 | fn = tracing_map_cmp_u32; |
184 | break; |
185 | case 2: |
186 | if (field_is_signed) |
187 | fn = tracing_map_cmp_s16; |
188 | else |
189 | fn = tracing_map_cmp_u16; |
190 | break; |
191 | case 1: |
192 | if (field_is_signed) |
193 | fn = tracing_map_cmp_s8; |
194 | else |
195 | fn = tracing_map_cmp_u8; |
196 | break; |
197 | } |
198 | |
199 | return fn; |
200 | } |
201 | |
202 | static int tracing_map_add_field(struct tracing_map *map, |
203 | tracing_map_cmp_fn_t cmp_fn) |
204 | { |
205 | int ret = -EINVAL; |
206 | |
207 | if (map->n_fields < TRACING_MAP_FIELDS_MAX) { |
208 | ret = map->n_fields; |
209 | map->fields[map->n_fields++].cmp_fn = cmp_fn; |
210 | } |
211 | |
212 | return ret; |
213 | } |
214 | |
215 | /** |
216 | * tracing_map_add_sum_field - Add a field describing a tracing_map sum |
217 | * @map: The tracing_map |
218 | * |
219 | * Add a sum field to the key and return the index identifying it in |
220 | * the map and associated tracing_map_elts. This is the index used |
221 | * for instance to update a sum for a particular tracing_map_elt using |
222 | * tracing_map_update_sum() or reading it via tracing_map_read_sum(). |
223 | * |
224 | * Return: The index identifying the field in the map and associated |
225 | * tracing_map_elts, or -EINVAL on error. |
226 | */ |
227 | int tracing_map_add_sum_field(struct tracing_map *map) |
228 | { |
229 | return tracing_map_add_field(map, cmp_fn: tracing_map_cmp_atomic64); |
230 | } |
231 | |
232 | /** |
233 | * tracing_map_add_var - Add a field describing a tracing_map var |
234 | * @map: The tracing_map |
235 | * |
236 | * Add a var to the map and return the index identifying it in the map |
237 | * and associated tracing_map_elts. This is the index used for |
238 | * instance to update a var for a particular tracing_map_elt using |
239 | * tracing_map_update_var() or reading it via tracing_map_read_var(). |
240 | * |
241 | * Return: The index identifying the var in the map and associated |
242 | * tracing_map_elts, or -EINVAL on error. |
243 | */ |
244 | int tracing_map_add_var(struct tracing_map *map) |
245 | { |
246 | int ret = -EINVAL; |
247 | |
248 | if (map->n_vars < TRACING_MAP_VARS_MAX) |
249 | ret = map->n_vars++; |
250 | |
251 | return ret; |
252 | } |
253 | |
254 | /** |
255 | * tracing_map_add_key_field - Add a field describing a tracing_map key |
256 | * @map: The tracing_map |
257 | * @offset: The offset within the key |
258 | * @cmp_fn: The comparison function that will be used to sort on the key |
259 | * |
260 | * Let the map know there is a key and that if it's used as a sort key |
261 | * to use cmp_fn. |
262 | * |
263 | * A key can be a subset of a compound key; for that purpose, the |
264 | * offset param is used to describe where within the compound key |
265 | * the key referenced by this key field resides. |
266 | * |
267 | * Return: The index identifying the field in the map and associated |
268 | * tracing_map_elts, or -EINVAL on error. |
269 | */ |
270 | int tracing_map_add_key_field(struct tracing_map *map, |
271 | unsigned int offset, |
272 | tracing_map_cmp_fn_t cmp_fn) |
273 | |
274 | { |
275 | int idx = tracing_map_add_field(map, cmp_fn); |
276 | |
277 | if (idx < 0) |
278 | return idx; |
279 | |
280 | map->fields[idx].offset = offset; |
281 | |
282 | map->key_idx[map->n_keys++] = idx; |
283 | |
284 | return idx; |
285 | } |
286 | |
287 | static void tracing_map_array_clear(struct tracing_map_array *a) |
288 | { |
289 | unsigned int i; |
290 | |
291 | if (!a->pages) |
292 | return; |
293 | |
294 | for (i = 0; i < a->n_pages; i++) |
295 | memset(a->pages[i], 0, PAGE_SIZE); |
296 | } |
297 | |
298 | static void tracing_map_array_free(struct tracing_map_array *a) |
299 | { |
300 | unsigned int i; |
301 | |
302 | if (!a) |
303 | return; |
304 | |
305 | if (!a->pages) |
306 | goto free; |
307 | |
308 | for (i = 0; i < a->n_pages; i++) { |
309 | if (!a->pages[i]) |
310 | break; |
311 | kmemleak_free(ptr: a->pages[i]); |
312 | free_page((unsigned long)a->pages[i]); |
313 | } |
314 | |
315 | kfree(objp: a->pages); |
316 | |
317 | free: |
318 | kfree(objp: a); |
319 | } |
320 | |
321 | static struct tracing_map_array *tracing_map_array_alloc(unsigned int n_elts, |
322 | unsigned int entry_size) |
323 | { |
324 | struct tracing_map_array *a; |
325 | unsigned int i; |
326 | |
327 | a = kzalloc(size: sizeof(*a), GFP_KERNEL); |
328 | if (!a) |
329 | return NULL; |
330 | |
331 | a->entry_size_shift = fls(roundup_pow_of_two(entry_size) - 1); |
332 | a->entries_per_page = PAGE_SIZE / (1 << a->entry_size_shift); |
333 | a->n_pages = n_elts / a->entries_per_page; |
334 | if (!a->n_pages) |
335 | a->n_pages = 1; |
336 | a->entry_shift = fls(x: a->entries_per_page) - 1; |
337 | a->entry_mask = (1 << a->entry_shift) - 1; |
338 | |
339 | a->pages = kcalloc(n: a->n_pages, size: sizeof(void *), GFP_KERNEL); |
340 | if (!a->pages) |
341 | goto free; |
342 | |
343 | for (i = 0; i < a->n_pages; i++) { |
344 | a->pages[i] = (void *)get_zeroed_page(GFP_KERNEL); |
345 | if (!a->pages[i]) |
346 | goto free; |
347 | kmemleak_alloc(ptr: a->pages[i], PAGE_SIZE, min_count: 1, GFP_KERNEL); |
348 | } |
349 | out: |
350 | return a; |
351 | free: |
352 | tracing_map_array_free(a); |
353 | a = NULL; |
354 | |
355 | goto out; |
356 | } |
357 | |
358 | static void tracing_map_elt_clear(struct tracing_map_elt *elt) |
359 | { |
360 | unsigned i; |
361 | |
362 | for (i = 0; i < elt->map->n_fields; i++) |
363 | if (elt->fields[i].cmp_fn == tracing_map_cmp_atomic64) |
364 | atomic64_set(v: &elt->fields[i].sum, i: 0); |
365 | |
366 | for (i = 0; i < elt->map->n_vars; i++) { |
367 | atomic64_set(v: &elt->vars[i], i: 0); |
368 | elt->var_set[i] = false; |
369 | } |
370 | |
371 | if (elt->map->ops && elt->map->ops->elt_clear) |
372 | elt->map->ops->elt_clear(elt); |
373 | } |
374 | |
375 | static void tracing_map_elt_init_fields(struct tracing_map_elt *elt) |
376 | { |
377 | unsigned int i; |
378 | |
379 | tracing_map_elt_clear(elt); |
380 | |
381 | for (i = 0; i < elt->map->n_fields; i++) { |
382 | elt->fields[i].cmp_fn = elt->map->fields[i].cmp_fn; |
383 | |
384 | if (elt->fields[i].cmp_fn != tracing_map_cmp_atomic64) |
385 | elt->fields[i].offset = elt->map->fields[i].offset; |
386 | } |
387 | } |
388 | |
389 | static void tracing_map_elt_free(struct tracing_map_elt *elt) |
390 | { |
391 | if (!elt) |
392 | return; |
393 | |
394 | if (elt->map->ops && elt->map->ops->elt_free) |
395 | elt->map->ops->elt_free(elt); |
396 | kfree(objp: elt->fields); |
397 | kfree(objp: elt->vars); |
398 | kfree(objp: elt->var_set); |
399 | kfree(objp: elt->key); |
400 | kfree(objp: elt); |
401 | } |
402 | |
403 | static struct tracing_map_elt *tracing_map_elt_alloc(struct tracing_map *map) |
404 | { |
405 | struct tracing_map_elt *elt; |
406 | int err = 0; |
407 | |
408 | elt = kzalloc(size: sizeof(*elt), GFP_KERNEL); |
409 | if (!elt) |
410 | return ERR_PTR(error: -ENOMEM); |
411 | |
412 | elt->map = map; |
413 | |
414 | elt->key = kzalloc(size: map->key_size, GFP_KERNEL); |
415 | if (!elt->key) { |
416 | err = -ENOMEM; |
417 | goto free; |
418 | } |
419 | |
420 | elt->fields = kcalloc(n: map->n_fields, size: sizeof(*elt->fields), GFP_KERNEL); |
421 | if (!elt->fields) { |
422 | err = -ENOMEM; |
423 | goto free; |
424 | } |
425 | |
426 | elt->vars = kcalloc(n: map->n_vars, size: sizeof(*elt->vars), GFP_KERNEL); |
427 | if (!elt->vars) { |
428 | err = -ENOMEM; |
429 | goto free; |
430 | } |
431 | |
432 | elt->var_set = kcalloc(n: map->n_vars, size: sizeof(*elt->var_set), GFP_KERNEL); |
433 | if (!elt->var_set) { |
434 | err = -ENOMEM; |
435 | goto free; |
436 | } |
437 | |
438 | tracing_map_elt_init_fields(elt); |
439 | |
440 | if (map->ops && map->ops->elt_alloc) { |
441 | err = map->ops->elt_alloc(elt); |
442 | if (err) |
443 | goto free; |
444 | } |
445 | return elt; |
446 | free: |
447 | tracing_map_elt_free(elt); |
448 | |
449 | return ERR_PTR(error: err); |
450 | } |
451 | |
452 | static struct tracing_map_elt *get_free_elt(struct tracing_map *map) |
453 | { |
454 | struct tracing_map_elt *elt = NULL; |
455 | int idx; |
456 | |
457 | idx = atomic_inc_return(v: &map->next_elt); |
458 | if (idx < map->max_elts) { |
459 | elt = *(TRACING_MAP_ELT(map->elts, idx)); |
460 | if (map->ops && map->ops->elt_init) |
461 | map->ops->elt_init(elt); |
462 | } |
463 | |
464 | return elt; |
465 | } |
466 | |
467 | static void tracing_map_free_elts(struct tracing_map *map) |
468 | { |
469 | unsigned int i; |
470 | |
471 | if (!map->elts) |
472 | return; |
473 | |
474 | for (i = 0; i < map->max_elts; i++) { |
475 | tracing_map_elt_free(elt: *(TRACING_MAP_ELT(map->elts, i))); |
476 | *(TRACING_MAP_ELT(map->elts, i)) = NULL; |
477 | } |
478 | |
479 | tracing_map_array_free(a: map->elts); |
480 | map->elts = NULL; |
481 | } |
482 | |
483 | static int tracing_map_alloc_elts(struct tracing_map *map) |
484 | { |
485 | unsigned int i; |
486 | |
487 | map->elts = tracing_map_array_alloc(n_elts: map->max_elts, |
488 | entry_size: sizeof(struct tracing_map_elt *)); |
489 | if (!map->elts) |
490 | return -ENOMEM; |
491 | |
492 | for (i = 0; i < map->max_elts; i++) { |
493 | *(TRACING_MAP_ELT(map->elts, i)) = tracing_map_elt_alloc(map); |
494 | if (IS_ERR(ptr: *(TRACING_MAP_ELT(map->elts, i)))) { |
495 | *(TRACING_MAP_ELT(map->elts, i)) = NULL; |
496 | tracing_map_free_elts(map); |
497 | |
498 | return -ENOMEM; |
499 | } |
500 | } |
501 | |
502 | return 0; |
503 | } |
504 | |
505 | static inline bool keys_match(void *key, void *test_key, unsigned key_size) |
506 | { |
507 | bool match = true; |
508 | |
509 | if (memcmp(p: key, q: test_key, size: key_size)) |
510 | match = false; |
511 | |
512 | return match; |
513 | } |
514 | |
515 | static inline struct tracing_map_elt * |
516 | __tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only) |
517 | { |
518 | u32 idx, key_hash, test_key; |
519 | int dup_try = 0; |
520 | struct tracing_map_entry *entry; |
521 | struct tracing_map_elt *val; |
522 | |
523 | key_hash = jhash(key, length: map->key_size, initval: 0); |
524 | if (key_hash == 0) |
525 | key_hash = 1; |
526 | idx = key_hash >> (32 - (map->map_bits + 1)); |
527 | |
528 | while (1) { |
529 | idx &= (map->map_size - 1); |
530 | entry = TRACING_MAP_ENTRY(map->map, idx); |
531 | test_key = entry->key; |
532 | |
533 | if (test_key && test_key == key_hash) { |
534 | val = READ_ONCE(entry->val); |
535 | if (val && |
536 | keys_match(key, test_key: val->key, key_size: map->key_size)) { |
537 | if (!lookup_only) |
538 | atomic64_inc(v: &map->hits); |
539 | return val; |
540 | } else if (unlikely(!val)) { |
541 | /* |
542 | * The key is present. But, val (pointer to elt |
543 | * struct) is still NULL. which means some other |
544 | * thread is in the process of inserting an |
545 | * element. |
546 | * |
547 | * On top of that, it's key_hash is same as the |
548 | * one being inserted right now. So, it's |
549 | * possible that the element has the same |
550 | * key as well. |
551 | */ |
552 | |
553 | dup_try++; |
554 | if (dup_try > map->map_size) { |
555 | atomic64_inc(v: &map->drops); |
556 | break; |
557 | } |
558 | continue; |
559 | } |
560 | } |
561 | |
562 | if (!test_key) { |
563 | if (lookup_only) |
564 | break; |
565 | |
566 | if (!cmpxchg(&entry->key, 0, key_hash)) { |
567 | struct tracing_map_elt *elt; |
568 | |
569 | elt = get_free_elt(map); |
570 | if (!elt) { |
571 | atomic64_inc(v: &map->drops); |
572 | entry->key = 0; |
573 | break; |
574 | } |
575 | |
576 | memcpy(elt->key, key, map->key_size); |
577 | entry->val = elt; |
578 | atomic64_inc(v: &map->hits); |
579 | |
580 | return entry->val; |
581 | } else { |
582 | /* |
583 | * cmpxchg() failed. Loop around once |
584 | * more to check what key was inserted. |
585 | */ |
586 | dup_try++; |
587 | continue; |
588 | } |
589 | } |
590 | |
591 | idx++; |
592 | } |
593 | |
594 | return NULL; |
595 | } |
596 | |
597 | /** |
598 | * tracing_map_insert - Insert key and/or retrieve val from a tracing_map |
599 | * @map: The tracing_map to insert into |
600 | * @key: The key to insert |
601 | * |
602 | * Inserts a key into a tracing_map and creates and returns a new |
603 | * tracing_map_elt for it, or if the key has already been inserted by |
604 | * a previous call, returns the tracing_map_elt already associated |
605 | * with it. When the map was created, the number of elements to be |
606 | * allocated for the map was specified (internally maintained as |
607 | * 'max_elts' in struct tracing_map), and that number of |
608 | * tracing_map_elts was created by tracing_map_init(). This is the |
609 | * pre-allocated pool of tracing_map_elts that tracing_map_insert() |
610 | * will allocate from when adding new keys. Once that pool is |
611 | * exhausted, tracing_map_insert() is useless and will return NULL to |
612 | * signal that state. There are two user-visible tracing_map |
613 | * variables, 'hits' and 'drops', which are updated by this function. |
614 | * Every time an element is either successfully inserted or retrieved, |
615 | * the 'hits' value is incremented. Every time an element insertion |
616 | * fails, the 'drops' value is incremented. |
617 | * |
618 | * This is a lock-free tracing map insertion function implementing a |
619 | * modified form of Cliff Click's basic insertion algorithm. It |
620 | * requires the table size be a power of two. To prevent any |
621 | * possibility of an infinite loop we always make the internal table |
622 | * size double the size of the requested table size (max_elts * 2). |
623 | * Likewise, we never reuse a slot or resize or delete elements - when |
624 | * we've reached max_elts entries, we simply return NULL once we've |
625 | * run out of entries. Readers can at any point in time traverse the |
626 | * tracing map and safely access the key/val pairs. |
627 | * |
628 | * Return: the tracing_map_elt pointer val associated with the key. |
629 | * If this was a newly inserted key, the val will be a newly allocated |
630 | * and associated tracing_map_elt pointer val. If the key wasn't |
631 | * found and the pool of tracing_map_elts has been exhausted, NULL is |
632 | * returned and no further insertions will succeed. |
633 | */ |
634 | struct tracing_map_elt *tracing_map_insert(struct tracing_map *map, void *key) |
635 | { |
636 | return __tracing_map_insert(map, key, lookup_only: false); |
637 | } |
638 | |
639 | /** |
640 | * tracing_map_lookup - Retrieve val from a tracing_map |
641 | * @map: The tracing_map to perform the lookup on |
642 | * @key: The key to look up |
643 | * |
644 | * Looks up key in tracing_map and if found returns the matching |
645 | * tracing_map_elt. This is a lock-free lookup; see |
646 | * tracing_map_insert() for details on tracing_map and how it works. |
647 | * Every time an element is retrieved, the 'hits' value is |
648 | * incremented. There is one user-visible tracing_map variable, |
649 | * 'hits', which is updated by this function. Every time an element |
650 | * is successfully retrieved, the 'hits' value is incremented. The |
651 | * 'drops' value is never updated by this function. |
652 | * |
653 | * Return: the tracing_map_elt pointer val associated with the key. |
654 | * If the key wasn't found, NULL is returned. |
655 | */ |
656 | struct tracing_map_elt *tracing_map_lookup(struct tracing_map *map, void *key) |
657 | { |
658 | return __tracing_map_insert(map, key, lookup_only: true); |
659 | } |
660 | |
661 | /** |
662 | * tracing_map_destroy - Destroy a tracing_map |
663 | * @map: The tracing_map to destroy |
664 | * |
665 | * Frees a tracing_map along with its associated array of |
666 | * tracing_map_elts. |
667 | * |
668 | * Callers should make sure there are no readers or writers actively |
669 | * reading or inserting into the map before calling this. |
670 | */ |
671 | void tracing_map_destroy(struct tracing_map *map) |
672 | { |
673 | if (!map) |
674 | return; |
675 | |
676 | tracing_map_free_elts(map); |
677 | |
678 | tracing_map_array_free(a: map->map); |
679 | kfree(objp: map); |
680 | } |
681 | |
682 | /** |
683 | * tracing_map_clear - Clear a tracing_map |
684 | * @map: The tracing_map to clear |
685 | * |
686 | * Resets the tracing map to a cleared or initial state. The |
687 | * tracing_map_elts are all cleared, and the array of struct |
688 | * tracing_map_entry is reset to an initialized state. |
689 | * |
690 | * Callers should make sure there are no writers actively inserting |
691 | * into the map before calling this. |
692 | */ |
693 | void tracing_map_clear(struct tracing_map *map) |
694 | { |
695 | unsigned int i; |
696 | |
697 | atomic_set(v: &map->next_elt, i: -1); |
698 | atomic64_set(v: &map->hits, i: 0); |
699 | atomic64_set(v: &map->drops, i: 0); |
700 | |
701 | tracing_map_array_clear(a: map->map); |
702 | |
703 | for (i = 0; i < map->max_elts; i++) |
704 | tracing_map_elt_clear(elt: *(TRACING_MAP_ELT(map->elts, i))); |
705 | } |
706 | |
707 | static void set_sort_key(struct tracing_map *map, |
708 | struct tracing_map_sort_key *sort_key) |
709 | { |
710 | map->sort_key = *sort_key; |
711 | } |
712 | |
713 | /** |
714 | * tracing_map_create - Create a lock-free map and element pool |
715 | * @map_bits: The size of the map (2 ** map_bits) |
716 | * @key_size: The size of the key for the map in bytes |
717 | * @ops: Optional client-defined tracing_map_ops instance |
718 | * @private_data: Client data associated with the map |
719 | * |
720 | * Creates and sets up a map to contain 2 ** map_bits number of |
721 | * elements (internally maintained as 'max_elts' in struct |
722 | * tracing_map). Before using, map fields should be added to the map |
723 | * with tracing_map_add_sum_field() and tracing_map_add_key_field(). |
724 | * tracing_map_init() should then be called to allocate the array of |
725 | * tracing_map_elts, in order to avoid allocating anything in the map |
726 | * insertion path. The user-specified map size reflects the maximum |
727 | * number of elements that can be contained in the table requested by |
728 | * the user - internally we double that in order to keep the table |
729 | * sparse and keep collisions manageable. |
730 | * |
731 | * A tracing_map is a special-purpose map designed to aggregate or |
732 | * 'sum' one or more values associated with a specific object of type |
733 | * tracing_map_elt, which is attached by the map to a given key. |
734 | * |
735 | * tracing_map_create() sets up the map itself, and provides |
736 | * operations for inserting tracing_map_elts, but doesn't allocate the |
737 | * tracing_map_elts themselves, or provide a means for describing the |
738 | * keys or sums associated with the tracing_map_elts. All |
739 | * tracing_map_elts for a given map have the same set of sums and |
740 | * keys, which are defined by the client using the functions |
741 | * tracing_map_add_key_field() and tracing_map_add_sum_field(). Once |
742 | * the fields are defined, the pool of elements allocated for the map |
743 | * can be created, which occurs when the client code calls |
744 | * tracing_map_init(). |
745 | * |
746 | * When tracing_map_init() returns, tracing_map_elt elements can be |
747 | * inserted into the map using tracing_map_insert(). When called, |
748 | * tracing_map_insert() grabs a free tracing_map_elt from the pool, or |
749 | * finds an existing match in the map and in either case returns it. |
750 | * The client can then use tracing_map_update_sum() and |
751 | * tracing_map_read_sum() to update or read a given sum field for the |
752 | * tracing_map_elt. |
753 | * |
754 | * The client can at any point retrieve and traverse the current set |
755 | * of inserted tracing_map_elts in a tracing_map, via |
756 | * tracing_map_sort_entries(). Sorting can be done on any field, |
757 | * including keys. |
758 | * |
759 | * See tracing_map.h for a description of tracing_map_ops. |
760 | * |
761 | * Return: the tracing_map pointer if successful, ERR_PTR if not. |
762 | */ |
763 | struct tracing_map *tracing_map_create(unsigned int map_bits, |
764 | unsigned int key_size, |
765 | const struct tracing_map_ops *ops, |
766 | void *private_data) |
767 | { |
768 | struct tracing_map *map; |
769 | unsigned int i; |
770 | |
771 | if (map_bits < TRACING_MAP_BITS_MIN || |
772 | map_bits > TRACING_MAP_BITS_MAX) |
773 | return ERR_PTR(error: -EINVAL); |
774 | |
775 | map = kzalloc(size: sizeof(*map), GFP_KERNEL); |
776 | if (!map) |
777 | return ERR_PTR(error: -ENOMEM); |
778 | |
779 | map->map_bits = map_bits; |
780 | map->max_elts = (1 << map_bits); |
781 | atomic_set(v: &map->next_elt, i: -1); |
782 | |
783 | map->map_size = (1 << (map_bits + 1)); |
784 | map->ops = ops; |
785 | |
786 | map->private_data = private_data; |
787 | |
788 | map->map = tracing_map_array_alloc(n_elts: map->map_size, |
789 | entry_size: sizeof(struct tracing_map_entry)); |
790 | if (!map->map) |
791 | goto free; |
792 | |
793 | map->key_size = key_size; |
794 | for (i = 0; i < TRACING_MAP_KEYS_MAX; i++) |
795 | map->key_idx[i] = -1; |
796 | out: |
797 | return map; |
798 | free: |
799 | tracing_map_destroy(map); |
800 | map = ERR_PTR(error: -ENOMEM); |
801 | |
802 | goto out; |
803 | } |
804 | |
805 | /** |
806 | * tracing_map_init - Allocate and clear a map's tracing_map_elts |
807 | * @map: The tracing_map to initialize |
808 | * |
809 | * Allocates a clears a pool of tracing_map_elts equal to the |
810 | * user-specified size of 2 ** map_bits (internally maintained as |
811 | * 'max_elts' in struct tracing_map). Before using, the map fields |
812 | * should be added to the map with tracing_map_add_sum_field() and |
813 | * tracing_map_add_key_field(). tracing_map_init() should then be |
814 | * called to allocate the array of tracing_map_elts, in order to avoid |
815 | * allocating anything in the map insertion path. The user-specified |
816 | * map size reflects the max number of elements requested by the user |
817 | * - internally we double that in order to keep the table sparse and |
818 | * keep collisions manageable. |
819 | * |
820 | * See tracing_map.h for a description of tracing_map_ops. |
821 | * |
822 | * Return: the tracing_map pointer if successful, ERR_PTR if not. |
823 | */ |
824 | int tracing_map_init(struct tracing_map *map) |
825 | { |
826 | int err; |
827 | |
828 | if (map->n_fields < 2) |
829 | return -EINVAL; /* need at least 1 key and 1 val */ |
830 | |
831 | err = tracing_map_alloc_elts(map); |
832 | if (err) |
833 | return err; |
834 | |
835 | tracing_map_clear(map); |
836 | |
837 | return err; |
838 | } |
839 | |
840 | static int cmp_entries_dup(const void *A, const void *B) |
841 | { |
842 | const struct tracing_map_sort_entry *a, *b; |
843 | int ret = 0; |
844 | |
845 | a = *(const struct tracing_map_sort_entry **)A; |
846 | b = *(const struct tracing_map_sort_entry **)B; |
847 | |
848 | if (memcmp(p: a->key, q: b->key, size: a->elt->map->key_size)) |
849 | ret = 1; |
850 | |
851 | return ret; |
852 | } |
853 | |
854 | static int cmp_entries_sum(const void *A, const void *B) |
855 | { |
856 | const struct tracing_map_elt *elt_a, *elt_b; |
857 | const struct tracing_map_sort_entry *a, *b; |
858 | struct tracing_map_sort_key *sort_key; |
859 | struct tracing_map_field *field; |
860 | tracing_map_cmp_fn_t cmp_fn; |
861 | void *val_a, *val_b; |
862 | int ret = 0; |
863 | |
864 | a = *(const struct tracing_map_sort_entry **)A; |
865 | b = *(const struct tracing_map_sort_entry **)B; |
866 | |
867 | elt_a = a->elt; |
868 | elt_b = b->elt; |
869 | |
870 | sort_key = &elt_a->map->sort_key; |
871 | |
872 | field = &elt_a->fields[sort_key->field_idx]; |
873 | cmp_fn = field->cmp_fn; |
874 | |
875 | val_a = &elt_a->fields[sort_key->field_idx].sum; |
876 | val_b = &elt_b->fields[sort_key->field_idx].sum; |
877 | |
878 | ret = cmp_fn(val_a, val_b); |
879 | if (sort_key->descending) |
880 | ret = -ret; |
881 | |
882 | return ret; |
883 | } |
884 | |
885 | static int cmp_entries_key(const void *A, const void *B) |
886 | { |
887 | const struct tracing_map_elt *elt_a, *elt_b; |
888 | const struct tracing_map_sort_entry *a, *b; |
889 | struct tracing_map_sort_key *sort_key; |
890 | struct tracing_map_field *field; |
891 | tracing_map_cmp_fn_t cmp_fn; |
892 | void *val_a, *val_b; |
893 | int ret = 0; |
894 | |
895 | a = *(const struct tracing_map_sort_entry **)A; |
896 | b = *(const struct tracing_map_sort_entry **)B; |
897 | |
898 | elt_a = a->elt; |
899 | elt_b = b->elt; |
900 | |
901 | sort_key = &elt_a->map->sort_key; |
902 | |
903 | field = &elt_a->fields[sort_key->field_idx]; |
904 | |
905 | cmp_fn = field->cmp_fn; |
906 | |
907 | val_a = elt_a->key + field->offset; |
908 | val_b = elt_b->key + field->offset; |
909 | |
910 | ret = cmp_fn(val_a, val_b); |
911 | if (sort_key->descending) |
912 | ret = -ret; |
913 | |
914 | return ret; |
915 | } |
916 | |
917 | static void destroy_sort_entry(struct tracing_map_sort_entry *entry) |
918 | { |
919 | if (!entry) |
920 | return; |
921 | |
922 | if (entry->elt_copied) |
923 | tracing_map_elt_free(elt: entry->elt); |
924 | |
925 | kfree(objp: entry); |
926 | } |
927 | |
928 | /** |
929 | * tracing_map_destroy_sort_entries - Destroy an array of sort entries |
930 | * @entries: The entries to destroy |
931 | * @n_entries: The number of entries in the array |
932 | * |
933 | * Destroy the elements returned by a tracing_map_sort_entries() call. |
934 | */ |
935 | void tracing_map_destroy_sort_entries(struct tracing_map_sort_entry **entries, |
936 | unsigned int n_entries) |
937 | { |
938 | unsigned int i; |
939 | |
940 | for (i = 0; i < n_entries; i++) |
941 | destroy_sort_entry(entry: entries[i]); |
942 | |
943 | vfree(addr: entries); |
944 | } |
945 | |
946 | static struct tracing_map_sort_entry * |
947 | create_sort_entry(void *key, struct tracing_map_elt *elt) |
948 | { |
949 | struct tracing_map_sort_entry *sort_entry; |
950 | |
951 | sort_entry = kzalloc(size: sizeof(*sort_entry), GFP_KERNEL); |
952 | if (!sort_entry) |
953 | return NULL; |
954 | |
955 | sort_entry->key = key; |
956 | sort_entry->elt = elt; |
957 | |
958 | return sort_entry; |
959 | } |
960 | |
961 | static void detect_dups(struct tracing_map_sort_entry **sort_entries, |
962 | int n_entries, unsigned int key_size) |
963 | { |
964 | unsigned int total_dups = 0; |
965 | int i; |
966 | void *key; |
967 | |
968 | if (n_entries < 2) |
969 | return; |
970 | |
971 | sort(base: sort_entries, num: n_entries, size: sizeof(struct tracing_map_sort_entry *), |
972 | cmp_func: (int (*)(const void *, const void *))cmp_entries_dup, NULL); |
973 | |
974 | key = sort_entries[0]->key; |
975 | for (i = 1; i < n_entries; i++) { |
976 | if (!memcmp(p: sort_entries[i]->key, q: key, size: key_size)) { |
977 | total_dups++; |
978 | continue; |
979 | } |
980 | key = sort_entries[i]->key; |
981 | } |
982 | |
983 | WARN_ONCE(total_dups > 0, |
984 | "Duplicates detected: %d\n" , total_dups); |
985 | } |
986 | |
987 | static bool is_key(struct tracing_map *map, unsigned int field_idx) |
988 | { |
989 | unsigned int i; |
990 | |
991 | for (i = 0; i < map->n_keys; i++) |
992 | if (map->key_idx[i] == field_idx) |
993 | return true; |
994 | return false; |
995 | } |
996 | |
997 | static void sort_secondary(struct tracing_map *map, |
998 | const struct tracing_map_sort_entry **entries, |
999 | unsigned int n_entries, |
1000 | struct tracing_map_sort_key *primary_key, |
1001 | struct tracing_map_sort_key *secondary_key) |
1002 | { |
1003 | int (*primary_fn)(const void *, const void *); |
1004 | int (*secondary_fn)(const void *, const void *); |
1005 | unsigned i, start = 0, n_sub = 1; |
1006 | |
1007 | if (is_key(map, field_idx: primary_key->field_idx)) |
1008 | primary_fn = cmp_entries_key; |
1009 | else |
1010 | primary_fn = cmp_entries_sum; |
1011 | |
1012 | if (is_key(map, field_idx: secondary_key->field_idx)) |
1013 | secondary_fn = cmp_entries_key; |
1014 | else |
1015 | secondary_fn = cmp_entries_sum; |
1016 | |
1017 | for (i = 0; i < n_entries - 1; i++) { |
1018 | const struct tracing_map_sort_entry **a = &entries[i]; |
1019 | const struct tracing_map_sort_entry **b = &entries[i + 1]; |
1020 | |
1021 | if (primary_fn(a, b) == 0) { |
1022 | n_sub++; |
1023 | if (i < n_entries - 2) |
1024 | continue; |
1025 | } |
1026 | |
1027 | if (n_sub < 2) { |
1028 | start = i + 1; |
1029 | n_sub = 1; |
1030 | continue; |
1031 | } |
1032 | |
1033 | set_sort_key(map, sort_key: secondary_key); |
1034 | sort(base: &entries[start], num: n_sub, |
1035 | size: sizeof(struct tracing_map_sort_entry *), |
1036 | cmp_func: (int (*)(const void *, const void *))secondary_fn, NULL); |
1037 | set_sort_key(map, sort_key: primary_key); |
1038 | |
1039 | start = i + 1; |
1040 | n_sub = 1; |
1041 | } |
1042 | } |
1043 | |
1044 | /** |
1045 | * tracing_map_sort_entries - Sort the current set of tracing_map_elts in a map |
1046 | * @map: The tracing_map |
1047 | * @sort_keys: The sort key to use for sorting |
1048 | * @n_sort_keys: hitcount, always have at least one |
1049 | * @sort_entries: outval: pointer to allocated and sorted array of entries |
1050 | * |
1051 | * tracing_map_sort_entries() sorts the current set of entries in the |
1052 | * map and returns the list of tracing_map_sort_entries containing |
1053 | * them to the client in the sort_entries param. The client can |
1054 | * access the struct tracing_map_elt element of interest directly as |
1055 | * the 'elt' field of a returned struct tracing_map_sort_entry object. |
1056 | * |
1057 | * The sort_key has only two fields: idx and descending. 'idx' refers |
1058 | * to the index of the field added via tracing_map_add_sum_field() or |
1059 | * tracing_map_add_key_field() when the tracing_map was initialized. |
1060 | * 'descending' is a flag that if set reverses the sort order, which |
1061 | * by default is ascending. |
1062 | * |
1063 | * The client should not hold on to the returned array but should use |
1064 | * it and call tracing_map_destroy_sort_entries() when done. |
1065 | * |
1066 | * Return: the number of sort_entries in the struct tracing_map_sort_entry |
1067 | * array, negative on error |
1068 | */ |
1069 | int tracing_map_sort_entries(struct tracing_map *map, |
1070 | struct tracing_map_sort_key *sort_keys, |
1071 | unsigned int n_sort_keys, |
1072 | struct tracing_map_sort_entry ***sort_entries) |
1073 | { |
1074 | int (*cmp_entries_fn)(const void *, const void *); |
1075 | struct tracing_map_sort_entry *sort_entry, **entries; |
1076 | int i, n_entries, ret; |
1077 | |
1078 | entries = vmalloc(array_size(sizeof(sort_entry), map->max_elts)); |
1079 | if (!entries) |
1080 | return -ENOMEM; |
1081 | |
1082 | for (i = 0, n_entries = 0; i < map->map_size; i++) { |
1083 | struct tracing_map_entry *entry; |
1084 | |
1085 | entry = TRACING_MAP_ENTRY(map->map, i); |
1086 | |
1087 | if (!entry->key || !entry->val) |
1088 | continue; |
1089 | |
1090 | entries[n_entries] = create_sort_entry(key: entry->val->key, |
1091 | elt: entry->val); |
1092 | if (!entries[n_entries++]) { |
1093 | ret = -ENOMEM; |
1094 | goto free; |
1095 | } |
1096 | } |
1097 | |
1098 | if (n_entries == 0) { |
1099 | ret = 0; |
1100 | goto free; |
1101 | } |
1102 | |
1103 | if (n_entries == 1) { |
1104 | *sort_entries = entries; |
1105 | return 1; |
1106 | } |
1107 | |
1108 | detect_dups(sort_entries: entries, n_entries, key_size: map->key_size); |
1109 | |
1110 | if (is_key(map, field_idx: sort_keys[0].field_idx)) |
1111 | cmp_entries_fn = cmp_entries_key; |
1112 | else |
1113 | cmp_entries_fn = cmp_entries_sum; |
1114 | |
1115 | set_sort_key(map, sort_key: &sort_keys[0]); |
1116 | |
1117 | sort(base: entries, num: n_entries, size: sizeof(struct tracing_map_sort_entry *), |
1118 | cmp_func: (int (*)(const void *, const void *))cmp_entries_fn, NULL); |
1119 | |
1120 | if (n_sort_keys > 1) |
1121 | sort_secondary(map, |
1122 | entries: (const struct tracing_map_sort_entry **)entries, |
1123 | n_entries, |
1124 | primary_key: &sort_keys[0], |
1125 | secondary_key: &sort_keys[1]); |
1126 | |
1127 | *sort_entries = entries; |
1128 | |
1129 | return n_entries; |
1130 | free: |
1131 | tracing_map_destroy_sort_entries(entries, n_entries); |
1132 | |
1133 | return ret; |
1134 | } |
1135 | |