1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * lib/btree.c - Simple In-memory B+Tree |
4 | * |
5 | * Copyright (c) 2007-2008 Joern Engel <joern@purestorage.com> |
6 | * Bits and pieces stolen from Peter Zijlstra's code, which is |
7 | * Copyright 2007, Red Hat Inc. Peter Zijlstra |
8 | * |
9 | * see http://programming.kicks-ass.net/kernel-patches/vma_lookup/btree.patch |
10 | * |
11 | * A relatively simple B+Tree implementation. I have written it as a learning |
12 | * exercise to understand how B+Trees work. Turned out to be useful as well. |
13 | * |
14 | * B+Trees can be used similar to Linux radix trees (which don't have anything |
15 | * in common with textbook radix trees, beware). Prerequisite for them working |
16 | * well is that access to a random tree node is much faster than a large number |
17 | * of operations within each node. |
18 | * |
19 | * Disks have fulfilled the prerequisite for a long time. More recently DRAM |
20 | * has gained similar properties, as memory access times, when measured in cpu |
21 | * cycles, have increased. Cacheline sizes have increased as well, which also |
22 | * helps B+Trees. |
23 | * |
24 | * Compared to radix trees, B+Trees are more efficient when dealing with a |
25 | * sparsely populated address space. Between 25% and 50% of the memory is |
26 | * occupied with valid pointers. When densely populated, radix trees contain |
27 | * ~98% pointers - hard to beat. Very sparse radix trees contain only ~2% |
28 | * pointers. |
29 | * |
30 | * This particular implementation stores pointers identified by a long value. |
31 | * Storing NULL pointers is illegal, lookup will return NULL when no entry |
32 | * was found. |
33 | * |
34 | * A tricks was used that is not commonly found in textbooks. The lowest |
35 | * values are to the right, not to the left. All used slots within a node |
36 | * are on the left, all unused slots contain NUL values. Most operations |
37 | * simply loop once over all slots and terminate on the first NUL. |
38 | */ |
39 | |
40 | #include <linux/btree.h> |
41 | #include <linux/cache.h> |
42 | #include <linux/kernel.h> |
43 | #include <linux/slab.h> |
44 | #include <linux/module.h> |
45 | |
46 | #define MAX(a, b) ((a) > (b) ? (a) : (b)) |
47 | #define NODESIZE MAX(L1_CACHE_BYTES, 128) |
48 | |
49 | struct btree_geo { |
50 | int keylen; |
51 | int no_pairs; |
52 | int no_longs; |
53 | }; |
54 | |
55 | struct btree_geo btree_geo32 = { |
56 | .keylen = 1, |
57 | .no_pairs = NODESIZE / sizeof(long) / 2, |
58 | .no_longs = NODESIZE / sizeof(long) / 2, |
59 | }; |
60 | EXPORT_SYMBOL_GPL(btree_geo32); |
61 | |
62 | #define LONG_PER_U64 (64 / BITS_PER_LONG) |
63 | struct btree_geo btree_geo64 = { |
64 | .keylen = LONG_PER_U64, |
65 | .no_pairs = NODESIZE / sizeof(long) / (1 + LONG_PER_U64), |
66 | .no_longs = LONG_PER_U64 * (NODESIZE / sizeof(long) / (1 + LONG_PER_U64)), |
67 | }; |
68 | EXPORT_SYMBOL_GPL(btree_geo64); |
69 | |
70 | struct btree_geo btree_geo128 = { |
71 | .keylen = 2 * LONG_PER_U64, |
72 | .no_pairs = NODESIZE / sizeof(long) / (1 + 2 * LONG_PER_U64), |
73 | .no_longs = 2 * LONG_PER_U64 * (NODESIZE / sizeof(long) / (1 + 2 * LONG_PER_U64)), |
74 | }; |
75 | EXPORT_SYMBOL_GPL(btree_geo128); |
76 | |
77 | #define MAX_KEYLEN (2 * LONG_PER_U64) |
78 | |
79 | static struct kmem_cache *btree_cachep; |
80 | |
81 | void *btree_alloc(gfp_t gfp_mask, void *pool_data) |
82 | { |
83 | return kmem_cache_alloc(cachep: btree_cachep, flags: gfp_mask); |
84 | } |
85 | EXPORT_SYMBOL_GPL(btree_alloc); |
86 | |
87 | void btree_free(void *element, void *pool_data) |
88 | { |
89 | kmem_cache_free(s: btree_cachep, objp: element); |
90 | } |
91 | EXPORT_SYMBOL_GPL(btree_free); |
92 | |
93 | static unsigned long *btree_node_alloc(struct btree_head *head, gfp_t gfp) |
94 | { |
95 | unsigned long *node; |
96 | |
97 | node = mempool_alloc(pool: head->mempool, gfp_mask: gfp); |
98 | if (likely(node)) |
99 | memset(node, 0, NODESIZE); |
100 | return node; |
101 | } |
102 | |
103 | static int longcmp(const unsigned long *l1, const unsigned long *l2, size_t n) |
104 | { |
105 | size_t i; |
106 | |
107 | for (i = 0; i < n; i++) { |
108 | if (l1[i] < l2[i]) |
109 | return -1; |
110 | if (l1[i] > l2[i]) |
111 | return 1; |
112 | } |
113 | return 0; |
114 | } |
115 | |
116 | static unsigned long *longcpy(unsigned long *dest, const unsigned long *src, |
117 | size_t n) |
118 | { |
119 | size_t i; |
120 | |
121 | for (i = 0; i < n; i++) |
122 | dest[i] = src[i]; |
123 | return dest; |
124 | } |
125 | |
126 | static unsigned long *longset(unsigned long *s, unsigned long c, size_t n) |
127 | { |
128 | size_t i; |
129 | |
130 | for (i = 0; i < n; i++) |
131 | s[i] = c; |
132 | return s; |
133 | } |
134 | |
135 | static void dec_key(struct btree_geo *geo, unsigned long *key) |
136 | { |
137 | unsigned long val; |
138 | int i; |
139 | |
140 | for (i = geo->keylen - 1; i >= 0; i--) { |
141 | val = key[i]; |
142 | key[i] = val - 1; |
143 | if (val) |
144 | break; |
145 | } |
146 | } |
147 | |
148 | static unsigned long *bkey(struct btree_geo *geo, unsigned long *node, int n) |
149 | { |
150 | return &node[n * geo->keylen]; |
151 | } |
152 | |
153 | static void *bval(struct btree_geo *geo, unsigned long *node, int n) |
154 | { |
155 | return (void *)node[geo->no_longs + n]; |
156 | } |
157 | |
158 | static void setkey(struct btree_geo *geo, unsigned long *node, int n, |
159 | unsigned long *key) |
160 | { |
161 | longcpy(dest: bkey(geo, node, n), src: key, n: geo->keylen); |
162 | } |
163 | |
164 | static void setval(struct btree_geo *geo, unsigned long *node, int n, |
165 | void *val) |
166 | { |
167 | node[geo->no_longs + n] = (unsigned long) val; |
168 | } |
169 | |
170 | static void clearpair(struct btree_geo *geo, unsigned long *node, int n) |
171 | { |
172 | longset(s: bkey(geo, node, n), c: 0, n: geo->keylen); |
173 | node[geo->no_longs + n] = 0; |
174 | } |
175 | |
176 | static inline void __btree_init(struct btree_head *head) |
177 | { |
178 | head->node = NULL; |
179 | head->height = 0; |
180 | } |
181 | |
182 | void btree_init_mempool(struct btree_head *head, mempool_t *mempool) |
183 | { |
184 | __btree_init(head); |
185 | head->mempool = mempool; |
186 | } |
187 | EXPORT_SYMBOL_GPL(btree_init_mempool); |
188 | |
189 | int btree_init(struct btree_head *head) |
190 | { |
191 | __btree_init(head); |
192 | head->mempool = mempool_create(min_nr: 0, alloc_fn: btree_alloc, free_fn: btree_free, NULL); |
193 | if (!head->mempool) |
194 | return -ENOMEM; |
195 | return 0; |
196 | } |
197 | EXPORT_SYMBOL_GPL(btree_init); |
198 | |
199 | void btree_destroy(struct btree_head *head) |
200 | { |
201 | mempool_free(element: head->node, pool: head->mempool); |
202 | mempool_destroy(pool: head->mempool); |
203 | head->mempool = NULL; |
204 | } |
205 | EXPORT_SYMBOL_GPL(btree_destroy); |
206 | |
207 | void *btree_last(struct btree_head *head, struct btree_geo *geo, |
208 | unsigned long *key) |
209 | { |
210 | int height = head->height; |
211 | unsigned long *node = head->node; |
212 | |
213 | if (height == 0) |
214 | return NULL; |
215 | |
216 | for ( ; height > 1; height--) |
217 | node = bval(geo, node, n: 0); |
218 | |
219 | longcpy(dest: key, src: bkey(geo, node, n: 0), n: geo->keylen); |
220 | return bval(geo, node, n: 0); |
221 | } |
222 | EXPORT_SYMBOL_GPL(btree_last); |
223 | |
224 | static int keycmp(struct btree_geo *geo, unsigned long *node, int pos, |
225 | unsigned long *key) |
226 | { |
227 | return longcmp(l1: bkey(geo, node, n: pos), l2: key, n: geo->keylen); |
228 | } |
229 | |
230 | static int keyzero(struct btree_geo *geo, unsigned long *key) |
231 | { |
232 | int i; |
233 | |
234 | for (i = 0; i < geo->keylen; i++) |
235 | if (key[i]) |
236 | return 0; |
237 | |
238 | return 1; |
239 | } |
240 | |
241 | static void *btree_lookup_node(struct btree_head *head, struct btree_geo *geo, |
242 | unsigned long *key) |
243 | { |
244 | int i, height = head->height; |
245 | unsigned long *node = head->node; |
246 | |
247 | if (height == 0) |
248 | return NULL; |
249 | |
250 | for ( ; height > 1; height--) { |
251 | for (i = 0; i < geo->no_pairs; i++) |
252 | if (keycmp(geo, node, pos: i, key) <= 0) |
253 | break; |
254 | if (i == geo->no_pairs) |
255 | return NULL; |
256 | node = bval(geo, node, n: i); |
257 | if (!node) |
258 | return NULL; |
259 | } |
260 | return node; |
261 | } |
262 | |
263 | void *btree_lookup(struct btree_head *head, struct btree_geo *geo, |
264 | unsigned long *key) |
265 | { |
266 | int i; |
267 | unsigned long *node; |
268 | |
269 | node = btree_lookup_node(head, geo, key); |
270 | if (!node) |
271 | return NULL; |
272 | |
273 | for (i = 0; i < geo->no_pairs; i++) |
274 | if (keycmp(geo, node, pos: i, key) == 0) |
275 | return bval(geo, node, n: i); |
276 | return NULL; |
277 | } |
278 | EXPORT_SYMBOL_GPL(btree_lookup); |
279 | |
280 | int btree_update(struct btree_head *head, struct btree_geo *geo, |
281 | unsigned long *key, void *val) |
282 | { |
283 | int i; |
284 | unsigned long *node; |
285 | |
286 | node = btree_lookup_node(head, geo, key); |
287 | if (!node) |
288 | return -ENOENT; |
289 | |
290 | for (i = 0; i < geo->no_pairs; i++) |
291 | if (keycmp(geo, node, pos: i, key) == 0) { |
292 | setval(geo, node, n: i, val); |
293 | return 0; |
294 | } |
295 | return -ENOENT; |
296 | } |
297 | EXPORT_SYMBOL_GPL(btree_update); |
298 | |
299 | /* |
300 | * Usually this function is quite similar to normal lookup. But the key of |
301 | * a parent node may be smaller than the smallest key of all its siblings. |
302 | * In such a case we cannot just return NULL, as we have only proven that no |
303 | * key smaller than __key, but larger than this parent key exists. |
304 | * So we set __key to the parent key and retry. We have to use the smallest |
305 | * such parent key, which is the last parent key we encountered. |
306 | */ |
307 | void *btree_get_prev(struct btree_head *head, struct btree_geo *geo, |
308 | unsigned long *__key) |
309 | { |
310 | int i, height; |
311 | unsigned long *node, *oldnode; |
312 | unsigned long *retry_key = NULL, key[MAX_KEYLEN]; |
313 | |
314 | if (keyzero(geo, key: __key)) |
315 | return NULL; |
316 | |
317 | if (head->height == 0) |
318 | return NULL; |
319 | longcpy(dest: key, src: __key, n: geo->keylen); |
320 | retry: |
321 | dec_key(geo, key); |
322 | |
323 | node = head->node; |
324 | for (height = head->height ; height > 1; height--) { |
325 | for (i = 0; i < geo->no_pairs; i++) |
326 | if (keycmp(geo, node, pos: i, key) <= 0) |
327 | break; |
328 | if (i == geo->no_pairs) |
329 | goto miss; |
330 | oldnode = node; |
331 | node = bval(geo, node, n: i); |
332 | if (!node) |
333 | goto miss; |
334 | retry_key = bkey(geo, node: oldnode, n: i); |
335 | } |
336 | |
337 | if (!node) |
338 | goto miss; |
339 | |
340 | for (i = 0; i < geo->no_pairs; i++) { |
341 | if (keycmp(geo, node, pos: i, key) <= 0) { |
342 | if (bval(geo, node, n: i)) { |
343 | longcpy(dest: __key, src: bkey(geo, node, n: i), n: geo->keylen); |
344 | return bval(geo, node, n: i); |
345 | } else |
346 | goto miss; |
347 | } |
348 | } |
349 | miss: |
350 | if (retry_key) { |
351 | longcpy(dest: key, src: retry_key, n: geo->keylen); |
352 | retry_key = NULL; |
353 | goto retry; |
354 | } |
355 | return NULL; |
356 | } |
357 | EXPORT_SYMBOL_GPL(btree_get_prev); |
358 | |
359 | static int getpos(struct btree_geo *geo, unsigned long *node, |
360 | unsigned long *key) |
361 | { |
362 | int i; |
363 | |
364 | for (i = 0; i < geo->no_pairs; i++) { |
365 | if (keycmp(geo, node, pos: i, key) <= 0) |
366 | break; |
367 | } |
368 | return i; |
369 | } |
370 | |
371 | static int getfill(struct btree_geo *geo, unsigned long *node, int start) |
372 | { |
373 | int i; |
374 | |
375 | for (i = start; i < geo->no_pairs; i++) |
376 | if (!bval(geo, node, n: i)) |
377 | break; |
378 | return i; |
379 | } |
380 | |
381 | /* |
382 | * locate the correct leaf node in the btree |
383 | */ |
384 | static unsigned long *find_level(struct btree_head *head, struct btree_geo *geo, |
385 | unsigned long *key, int level) |
386 | { |
387 | unsigned long *node = head->node; |
388 | int i, height; |
389 | |
390 | for (height = head->height; height > level; height--) { |
391 | for (i = 0; i < geo->no_pairs; i++) |
392 | if (keycmp(geo, node, pos: i, key) <= 0) |
393 | break; |
394 | |
395 | if ((i == geo->no_pairs) || !bval(geo, node, n: i)) { |
396 | /* right-most key is too large, update it */ |
397 | /* FIXME: If the right-most key on higher levels is |
398 | * always zero, this wouldn't be necessary. */ |
399 | i--; |
400 | setkey(geo, node, n: i, key); |
401 | } |
402 | BUG_ON(i < 0); |
403 | node = bval(geo, node, n: i); |
404 | } |
405 | BUG_ON(!node); |
406 | return node; |
407 | } |
408 | |
409 | static int btree_grow(struct btree_head *head, struct btree_geo *geo, |
410 | gfp_t gfp) |
411 | { |
412 | unsigned long *node; |
413 | int fill; |
414 | |
415 | node = btree_node_alloc(head, gfp); |
416 | if (!node) |
417 | return -ENOMEM; |
418 | if (head->node) { |
419 | fill = getfill(geo, node: head->node, start: 0); |
420 | setkey(geo, node, n: 0, key: bkey(geo, node: head->node, n: fill - 1)); |
421 | setval(geo, node, n: 0, val: head->node); |
422 | } |
423 | head->node = node; |
424 | head->height++; |
425 | return 0; |
426 | } |
427 | |
428 | static void btree_shrink(struct btree_head *head, struct btree_geo *geo) |
429 | { |
430 | unsigned long *node; |
431 | int fill; |
432 | |
433 | if (head->height <= 1) |
434 | return; |
435 | |
436 | node = head->node; |
437 | fill = getfill(geo, node, start: 0); |
438 | BUG_ON(fill > 1); |
439 | head->node = bval(geo, node, n: 0); |
440 | head->height--; |
441 | mempool_free(element: node, pool: head->mempool); |
442 | } |
443 | |
444 | static int btree_insert_level(struct btree_head *head, struct btree_geo *geo, |
445 | unsigned long *key, void *val, int level, |
446 | gfp_t gfp) |
447 | { |
448 | unsigned long *node; |
449 | int i, pos, fill, err; |
450 | |
451 | BUG_ON(!val); |
452 | if (head->height < level) { |
453 | err = btree_grow(head, geo, gfp); |
454 | if (err) |
455 | return err; |
456 | } |
457 | |
458 | retry: |
459 | node = find_level(head, geo, key, level); |
460 | pos = getpos(geo, node, key); |
461 | fill = getfill(geo, node, start: pos); |
462 | /* two identical keys are not allowed */ |
463 | BUG_ON(pos < fill && keycmp(geo, node, pos, key) == 0); |
464 | |
465 | if (fill == geo->no_pairs) { |
466 | /* need to split node */ |
467 | unsigned long *new; |
468 | |
469 | new = btree_node_alloc(head, gfp); |
470 | if (!new) |
471 | return -ENOMEM; |
472 | err = btree_insert_level(head, geo, |
473 | key: bkey(geo, node, n: fill / 2 - 1), |
474 | val: new, level: level + 1, gfp); |
475 | if (err) { |
476 | mempool_free(element: new, pool: head->mempool); |
477 | return err; |
478 | } |
479 | for (i = 0; i < fill / 2; i++) { |
480 | setkey(geo, node: new, n: i, key: bkey(geo, node, n: i)); |
481 | setval(geo, node: new, n: i, val: bval(geo, node, n: i)); |
482 | setkey(geo, node, n: i, key: bkey(geo, node, n: i + fill / 2)); |
483 | setval(geo, node, n: i, val: bval(geo, node, n: i + fill / 2)); |
484 | clearpair(geo, node, n: i + fill / 2); |
485 | } |
486 | if (fill & 1) { |
487 | setkey(geo, node, n: i, key: bkey(geo, node, n: fill - 1)); |
488 | setval(geo, node, n: i, val: bval(geo, node, n: fill - 1)); |
489 | clearpair(geo, node, n: fill - 1); |
490 | } |
491 | goto retry; |
492 | } |
493 | BUG_ON(fill >= geo->no_pairs); |
494 | |
495 | /* shift and insert */ |
496 | for (i = fill; i > pos; i--) { |
497 | setkey(geo, node, n: i, key: bkey(geo, node, n: i - 1)); |
498 | setval(geo, node, n: i, val: bval(geo, node, n: i - 1)); |
499 | } |
500 | setkey(geo, node, n: pos, key); |
501 | setval(geo, node, n: pos, val); |
502 | |
503 | return 0; |
504 | } |
505 | |
506 | int btree_insert(struct btree_head *head, struct btree_geo *geo, |
507 | unsigned long *key, void *val, gfp_t gfp) |
508 | { |
509 | BUG_ON(!val); |
510 | return btree_insert_level(head, geo, key, val, level: 1, gfp); |
511 | } |
512 | EXPORT_SYMBOL_GPL(btree_insert); |
513 | |
514 | static void *btree_remove_level(struct btree_head *head, struct btree_geo *geo, |
515 | unsigned long *key, int level); |
516 | static void merge(struct btree_head *head, struct btree_geo *geo, int level, |
517 | unsigned long *left, int lfill, |
518 | unsigned long *right, int rfill, |
519 | unsigned long *parent, int lpos) |
520 | { |
521 | int i; |
522 | |
523 | for (i = 0; i < rfill; i++) { |
524 | /* Move all keys to the left */ |
525 | setkey(geo, node: left, n: lfill + i, key: bkey(geo, node: right, n: i)); |
526 | setval(geo, node: left, n: lfill + i, val: bval(geo, node: right, n: i)); |
527 | } |
528 | /* Exchange left and right child in parent */ |
529 | setval(geo, node: parent, n: lpos, val: right); |
530 | setval(geo, node: parent, n: lpos + 1, val: left); |
531 | /* Remove left (formerly right) child from parent */ |
532 | btree_remove_level(head, geo, key: bkey(geo, node: parent, n: lpos), level: level + 1); |
533 | mempool_free(element: right, pool: head->mempool); |
534 | } |
535 | |
536 | static void rebalance(struct btree_head *head, struct btree_geo *geo, |
537 | unsigned long *key, int level, unsigned long *child, int fill) |
538 | { |
539 | unsigned long *parent, *left = NULL, *right = NULL; |
540 | int i, no_left, no_right; |
541 | |
542 | if (fill == 0) { |
543 | /* Because we don't steal entries from a neighbour, this case |
544 | * can happen. Parent node contains a single child, this |
545 | * node, so merging with a sibling never happens. |
546 | */ |
547 | btree_remove_level(head, geo, key, level: level + 1); |
548 | mempool_free(element: child, pool: head->mempool); |
549 | return; |
550 | } |
551 | |
552 | parent = find_level(head, geo, key, level: level + 1); |
553 | i = getpos(geo, node: parent, key); |
554 | BUG_ON(bval(geo, parent, i) != child); |
555 | |
556 | if (i > 0) { |
557 | left = bval(geo, node: parent, n: i - 1); |
558 | no_left = getfill(geo, node: left, start: 0); |
559 | if (fill + no_left <= geo->no_pairs) { |
560 | merge(head, geo, level, |
561 | left, lfill: no_left, |
562 | right: child, rfill: fill, |
563 | parent, lpos: i - 1); |
564 | return; |
565 | } |
566 | } |
567 | if (i + 1 < getfill(geo, node: parent, start: i)) { |
568 | right = bval(geo, node: parent, n: i + 1); |
569 | no_right = getfill(geo, node: right, start: 0); |
570 | if (fill + no_right <= geo->no_pairs) { |
571 | merge(head, geo, level, |
572 | left: child, lfill: fill, |
573 | right, rfill: no_right, |
574 | parent, lpos: i); |
575 | return; |
576 | } |
577 | } |
578 | /* |
579 | * We could also try to steal one entry from the left or right |
580 | * neighbor. By not doing so we changed the invariant from |
581 | * "all nodes are at least half full" to "no two neighboring |
582 | * nodes can be merged". Which means that the average fill of |
583 | * all nodes is still half or better. |
584 | */ |
585 | } |
586 | |
587 | static void *btree_remove_level(struct btree_head *head, struct btree_geo *geo, |
588 | unsigned long *key, int level) |
589 | { |
590 | unsigned long *node; |
591 | int i, pos, fill; |
592 | void *ret; |
593 | |
594 | if (level > head->height) { |
595 | /* we recursed all the way up */ |
596 | head->height = 0; |
597 | head->node = NULL; |
598 | return NULL; |
599 | } |
600 | |
601 | node = find_level(head, geo, key, level); |
602 | pos = getpos(geo, node, key); |
603 | fill = getfill(geo, node, start: pos); |
604 | if ((level == 1) && (keycmp(geo, node, pos, key) != 0)) |
605 | return NULL; |
606 | ret = bval(geo, node, n: pos); |
607 | |
608 | /* remove and shift */ |
609 | for (i = pos; i < fill - 1; i++) { |
610 | setkey(geo, node, n: i, key: bkey(geo, node, n: i + 1)); |
611 | setval(geo, node, n: i, val: bval(geo, node, n: i + 1)); |
612 | } |
613 | clearpair(geo, node, n: fill - 1); |
614 | |
615 | if (fill - 1 < geo->no_pairs / 2) { |
616 | if (level < head->height) |
617 | rebalance(head, geo, key, level, child: node, fill: fill - 1); |
618 | else if (fill - 1 == 1) |
619 | btree_shrink(head, geo); |
620 | } |
621 | |
622 | return ret; |
623 | } |
624 | |
625 | void *btree_remove(struct btree_head *head, struct btree_geo *geo, |
626 | unsigned long *key) |
627 | { |
628 | if (head->height == 0) |
629 | return NULL; |
630 | |
631 | return btree_remove_level(head, geo, key, level: 1); |
632 | } |
633 | EXPORT_SYMBOL_GPL(btree_remove); |
634 | |
635 | int btree_merge(struct btree_head *target, struct btree_head *victim, |
636 | struct btree_geo *geo, gfp_t gfp) |
637 | { |
638 | unsigned long key[MAX_KEYLEN]; |
639 | unsigned long dup[MAX_KEYLEN]; |
640 | void *val; |
641 | int err; |
642 | |
643 | BUG_ON(target == victim); |
644 | |
645 | if (!(target->node)) { |
646 | /* target is empty, just copy fields over */ |
647 | target->node = victim->node; |
648 | target->height = victim->height; |
649 | __btree_init(head: victim); |
650 | return 0; |
651 | } |
652 | |
653 | /* TODO: This needs some optimizations. Currently we do three tree |
654 | * walks to remove a single object from the victim. |
655 | */ |
656 | for (;;) { |
657 | if (!btree_last(victim, geo, key)) |
658 | break; |
659 | val = btree_lookup(victim, geo, key); |
660 | err = btree_insert(target, geo, key, val, gfp); |
661 | if (err) |
662 | return err; |
663 | /* We must make a copy of the key, as the original will get |
664 | * mangled inside btree_remove. */ |
665 | longcpy(dest: dup, src: key, n: geo->keylen); |
666 | btree_remove(victim, geo, dup); |
667 | } |
668 | return 0; |
669 | } |
670 | EXPORT_SYMBOL_GPL(btree_merge); |
671 | |
672 | static size_t __btree_for_each(struct btree_head *head, struct btree_geo *geo, |
673 | unsigned long *node, unsigned long opaque, |
674 | void (*func)(void *elem, unsigned long opaque, |
675 | unsigned long *key, size_t index, |
676 | void *func2), |
677 | void *func2, int reap, int height, size_t count) |
678 | { |
679 | int i; |
680 | unsigned long *child; |
681 | |
682 | for (i = 0; i < geo->no_pairs; i++) { |
683 | child = bval(geo, node, n: i); |
684 | if (!child) |
685 | break; |
686 | if (height > 1) |
687 | count = __btree_for_each(head, geo, node: child, opaque, |
688 | func, func2, reap, height: height - 1, count); |
689 | else |
690 | func(child, opaque, bkey(geo, node, n: i), count++, |
691 | func2); |
692 | } |
693 | if (reap) |
694 | mempool_free(element: node, pool: head->mempool); |
695 | return count; |
696 | } |
697 | |
698 | static void empty(void *elem, unsigned long opaque, unsigned long *key, |
699 | size_t index, void *func2) |
700 | { |
701 | } |
702 | |
703 | void visitorl(void *elem, unsigned long opaque, unsigned long *key, |
704 | size_t index, void *__func) |
705 | { |
706 | visitorl_t func = __func; |
707 | |
708 | func(elem, opaque, *key, index); |
709 | } |
710 | EXPORT_SYMBOL_GPL(visitorl); |
711 | |
712 | void visitor32(void *elem, unsigned long opaque, unsigned long *__key, |
713 | size_t index, void *__func) |
714 | { |
715 | visitor32_t func = __func; |
716 | u32 *key = (void *)__key; |
717 | |
718 | func(elem, opaque, *key, index); |
719 | } |
720 | EXPORT_SYMBOL_GPL(visitor32); |
721 | |
722 | void visitor64(void *elem, unsigned long opaque, unsigned long *__key, |
723 | size_t index, void *__func) |
724 | { |
725 | visitor64_t func = __func; |
726 | u64 *key = (void *)__key; |
727 | |
728 | func(elem, opaque, *key, index); |
729 | } |
730 | EXPORT_SYMBOL_GPL(visitor64); |
731 | |
732 | void visitor128(void *elem, unsigned long opaque, unsigned long *__key, |
733 | size_t index, void *__func) |
734 | { |
735 | visitor128_t func = __func; |
736 | u64 *key = (void *)__key; |
737 | |
738 | func(elem, opaque, key[0], key[1], index); |
739 | } |
740 | EXPORT_SYMBOL_GPL(visitor128); |
741 | |
742 | size_t btree_visitor(struct btree_head *head, struct btree_geo *geo, |
743 | unsigned long opaque, |
744 | void (*func)(void *elem, unsigned long opaque, |
745 | unsigned long *key, |
746 | size_t index, void *func2), |
747 | void *func2) |
748 | { |
749 | size_t count = 0; |
750 | |
751 | if (!func2) |
752 | func = empty; |
753 | if (head->node) |
754 | count = __btree_for_each(head, geo, node: head->node, opaque, func, |
755 | func2, reap: 0, height: head->height, count: 0); |
756 | return count; |
757 | } |
758 | EXPORT_SYMBOL_GPL(btree_visitor); |
759 | |
760 | size_t btree_grim_visitor(struct btree_head *head, struct btree_geo *geo, |
761 | unsigned long opaque, |
762 | void (*func)(void *elem, unsigned long opaque, |
763 | unsigned long *key, |
764 | size_t index, void *func2), |
765 | void *func2) |
766 | { |
767 | size_t count = 0; |
768 | |
769 | if (!func2) |
770 | func = empty; |
771 | if (head->node) |
772 | count = __btree_for_each(head, geo, node: head->node, opaque, func, |
773 | func2, reap: 1, height: head->height, count: 0); |
774 | __btree_init(head); |
775 | return count; |
776 | } |
777 | EXPORT_SYMBOL_GPL(btree_grim_visitor); |
778 | |
779 | static int __init btree_module_init(void) |
780 | { |
781 | btree_cachep = kmem_cache_create(name: "btree_node" , NODESIZE, align: 0, |
782 | SLAB_HWCACHE_ALIGN, NULL); |
783 | return 0; |
784 | } |
785 | |
786 | static void __exit btree_module_exit(void) |
787 | { |
788 | kmem_cache_destroy(s: btree_cachep); |
789 | } |
790 | |
791 | /* If core code starts using btree, initialization should happen even earlier */ |
792 | module_init(btree_module_init); |
793 | module_exit(btree_module_exit); |
794 | |
795 | MODULE_AUTHOR("Joern Engel <joern@logfs.org>" ); |
796 | MODULE_AUTHOR("Johannes Berg <johannes@sipsolutions.net>" ); |
797 | |