1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
2 | /* |
3 | * Copyright (C) 2011 Red Hat, Inc. |
4 | * |
5 | * This file is released under the GPL. |
6 | */ |
7 | #ifndef _LINUX_DM_BTREE_H |
8 | #define _LINUX_DM_BTREE_H |
9 | |
10 | #include "dm-block-manager.h" |
11 | |
12 | struct dm_transaction_manager; |
13 | |
14 | /*----------------------------------------------------------------*/ |
15 | |
16 | /* |
17 | * Annotations used to check on-disk metadata is handled as little-endian. |
18 | */ |
19 | #ifdef __CHECKER__ |
20 | # define __dm_written_to_disk(x) __releases(x) |
21 | # define __dm_reads_from_disk(x) __acquires(x) |
22 | # define __dm_bless_for_disk(x) __acquire(x) |
23 | # define __dm_unbless_for_disk(x) __release(x) |
24 | #else |
25 | # define __dm_written_to_disk(x) |
26 | # define __dm_reads_from_disk(x) |
27 | # define __dm_bless_for_disk(x) |
28 | # define __dm_unbless_for_disk(x) |
29 | #endif |
30 | |
31 | /*----------------------------------------------------------------*/ |
32 | |
33 | /* |
34 | * Manipulates hierarchical B+ trees with 64-bit keys and arbitrary-sized |
35 | * values. |
36 | */ |
37 | |
38 | /* |
39 | * Information about the values stored within the btree. |
40 | */ |
41 | struct dm_btree_value_type { |
42 | void *context; |
43 | |
44 | /* |
45 | * The size in bytes of each value. |
46 | */ |
47 | uint32_t size; |
48 | |
49 | /* |
50 | * Any of these methods can be safely set to NULL if you do not |
51 | * need the corresponding feature. |
52 | */ |
53 | |
54 | /* |
55 | * The btree is making a duplicate of a run of values, for instance |
56 | * because previously-shared btree nodes have now diverged. |
57 | * @value argument is the new copy that the copy function may modify. |
58 | * (Probably it just wants to increment a reference count |
59 | * somewhere.) This method is _not_ called for insertion of a new |
60 | * value: It is assumed the ref count is already 1. |
61 | */ |
62 | void (*inc)(void *context, const void *value, unsigned int count); |
63 | |
64 | /* |
65 | * These values are being deleted. The btree takes care of freeing |
66 | * the memory pointed to by @value. Often the del function just |
67 | * needs to decrement a reference counts somewhere. |
68 | */ |
69 | void (*dec)(void *context, const void *value, unsigned int count); |
70 | |
71 | /* |
72 | * A test for equality between two values. When a value is |
73 | * overwritten with a new one, the old one has the dec method |
74 | * called _unless_ the new and old value are deemed equal. |
75 | */ |
76 | int (*equal)(void *context, const void *value1, const void *value2); |
77 | }; |
78 | |
79 | /* |
80 | * The shape and contents of a btree. |
81 | */ |
82 | struct dm_btree_info { |
83 | struct dm_transaction_manager *tm; |
84 | |
85 | /* |
86 | * Number of nested btrees. (Not the depth of a single tree.) |
87 | */ |
88 | unsigned int levels; |
89 | struct dm_btree_value_type value_type; |
90 | }; |
91 | |
92 | /* |
93 | * Set up an empty tree. O(1). |
94 | */ |
95 | int dm_btree_empty(struct dm_btree_info *info, dm_block_t *root); |
96 | |
97 | /* |
98 | * Delete a tree. O(n) - this is the slow one! It can also block, so |
99 | * please don't call it on an IO path. |
100 | */ |
101 | int dm_btree_del(struct dm_btree_info *info, dm_block_t root); |
102 | |
103 | /* |
104 | * All the lookup functions return -ENODATA if the key cannot be found. |
105 | */ |
106 | |
107 | /* |
108 | * Tries to find a key that matches exactly. O(ln(n)) |
109 | */ |
110 | int dm_btree_lookup(struct dm_btree_info *info, dm_block_t root, |
111 | uint64_t *keys, void *value_le); |
112 | |
113 | /* |
114 | * Tries to find the first key where the bottom level key is >= to that |
115 | * given. Useful for skipping empty sections of the btree. |
116 | */ |
117 | int dm_btree_lookup_next(struct dm_btree_info *info, dm_block_t root, |
118 | uint64_t *keys, uint64_t *rkey, void *value_le); |
119 | |
120 | /* |
121 | * Insertion (or overwrite an existing value). O(ln(n)) |
122 | */ |
123 | int dm_btree_insert(struct dm_btree_info *info, dm_block_t root, |
124 | uint64_t *keys, void *value, dm_block_t *new_root) |
125 | __dm_written_to_disk(value); |
126 | |
127 | /* |
128 | * A variant of insert that indicates whether it actually inserted or just |
129 | * overwrote. Useful if you're keeping track of the number of entries in a |
130 | * tree. |
131 | */ |
132 | int dm_btree_insert_notify(struct dm_btree_info *info, dm_block_t root, |
133 | uint64_t *keys, void *value, dm_block_t *new_root, |
134 | int *inserted) |
135 | __dm_written_to_disk(value); |
136 | |
137 | /* |
138 | * Remove a key if present. This doesn't remove empty sub trees. Normally |
139 | * subtrees represent a separate entity, like a snapshot map, so this is |
140 | * correct behaviour. O(ln(n)). |
141 | */ |
142 | int dm_btree_remove(struct dm_btree_info *info, dm_block_t root, |
143 | uint64_t *keys, dm_block_t *new_root); |
144 | |
145 | /* |
146 | * Removes a _contiguous_ run of values starting from 'keys' and not |
147 | * reaching keys2 (where keys2 is keys with the final key replaced with |
148 | * 'end_key'). 'end_key' is the one-past-the-end value. 'keys' may be |
149 | * altered. |
150 | */ |
151 | int dm_btree_remove_leaves(struct dm_btree_info *info, dm_block_t root, |
152 | uint64_t *keys, uint64_t end_key, |
153 | dm_block_t *new_root, unsigned int *nr_removed); |
154 | |
155 | /* |
156 | * Returns < 0 on failure. Otherwise the number of key entries that have |
157 | * been filled out. Remember trees can have zero entries, and as such have |
158 | * no lowest key. |
159 | */ |
160 | int dm_btree_find_lowest_key(struct dm_btree_info *info, dm_block_t root, |
161 | uint64_t *result_keys); |
162 | |
163 | /* |
164 | * Returns < 0 on failure. Otherwise the number of key entries that have |
165 | * been filled out. Remember trees can have zero entries, and as such have |
166 | * no highest key. |
167 | */ |
168 | int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root, |
169 | uint64_t *result_keys); |
170 | |
171 | /* |
172 | * Iterate through the a btree, calling fn() on each entry. |
173 | * It only works for single level trees and is internally recursive, so |
174 | * monitor stack usage carefully. |
175 | */ |
176 | int dm_btree_walk(struct dm_btree_info *info, dm_block_t root, |
177 | int (*fn)(void *context, uint64_t *keys, void *leaf), |
178 | void *context); |
179 | |
180 | |
181 | /*----------------------------------------------------------------*/ |
182 | |
183 | /* |
184 | * Cursor API. This does not follow the rolling lock convention. Since we |
185 | * know the order that values are required we can issue prefetches to speed |
186 | * up iteration. Use on a single level btree only. |
187 | */ |
188 | #define DM_BTREE_CURSOR_MAX_DEPTH 16 |
189 | |
190 | struct cursor_node { |
191 | struct dm_block *b; |
192 | unsigned int index; |
193 | }; |
194 | |
195 | struct dm_btree_cursor { |
196 | struct dm_btree_info *info; |
197 | dm_block_t root; |
198 | |
199 | bool prefetch_leaves; |
200 | unsigned int depth; |
201 | struct cursor_node nodes[DM_BTREE_CURSOR_MAX_DEPTH]; |
202 | }; |
203 | |
204 | /* |
205 | * Creates a fresh cursor. If prefetch_leaves is set then it is assumed |
206 | * the btree contains block indexes that will be prefetched. The cursor is |
207 | * quite large, so you probably don't want to put it on the stack. |
208 | */ |
209 | int dm_btree_cursor_begin(struct dm_btree_info *info, dm_block_t root, |
210 | bool prefetch_leaves, struct dm_btree_cursor *c); |
211 | void dm_btree_cursor_end(struct dm_btree_cursor *c); |
212 | int dm_btree_cursor_next(struct dm_btree_cursor *c); |
213 | int dm_btree_cursor_skip(struct dm_btree_cursor *c, uint32_t count); |
214 | int dm_btree_cursor_get_value(struct dm_btree_cursor *c, uint64_t *key, void *value_le); |
215 | |
216 | #endif /* _LINUX_DM_BTREE_H */ |
217 | |