1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | #ifndef _EYTZINGER_H |
3 | #define _EYTZINGER_H |
4 | |
5 | #include <linux/bitops.h> |
6 | #include <linux/log2.h> |
7 | |
8 | #ifdef EYTZINGER_DEBUG |
9 | #define EYTZINGER_BUG_ON(cond) BUG_ON(cond) |
10 | #else |
11 | #define EYTZINGER_BUG_ON(cond) |
12 | #endif |
13 | |
14 | /* |
15 | * Traversal for trees in eytzinger layout - a full binary tree layed out in an |
16 | * array. |
17 | * |
18 | * Consider using an eytzinger tree any time you would otherwise be doing binary |
19 | * search over an array. Binary search is a worst case scenario for branch |
20 | * prediction and prefetching, but in an eytzinger tree every node's children |
21 | * are adjacent in memory, thus we can prefetch children before knowing the |
22 | * result of the comparison, assuming multiple nodes fit on a cacheline. |
23 | * |
24 | * Two variants are provided, for one based indexing and zero based indexing. |
25 | * |
26 | * Zero based indexing is more convenient, but one based indexing has better |
27 | * alignment and thus better performance because each new level of the tree |
28 | * starts at a power of two, and thus if element 0 was cacheline aligned, each |
29 | * new level will be as well. |
30 | */ |
31 | |
32 | static inline unsigned eytzinger1_child(unsigned i, unsigned child) |
33 | { |
34 | EYTZINGER_BUG_ON(child > 1); |
35 | |
36 | return (i << 1) + child; |
37 | } |
38 | |
39 | static inline unsigned eytzinger1_left_child(unsigned i) |
40 | { |
41 | return eytzinger1_child(i, child: 0); |
42 | } |
43 | |
44 | static inline unsigned eytzinger1_right_child(unsigned i) |
45 | { |
46 | return eytzinger1_child(i, child: 1); |
47 | } |
48 | |
49 | static inline unsigned eytzinger1_first(unsigned size) |
50 | { |
51 | return rounddown_pow_of_two(size); |
52 | } |
53 | |
54 | static inline unsigned eytzinger1_last(unsigned size) |
55 | { |
56 | return rounddown_pow_of_two(size + 1) - 1; |
57 | } |
58 | |
59 | /* |
60 | * eytzinger1_next() and eytzinger1_prev() have the nice properties that |
61 | * |
62 | * eytzinger1_next(0) == eytzinger1_first()) |
63 | * eytzinger1_prev(0) == eytzinger1_last()) |
64 | * |
65 | * eytzinger1_prev(eytzinger1_first()) == 0 |
66 | * eytzinger1_next(eytzinger1_last()) == 0 |
67 | */ |
68 | |
69 | static inline unsigned eytzinger1_next(unsigned i, unsigned size) |
70 | { |
71 | EYTZINGER_BUG_ON(i > size); |
72 | |
73 | if (eytzinger1_right_child(i) <= size) { |
74 | i = eytzinger1_right_child(i); |
75 | |
76 | i <<= __fls(word: size + 1) - __fls(word: i); |
77 | i >>= i > size; |
78 | } else { |
79 | i >>= ffz(i) + 1; |
80 | } |
81 | |
82 | return i; |
83 | } |
84 | |
85 | static inline unsigned eytzinger1_prev(unsigned i, unsigned size) |
86 | { |
87 | EYTZINGER_BUG_ON(i > size); |
88 | |
89 | if (eytzinger1_left_child(i) <= size) { |
90 | i = eytzinger1_left_child(i) + 1; |
91 | |
92 | i <<= __fls(word: size + 1) - __fls(word: i); |
93 | i -= 1; |
94 | i >>= i > size; |
95 | } else { |
96 | i >>= __ffs(i) + 1; |
97 | } |
98 | |
99 | return i; |
100 | } |
101 | |
102 | static inline unsigned (unsigned size) |
103 | { |
104 | return (size + 1 - rounddown_pow_of_two(size)) << 1; |
105 | } |
106 | |
107 | static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size, |
108 | unsigned ) |
109 | { |
110 | unsigned b = __fls(word: i); |
111 | unsigned shift = __fls(word: size) - b; |
112 | int s; |
113 | |
114 | EYTZINGER_BUG_ON(!i || i > size); |
115 | |
116 | i ^= 1U << b; |
117 | i <<= 1; |
118 | i |= 1; |
119 | i <<= shift; |
120 | |
121 | /* |
122 | * sign bit trick: |
123 | * |
124 | * if (i > extra) |
125 | * i -= (i - extra) >> 1; |
126 | */ |
127 | s = extra - i; |
128 | i += (s >> 1) & (s >> 31); |
129 | |
130 | return i; |
131 | } |
132 | |
133 | static inline unsigned __inorder_to_eytzinger1(unsigned i, unsigned size, |
134 | unsigned ) |
135 | { |
136 | unsigned shift; |
137 | int s; |
138 | |
139 | EYTZINGER_BUG_ON(!i || i > size); |
140 | |
141 | /* |
142 | * sign bit trick: |
143 | * |
144 | * if (i > extra) |
145 | * i += i - extra; |
146 | */ |
147 | s = extra - i; |
148 | i -= s & (s >> 31); |
149 | |
150 | shift = __ffs(i); |
151 | |
152 | i >>= shift + 1; |
153 | i |= 1U << (__fls(word: size) - shift); |
154 | |
155 | return i; |
156 | } |
157 | |
158 | static inline unsigned eytzinger1_to_inorder(unsigned i, unsigned size) |
159 | { |
160 | return __eytzinger1_to_inorder(i, size, extra: eytzinger1_extra(size)); |
161 | } |
162 | |
163 | static inline unsigned inorder_to_eytzinger1(unsigned i, unsigned size) |
164 | { |
165 | return __inorder_to_eytzinger1(i, size, extra: eytzinger1_extra(size)); |
166 | } |
167 | |
168 | #define eytzinger1_for_each(_i, _size) \ |
169 | for (unsigned (_i) = eytzinger1_first((_size)); \ |
170 | (_i) != 0; \ |
171 | (_i) = eytzinger1_next((_i), (_size))) |
172 | |
173 | /* Zero based indexing version: */ |
174 | |
175 | static inline unsigned eytzinger0_child(unsigned i, unsigned child) |
176 | { |
177 | EYTZINGER_BUG_ON(child > 1); |
178 | |
179 | return (i << 1) + 1 + child; |
180 | } |
181 | |
182 | static inline unsigned eytzinger0_left_child(unsigned i) |
183 | { |
184 | return eytzinger0_child(i, child: 0); |
185 | } |
186 | |
187 | static inline unsigned eytzinger0_right_child(unsigned i) |
188 | { |
189 | return eytzinger0_child(i, child: 1); |
190 | } |
191 | |
192 | static inline unsigned eytzinger0_first(unsigned size) |
193 | { |
194 | return eytzinger1_first(size) - 1; |
195 | } |
196 | |
197 | static inline unsigned eytzinger0_last(unsigned size) |
198 | { |
199 | return eytzinger1_last(size) - 1; |
200 | } |
201 | |
202 | static inline unsigned eytzinger0_next(unsigned i, unsigned size) |
203 | { |
204 | return eytzinger1_next(i: i + 1, size) - 1; |
205 | } |
206 | |
207 | static inline unsigned eytzinger0_prev(unsigned i, unsigned size) |
208 | { |
209 | return eytzinger1_prev(i: i + 1, size) - 1; |
210 | } |
211 | |
212 | static inline unsigned (unsigned size) |
213 | { |
214 | return eytzinger1_extra(size); |
215 | } |
216 | |
217 | static inline unsigned __eytzinger0_to_inorder(unsigned i, unsigned size, |
218 | unsigned ) |
219 | { |
220 | return __eytzinger1_to_inorder(i: i + 1, size, extra) - 1; |
221 | } |
222 | |
223 | static inline unsigned __inorder_to_eytzinger0(unsigned i, unsigned size, |
224 | unsigned ) |
225 | { |
226 | return __inorder_to_eytzinger1(i: i + 1, size, extra) - 1; |
227 | } |
228 | |
229 | static inline unsigned eytzinger0_to_inorder(unsigned i, unsigned size) |
230 | { |
231 | return __eytzinger0_to_inorder(i, size, extra: eytzinger0_extra(size)); |
232 | } |
233 | |
234 | static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size) |
235 | { |
236 | return __inorder_to_eytzinger0(i, size, extra: eytzinger0_extra(size)); |
237 | } |
238 | |
239 | #define eytzinger0_for_each(_i, _size) \ |
240 | for (unsigned (_i) = eytzinger0_first((_size)); \ |
241 | (_i) != -1; \ |
242 | (_i) = eytzinger0_next((_i), (_size))) |
243 | |
244 | /* return greatest node <= @search, or -1 if not found */ |
245 | static inline int eytzinger0_find_le(void *base, size_t nr, size_t size, |
246 | cmp_func_t cmp, const void *search) |
247 | { |
248 | unsigned i, n = 0; |
249 | |
250 | if (!nr) |
251 | return -1; |
252 | |
253 | do { |
254 | i = n; |
255 | n = eytzinger0_child(i, child: cmp(base + i * size, search) <= 0); |
256 | } while (n < nr); |
257 | |
258 | if (n & 1) { |
259 | /* |
260 | * @i was greater than @search, return previous node: |
261 | * |
262 | * if @i was leftmost/smallest element, |
263 | * eytzinger0_prev(eytzinger0_first())) returns -1, as expected |
264 | */ |
265 | return eytzinger0_prev(i, size: nr); |
266 | } else { |
267 | return i; |
268 | } |
269 | } |
270 | |
271 | static inline int eytzinger0_find_gt(void *base, size_t nr, size_t size, |
272 | cmp_func_t cmp, const void *search) |
273 | { |
274 | ssize_t idx = eytzinger0_find_le(base, nr, size, cmp, search); |
275 | |
276 | /* |
277 | * if eytitzinger0_find_le() returned -1 - no element was <= search - we |
278 | * want to return the first element; next/prev identities mean this work |
279 | * as expected |
280 | * |
281 | * similarly if find_le() returns last element, we should return -1; |
282 | * identities mean this all works out: |
283 | */ |
284 | return eytzinger0_next(i: idx, size: nr); |
285 | } |
286 | |
287 | #define eytzinger0_find(base, nr, size, _cmp, search) \ |
288 | ({ \ |
289 | void *_base = (base); \ |
290 | const void *_search = (search); \ |
291 | size_t _nr = (nr); \ |
292 | size_t _size = (size); \ |
293 | size_t _i = 0; \ |
294 | int _res; \ |
295 | \ |
296 | while (_i < _nr && \ |
297 | (_res = _cmp(_search, _base + _i * _size))) \ |
298 | _i = eytzinger0_child(_i, _res > 0); \ |
299 | _i; \ |
300 | }) |
301 | |
302 | void eytzinger0_sort_r(void *, size_t, size_t, |
303 | cmp_r_func_t, swap_r_func_t, const void *); |
304 | void eytzinger0_sort(void *, size_t, size_t, cmp_func_t, swap_func_t); |
305 | |
306 | #endif /* _EYTZINGER_H */ |
307 | |