eytzinger.c source code [linux/fs/bcachefs/eytzinger.c]

1	// SPDX-License-Identifier: GPL-2.0
2
3	#include "eytzinger.h"
4
5	/**
6	* is_aligned - is this pointer & size okay for word-wide copying?
7	* @base: pointer to data
8	* @size: size of each element
9	* @align: required alignment (typically 4 or 8)
10	*
11	* Returns true if elements can be copied using word loads and stores.
12	* The size must be a multiple of the alignment, and the base address must
13	* be if we do not have CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS.
14	*
15	* For some reason, gcc doesn't know to optimize "if (a & mask \|\| b & mask)"
16	* to "if ((a \| b) & mask)", so we do that by hand.
17	*/
18	__attribute_const__ __always_inline
19	static bool is_aligned(const void base, size_t size, unsigned* char align)
20	{
21	unsigned char lsbits = (unsigned char)size;
22
23	(void)base;
24	#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
25	lsbits \|= (unsigned char)(uintptr_t)base;
26	#endif
27	return (lsbits & (align - `1`)) == `0`;
28	}
29
30	/**
31	* swap_words_32 - swap two elements in 32-bit chunks
32	* @a: pointer to the first element to swap
33	* @b: pointer to the second element to swap
34	* @n: element size (must be a multiple of 4)
35	*
36	* Exchange the two objects in memory. This exploits base+index addressing,
37	* which basically all CPUs have, to minimize loop overhead computations.
38	*
39	* For some reason, on x86 gcc 7.3.0 adds a redundant test of n at the
40	* bottom of the loop, even though the zero flag is still valid from the
41	* subtract (since the intervening mov instructions don't alter the flags).
42	* Gcc 8.1.0 doesn't have that problem.
43	*/
44	static void swap_words_32(void a, void* *b, size_t n)
45	{
46	do {
47	u32 t = (u32 )(a + (n -= `4`));
48	(u32 )(a + n) = (u32 )(b + n);
49	(u32 )(b + n) = t;
50	} while (n);
51	}
52
53	/**
54	* swap_words_64 - swap two elements in 64-bit chunks
55	* @a: pointer to the first element to swap
56	* @b: pointer to the second element to swap
57	* @n: element size (must be a multiple of 8)
58	*
59	* Exchange the two objects in memory. This exploits base+index
60	* addressing, which basically all CPUs have, to minimize loop overhead
61	* computations.
62	*
63	* We'd like to use 64-bit loads if possible. If they're not, emulating
64	* one requires base+index+4 addressing which x86 has but most other
65	* processors do not. If CONFIG_64BIT, we definitely have 64-bit loads,
66	* but it's possible to have 64-bit loads without 64-bit pointers (e.g.
67	* x32 ABI). Are there any cases the kernel needs to worry about?
68	*/
69	static void swap_words_64(void a, void* *b, size_t n)
70	{
71	do {
72	#ifdef CONFIG_64BIT
73	u64 t = (u64 )(a + (n -= `8`));
74	(u64 )(a + n) = (u64 )(b + n);
75	(u64 )(b + n) = t;
76	#else
77	/ Use two 32-bit transfers to avoid base+index+4 addressing /
78	u32 t = (u32 )(a + (n -= `4`));
79	(u32 )(a + n) = (u32 )(b + n);
80	(u32 )(b + n) = t;
81
82	t = (u32 )(a + (n -= `4`));
83	(u32 )(a + n) = (u32 )(b + n);
84	(u32 )(b + n) = t;
85	#endif
86	} while (n);
87	}
88
89	/**
90	* swap_bytes - swap two elements a byte at a time
91	* @a: pointer to the first element to swap
92	* @b: pointer to the second element to swap
93	* @n: element size
94	*
95	* This is the fallback if alignment doesn't allow using larger chunks.
96	*/
97	static void swap_bytes(void a, void* *b, size_t n)
98	{
99	do {
100	char t = ((char *)a)[--n];
101	((char )a)[n] = ((char* *)b)[n];
102	((char *)b)[n] = t;
103	} while (n);
104	}
105
106	/*
107	* The values are arbitrary as long as they can't be confused with
108	* a pointer, but small integers make for the smallest compare
109	* instructions.
110	*/
111	#define SWAP_WORDS_64 (swap_r_func_t)0
112	#define SWAP_WORDS_32 (swap_r_func_t)1
113	#define SWAP_BYTES (swap_r_func_t)2
114	#define SWAP_WRAPPER (swap_r_func_t)3
115
116	struct wrapper {
117	cmp_func_t cmp;
118	swap_func_t swap_func;
119	};
120
121	/*
122	* The function pointer is last to make tail calls most efficient if the
123	* compiler decides not to inline this function.
124	*/
125	static void do_swap(void a, void* b, size_t size, swap_r_func_t swap_func, const* void *priv)
126	{
127	if (swap_func == SWAP_WRAPPER) {
128	((const struct wrapper )priv)->swap_func(a, b, (int*)size);
129	return;
130	}
131
132	if (swap_func == SWAP_WORDS_64)
133	swap_words_64(a, b, n: size);
134	else if (swap_func == SWAP_WORDS_32)
135	swap_words_32(a, b, n: size);
136	else if (swap_func == SWAP_BYTES)
137	swap_bytes(a, b, n: size);
138	else
139	swap_func(a, b, (int)size, priv);
140	}
141
142	#define _CMP_WRAPPER ((cmp_r_func_t)0L)
143
144	static int do_cmp(const void a, const* void b, cmp_r_func_t cmp, const* void *priv)
145	{
146	if (cmp == _CMP_WRAPPER)
147	return ((const struct wrapper *)priv)->cmp(a, b);
148	return cmp(a, b, priv);
149	}
150
151	static inline int eytzinger0_do_cmp(void *base, size_t n, size_t size,
152	cmp_r_func_t cmp_func, const void *priv,
153	size_t l, size_t r)
154	{
155	return do_cmp(a: base + inorder_to_eytzinger0(i: l, size: n) * size,
156	b: base + inorder_to_eytzinger0(i: r, size: n) * size,
157	cmp: cmp_func, priv);
158	}
159
160	static inline void eytzinger0_do_swap(void *base, size_t n, size_t size,
161	swap_r_func_t swap_func, const void *priv,
162	size_t l, size_t r)
163	{
164	do_swap(a: base + inorder_to_eytzinger0(i: l, size: n) * size,
165	b: base + inorder_to_eytzinger0(i: r, size: n) * size,
166	size, swap_func, priv);
167	}
168
169	void eytzinger0_sort_r(void *base, size_t n, size_t size,
170	cmp_r_func_t cmp_func,
171	swap_r_func_t swap_func,
172	const void *priv)
173	{
174	int i, c, r;
175
176	/ called from 'sort' without swap function, let's pick the default /
177	if (swap_func == SWAP_WRAPPER && !((struct wrapper *)priv)->swap_func)
178	swap_func = NULL;
179
180	if (!swap_func) {
181	if (is_aligned(base, size, align: `8`))
182	swap_func = SWAP_WORDS_64;
183	else if (is_aligned(base, size, align: `4`))
184	swap_func = SWAP_WORDS_32;
185	else
186	swap_func = SWAP_BYTES;
187	}
188
189	/ heapify /
190	for (i = n / `2` - `1`; i >= `0`; --i) {
191	for (r = i; r * `2` + `1` < n; r = c) {
192	c = r * `2` + `1`;
193
194	if (c + `1` < n &&
195	eytzinger0_do_cmp(base, n, size, cmp_func, priv, l: c, r: c + `1`) < `0`)
196	c++;
197
198	if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, l: r, r: c) >= `0`)
199	break;
200
201	eytzinger0_do_swap(base, n, size, swap_func, priv, l: r, r: c);
202	}
203	}
204
205	/ sort /
206	for (i = n - `1`; i > `0`; --i) {
207	eytzinger0_do_swap(base, n, size, swap_func, priv, l: `0`, r: i);
208
209	for (r = `0`; r * `2` + `1` < i; r = c) {
210	c = r * `2` + `1`;
211
212	if (c + `1` < i &&
213	eytzinger0_do_cmp(base, n, size, cmp_func, priv, l: c, r: c + `1`) < `0`)
214	c++;
215
216	if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, l: r, r: c) >= `0`)
217	break;
218
219	eytzinger0_do_swap(base, n, size, swap_func, priv, l: r, r: c);
220	}
221	}
222	}
223
224	void eytzinger0_sort(void *base, size_t n, size_t size,
225	cmp_func_t cmp_func,
226	swap_func_t swap_func)
227	{
228	struct wrapper w = {
229	.cmp = cmp_func,
230	.swap_func = swap_func,
231	};
232
233	return eytzinger0_sort_r(base, n, size, _CMP_WRAPPER, SWAP_WRAPPER, priv: &w);
234	}
235

source code of linux/fs/bcachefs/eytzinger.c