1 | /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ |
2 | #ifndef _LINUX_BCACHE_H |
3 | #define _LINUX_BCACHE_H |
4 | |
5 | /* |
6 | * Bcache on disk data structures |
7 | */ |
8 | |
9 | #include <linux/types.h> |
10 | |
11 | #define BITMASK(name, type, field, offset, size) \ |
12 | static inline __u64 name(const type *k) \ |
13 | { return (k->field >> offset) & ~(~0ULL << size); } \ |
14 | \ |
15 | static inline void SET_##name(type *k, __u64 v) \ |
16 | { \ |
17 | k->field &= ~(~(~0ULL << size) << offset); \ |
18 | k->field |= (v & ~(~0ULL << size)) << offset; \ |
19 | } |
20 | |
21 | /* Btree keys - all units are in sectors */ |
22 | |
23 | struct bkey { |
24 | __u64 high; |
25 | __u64 low; |
26 | __u64 ptr[]; |
27 | }; |
28 | |
29 | #define KEY_FIELD(name, field, offset, size) \ |
30 | BITMASK(name, struct bkey, field, offset, size) |
31 | |
32 | #define PTR_FIELD(name, offset, size) \ |
33 | static inline __u64 name(const struct bkey *k, unsigned int i) \ |
34 | { return (k->ptr[i] >> offset) & ~(~0ULL << size); } \ |
35 | \ |
36 | static inline void SET_##name(struct bkey *k, unsigned int i, __u64 v) \ |
37 | { \ |
38 | k->ptr[i] &= ~(~(~0ULL << size) << offset); \ |
39 | k->ptr[i] |= (v & ~(~0ULL << size)) << offset; \ |
40 | } |
41 | |
42 | #define KEY_SIZE_BITS 16 |
43 | #define KEY_MAX_U64S 8 |
44 | |
45 | KEY_FIELD(KEY_PTRS, high, 60, 3) |
46 | KEY_FIELD(__PAD0, high, 58, 2) |
47 | KEY_FIELD(KEY_CSUM, high, 56, 2) |
48 | KEY_FIELD(__PAD1, high, 55, 1) |
49 | KEY_FIELD(KEY_DIRTY, high, 36, 1) |
50 | |
51 | KEY_FIELD(KEY_SIZE, high, 20, KEY_SIZE_BITS) |
52 | KEY_FIELD(KEY_INODE, high, 0, 20) |
53 | |
54 | /* Next time I change the on disk format, KEY_OFFSET() won't be 64 bits */ |
55 | |
56 | static inline __u64 KEY_OFFSET(const struct bkey *k) |
57 | { |
58 | return k->low; |
59 | } |
60 | |
61 | static inline void SET_KEY_OFFSET(struct bkey *k, __u64 v) |
62 | { |
63 | k->low = v; |
64 | } |
65 | |
66 | /* |
67 | * The high bit being set is a relic from when we used it to do binary |
68 | * searches - it told you where a key started. It's not used anymore, |
69 | * and can probably be safely dropped. |
70 | */ |
71 | #define KEY(inode, offset, size) \ |
72 | ((struct bkey) { \ |
73 | .high = (1ULL << 63) | ((__u64) (size) << 20) | (inode), \ |
74 | .low = (offset) \ |
75 | }) |
76 | |
77 | #define ZERO_KEY KEY(0, 0, 0) |
78 | |
79 | #define MAX_KEY_INODE (~(~0 << 20)) |
80 | #define MAX_KEY_OFFSET (~0ULL >> 1) |
81 | #define MAX_KEY KEY(MAX_KEY_INODE, MAX_KEY_OFFSET, 0) |
82 | |
83 | #define KEY_START(k) (KEY_OFFSET(k) - KEY_SIZE(k)) |
84 | #define START_KEY(k) KEY(KEY_INODE(k), KEY_START(k), 0) |
85 | |
86 | #define PTR_DEV_BITS 12 |
87 | |
88 | PTR_FIELD(PTR_DEV, 51, PTR_DEV_BITS) |
89 | PTR_FIELD(PTR_OFFSET, 8, 43) |
90 | PTR_FIELD(PTR_GEN, 0, 8) |
91 | |
92 | #define PTR_CHECK_DEV ((1 << PTR_DEV_BITS) - 1) |
93 | |
94 | #define MAKE_PTR(gen, offset, dev) \ |
95 | ((((__u64) dev) << 51) | ((__u64) offset) << 8 | gen) |
96 | |
97 | /* Bkey utility code */ |
98 | |
99 | static inline unsigned long bkey_u64s(const struct bkey *k) |
100 | { |
101 | return (sizeof(struct bkey) / sizeof(__u64)) + KEY_PTRS(k); |
102 | } |
103 | |
104 | static inline unsigned long bkey_bytes(const struct bkey *k) |
105 | { |
106 | return bkey_u64s(k) * sizeof(__u64); |
107 | } |
108 | |
109 | #define bkey_copy(_dest, _src) unsafe_memcpy(_dest, _src, bkey_bytes(_src), \ |
110 | /* bkey is always padded */) |
111 | |
112 | static inline void bkey_copy_key(struct bkey *dest, const struct bkey *src) |
113 | { |
114 | SET_KEY_INODE(k: dest, v: KEY_INODE(k: src)); |
115 | SET_KEY_OFFSET(k: dest, v: KEY_OFFSET(k: src)); |
116 | } |
117 | |
118 | static inline struct bkey *bkey_next(const struct bkey *k) |
119 | { |
120 | __u64 *d = (void *) k; |
121 | |
122 | return (struct bkey *) (d + bkey_u64s(k)); |
123 | } |
124 | |
125 | static inline struct bkey *bkey_idx(const struct bkey *k, unsigned int nr_keys) |
126 | { |
127 | __u64 *d = (void *) k; |
128 | |
129 | return (struct bkey *) (d + nr_keys); |
130 | } |
131 | /* Enough for a key with 6 pointers */ |
132 | #define BKEY_PAD 8 |
133 | |
134 | #define BKEY_PADDED(key) \ |
135 | union { struct bkey key; __u64 key ## _pad[BKEY_PAD]; } |
136 | |
137 | /* Superblock */ |
138 | |
139 | /* Version 0: Cache device |
140 | * Version 1: Backing device |
141 | * Version 2: Seed pointer into btree node checksum |
142 | * Version 3: Cache device with new UUID format |
143 | * Version 4: Backing device with data offset |
144 | */ |
145 | #define BCACHE_SB_VERSION_CDEV 0 |
146 | #define BCACHE_SB_VERSION_BDEV 1 |
147 | #define BCACHE_SB_VERSION_CDEV_WITH_UUID 3 |
148 | #define BCACHE_SB_VERSION_BDEV_WITH_OFFSET 4 |
149 | #define BCACHE_SB_VERSION_CDEV_WITH_FEATURES 5 |
150 | #define BCACHE_SB_VERSION_BDEV_WITH_FEATURES 6 |
151 | #define BCACHE_SB_MAX_VERSION 6 |
152 | |
153 | #define SB_SECTOR 8 |
154 | #define SB_OFFSET (SB_SECTOR << SECTOR_SHIFT) |
155 | #define SB_SIZE 4096 |
156 | #define SB_LABEL_SIZE 32 |
157 | #define SB_JOURNAL_BUCKETS 256U |
158 | /* SB_JOURNAL_BUCKETS must be divisible by BITS_PER_LONG */ |
159 | #define MAX_CACHES_PER_SET 8 |
160 | |
161 | #define BDEV_DATA_START_DEFAULT 16 /* sectors */ |
162 | |
163 | struct cache_sb_disk { |
164 | __le64 csum; |
165 | __le64 offset; /* sector where this sb was written */ |
166 | __le64 version; |
167 | |
168 | __u8 magic[16]; |
169 | |
170 | __u8 uuid[16]; |
171 | union { |
172 | __u8 set_uuid[16]; |
173 | __le64 set_magic; |
174 | }; |
175 | __u8 label[SB_LABEL_SIZE]; |
176 | |
177 | __le64 flags; |
178 | __le64 seq; |
179 | |
180 | __le64 feature_compat; |
181 | __le64 feature_incompat; |
182 | __le64 feature_ro_compat; |
183 | |
184 | __le64 pad[5]; |
185 | |
186 | union { |
187 | struct { |
188 | /* Cache devices */ |
189 | __le64 nbuckets; /* device size */ |
190 | |
191 | __le16 block_size; /* sectors */ |
192 | __le16 bucket_size; /* sectors */ |
193 | |
194 | __le16 nr_in_set; |
195 | __le16 nr_this_dev; |
196 | }; |
197 | struct { |
198 | /* Backing devices */ |
199 | __le64 data_offset; |
200 | |
201 | /* |
202 | * block_size from the cache device section is still used by |
203 | * backing devices, so don't add anything here until we fix |
204 | * things to not need it for backing devices anymore |
205 | */ |
206 | }; |
207 | }; |
208 | |
209 | __le32 last_mount; /* time overflow in y2106 */ |
210 | |
211 | __le16 first_bucket; |
212 | union { |
213 | __le16 njournal_buckets; |
214 | __le16 keys; |
215 | }; |
216 | __le64 d[SB_JOURNAL_BUCKETS]; /* journal buckets */ |
217 | __le16 obso_bucket_size_hi; /* obsoleted */ |
218 | }; |
219 | |
220 | /* |
221 | * This is for in-memory bcache super block. |
222 | * NOTE: cache_sb is NOT exactly mapping to cache_sb_disk, the member |
223 | * size, ordering and even whole struct size may be different |
224 | * from cache_sb_disk. |
225 | */ |
226 | struct cache_sb { |
227 | __u64 offset; /* sector where this sb was written */ |
228 | __u64 version; |
229 | |
230 | __u8 magic[16]; |
231 | |
232 | __u8 uuid[16]; |
233 | union { |
234 | __u8 set_uuid[16]; |
235 | __u64 set_magic; |
236 | }; |
237 | __u8 label[SB_LABEL_SIZE]; |
238 | |
239 | __u64 flags; |
240 | __u64 seq; |
241 | |
242 | __u64 feature_compat; |
243 | __u64 feature_incompat; |
244 | __u64 feature_ro_compat; |
245 | |
246 | union { |
247 | struct { |
248 | /* Cache devices */ |
249 | __u64 nbuckets; /* device size */ |
250 | |
251 | __u16 block_size; /* sectors */ |
252 | __u16 nr_in_set; |
253 | __u16 nr_this_dev; |
254 | __u32 bucket_size; /* sectors */ |
255 | }; |
256 | struct { |
257 | /* Backing devices */ |
258 | __u64 data_offset; |
259 | |
260 | /* |
261 | * block_size from the cache device section is still used by |
262 | * backing devices, so don't add anything here until we fix |
263 | * things to not need it for backing devices anymore |
264 | */ |
265 | }; |
266 | }; |
267 | |
268 | __u32 last_mount; /* time overflow in y2106 */ |
269 | |
270 | __u16 first_bucket; |
271 | union { |
272 | __u16 njournal_buckets; |
273 | __u16 keys; |
274 | }; |
275 | __u64 d[SB_JOURNAL_BUCKETS]; /* journal buckets */ |
276 | }; |
277 | |
278 | static inline _Bool SB_IS_BDEV(const struct cache_sb *sb) |
279 | { |
280 | return sb->version == BCACHE_SB_VERSION_BDEV |
281 | || sb->version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET |
282 | || sb->version == BCACHE_SB_VERSION_BDEV_WITH_FEATURES; |
283 | } |
284 | |
285 | BITMASK(CACHE_SYNC, struct cache_sb, flags, 0, 1); |
286 | BITMASK(CACHE_DISCARD, struct cache_sb, flags, 1, 1); |
287 | BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3); |
288 | #define CACHE_REPLACEMENT_LRU 0U |
289 | #define CACHE_REPLACEMENT_FIFO 1U |
290 | #define CACHE_REPLACEMENT_RANDOM 2U |
291 | |
292 | BITMASK(BDEV_CACHE_MODE, struct cache_sb, flags, 0, 4); |
293 | #define CACHE_MODE_WRITETHROUGH 0U |
294 | #define CACHE_MODE_WRITEBACK 1U |
295 | #define CACHE_MODE_WRITEAROUND 2U |
296 | #define CACHE_MODE_NONE 3U |
297 | BITMASK(BDEV_STATE, struct cache_sb, flags, 61, 2); |
298 | #define BDEV_STATE_NONE 0U |
299 | #define BDEV_STATE_CLEAN 1U |
300 | #define BDEV_STATE_DIRTY 2U |
301 | #define BDEV_STATE_STALE 3U |
302 | |
303 | /* |
304 | * Magic numbers |
305 | * |
306 | * The various other data structures have their own magic numbers, which are |
307 | * xored with the first part of the cache set's UUID |
308 | */ |
309 | |
310 | #define JSET_MAGIC 0x245235c1a3625032ULL |
311 | #define PSET_MAGIC 0x6750e15f87337f91ULL |
312 | #define BSET_MAGIC 0x90135c78b99e07f5ULL |
313 | |
314 | static inline __u64 jset_magic(struct cache_sb *sb) |
315 | { |
316 | return sb->set_magic ^ JSET_MAGIC; |
317 | } |
318 | |
319 | static inline __u64 pset_magic(struct cache_sb *sb) |
320 | { |
321 | return sb->set_magic ^ PSET_MAGIC; |
322 | } |
323 | |
324 | static inline __u64 bset_magic(struct cache_sb *sb) |
325 | { |
326 | return sb->set_magic ^ BSET_MAGIC; |
327 | } |
328 | |
329 | /* |
330 | * Journal |
331 | * |
332 | * On disk format for a journal entry: |
333 | * seq is monotonically increasing; every journal entry has its own unique |
334 | * sequence number. |
335 | * |
336 | * last_seq is the oldest journal entry that still has keys the btree hasn't |
337 | * flushed to disk yet. |
338 | * |
339 | * version is for on disk format changes. |
340 | */ |
341 | |
342 | #define BCACHE_JSET_VERSION_UUIDv1 1 |
343 | #define BCACHE_JSET_VERSION_UUID 1 /* Always latest UUID format */ |
344 | #define BCACHE_JSET_VERSION 1 |
345 | |
346 | struct jset { |
347 | __u64 csum; |
348 | __u64 magic; |
349 | __u64 seq; |
350 | __u32 version; |
351 | __u32 keys; |
352 | |
353 | __u64 last_seq; |
354 | |
355 | BKEY_PADDED(uuid_bucket); |
356 | BKEY_PADDED(btree_root); |
357 | __u16 btree_level; |
358 | __u16 pad[3]; |
359 | |
360 | __u64 prio_bucket[MAX_CACHES_PER_SET]; |
361 | |
362 | union { |
363 | DECLARE_FLEX_ARRAY(struct bkey, start); |
364 | DECLARE_FLEX_ARRAY(__u64, d); |
365 | }; |
366 | }; |
367 | |
368 | /* Bucket prios/gens */ |
369 | |
370 | struct prio_set { |
371 | __u64 csum; |
372 | __u64 magic; |
373 | __u64 seq; |
374 | __u32 version; |
375 | __u32 pad; |
376 | |
377 | __u64 next_bucket; |
378 | |
379 | struct bucket_disk { |
380 | __u16 prio; |
381 | __u8 gen; |
382 | } __attribute((packed)) data[]; |
383 | }; |
384 | |
385 | /* UUIDS - per backing device/flash only volume metadata */ |
386 | |
387 | struct uuid_entry { |
388 | union { |
389 | struct { |
390 | __u8 uuid[16]; |
391 | __u8 label[32]; |
392 | __u32 first_reg; /* time overflow in y2106 */ |
393 | __u32 last_reg; |
394 | __u32 invalidated; |
395 | |
396 | __u32 flags; |
397 | /* Size of flash only volumes */ |
398 | __u64 sectors; |
399 | }; |
400 | |
401 | __u8 pad[128]; |
402 | }; |
403 | }; |
404 | |
405 | BITMASK(UUID_FLASH_ONLY, struct uuid_entry, flags, 0, 1); |
406 | |
407 | /* Btree nodes */ |
408 | |
409 | /* Version 1: Seed pointer into btree node checksum |
410 | */ |
411 | #define BCACHE_BSET_CSUM 1 |
412 | #define BCACHE_BSET_VERSION 1 |
413 | |
414 | /* |
415 | * Btree nodes |
416 | * |
417 | * On disk a btree node is a list/log of these; within each set the keys are |
418 | * sorted |
419 | */ |
420 | struct bset { |
421 | __u64 csum; |
422 | __u64 magic; |
423 | __u64 seq; |
424 | __u32 version; |
425 | __u32 keys; |
426 | |
427 | union { |
428 | DECLARE_FLEX_ARRAY(struct bkey, start); |
429 | DECLARE_FLEX_ARRAY(__u64, d); |
430 | }; |
431 | }; |
432 | |
433 | /* OBSOLETE */ |
434 | |
435 | /* UUIDS - per backing device/flash only volume metadata */ |
436 | |
437 | struct uuid_entry_v0 { |
438 | __u8 uuid[16]; |
439 | __u8 label[32]; |
440 | __u32 first_reg; |
441 | __u32 last_reg; |
442 | __u32 invalidated; |
443 | __u32 pad; |
444 | }; |
445 | |
446 | #endif /* _LINUX_BCACHE_H */ |
447 | |