1 | // SPDX-License-Identifier: GPL-2.0 |
2 | |
3 | #include "bcachefs.h" |
4 | #include "bkey.h" |
5 | #include "bkey_cmp.h" |
6 | #include "bkey_methods.h" |
7 | #include "bset.h" |
8 | #include "util.h" |
9 | |
10 | const struct bkey_format bch2_bkey_format_current = BKEY_FORMAT_CURRENT; |
11 | |
12 | void bch2_bkey_packed_to_binary_text(struct printbuf *out, |
13 | const struct bkey_format *f, |
14 | const struct bkey_packed *k) |
15 | { |
16 | const u64 *p = high_word(f, k); |
17 | unsigned word_bits = 64 - high_bit_offset; |
18 | unsigned nr_key_bits = bkey_format_key_bits(format: f) + high_bit_offset; |
19 | u64 v = *p & (~0ULL >> high_bit_offset); |
20 | |
21 | if (!nr_key_bits) { |
22 | prt_str(out, str: "(empty)" ); |
23 | return; |
24 | } |
25 | |
26 | while (1) { |
27 | unsigned next_key_bits = nr_key_bits; |
28 | |
29 | if (nr_key_bits < 64) { |
30 | v >>= 64 - nr_key_bits; |
31 | next_key_bits = 0; |
32 | } else { |
33 | next_key_bits -= 64; |
34 | } |
35 | |
36 | bch2_prt_u64_base2_nbits(out, v, min(word_bits, nr_key_bits)); |
37 | |
38 | if (!next_key_bits) |
39 | break; |
40 | |
41 | prt_char(out, c: ' '); |
42 | |
43 | p = next_word(p); |
44 | v = *p; |
45 | word_bits = 64; |
46 | nr_key_bits = next_key_bits; |
47 | } |
48 | } |
49 | |
50 | #ifdef CONFIG_BCACHEFS_DEBUG |
51 | |
52 | static void bch2_bkey_pack_verify(const struct bkey_packed *packed, |
53 | const struct bkey *unpacked, |
54 | const struct bkey_format *format) |
55 | { |
56 | struct bkey tmp; |
57 | |
58 | BUG_ON(bkeyp_val_u64s(format, packed) != |
59 | bkey_val_u64s(unpacked)); |
60 | |
61 | BUG_ON(packed->u64s < bkeyp_key_u64s(format, packed)); |
62 | |
63 | tmp = __bch2_bkey_unpack_key(format, packed); |
64 | |
65 | if (memcmp(p: &tmp, q: unpacked, size: sizeof(struct bkey))) { |
66 | struct printbuf buf = PRINTBUF; |
67 | |
68 | prt_printf(&buf, "keys differ: format u64s %u fields %u %u %u %u %u\n" , |
69 | format->key_u64s, |
70 | format->bits_per_field[0], |
71 | format->bits_per_field[1], |
72 | format->bits_per_field[2], |
73 | format->bits_per_field[3], |
74 | format->bits_per_field[4]); |
75 | |
76 | prt_printf(&buf, "compiled unpack: " ); |
77 | bch2_bkey_to_text(&buf, unpacked); |
78 | prt_newline(&buf); |
79 | |
80 | prt_printf(&buf, "c unpack: " ); |
81 | bch2_bkey_to_text(&buf, &tmp); |
82 | prt_newline(&buf); |
83 | |
84 | prt_printf(&buf, "compiled unpack: " ); |
85 | bch2_bkey_packed_to_binary_text(out: &buf, f: &bch2_bkey_format_current, |
86 | k: (struct bkey_packed *) unpacked); |
87 | prt_newline(&buf); |
88 | |
89 | prt_printf(&buf, "c unpack: " ); |
90 | bch2_bkey_packed_to_binary_text(out: &buf, f: &bch2_bkey_format_current, |
91 | k: (struct bkey_packed *) &tmp); |
92 | prt_newline(&buf); |
93 | |
94 | panic(fmt: "%s" , buf.buf); |
95 | } |
96 | } |
97 | |
98 | #else |
99 | static inline void bch2_bkey_pack_verify(const struct bkey_packed *packed, |
100 | const struct bkey *unpacked, |
101 | const struct bkey_format *format) {} |
102 | #endif |
103 | |
104 | struct pack_state { |
105 | const struct bkey_format *format; |
106 | unsigned bits; /* bits remaining in current word */ |
107 | u64 w; /* current word */ |
108 | u64 *p; /* pointer to next word */ |
109 | }; |
110 | |
111 | __always_inline |
112 | static struct pack_state pack_state_init(const struct bkey_format *format, |
113 | struct bkey_packed *k) |
114 | { |
115 | u64 *p = high_word(format, k); |
116 | |
117 | return (struct pack_state) { |
118 | .format = format, |
119 | .bits = 64 - high_bit_offset, |
120 | .w = 0, |
121 | .p = p, |
122 | }; |
123 | } |
124 | |
125 | __always_inline |
126 | static void pack_state_finish(struct pack_state *state, |
127 | struct bkey_packed *k) |
128 | { |
129 | EBUG_ON(state->p < k->_data); |
130 | EBUG_ON(state->p >= (u64 *) k->_data + state->format->key_u64s); |
131 | |
132 | *state->p = state->w; |
133 | } |
134 | |
135 | struct unpack_state { |
136 | const struct bkey_format *format; |
137 | unsigned bits; /* bits remaining in current word */ |
138 | u64 w; /* current word */ |
139 | const u64 *p; /* pointer to next word */ |
140 | }; |
141 | |
142 | __always_inline |
143 | static struct unpack_state unpack_state_init(const struct bkey_format *format, |
144 | const struct bkey_packed *k) |
145 | { |
146 | const u64 *p = high_word(format, k); |
147 | |
148 | return (struct unpack_state) { |
149 | .format = format, |
150 | .bits = 64 - high_bit_offset, |
151 | .w = *p << high_bit_offset, |
152 | .p = p, |
153 | }; |
154 | } |
155 | |
156 | __always_inline |
157 | static u64 get_inc_field(struct unpack_state *state, unsigned field) |
158 | { |
159 | unsigned bits = state->format->bits_per_field[field]; |
160 | u64 v = 0, offset = le64_to_cpu(state->format->field_offset[field]); |
161 | |
162 | if (bits >= state->bits) { |
163 | v = state->w >> (64 - bits); |
164 | bits -= state->bits; |
165 | |
166 | state->p = next_word(state->p); |
167 | state->w = *state->p; |
168 | state->bits = 64; |
169 | } |
170 | |
171 | /* avoid shift by 64 if bits is 0 - bits is never 64 here: */ |
172 | v |= (state->w >> 1) >> (63 - bits); |
173 | state->w <<= bits; |
174 | state->bits -= bits; |
175 | |
176 | return v + offset; |
177 | } |
178 | |
179 | __always_inline |
180 | static void __set_inc_field(struct pack_state *state, unsigned field, u64 v) |
181 | { |
182 | unsigned bits = state->format->bits_per_field[field]; |
183 | |
184 | if (bits) { |
185 | if (bits > state->bits) { |
186 | bits -= state->bits; |
187 | /* avoid shift by 64 if bits is 64 - bits is never 0 here: */ |
188 | state->w |= (v >> 1) >> (bits - 1); |
189 | |
190 | *state->p = state->w; |
191 | state->p = next_word(state->p); |
192 | state->w = 0; |
193 | state->bits = 64; |
194 | } |
195 | |
196 | state->bits -= bits; |
197 | state->w |= v << state->bits; |
198 | } |
199 | } |
200 | |
201 | __always_inline |
202 | static bool set_inc_field(struct pack_state *state, unsigned field, u64 v) |
203 | { |
204 | unsigned bits = state->format->bits_per_field[field]; |
205 | u64 offset = le64_to_cpu(state->format->field_offset[field]); |
206 | |
207 | if (v < offset) |
208 | return false; |
209 | |
210 | v -= offset; |
211 | |
212 | if (fls64(x: v) > bits) |
213 | return false; |
214 | |
215 | __set_inc_field(state, field, v); |
216 | return true; |
217 | } |
218 | |
219 | /* |
220 | * Note: does NOT set out->format (we don't know what it should be here!) |
221 | * |
222 | * Also: doesn't work on extents - it doesn't preserve the invariant that |
223 | * if k is packed bkey_start_pos(k) will successfully pack |
224 | */ |
225 | static bool bch2_bkey_transform_key(const struct bkey_format *out_f, |
226 | struct bkey_packed *out, |
227 | const struct bkey_format *in_f, |
228 | const struct bkey_packed *in) |
229 | { |
230 | struct pack_state out_s = pack_state_init(format: out_f, k: out); |
231 | struct unpack_state in_s = unpack_state_init(format: in_f, k: in); |
232 | u64 *w = out->_data; |
233 | unsigned i; |
234 | |
235 | *w = 0; |
236 | |
237 | for (i = 0; i < BKEY_NR_FIELDS; i++) |
238 | if (!set_inc_field(state: &out_s, field: i, v: get_inc_field(state: &in_s, field: i))) |
239 | return false; |
240 | |
241 | /* Can't happen because the val would be too big to unpack: */ |
242 | EBUG_ON(in->u64s - in_f->key_u64s + out_f->key_u64s > U8_MAX); |
243 | |
244 | pack_state_finish(state: &out_s, k: out); |
245 | out->u64s = out_f->key_u64s + in->u64s - in_f->key_u64s; |
246 | out->needs_whiteout = in->needs_whiteout; |
247 | out->type = in->type; |
248 | |
249 | return true; |
250 | } |
251 | |
252 | bool bch2_bkey_transform(const struct bkey_format *out_f, |
253 | struct bkey_packed *out, |
254 | const struct bkey_format *in_f, |
255 | const struct bkey_packed *in) |
256 | { |
257 | if (!bch2_bkey_transform_key(out_f, out, in_f, in)) |
258 | return false; |
259 | |
260 | memcpy_u64s(dst: (u64 *) out + out_f->key_u64s, |
261 | src: (u64 *) in + in_f->key_u64s, |
262 | u64s: (in->u64s - in_f->key_u64s)); |
263 | return true; |
264 | } |
265 | |
266 | struct bkey __bch2_bkey_unpack_key(const struct bkey_format *format, |
267 | const struct bkey_packed *in) |
268 | { |
269 | struct unpack_state state = unpack_state_init(format, k: in); |
270 | struct bkey out; |
271 | |
272 | EBUG_ON(format->nr_fields != BKEY_NR_FIELDS); |
273 | EBUG_ON(in->u64s < format->key_u64s); |
274 | EBUG_ON(in->format != KEY_FORMAT_LOCAL_BTREE); |
275 | EBUG_ON(in->u64s - format->key_u64s + BKEY_U64s > U8_MAX); |
276 | |
277 | out.u64s = BKEY_U64s + in->u64s - format->key_u64s; |
278 | out.format = KEY_FORMAT_CURRENT; |
279 | out.needs_whiteout = in->needs_whiteout; |
280 | out.type = in->type; |
281 | out.pad[0] = 0; |
282 | |
283 | #define x(id, field) out.field = get_inc_field(&state, id); |
284 | bkey_fields() |
285 | #undef x |
286 | |
287 | return out; |
288 | } |
289 | |
290 | #ifndef HAVE_BCACHEFS_COMPILED_UNPACK |
291 | struct bpos __bkey_unpack_pos(const struct bkey_format *format, |
292 | const struct bkey_packed *in) |
293 | { |
294 | struct unpack_state state = unpack_state_init(format, k: in); |
295 | struct bpos out; |
296 | |
297 | EBUG_ON(format->nr_fields != BKEY_NR_FIELDS); |
298 | EBUG_ON(in->u64s < format->key_u64s); |
299 | EBUG_ON(in->format != KEY_FORMAT_LOCAL_BTREE); |
300 | |
301 | out.inode = get_inc_field(state: &state, field: BKEY_FIELD_INODE); |
302 | out.offset = get_inc_field(state: &state, field: BKEY_FIELD_OFFSET); |
303 | out.snapshot = get_inc_field(state: &state, field: BKEY_FIELD_SNAPSHOT); |
304 | |
305 | return out; |
306 | } |
307 | #endif |
308 | |
309 | /** |
310 | * bch2_bkey_pack_key -- pack just the key, not the value |
311 | * @out: packed result |
312 | * @in: key to pack |
313 | * @format: format of packed result |
314 | * |
315 | * Returns: true on success, false on failure |
316 | */ |
317 | bool bch2_bkey_pack_key(struct bkey_packed *out, const struct bkey *in, |
318 | const struct bkey_format *format) |
319 | { |
320 | struct pack_state state = pack_state_init(format, k: out); |
321 | u64 *w = out->_data; |
322 | |
323 | EBUG_ON((void *) in == (void *) out); |
324 | EBUG_ON(format->nr_fields != BKEY_NR_FIELDS); |
325 | EBUG_ON(in->format != KEY_FORMAT_CURRENT); |
326 | |
327 | *w = 0; |
328 | |
329 | #define x(id, field) if (!set_inc_field(&state, id, in->field)) return false; |
330 | bkey_fields() |
331 | #undef x |
332 | pack_state_finish(state: &state, k: out); |
333 | out->u64s = format->key_u64s + in->u64s - BKEY_U64s; |
334 | out->format = KEY_FORMAT_LOCAL_BTREE; |
335 | out->needs_whiteout = in->needs_whiteout; |
336 | out->type = in->type; |
337 | |
338 | bch2_bkey_pack_verify(packed: out, unpacked: in, format); |
339 | return true; |
340 | } |
341 | |
342 | /** |
343 | * bch2_bkey_unpack -- unpack the key and the value |
344 | * @b: btree node of @src key (for packed format) |
345 | * @dst: unpacked result |
346 | * @src: packed input |
347 | */ |
348 | void bch2_bkey_unpack(const struct btree *b, struct bkey_i *dst, |
349 | const struct bkey_packed *src) |
350 | { |
351 | __bkey_unpack_key(b, dst: &dst->k, src); |
352 | |
353 | memcpy_u64s(dst: &dst->v, |
354 | bkeyp_val(&b->format, src), |
355 | u64s: bkeyp_val_u64s(format: &b->format, k: src)); |
356 | } |
357 | |
358 | /** |
359 | * bch2_bkey_pack -- pack the key and the value |
360 | * @dst: packed result |
361 | * @src: unpacked input |
362 | * @format: format of packed result |
363 | * |
364 | * Returns: true on success, false on failure |
365 | */ |
366 | bool bch2_bkey_pack(struct bkey_packed *dst, const struct bkey_i *src, |
367 | const struct bkey_format *format) |
368 | { |
369 | struct bkey_packed tmp; |
370 | |
371 | if (!bch2_bkey_pack_key(out: &tmp, in: &src->k, format)) |
372 | return false; |
373 | |
374 | memmove_u64s(dst: (u64 *) dst + format->key_u64s, |
375 | src: &src->v, |
376 | bkey_val_u64s(&src->k)); |
377 | memcpy_u64s_small(dst, src: &tmp, u64s: format->key_u64s); |
378 | |
379 | return true; |
380 | } |
381 | |
382 | __always_inline |
383 | static bool set_inc_field_lossy(struct pack_state *state, unsigned field, u64 v) |
384 | { |
385 | unsigned bits = state->format->bits_per_field[field]; |
386 | u64 offset = le64_to_cpu(state->format->field_offset[field]); |
387 | bool ret = true; |
388 | |
389 | EBUG_ON(v < offset); |
390 | v -= offset; |
391 | |
392 | if (fls64(x: v) > bits) { |
393 | v = ~(~0ULL << bits); |
394 | ret = false; |
395 | } |
396 | |
397 | __set_inc_field(state, field, v); |
398 | return ret; |
399 | } |
400 | |
401 | #ifdef CONFIG_BCACHEFS_DEBUG |
402 | static bool bkey_packed_successor(struct bkey_packed *out, |
403 | const struct btree *b, |
404 | struct bkey_packed k) |
405 | { |
406 | const struct bkey_format *f = &b->format; |
407 | unsigned nr_key_bits = b->nr_key_bits; |
408 | unsigned first_bit, offset; |
409 | u64 *p; |
410 | |
411 | EBUG_ON(b->nr_key_bits != bkey_format_key_bits(f)); |
412 | |
413 | if (!nr_key_bits) |
414 | return false; |
415 | |
416 | *out = k; |
417 | |
418 | first_bit = high_bit_offset + nr_key_bits - 1; |
419 | p = nth_word(high_word(f, out), first_bit >> 6); |
420 | offset = 63 - (first_bit & 63); |
421 | |
422 | while (nr_key_bits) { |
423 | unsigned bits = min(64 - offset, nr_key_bits); |
424 | u64 mask = (~0ULL >> (64 - bits)) << offset; |
425 | |
426 | if ((*p & mask) != mask) { |
427 | *p += 1ULL << offset; |
428 | EBUG_ON(bch2_bkey_cmp_packed(b, out, &k) <= 0); |
429 | return true; |
430 | } |
431 | |
432 | *p &= ~mask; |
433 | p = prev_word(p); |
434 | nr_key_bits -= bits; |
435 | offset = 0; |
436 | } |
437 | |
438 | return false; |
439 | } |
440 | |
441 | static bool bkey_format_has_too_big_fields(const struct bkey_format *f) |
442 | { |
443 | for (unsigned i = 0; i < f->nr_fields; i++) { |
444 | unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i]; |
445 | u64 unpacked_max = ~((~0ULL << 1) << (unpacked_bits - 1)); |
446 | u64 packed_max = f->bits_per_field[i] |
447 | ? ~((~0ULL << 1) << (f->bits_per_field[i] - 1)) |
448 | : 0; |
449 | u64 field_offset = le64_to_cpu(f->field_offset[i]); |
450 | |
451 | if (packed_max + field_offset < packed_max || |
452 | packed_max + field_offset > unpacked_max) |
453 | return true; |
454 | } |
455 | |
456 | return false; |
457 | } |
458 | #endif |
459 | |
460 | /* |
461 | * Returns a packed key that compares <= in |
462 | * |
463 | * This is used in bset_search_tree(), where we need a packed pos in order to be |
464 | * able to compare against the keys in the auxiliary search tree - and it's |
465 | * legal to use a packed pos that isn't equivalent to the original pos, |
466 | * _provided_ it compares <= to the original pos. |
467 | */ |
468 | enum bkey_pack_pos_ret bch2_bkey_pack_pos_lossy(struct bkey_packed *out, |
469 | struct bpos in, |
470 | const struct btree *b) |
471 | { |
472 | const struct bkey_format *f = &b->format; |
473 | struct pack_state state = pack_state_init(format: f, k: out); |
474 | u64 *w = out->_data; |
475 | #ifdef CONFIG_BCACHEFS_DEBUG |
476 | struct bpos orig = in; |
477 | #endif |
478 | bool exact = true; |
479 | unsigned i; |
480 | |
481 | /* |
482 | * bch2_bkey_pack_key() will write to all of f->key_u64s, minus the 3 |
483 | * byte header, but pack_pos() won't if the len/version fields are big |
484 | * enough - we need to make sure to zero them out: |
485 | */ |
486 | for (i = 0; i < f->key_u64s; i++) |
487 | w[i] = 0; |
488 | |
489 | if (unlikely(in.snapshot < |
490 | le64_to_cpu(f->field_offset[BKEY_FIELD_SNAPSHOT]))) { |
491 | if (!in.offset-- && |
492 | !in.inode--) |
493 | return BKEY_PACK_POS_FAIL; |
494 | in.snapshot = KEY_SNAPSHOT_MAX; |
495 | exact = false; |
496 | } |
497 | |
498 | if (unlikely(in.offset < |
499 | le64_to_cpu(f->field_offset[BKEY_FIELD_OFFSET]))) { |
500 | if (!in.inode--) |
501 | return BKEY_PACK_POS_FAIL; |
502 | in.offset = KEY_OFFSET_MAX; |
503 | in.snapshot = KEY_SNAPSHOT_MAX; |
504 | exact = false; |
505 | } |
506 | |
507 | if (unlikely(in.inode < |
508 | le64_to_cpu(f->field_offset[BKEY_FIELD_INODE]))) |
509 | return BKEY_PACK_POS_FAIL; |
510 | |
511 | if (unlikely(!set_inc_field_lossy(&state, BKEY_FIELD_INODE, in.inode))) { |
512 | in.offset = KEY_OFFSET_MAX; |
513 | in.snapshot = KEY_SNAPSHOT_MAX; |
514 | exact = false; |
515 | } |
516 | |
517 | if (unlikely(!set_inc_field_lossy(&state, BKEY_FIELD_OFFSET, in.offset))) { |
518 | in.snapshot = KEY_SNAPSHOT_MAX; |
519 | exact = false; |
520 | } |
521 | |
522 | if (unlikely(!set_inc_field_lossy(&state, BKEY_FIELD_SNAPSHOT, in.snapshot))) |
523 | exact = false; |
524 | |
525 | pack_state_finish(state: &state, k: out); |
526 | out->u64s = f->key_u64s; |
527 | out->format = KEY_FORMAT_LOCAL_BTREE; |
528 | out->type = KEY_TYPE_deleted; |
529 | |
530 | #ifdef CONFIG_BCACHEFS_DEBUG |
531 | if (exact) { |
532 | BUG_ON(bkey_cmp_left_packed(b, out, &orig)); |
533 | } else { |
534 | struct bkey_packed successor; |
535 | |
536 | BUG_ON(bkey_cmp_left_packed(b, out, &orig) >= 0); |
537 | BUG_ON(bkey_packed_successor(&successor, b, *out) && |
538 | bkey_cmp_left_packed(b, &successor, &orig) < 0 && |
539 | !bkey_format_has_too_big_fields(f)); |
540 | } |
541 | #endif |
542 | |
543 | return exact ? BKEY_PACK_POS_EXACT : BKEY_PACK_POS_SMALLER; |
544 | } |
545 | |
546 | void bch2_bkey_format_init(struct bkey_format_state *s) |
547 | { |
548 | unsigned i; |
549 | |
550 | for (i = 0; i < ARRAY_SIZE(s->field_min); i++) |
551 | s->field_min[i] = U64_MAX; |
552 | |
553 | for (i = 0; i < ARRAY_SIZE(s->field_max); i++) |
554 | s->field_max[i] = 0; |
555 | |
556 | /* Make sure we can store a size of 0: */ |
557 | s->field_min[BKEY_FIELD_SIZE] = 0; |
558 | } |
559 | |
560 | void bch2_bkey_format_add_pos(struct bkey_format_state *s, struct bpos p) |
561 | { |
562 | unsigned field = 0; |
563 | |
564 | __bkey_format_add(s, field: field++, v: p.inode); |
565 | __bkey_format_add(s, field: field++, v: p.offset); |
566 | __bkey_format_add(s, field: field++, v: p.snapshot); |
567 | } |
568 | |
569 | /* |
570 | * We don't want it to be possible for the packed format to represent fields |
571 | * bigger than a u64... that will cause confusion and issues (like with |
572 | * bkey_packed_successor()) |
573 | */ |
574 | static void set_format_field(struct bkey_format *f, enum bch_bkey_fields i, |
575 | unsigned bits, u64 offset) |
576 | { |
577 | unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i]; |
578 | u64 unpacked_max = ~((~0ULL << 1) << (unpacked_bits - 1)); |
579 | |
580 | bits = min(bits, unpacked_bits); |
581 | |
582 | offset = bits == unpacked_bits ? 0 : min(offset, unpacked_max - ((1ULL << bits) - 1)); |
583 | |
584 | f->bits_per_field[i] = bits; |
585 | f->field_offset[i] = cpu_to_le64(offset); |
586 | } |
587 | |
588 | struct bkey_format bch2_bkey_format_done(struct bkey_format_state *s) |
589 | { |
590 | unsigned i, bits = KEY_PACKED_BITS_START; |
591 | struct bkey_format ret = { |
592 | .nr_fields = BKEY_NR_FIELDS, |
593 | }; |
594 | |
595 | for (i = 0; i < ARRAY_SIZE(s->field_min); i++) { |
596 | s->field_min[i] = min(s->field_min[i], s->field_max[i]); |
597 | |
598 | set_format_field(f: &ret, i, |
599 | bits: fls64(x: s->field_max[i] - s->field_min[i]), |
600 | offset: s->field_min[i]); |
601 | |
602 | bits += ret.bits_per_field[i]; |
603 | } |
604 | |
605 | /* allow for extent merging: */ |
606 | if (ret.bits_per_field[BKEY_FIELD_SIZE]) { |
607 | unsigned b = min(4U, 32U - ret.bits_per_field[BKEY_FIELD_SIZE]); |
608 | |
609 | ret.bits_per_field[BKEY_FIELD_SIZE] += b; |
610 | bits += b; |
611 | } |
612 | |
613 | ret.key_u64s = DIV_ROUND_UP(bits, 64); |
614 | |
615 | /* if we have enough spare bits, round fields up to nearest byte */ |
616 | bits = ret.key_u64s * 64 - bits; |
617 | |
618 | for (i = 0; i < ARRAY_SIZE(ret.bits_per_field); i++) { |
619 | unsigned r = round_up(ret.bits_per_field[i], 8) - |
620 | ret.bits_per_field[i]; |
621 | |
622 | if (r <= bits) { |
623 | set_format_field(f: &ret, i, |
624 | bits: ret.bits_per_field[i] + r, |
625 | le64_to_cpu(ret.field_offset[i])); |
626 | bits -= r; |
627 | } |
628 | } |
629 | |
630 | #ifdef CONFIG_BCACHEFS_DEBUG |
631 | { |
632 | struct printbuf buf = PRINTBUF; |
633 | |
634 | BUG_ON(bch2_bkey_format_invalid(NULL, &ret, 0, &buf)); |
635 | printbuf_exit(&buf); |
636 | } |
637 | #endif |
638 | return ret; |
639 | } |
640 | |
641 | int bch2_bkey_format_invalid(struct bch_fs *c, |
642 | struct bkey_format *f, |
643 | enum bkey_invalid_flags flags, |
644 | struct printbuf *err) |
645 | { |
646 | unsigned i, bits = KEY_PACKED_BITS_START; |
647 | |
648 | if (f->nr_fields != BKEY_NR_FIELDS) { |
649 | prt_printf(err, "incorrect number of fields: got %u, should be %u" , |
650 | f->nr_fields, BKEY_NR_FIELDS); |
651 | return -BCH_ERR_invalid; |
652 | } |
653 | |
654 | /* |
655 | * Verify that the packed format can't represent fields larger than the |
656 | * unpacked format: |
657 | */ |
658 | for (i = 0; i < f->nr_fields; i++) { |
659 | if (!c || c->sb.version_min >= bcachefs_metadata_version_snapshot) { |
660 | unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i]; |
661 | u64 unpacked_max = ~((~0ULL << 1) << (unpacked_bits - 1)); |
662 | u64 packed_max = f->bits_per_field[i] |
663 | ? ~((~0ULL << 1) << (f->bits_per_field[i] - 1)) |
664 | : 0; |
665 | u64 field_offset = le64_to_cpu(f->field_offset[i]); |
666 | |
667 | if (packed_max + field_offset < packed_max || |
668 | packed_max + field_offset > unpacked_max) { |
669 | prt_printf(err, "field %u too large: %llu + %llu > %llu" , |
670 | i, packed_max, field_offset, unpacked_max); |
671 | return -BCH_ERR_invalid; |
672 | } |
673 | } |
674 | |
675 | bits += f->bits_per_field[i]; |
676 | } |
677 | |
678 | if (f->key_u64s != DIV_ROUND_UP(bits, 64)) { |
679 | prt_printf(err, "incorrect key_u64s: got %u, should be %u" , |
680 | f->key_u64s, DIV_ROUND_UP(bits, 64)); |
681 | return -BCH_ERR_invalid; |
682 | } |
683 | |
684 | return 0; |
685 | } |
686 | |
687 | void bch2_bkey_format_to_text(struct printbuf *out, const struct bkey_format *f) |
688 | { |
689 | prt_printf(out, "u64s %u fields " , f->key_u64s); |
690 | |
691 | for (unsigned i = 0; i < ARRAY_SIZE(f->bits_per_field); i++) { |
692 | if (i) |
693 | prt_str(out, str: ", " ); |
694 | prt_printf(out, "%u:%llu" , |
695 | f->bits_per_field[i], |
696 | le64_to_cpu(f->field_offset[i])); |
697 | } |
698 | } |
699 | |
700 | /* |
701 | * Most significant differing bit |
702 | * Bits are indexed from 0 - return is [0, nr_key_bits) |
703 | */ |
704 | __pure |
705 | unsigned bch2_bkey_greatest_differing_bit(const struct btree *b, |
706 | const struct bkey_packed *l_k, |
707 | const struct bkey_packed *r_k) |
708 | { |
709 | const u64 *l = high_word(&b->format, l_k); |
710 | const u64 *r = high_word(&b->format, r_k); |
711 | unsigned nr_key_bits = b->nr_key_bits; |
712 | unsigned word_bits = 64 - high_bit_offset; |
713 | u64 l_v, r_v; |
714 | |
715 | EBUG_ON(b->nr_key_bits != bkey_format_key_bits(&b->format)); |
716 | |
717 | /* for big endian, skip past header */ |
718 | l_v = *l & (~0ULL >> high_bit_offset); |
719 | r_v = *r & (~0ULL >> high_bit_offset); |
720 | |
721 | while (nr_key_bits) { |
722 | if (nr_key_bits < word_bits) { |
723 | l_v >>= word_bits - nr_key_bits; |
724 | r_v >>= word_bits - nr_key_bits; |
725 | nr_key_bits = 0; |
726 | } else { |
727 | nr_key_bits -= word_bits; |
728 | } |
729 | |
730 | if (l_v != r_v) |
731 | return fls64(x: l_v ^ r_v) - 1 + nr_key_bits; |
732 | |
733 | l = next_word(l); |
734 | r = next_word(r); |
735 | |
736 | l_v = *l; |
737 | r_v = *r; |
738 | word_bits = 64; |
739 | } |
740 | |
741 | return 0; |
742 | } |
743 | |
744 | /* |
745 | * First set bit |
746 | * Bits are indexed from 0 - return is [0, nr_key_bits) |
747 | */ |
748 | __pure |
749 | unsigned bch2_bkey_ffs(const struct btree *b, const struct bkey_packed *k) |
750 | { |
751 | const u64 *p = high_word(&b->format, k); |
752 | unsigned nr_key_bits = b->nr_key_bits; |
753 | unsigned ret = 0, offset; |
754 | |
755 | EBUG_ON(b->nr_key_bits != bkey_format_key_bits(&b->format)); |
756 | |
757 | offset = nr_key_bits; |
758 | while (offset > 64) { |
759 | p = next_word(p); |
760 | offset -= 64; |
761 | } |
762 | |
763 | offset = 64 - offset; |
764 | |
765 | while (nr_key_bits) { |
766 | unsigned bits = nr_key_bits + offset < 64 |
767 | ? nr_key_bits |
768 | : 64 - offset; |
769 | |
770 | u64 mask = (~0ULL >> (64 - bits)) << offset; |
771 | |
772 | if (*p & mask) |
773 | return ret + __ffs64(word: *p & mask) - offset; |
774 | |
775 | p = prev_word(p); |
776 | nr_key_bits -= bits; |
777 | ret += bits; |
778 | offset = 0; |
779 | } |
780 | |
781 | return 0; |
782 | } |
783 | |
784 | #ifdef HAVE_BCACHEFS_COMPILED_UNPACK |
785 | |
786 | #define I(_x) (*(out)++ = (_x)) |
787 | #define I1(i0) I(i0) |
788 | #define I2(i0, i1) (I1(i0), I(i1)) |
789 | #define I3(i0, i1, i2) (I2(i0, i1), I(i2)) |
790 | #define I4(i0, i1, i2, i3) (I3(i0, i1, i2), I(i3)) |
791 | #define I5(i0, i1, i2, i3, i4) (I4(i0, i1, i2, i3), I(i4)) |
792 | |
793 | static u8 *compile_bkey_field(const struct bkey_format *format, u8 *out, |
794 | enum bch_bkey_fields field, |
795 | unsigned dst_offset, unsigned dst_size, |
796 | bool *eax_zeroed) |
797 | { |
798 | unsigned bits = format->bits_per_field[field]; |
799 | u64 offset = le64_to_cpu(format->field_offset[field]); |
800 | unsigned i, byte, bit_offset, align, shl, shr; |
801 | |
802 | if (!bits && !offset) { |
803 | if (!*eax_zeroed) { |
804 | /* xor eax, eax */ |
805 | I2(0x31, 0xc0); |
806 | } |
807 | |
808 | *eax_zeroed = true; |
809 | goto set_field; |
810 | } |
811 | |
812 | if (!bits) { |
813 | /* just return offset: */ |
814 | |
815 | switch (dst_size) { |
816 | case 8: |
817 | if (offset > S32_MAX) { |
818 | /* mov [rdi + dst_offset], offset */ |
819 | I3(0xc7, 0x47, dst_offset); |
820 | memcpy(out, &offset, 4); |
821 | out += 4; |
822 | |
823 | I3(0xc7, 0x47, dst_offset + 4); |
824 | memcpy(out, (void *) &offset + 4, 4); |
825 | out += 4; |
826 | } else { |
827 | /* mov [rdi + dst_offset], offset */ |
828 | /* sign extended */ |
829 | I4(0x48, 0xc7, 0x47, dst_offset); |
830 | memcpy(out, &offset, 4); |
831 | out += 4; |
832 | } |
833 | break; |
834 | case 4: |
835 | /* mov [rdi + dst_offset], offset */ |
836 | I3(0xc7, 0x47, dst_offset); |
837 | memcpy(out, &offset, 4); |
838 | out += 4; |
839 | break; |
840 | default: |
841 | BUG(); |
842 | } |
843 | |
844 | return out; |
845 | } |
846 | |
847 | bit_offset = format->key_u64s * 64; |
848 | for (i = 0; i <= field; i++) |
849 | bit_offset -= format->bits_per_field[i]; |
850 | |
851 | byte = bit_offset / 8; |
852 | bit_offset -= byte * 8; |
853 | |
854 | *eax_zeroed = false; |
855 | |
856 | if (bit_offset == 0 && bits == 8) { |
857 | /* movzx eax, BYTE PTR [rsi + imm8] */ |
858 | I4(0x0f, 0xb6, 0x46, byte); |
859 | } else if (bit_offset == 0 && bits == 16) { |
860 | /* movzx eax, WORD PTR [rsi + imm8] */ |
861 | I4(0x0f, 0xb7, 0x46, byte); |
862 | } else if (bit_offset + bits <= 32) { |
863 | align = min(4 - DIV_ROUND_UP(bit_offset + bits, 8), byte & 3); |
864 | byte -= align; |
865 | bit_offset += align * 8; |
866 | |
867 | BUG_ON(bit_offset + bits > 32); |
868 | |
869 | /* mov eax, [rsi + imm8] */ |
870 | I3(0x8b, 0x46, byte); |
871 | |
872 | if (bit_offset) { |
873 | /* shr eax, imm8 */ |
874 | I3(0xc1, 0xe8, bit_offset); |
875 | } |
876 | |
877 | if (bit_offset + bits < 32) { |
878 | unsigned mask = ~0U >> (32 - bits); |
879 | |
880 | /* and eax, imm32 */ |
881 | I1(0x25); |
882 | memcpy(out, &mask, 4); |
883 | out += 4; |
884 | } |
885 | } else if (bit_offset + bits <= 64) { |
886 | align = min(8 - DIV_ROUND_UP(bit_offset + bits, 8), byte & 7); |
887 | byte -= align; |
888 | bit_offset += align * 8; |
889 | |
890 | BUG_ON(bit_offset + bits > 64); |
891 | |
892 | /* mov rax, [rsi + imm8] */ |
893 | I4(0x48, 0x8b, 0x46, byte); |
894 | |
895 | shl = 64 - bit_offset - bits; |
896 | shr = bit_offset + shl; |
897 | |
898 | if (shl) { |
899 | /* shl rax, imm8 */ |
900 | I4(0x48, 0xc1, 0xe0, shl); |
901 | } |
902 | |
903 | if (shr) { |
904 | /* shr rax, imm8 */ |
905 | I4(0x48, 0xc1, 0xe8, shr); |
906 | } |
907 | } else { |
908 | align = min(4 - DIV_ROUND_UP(bit_offset + bits, 8), byte & 3); |
909 | byte -= align; |
910 | bit_offset += align * 8; |
911 | |
912 | BUG_ON(bit_offset + bits > 96); |
913 | |
914 | /* mov rax, [rsi + byte] */ |
915 | I4(0x48, 0x8b, 0x46, byte); |
916 | |
917 | /* mov edx, [rsi + byte + 8] */ |
918 | I3(0x8b, 0x56, byte + 8); |
919 | |
920 | /* bits from next word: */ |
921 | shr = bit_offset + bits - 64; |
922 | BUG_ON(shr > bit_offset); |
923 | |
924 | /* shr rax, bit_offset */ |
925 | I4(0x48, 0xc1, 0xe8, shr); |
926 | |
927 | /* shl rdx, imm8 */ |
928 | I4(0x48, 0xc1, 0xe2, 64 - shr); |
929 | |
930 | /* or rax, rdx */ |
931 | I3(0x48, 0x09, 0xd0); |
932 | |
933 | shr = bit_offset - shr; |
934 | |
935 | if (shr) { |
936 | /* shr rax, imm8 */ |
937 | I4(0x48, 0xc1, 0xe8, shr); |
938 | } |
939 | } |
940 | |
941 | /* rax += offset: */ |
942 | if (offset > S32_MAX) { |
943 | /* mov rdx, imm64 */ |
944 | I2(0x48, 0xba); |
945 | memcpy(out, &offset, 8); |
946 | out += 8; |
947 | /* add %rdx, %rax */ |
948 | I3(0x48, 0x01, 0xd0); |
949 | } else if (offset + (~0ULL >> (64 - bits)) > U32_MAX) { |
950 | /* add rax, imm32 */ |
951 | I2(0x48, 0x05); |
952 | memcpy(out, &offset, 4); |
953 | out += 4; |
954 | } else if (offset) { |
955 | /* add eax, imm32 */ |
956 | I1(0x05); |
957 | memcpy(out, &offset, 4); |
958 | out += 4; |
959 | } |
960 | set_field: |
961 | switch (dst_size) { |
962 | case 8: |
963 | /* mov [rdi + dst_offset], rax */ |
964 | I4(0x48, 0x89, 0x47, dst_offset); |
965 | break; |
966 | case 4: |
967 | /* mov [rdi + dst_offset], eax */ |
968 | I3(0x89, 0x47, dst_offset); |
969 | break; |
970 | default: |
971 | BUG(); |
972 | } |
973 | |
974 | return out; |
975 | } |
976 | |
977 | int bch2_compile_bkey_format(const struct bkey_format *format, void *_out) |
978 | { |
979 | bool eax_zeroed = false; |
980 | u8 *out = _out; |
981 | |
982 | /* |
983 | * rdi: dst - unpacked key |
984 | * rsi: src - packed key |
985 | */ |
986 | |
987 | /* k->u64s, k->format, k->type */ |
988 | |
989 | /* mov eax, [rsi] */ |
990 | I2(0x8b, 0x06); |
991 | |
992 | /* add eax, BKEY_U64s - format->key_u64s */ |
993 | I5(0x05, BKEY_U64s - format->key_u64s, KEY_FORMAT_CURRENT, 0, 0); |
994 | |
995 | /* and eax, imm32: mask out k->pad: */ |
996 | I5(0x25, 0xff, 0xff, 0xff, 0); |
997 | |
998 | /* mov [rdi], eax */ |
999 | I2(0x89, 0x07); |
1000 | |
1001 | #define x(id, field) \ |
1002 | out = compile_bkey_field(format, out, id, \ |
1003 | offsetof(struct bkey, field), \ |
1004 | sizeof(((struct bkey *) NULL)->field), \ |
1005 | &eax_zeroed); |
1006 | bkey_fields() |
1007 | #undef x |
1008 | |
1009 | /* retq */ |
1010 | I1(0xc3); |
1011 | |
1012 | return (void *) out - _out; |
1013 | } |
1014 | |
1015 | #else |
1016 | #endif |
1017 | |
1018 | __pure |
1019 | int __bch2_bkey_cmp_packed_format_checked(const struct bkey_packed *l, |
1020 | const struct bkey_packed *r, |
1021 | const struct btree *b) |
1022 | { |
1023 | return __bch2_bkey_cmp_packed_format_checked_inlined(l, r, b); |
1024 | } |
1025 | |
1026 | __pure __flatten |
1027 | int __bch2_bkey_cmp_left_packed_format_checked(const struct btree *b, |
1028 | const struct bkey_packed *l, |
1029 | const struct bpos *r) |
1030 | { |
1031 | return bpos_cmp(l: bkey_unpack_pos_format_checked(b, src: l), r: *r); |
1032 | } |
1033 | |
1034 | __pure __flatten |
1035 | int bch2_bkey_cmp_packed(const struct btree *b, |
1036 | const struct bkey_packed *l, |
1037 | const struct bkey_packed *r) |
1038 | { |
1039 | return bch2_bkey_cmp_packed_inlined(b, l, r); |
1040 | } |
1041 | |
1042 | __pure __flatten |
1043 | int __bch2_bkey_cmp_left_packed(const struct btree *b, |
1044 | const struct bkey_packed *l, |
1045 | const struct bpos *r) |
1046 | { |
1047 | const struct bkey *l_unpacked; |
1048 | |
1049 | return unlikely(l_unpacked = packed_to_bkey_c(l)) |
1050 | ? bpos_cmp(l: l_unpacked->p, r: *r) |
1051 | : __bch2_bkey_cmp_left_packed_format_checked(b, l, r); |
1052 | } |
1053 | |
1054 | void bch2_bpos_swab(struct bpos *p) |
1055 | { |
1056 | u8 *l = (u8 *) p; |
1057 | u8 *h = ((u8 *) &p[1]) - 1; |
1058 | |
1059 | while (l < h) { |
1060 | swap(*l, *h); |
1061 | l++; |
1062 | --h; |
1063 | } |
1064 | } |
1065 | |
1066 | void bch2_bkey_swab_key(const struct bkey_format *_f, struct bkey_packed *k) |
1067 | { |
1068 | const struct bkey_format *f = bkey_packed(k) ? _f : &bch2_bkey_format_current; |
1069 | u8 *l = k->key_start; |
1070 | u8 *h = (u8 *) (k->_data + f->key_u64s) - 1; |
1071 | |
1072 | while (l < h) { |
1073 | swap(*l, *h); |
1074 | l++; |
1075 | --h; |
1076 | } |
1077 | } |
1078 | |
1079 | #ifdef CONFIG_BCACHEFS_DEBUG |
1080 | void bch2_bkey_pack_test(void) |
1081 | { |
1082 | struct bkey t = KEY(4134ULL, 1250629070527416633ULL, 0); |
1083 | struct bkey_packed p; |
1084 | |
1085 | struct bkey_format test_format = { |
1086 | .key_u64s = 3, |
1087 | .nr_fields = BKEY_NR_FIELDS, |
1088 | .bits_per_field = { |
1089 | 13, |
1090 | 64, |
1091 | 32, |
1092 | }, |
1093 | }; |
1094 | |
1095 | struct unpack_state in_s = |
1096 | unpack_state_init(format: &bch2_bkey_format_current, k: (void *) &t); |
1097 | struct pack_state out_s = pack_state_init(format: &test_format, k: &p); |
1098 | unsigned i; |
1099 | |
1100 | for (i = 0; i < out_s.format->nr_fields; i++) { |
1101 | u64 a, v = get_inc_field(state: &in_s, field: i); |
1102 | |
1103 | switch (i) { |
1104 | #define x(id, field) case id: a = t.field; break; |
1105 | bkey_fields() |
1106 | #undef x |
1107 | default: |
1108 | BUG(); |
1109 | } |
1110 | |
1111 | if (a != v) |
1112 | panic(fmt: "got %llu actual %llu i %u\n" , v, a, i); |
1113 | |
1114 | if (!set_inc_field(state: &out_s, field: i, v)) |
1115 | panic(fmt: "failed at %u\n" , i); |
1116 | } |
1117 | |
1118 | BUG_ON(!bch2_bkey_pack_key(&p, &t, &test_format)); |
1119 | } |
1120 | #endif |
1121 | |