1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright (C) 2011 Red Hat, Inc. |
4 | * |
5 | * This file is released under the GPL. |
6 | */ |
7 | |
8 | #include "dm-space-map-common.h" |
9 | #include "dm-transaction-manager.h" |
10 | #include "dm-btree-internal.h" |
11 | #include "dm-persistent-data-internal.h" |
12 | |
13 | #include <linux/bitops.h> |
14 | #include <linux/device-mapper.h> |
15 | |
16 | #define DM_MSG_PREFIX "space map common" |
17 | |
18 | /*----------------------------------------------------------------*/ |
19 | |
20 | /* |
21 | * Index validator. |
22 | */ |
23 | #define INDEX_CSUM_XOR 160478 |
24 | |
25 | static void index_prepare_for_write(struct dm_block_validator *v, |
26 | struct dm_block *b, |
27 | size_t block_size) |
28 | { |
29 | struct disk_metadata_index *mi_le = dm_block_data(b); |
30 | |
31 | mi_le->blocknr = cpu_to_le64(dm_block_location(b)); |
32 | mi_le->csum = cpu_to_le32(dm_bm_checksum(&mi_le->padding, |
33 | block_size - sizeof(__le32), |
34 | INDEX_CSUM_XOR)); |
35 | } |
36 | |
37 | static int index_check(struct dm_block_validator *v, |
38 | struct dm_block *b, |
39 | size_t block_size) |
40 | { |
41 | struct disk_metadata_index *mi_le = dm_block_data(b); |
42 | __le32 csum_disk; |
43 | |
44 | if (dm_block_location(b) != le64_to_cpu(mi_le->blocknr)) { |
45 | DMERR_LIMIT("%s failed: blocknr %llu != wanted %llu" , __func__, |
46 | le64_to_cpu(mi_le->blocknr), dm_block_location(b)); |
47 | return -ENOTBLK; |
48 | } |
49 | |
50 | csum_disk = cpu_to_le32(dm_bm_checksum(&mi_le->padding, |
51 | block_size - sizeof(__le32), |
52 | INDEX_CSUM_XOR)); |
53 | if (csum_disk != mi_le->csum) { |
54 | DMERR_LIMIT("i%s failed: csum %u != wanted %u" , __func__, |
55 | le32_to_cpu(csum_disk), le32_to_cpu(mi_le->csum)); |
56 | return -EILSEQ; |
57 | } |
58 | |
59 | return 0; |
60 | } |
61 | |
62 | static struct dm_block_validator index_validator = { |
63 | .name = "index" , |
64 | .prepare_for_write = index_prepare_for_write, |
65 | .check = index_check |
66 | }; |
67 | |
68 | /*----------------------------------------------------------------*/ |
69 | |
70 | /* |
71 | * Bitmap validator |
72 | */ |
73 | #define BITMAP_CSUM_XOR 240779 |
74 | |
75 | static void dm_bitmap_prepare_for_write(struct dm_block_validator *v, |
76 | struct dm_block *b, |
77 | size_t block_size) |
78 | { |
79 | struct disk_bitmap_header * = dm_block_data(b); |
80 | |
81 | disk_header->blocknr = cpu_to_le64(dm_block_location(b)); |
82 | disk_header->csum = cpu_to_le32(dm_bm_checksum(&disk_header->not_used, |
83 | block_size - sizeof(__le32), |
84 | BITMAP_CSUM_XOR)); |
85 | } |
86 | |
87 | static int dm_bitmap_check(struct dm_block_validator *v, |
88 | struct dm_block *b, |
89 | size_t block_size) |
90 | { |
91 | struct disk_bitmap_header * = dm_block_data(b); |
92 | __le32 csum_disk; |
93 | |
94 | if (dm_block_location(b) != le64_to_cpu(disk_header->blocknr)) { |
95 | DMERR_LIMIT("bitmap check failed: blocknr %llu != wanted %llu" , |
96 | le64_to_cpu(disk_header->blocknr), dm_block_location(b)); |
97 | return -ENOTBLK; |
98 | } |
99 | |
100 | csum_disk = cpu_to_le32(dm_bm_checksum(&disk_header->not_used, |
101 | block_size - sizeof(__le32), |
102 | BITMAP_CSUM_XOR)); |
103 | if (csum_disk != disk_header->csum) { |
104 | DMERR_LIMIT("bitmap check failed: csum %u != wanted %u" , |
105 | le32_to_cpu(csum_disk), le32_to_cpu(disk_header->csum)); |
106 | return -EILSEQ; |
107 | } |
108 | |
109 | return 0; |
110 | } |
111 | |
112 | static struct dm_block_validator dm_sm_bitmap_validator = { |
113 | .name = "sm_bitmap" , |
114 | .prepare_for_write = dm_bitmap_prepare_for_write, |
115 | .check = dm_bitmap_check, |
116 | }; |
117 | |
118 | /*----------------------------------------------------------------*/ |
119 | |
120 | #define ENTRIES_PER_WORD 32 |
121 | #define ENTRIES_SHIFT 5 |
122 | |
123 | static void *dm_bitmap_data(struct dm_block *b) |
124 | { |
125 | return dm_block_data(b) + sizeof(struct disk_bitmap_header); |
126 | } |
127 | |
128 | #define WORD_MASK_HIGH 0xAAAAAAAAAAAAAAAAULL |
129 | |
130 | static unsigned int dm_bitmap_word_used(void *addr, unsigned int b) |
131 | { |
132 | __le64 *words_le = addr; |
133 | __le64 *w_le = words_le + (b >> ENTRIES_SHIFT); |
134 | |
135 | uint64_t bits = le64_to_cpu(*w_le); |
136 | uint64_t mask = (bits + WORD_MASK_HIGH + 1) & WORD_MASK_HIGH; |
137 | |
138 | return !(~bits & mask); |
139 | } |
140 | |
141 | static unsigned int sm_lookup_bitmap(void *addr, unsigned int b) |
142 | { |
143 | __le64 *words_le = addr; |
144 | __le64 *w_le = words_le + (b >> ENTRIES_SHIFT); |
145 | unsigned int hi, lo; |
146 | |
147 | b = (b & (ENTRIES_PER_WORD - 1)) << 1; |
148 | hi = !!test_bit_le(nr: b, addr: (void *) w_le); |
149 | lo = !!test_bit_le(nr: b + 1, addr: (void *) w_le); |
150 | return (hi << 1) | lo; |
151 | } |
152 | |
153 | static void sm_set_bitmap(void *addr, unsigned int b, unsigned int val) |
154 | { |
155 | __le64 *words_le = addr; |
156 | __le64 *w_le = words_le + (b >> ENTRIES_SHIFT); |
157 | |
158 | b = (b & (ENTRIES_PER_WORD - 1)) << 1; |
159 | |
160 | if (val & 2) |
161 | __set_bit_le(nr: b, addr: (void *) w_le); |
162 | else |
163 | __clear_bit_le(nr: b, addr: (void *) w_le); |
164 | |
165 | if (val & 1) |
166 | __set_bit_le(nr: b + 1, addr: (void *) w_le); |
167 | else |
168 | __clear_bit_le(nr: b + 1, addr: (void *) w_le); |
169 | } |
170 | |
171 | static int sm_find_free(void *addr, unsigned int begin, unsigned int end, |
172 | unsigned int *result) |
173 | { |
174 | while (begin < end) { |
175 | if (!(begin & (ENTRIES_PER_WORD - 1)) && |
176 | dm_bitmap_word_used(addr, b: begin)) { |
177 | begin += ENTRIES_PER_WORD; |
178 | continue; |
179 | } |
180 | |
181 | if (!sm_lookup_bitmap(addr, b: begin)) { |
182 | *result = begin; |
183 | return 0; |
184 | } |
185 | |
186 | begin++; |
187 | } |
188 | |
189 | return -ENOSPC; |
190 | } |
191 | |
192 | /*----------------------------------------------------------------*/ |
193 | |
194 | static int sm_ll_init(struct ll_disk *ll, struct dm_transaction_manager *tm) |
195 | { |
196 | memset(ll, 0, sizeof(struct ll_disk)); |
197 | |
198 | ll->tm = tm; |
199 | |
200 | ll->bitmap_info.tm = tm; |
201 | ll->bitmap_info.levels = 1; |
202 | |
203 | /* |
204 | * Because the new bitmap blocks are created via a shadow |
205 | * operation, the old entry has already had its reference count |
206 | * decremented and we don't need the btree to do any bookkeeping. |
207 | */ |
208 | ll->bitmap_info.value_type.size = sizeof(struct disk_index_entry); |
209 | ll->bitmap_info.value_type.inc = NULL; |
210 | ll->bitmap_info.value_type.dec = NULL; |
211 | ll->bitmap_info.value_type.equal = NULL; |
212 | |
213 | ll->ref_count_info.tm = tm; |
214 | ll->ref_count_info.levels = 1; |
215 | ll->ref_count_info.value_type.size = sizeof(uint32_t); |
216 | ll->ref_count_info.value_type.inc = NULL; |
217 | ll->ref_count_info.value_type.dec = NULL; |
218 | ll->ref_count_info.value_type.equal = NULL; |
219 | |
220 | ll->block_size = dm_bm_block_size(bm: dm_tm_get_bm(tm)); |
221 | |
222 | if (ll->block_size > (1 << 30)) { |
223 | DMERR("block size too big to hold bitmaps" ); |
224 | return -EINVAL; |
225 | } |
226 | |
227 | ll->entries_per_block = (ll->block_size - sizeof(struct disk_bitmap_header)) * |
228 | ENTRIES_PER_BYTE; |
229 | ll->nr_blocks = 0; |
230 | ll->bitmap_root = 0; |
231 | ll->ref_count_root = 0; |
232 | ll->bitmap_index_changed = false; |
233 | |
234 | return 0; |
235 | } |
236 | |
237 | int sm_ll_extend(struct ll_disk *ll, dm_block_t ) |
238 | { |
239 | int r; |
240 | dm_block_t i, nr_blocks, nr_indexes; |
241 | unsigned int old_blocks, blocks; |
242 | |
243 | nr_blocks = ll->nr_blocks + extra_blocks; |
244 | old_blocks = dm_sector_div_up(ll->nr_blocks, ll->entries_per_block); |
245 | blocks = dm_sector_div_up(nr_blocks, ll->entries_per_block); |
246 | |
247 | nr_indexes = dm_sector_div_up(nr_blocks, ll->entries_per_block); |
248 | if (nr_indexes > ll->max_entries(ll)) { |
249 | DMERR("space map too large" ); |
250 | return -EINVAL; |
251 | } |
252 | |
253 | /* |
254 | * We need to set this before the dm_tm_new_block() call below. |
255 | */ |
256 | ll->nr_blocks = nr_blocks; |
257 | for (i = old_blocks; i < blocks; i++) { |
258 | struct dm_block *b; |
259 | struct disk_index_entry idx; |
260 | |
261 | r = dm_tm_new_block(tm: ll->tm, v: &dm_sm_bitmap_validator, result: &b); |
262 | if (r < 0) |
263 | return r; |
264 | |
265 | idx.blocknr = cpu_to_le64(dm_block_location(b)); |
266 | |
267 | dm_tm_unlock(tm: ll->tm, b); |
268 | |
269 | idx.nr_free = cpu_to_le32(ll->entries_per_block); |
270 | idx.none_free_before = 0; |
271 | |
272 | r = ll->save_ie(ll, i, &idx); |
273 | if (r < 0) |
274 | return r; |
275 | } |
276 | |
277 | return 0; |
278 | } |
279 | |
280 | int sm_ll_lookup_bitmap(struct ll_disk *ll, dm_block_t b, uint32_t *result) |
281 | { |
282 | int r; |
283 | dm_block_t index = b; |
284 | struct disk_index_entry ie_disk; |
285 | struct dm_block *blk; |
286 | |
287 | if (b >= ll->nr_blocks) { |
288 | DMERR_LIMIT("metadata block out of bounds" ); |
289 | return -EINVAL; |
290 | } |
291 | |
292 | b = do_div(index, ll->entries_per_block); |
293 | r = ll->load_ie(ll, index, &ie_disk); |
294 | if (r < 0) |
295 | return r; |
296 | |
297 | r = dm_tm_read_lock(tm: ll->tm, le64_to_cpu(ie_disk.blocknr), |
298 | v: &dm_sm_bitmap_validator, result: &blk); |
299 | if (r < 0) |
300 | return r; |
301 | |
302 | *result = sm_lookup_bitmap(addr: dm_bitmap_data(b: blk), b); |
303 | |
304 | dm_tm_unlock(tm: ll->tm, b: blk); |
305 | |
306 | return 0; |
307 | } |
308 | |
309 | static int sm_ll_lookup_big_ref_count(struct ll_disk *ll, dm_block_t b, |
310 | uint32_t *result) |
311 | { |
312 | __le32 le_rc; |
313 | int r; |
314 | |
315 | r = dm_btree_lookup(info: &ll->ref_count_info, root: ll->ref_count_root, keys: &b, value_le: &le_rc); |
316 | if (r < 0) |
317 | return r; |
318 | |
319 | *result = le32_to_cpu(le_rc); |
320 | |
321 | return r; |
322 | } |
323 | |
324 | int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result) |
325 | { |
326 | int r = sm_ll_lookup_bitmap(ll, b, result); |
327 | |
328 | if (r) |
329 | return r; |
330 | |
331 | if (*result != 3) |
332 | return r; |
333 | |
334 | return sm_ll_lookup_big_ref_count(ll, b, result); |
335 | } |
336 | |
337 | int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin, |
338 | dm_block_t end, dm_block_t *result) |
339 | { |
340 | int r; |
341 | struct disk_index_entry ie_disk; |
342 | dm_block_t i, index_begin = begin; |
343 | dm_block_t index_end = dm_sector_div_up(end, ll->entries_per_block); |
344 | |
345 | /* |
346 | * FIXME: Use shifts |
347 | */ |
348 | begin = do_div(index_begin, ll->entries_per_block); |
349 | end = do_div(end, ll->entries_per_block); |
350 | if (end == 0) |
351 | end = ll->entries_per_block; |
352 | |
353 | for (i = index_begin; i < index_end; i++, begin = 0) { |
354 | struct dm_block *blk; |
355 | unsigned int position; |
356 | uint32_t bit_end; |
357 | |
358 | r = ll->load_ie(ll, i, &ie_disk); |
359 | if (r < 0) |
360 | return r; |
361 | |
362 | if (le32_to_cpu(ie_disk.nr_free) == 0) |
363 | continue; |
364 | |
365 | r = dm_tm_read_lock(tm: ll->tm, le64_to_cpu(ie_disk.blocknr), |
366 | v: &dm_sm_bitmap_validator, result: &blk); |
367 | if (r < 0) |
368 | return r; |
369 | |
370 | bit_end = (i == index_end - 1) ? end : ll->entries_per_block; |
371 | |
372 | r = sm_find_free(addr: dm_bitmap_data(b: blk), |
373 | max_t(unsigned int, begin, le32_to_cpu(ie_disk.none_free_before)), |
374 | end: bit_end, result: &position); |
375 | if (r == -ENOSPC) { |
376 | /* |
377 | * This might happen because we started searching |
378 | * part way through the bitmap. |
379 | */ |
380 | dm_tm_unlock(tm: ll->tm, b: blk); |
381 | continue; |
382 | } |
383 | |
384 | dm_tm_unlock(tm: ll->tm, b: blk); |
385 | |
386 | *result = i * ll->entries_per_block + (dm_block_t) position; |
387 | return 0; |
388 | } |
389 | |
390 | return -ENOSPC; |
391 | } |
392 | |
393 | int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll, |
394 | dm_block_t begin, dm_block_t end, dm_block_t *b) |
395 | { |
396 | int r; |
397 | uint32_t count; |
398 | |
399 | do { |
400 | r = sm_ll_find_free_block(ll: new_ll, begin, end: new_ll->nr_blocks, result: b); |
401 | if (r) |
402 | break; |
403 | |
404 | /* double check this block wasn't used in the old transaction */ |
405 | if (*b >= old_ll->nr_blocks) |
406 | count = 0; |
407 | else { |
408 | r = sm_ll_lookup(ll: old_ll, b: *b, result: &count); |
409 | if (r) |
410 | break; |
411 | |
412 | if (count) |
413 | begin = *b + 1; |
414 | } |
415 | } while (count); |
416 | |
417 | return r; |
418 | } |
419 | |
420 | /*----------------------------------------------------------------*/ |
421 | |
422 | int sm_ll_insert(struct ll_disk *ll, dm_block_t b, |
423 | uint32_t ref_count, int32_t *nr_allocations) |
424 | { |
425 | int r; |
426 | uint32_t bit, old; |
427 | struct dm_block *nb; |
428 | dm_block_t index = b; |
429 | struct disk_index_entry ie_disk; |
430 | void *bm_le; |
431 | int inc; |
432 | |
433 | bit = do_div(index, ll->entries_per_block); |
434 | r = ll->load_ie(ll, index, &ie_disk); |
435 | if (r < 0) |
436 | return r; |
437 | |
438 | r = dm_tm_shadow_block(tm: ll->tm, le64_to_cpu(ie_disk.blocknr), |
439 | v: &dm_sm_bitmap_validator, result: &nb, inc_children: &inc); |
440 | if (r < 0) { |
441 | DMERR("dm_tm_shadow_block() failed" ); |
442 | return r; |
443 | } |
444 | ie_disk.blocknr = cpu_to_le64(dm_block_location(nb)); |
445 | bm_le = dm_bitmap_data(b: nb); |
446 | |
447 | old = sm_lookup_bitmap(addr: bm_le, b: bit); |
448 | if (old > 2) { |
449 | r = sm_ll_lookup_big_ref_count(ll, b, result: &old); |
450 | if (r < 0) { |
451 | dm_tm_unlock(tm: ll->tm, b: nb); |
452 | return r; |
453 | } |
454 | } |
455 | |
456 | if (r) { |
457 | dm_tm_unlock(tm: ll->tm, b: nb); |
458 | return r; |
459 | } |
460 | |
461 | if (ref_count <= 2) { |
462 | sm_set_bitmap(addr: bm_le, b: bit, val: ref_count); |
463 | dm_tm_unlock(tm: ll->tm, b: nb); |
464 | |
465 | if (old > 2) { |
466 | r = dm_btree_remove(info: &ll->ref_count_info, |
467 | root: ll->ref_count_root, |
468 | keys: &b, new_root: &ll->ref_count_root); |
469 | if (r) |
470 | return r; |
471 | } |
472 | |
473 | } else { |
474 | __le32 le_rc = cpu_to_le32(ref_count); |
475 | |
476 | sm_set_bitmap(addr: bm_le, b: bit, val: 3); |
477 | dm_tm_unlock(tm: ll->tm, b: nb); |
478 | |
479 | __dm_bless_for_disk(&le_rc); |
480 | r = dm_btree_insert(info: &ll->ref_count_info, root: ll->ref_count_root, |
481 | keys: &b, value: &le_rc, new_root: &ll->ref_count_root); |
482 | if (r < 0) { |
483 | DMERR("ref count insert failed" ); |
484 | return r; |
485 | } |
486 | } |
487 | |
488 | if (ref_count && !old) { |
489 | *nr_allocations = 1; |
490 | ll->nr_allocated++; |
491 | le32_add_cpu(var: &ie_disk.nr_free, val: -1); |
492 | if (le32_to_cpu(ie_disk.none_free_before) == bit) |
493 | ie_disk.none_free_before = cpu_to_le32(bit + 1); |
494 | |
495 | } else if (old && !ref_count) { |
496 | *nr_allocations = -1; |
497 | ll->nr_allocated--; |
498 | le32_add_cpu(var: &ie_disk.nr_free, val: 1); |
499 | ie_disk.none_free_before = cpu_to_le32(min(le32_to_cpu(ie_disk.none_free_before), bit)); |
500 | } else |
501 | *nr_allocations = 0; |
502 | |
503 | return ll->save_ie(ll, index, &ie_disk); |
504 | } |
505 | |
506 | /*----------------------------------------------------------------*/ |
507 | |
508 | /* |
509 | * Holds useful intermediate results for the range based inc and dec |
510 | * operations. |
511 | */ |
512 | struct inc_context { |
513 | struct disk_index_entry ie_disk; |
514 | struct dm_block *bitmap_block; |
515 | void *bitmap; |
516 | |
517 | struct dm_block *overflow_leaf; |
518 | }; |
519 | |
520 | static inline void init_inc_context(struct inc_context *ic) |
521 | { |
522 | ic->bitmap_block = NULL; |
523 | ic->bitmap = NULL; |
524 | ic->overflow_leaf = NULL; |
525 | } |
526 | |
527 | static inline void exit_inc_context(struct ll_disk *ll, struct inc_context *ic) |
528 | { |
529 | if (ic->bitmap_block) |
530 | dm_tm_unlock(tm: ll->tm, b: ic->bitmap_block); |
531 | if (ic->overflow_leaf) |
532 | dm_tm_unlock(tm: ll->tm, b: ic->overflow_leaf); |
533 | } |
534 | |
535 | static inline void reset_inc_context(struct ll_disk *ll, struct inc_context *ic) |
536 | { |
537 | exit_inc_context(ll, ic); |
538 | init_inc_context(ic); |
539 | } |
540 | |
541 | /* |
542 | * Confirms a btree node contains a particular key at an index. |
543 | */ |
544 | static bool contains_key(struct btree_node *n, uint64_t key, int index) |
545 | { |
546 | return index >= 0 && |
547 | index < le32_to_cpu(n->header.nr_entries) && |
548 | le64_to_cpu(n->keys[index]) == key; |
549 | } |
550 | |
551 | static int __sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic) |
552 | { |
553 | int r; |
554 | int index; |
555 | struct btree_node *n; |
556 | __le32 *v_ptr; |
557 | uint32_t rc; |
558 | |
559 | /* |
560 | * bitmap_block needs to be unlocked because getting the |
561 | * overflow_leaf may need to allocate, and thus use the space map. |
562 | */ |
563 | reset_inc_context(ll, ic); |
564 | |
565 | r = btree_get_overwrite_leaf(info: &ll->ref_count_info, root: ll->ref_count_root, |
566 | key: b, index: &index, new_root: &ll->ref_count_root, leaf: &ic->overflow_leaf); |
567 | if (r < 0) |
568 | return r; |
569 | |
570 | n = dm_block_data(b: ic->overflow_leaf); |
571 | |
572 | if (!contains_key(n, key: b, index)) { |
573 | DMERR("overflow btree is missing an entry" ); |
574 | return -EINVAL; |
575 | } |
576 | |
577 | v_ptr = value_ptr(n, index); |
578 | rc = le32_to_cpu(*v_ptr) + 1; |
579 | *v_ptr = cpu_to_le32(rc); |
580 | |
581 | return 0; |
582 | } |
583 | |
584 | static int sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic) |
585 | { |
586 | int index; |
587 | struct btree_node *n; |
588 | __le32 *v_ptr; |
589 | uint32_t rc; |
590 | |
591 | /* |
592 | * Do we already have the correct overflow leaf? |
593 | */ |
594 | if (ic->overflow_leaf) { |
595 | n = dm_block_data(b: ic->overflow_leaf); |
596 | index = lower_bound(n, key: b); |
597 | if (contains_key(n, key: b, index)) { |
598 | v_ptr = value_ptr(n, index); |
599 | rc = le32_to_cpu(*v_ptr) + 1; |
600 | *v_ptr = cpu_to_le32(rc); |
601 | |
602 | return 0; |
603 | } |
604 | } |
605 | |
606 | return __sm_ll_inc_overflow(ll, b, ic); |
607 | } |
608 | |
609 | static inline int shadow_bitmap(struct ll_disk *ll, struct inc_context *ic) |
610 | { |
611 | int r, inc; |
612 | |
613 | r = dm_tm_shadow_block(tm: ll->tm, le64_to_cpu(ic->ie_disk.blocknr), |
614 | v: &dm_sm_bitmap_validator, result: &ic->bitmap_block, inc_children: &inc); |
615 | if (r < 0) { |
616 | DMERR("dm_tm_shadow_block() failed" ); |
617 | return r; |
618 | } |
619 | ic->ie_disk.blocknr = cpu_to_le64(dm_block_location(ic->bitmap_block)); |
620 | ic->bitmap = dm_bitmap_data(b: ic->bitmap_block); |
621 | return 0; |
622 | } |
623 | |
624 | /* |
625 | * Once shadow_bitmap has been called, which always happens at the start of inc/dec, |
626 | * we can reopen the bitmap with a simple write lock, rather than re calling |
627 | * dm_tm_shadow_block(). |
628 | */ |
629 | static inline int ensure_bitmap(struct ll_disk *ll, struct inc_context *ic) |
630 | { |
631 | if (!ic->bitmap_block) { |
632 | int r = dm_bm_write_lock(bm: dm_tm_get_bm(tm: ll->tm), le64_to_cpu(ic->ie_disk.blocknr), |
633 | v: &dm_sm_bitmap_validator, result: &ic->bitmap_block); |
634 | if (r) { |
635 | DMERR("unable to re-get write lock for bitmap" ); |
636 | return r; |
637 | } |
638 | ic->bitmap = dm_bitmap_data(b: ic->bitmap_block); |
639 | } |
640 | |
641 | return 0; |
642 | } |
643 | |
644 | /* |
645 | * Loops round incrementing entries in a single bitmap. |
646 | */ |
647 | static inline int sm_ll_inc_bitmap(struct ll_disk *ll, dm_block_t b, |
648 | uint32_t bit, uint32_t bit_end, |
649 | int32_t *nr_allocations, dm_block_t *new_b, |
650 | struct inc_context *ic) |
651 | { |
652 | int r; |
653 | __le32 le_rc; |
654 | uint32_t old; |
655 | |
656 | for (; bit != bit_end; bit++, b++) { |
657 | /* |
658 | * We only need to drop the bitmap if we need to find a new btree |
659 | * leaf for the overflow. So if it was dropped last iteration, |
660 | * we now re-get it. |
661 | */ |
662 | r = ensure_bitmap(ll, ic); |
663 | if (r) |
664 | return r; |
665 | |
666 | old = sm_lookup_bitmap(addr: ic->bitmap, b: bit); |
667 | switch (old) { |
668 | case 0: |
669 | /* inc bitmap, adjust nr_allocated */ |
670 | sm_set_bitmap(addr: ic->bitmap, b: bit, val: 1); |
671 | (*nr_allocations)++; |
672 | ll->nr_allocated++; |
673 | le32_add_cpu(var: &ic->ie_disk.nr_free, val: -1); |
674 | if (le32_to_cpu(ic->ie_disk.none_free_before) == bit) |
675 | ic->ie_disk.none_free_before = cpu_to_le32(bit + 1); |
676 | break; |
677 | |
678 | case 1: |
679 | /* inc bitmap */ |
680 | sm_set_bitmap(addr: ic->bitmap, b: bit, val: 2); |
681 | break; |
682 | |
683 | case 2: |
684 | /* inc bitmap and insert into overflow */ |
685 | sm_set_bitmap(addr: ic->bitmap, b: bit, val: 3); |
686 | reset_inc_context(ll, ic); |
687 | |
688 | le_rc = cpu_to_le32(3); |
689 | __dm_bless_for_disk(&le_rc); |
690 | r = dm_btree_insert(info: &ll->ref_count_info, root: ll->ref_count_root, |
691 | keys: &b, value: &le_rc, new_root: &ll->ref_count_root); |
692 | if (r < 0) { |
693 | DMERR("ref count insert failed" ); |
694 | return r; |
695 | } |
696 | break; |
697 | |
698 | default: |
699 | /* |
700 | * inc within the overflow tree only. |
701 | */ |
702 | r = sm_ll_inc_overflow(ll, b, ic); |
703 | if (r < 0) |
704 | return r; |
705 | } |
706 | } |
707 | |
708 | *new_b = b; |
709 | return 0; |
710 | } |
711 | |
712 | /* |
713 | * Finds a bitmap that contains entries in the block range, and increments |
714 | * them. |
715 | */ |
716 | static int __sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e, |
717 | int32_t *nr_allocations, dm_block_t *new_b) |
718 | { |
719 | int r; |
720 | struct inc_context ic; |
721 | uint32_t bit, bit_end; |
722 | dm_block_t index = b; |
723 | |
724 | init_inc_context(ic: &ic); |
725 | |
726 | bit = do_div(index, ll->entries_per_block); |
727 | r = ll->load_ie(ll, index, &ic.ie_disk); |
728 | if (r < 0) |
729 | return r; |
730 | |
731 | r = shadow_bitmap(ll, ic: &ic); |
732 | if (r) |
733 | return r; |
734 | |
735 | bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block); |
736 | r = sm_ll_inc_bitmap(ll, b, bit, bit_end, nr_allocations, new_b, ic: &ic); |
737 | |
738 | exit_inc_context(ll, ic: &ic); |
739 | |
740 | if (r) |
741 | return r; |
742 | |
743 | return ll->save_ie(ll, index, &ic.ie_disk); |
744 | } |
745 | |
746 | int sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e, |
747 | int32_t *nr_allocations) |
748 | { |
749 | *nr_allocations = 0; |
750 | while (b != e) { |
751 | int r = __sm_ll_inc(ll, b, e, nr_allocations, new_b: &b); |
752 | |
753 | if (r) |
754 | return r; |
755 | } |
756 | |
757 | return 0; |
758 | } |
759 | |
760 | /*----------------------------------------------------------------*/ |
761 | |
762 | static int __sm_ll_del_overflow(struct ll_disk *ll, dm_block_t b, |
763 | struct inc_context *ic) |
764 | { |
765 | reset_inc_context(ll, ic); |
766 | return dm_btree_remove(info: &ll->ref_count_info, root: ll->ref_count_root, |
767 | keys: &b, new_root: &ll->ref_count_root); |
768 | } |
769 | |
770 | static int __sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b, |
771 | struct inc_context *ic, uint32_t *old_rc) |
772 | { |
773 | int r; |
774 | int index = -1; |
775 | struct btree_node *n; |
776 | __le32 *v_ptr; |
777 | uint32_t rc; |
778 | |
779 | reset_inc_context(ll, ic); |
780 | r = btree_get_overwrite_leaf(info: &ll->ref_count_info, root: ll->ref_count_root, |
781 | key: b, index: &index, new_root: &ll->ref_count_root, leaf: &ic->overflow_leaf); |
782 | if (r < 0) |
783 | return r; |
784 | |
785 | n = dm_block_data(b: ic->overflow_leaf); |
786 | |
787 | if (!contains_key(n, key: b, index)) { |
788 | DMERR("overflow btree is missing an entry" ); |
789 | return -EINVAL; |
790 | } |
791 | |
792 | v_ptr = value_ptr(n, index); |
793 | rc = le32_to_cpu(*v_ptr); |
794 | *old_rc = rc; |
795 | |
796 | if (rc == 3) |
797 | return __sm_ll_del_overflow(ll, b, ic); |
798 | |
799 | rc--; |
800 | *v_ptr = cpu_to_le32(rc); |
801 | return 0; |
802 | } |
803 | |
804 | static int sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b, |
805 | struct inc_context *ic, uint32_t *old_rc) |
806 | { |
807 | /* |
808 | * Do we already have the correct overflow leaf? |
809 | */ |
810 | if (ic->overflow_leaf) { |
811 | int index; |
812 | struct btree_node *n; |
813 | __le32 *v_ptr; |
814 | uint32_t rc; |
815 | |
816 | n = dm_block_data(b: ic->overflow_leaf); |
817 | index = lower_bound(n, key: b); |
818 | if (contains_key(n, key: b, index)) { |
819 | v_ptr = value_ptr(n, index); |
820 | rc = le32_to_cpu(*v_ptr); |
821 | *old_rc = rc; |
822 | |
823 | if (rc > 3) { |
824 | rc--; |
825 | *v_ptr = cpu_to_le32(rc); |
826 | return 0; |
827 | } else { |
828 | return __sm_ll_del_overflow(ll, b, ic); |
829 | } |
830 | |
831 | } |
832 | } |
833 | |
834 | return __sm_ll_dec_overflow(ll, b, ic, old_rc); |
835 | } |
836 | |
837 | /* |
838 | * Loops round incrementing entries in a single bitmap. |
839 | */ |
840 | static inline int sm_ll_dec_bitmap(struct ll_disk *ll, dm_block_t b, |
841 | uint32_t bit, uint32_t bit_end, |
842 | struct inc_context *ic, |
843 | int32_t *nr_allocations, dm_block_t *new_b) |
844 | { |
845 | int r; |
846 | uint32_t old; |
847 | |
848 | for (; bit != bit_end; bit++, b++) { |
849 | /* |
850 | * We only need to drop the bitmap if we need to find a new btree |
851 | * leaf for the overflow. So if it was dropped last iteration, |
852 | * we now re-get it. |
853 | */ |
854 | r = ensure_bitmap(ll, ic); |
855 | if (r) |
856 | return r; |
857 | |
858 | old = sm_lookup_bitmap(addr: ic->bitmap, b: bit); |
859 | switch (old) { |
860 | case 0: |
861 | DMERR("unable to decrement block" ); |
862 | return -EINVAL; |
863 | |
864 | case 1: |
865 | /* dec bitmap */ |
866 | sm_set_bitmap(addr: ic->bitmap, b: bit, val: 0); |
867 | (*nr_allocations)--; |
868 | ll->nr_allocated--; |
869 | le32_add_cpu(var: &ic->ie_disk.nr_free, val: 1); |
870 | ic->ie_disk.none_free_before = |
871 | cpu_to_le32(min(le32_to_cpu(ic->ie_disk.none_free_before), bit)); |
872 | break; |
873 | |
874 | case 2: |
875 | /* dec bitmap and insert into overflow */ |
876 | sm_set_bitmap(addr: ic->bitmap, b: bit, val: 1); |
877 | break; |
878 | |
879 | case 3: |
880 | r = sm_ll_dec_overflow(ll, b, ic, old_rc: &old); |
881 | if (r < 0) |
882 | return r; |
883 | |
884 | if (old == 3) { |
885 | r = ensure_bitmap(ll, ic); |
886 | if (r) |
887 | return r; |
888 | |
889 | sm_set_bitmap(addr: ic->bitmap, b: bit, val: 2); |
890 | } |
891 | break; |
892 | } |
893 | } |
894 | |
895 | *new_b = b; |
896 | return 0; |
897 | } |
898 | |
899 | static int __sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e, |
900 | int32_t *nr_allocations, dm_block_t *new_b) |
901 | { |
902 | int r; |
903 | uint32_t bit, bit_end; |
904 | struct inc_context ic; |
905 | dm_block_t index = b; |
906 | |
907 | init_inc_context(ic: &ic); |
908 | |
909 | bit = do_div(index, ll->entries_per_block); |
910 | r = ll->load_ie(ll, index, &ic.ie_disk); |
911 | if (r < 0) |
912 | return r; |
913 | |
914 | r = shadow_bitmap(ll, ic: &ic); |
915 | if (r) |
916 | return r; |
917 | |
918 | bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block); |
919 | r = sm_ll_dec_bitmap(ll, b, bit, bit_end, ic: &ic, nr_allocations, new_b); |
920 | exit_inc_context(ll, ic: &ic); |
921 | |
922 | if (r) |
923 | return r; |
924 | |
925 | return ll->save_ie(ll, index, &ic.ie_disk); |
926 | } |
927 | |
928 | int sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e, |
929 | int32_t *nr_allocations) |
930 | { |
931 | *nr_allocations = 0; |
932 | while (b != e) { |
933 | int r = __sm_ll_dec(ll, b, e, nr_allocations, new_b: &b); |
934 | |
935 | if (r) |
936 | return r; |
937 | } |
938 | |
939 | return 0; |
940 | } |
941 | |
942 | /*----------------------------------------------------------------*/ |
943 | |
944 | int sm_ll_commit(struct ll_disk *ll) |
945 | { |
946 | int r = 0; |
947 | |
948 | if (ll->bitmap_index_changed) { |
949 | r = ll->commit(ll); |
950 | if (!r) |
951 | ll->bitmap_index_changed = false; |
952 | } |
953 | |
954 | return r; |
955 | } |
956 | |
957 | /*----------------------------------------------------------------*/ |
958 | |
959 | static int metadata_ll_load_ie(struct ll_disk *ll, dm_block_t index, |
960 | struct disk_index_entry *ie) |
961 | { |
962 | memcpy(ie, ll->mi_le.index + index, sizeof(*ie)); |
963 | return 0; |
964 | } |
965 | |
966 | static int metadata_ll_save_ie(struct ll_disk *ll, dm_block_t index, |
967 | struct disk_index_entry *ie) |
968 | { |
969 | ll->bitmap_index_changed = true; |
970 | memcpy(ll->mi_le.index + index, ie, sizeof(*ie)); |
971 | return 0; |
972 | } |
973 | |
974 | static int metadata_ll_init_index(struct ll_disk *ll) |
975 | { |
976 | int r; |
977 | struct dm_block *b; |
978 | |
979 | r = dm_tm_new_block(tm: ll->tm, v: &index_validator, result: &b); |
980 | if (r < 0) |
981 | return r; |
982 | |
983 | ll->bitmap_root = dm_block_location(b); |
984 | |
985 | dm_tm_unlock(tm: ll->tm, b); |
986 | |
987 | return 0; |
988 | } |
989 | |
990 | static int metadata_ll_open(struct ll_disk *ll) |
991 | { |
992 | int r; |
993 | struct dm_block *block; |
994 | |
995 | r = dm_tm_read_lock(tm: ll->tm, b: ll->bitmap_root, |
996 | v: &index_validator, result: &block); |
997 | if (r) |
998 | return r; |
999 | |
1000 | memcpy(&ll->mi_le, dm_block_data(block), sizeof(ll->mi_le)); |
1001 | dm_tm_unlock(tm: ll->tm, b: block); |
1002 | |
1003 | return 0; |
1004 | } |
1005 | |
1006 | static dm_block_t metadata_ll_max_entries(struct ll_disk *ll) |
1007 | { |
1008 | return MAX_METADATA_BITMAPS; |
1009 | } |
1010 | |
1011 | static int metadata_ll_commit(struct ll_disk *ll) |
1012 | { |
1013 | int r, inc; |
1014 | struct dm_block *b; |
1015 | |
1016 | r = dm_tm_shadow_block(tm: ll->tm, orig: ll->bitmap_root, v: &index_validator, result: &b, inc_children: &inc); |
1017 | if (r) |
1018 | return r; |
1019 | |
1020 | memcpy(dm_block_data(b), &ll->mi_le, sizeof(ll->mi_le)); |
1021 | ll->bitmap_root = dm_block_location(b); |
1022 | |
1023 | dm_tm_unlock(tm: ll->tm, b); |
1024 | |
1025 | return 0; |
1026 | } |
1027 | |
1028 | int sm_ll_new_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm) |
1029 | { |
1030 | int r; |
1031 | |
1032 | r = sm_ll_init(ll, tm); |
1033 | if (r < 0) |
1034 | return r; |
1035 | |
1036 | ll->load_ie = metadata_ll_load_ie; |
1037 | ll->save_ie = metadata_ll_save_ie; |
1038 | ll->init_index = metadata_ll_init_index; |
1039 | ll->open_index = metadata_ll_open; |
1040 | ll->max_entries = metadata_ll_max_entries; |
1041 | ll->commit = metadata_ll_commit; |
1042 | |
1043 | ll->nr_blocks = 0; |
1044 | ll->nr_allocated = 0; |
1045 | |
1046 | r = ll->init_index(ll); |
1047 | if (r < 0) |
1048 | return r; |
1049 | |
1050 | r = dm_btree_empty(info: &ll->ref_count_info, root: &ll->ref_count_root); |
1051 | if (r < 0) |
1052 | return r; |
1053 | |
1054 | return 0; |
1055 | } |
1056 | |
1057 | int sm_ll_open_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm, |
1058 | void *root_le, size_t len) |
1059 | { |
1060 | int r; |
1061 | struct disk_sm_root smr; |
1062 | |
1063 | if (len < sizeof(struct disk_sm_root)) { |
1064 | DMERR("sm_metadata root too small" ); |
1065 | return -ENOMEM; |
1066 | } |
1067 | |
1068 | /* |
1069 | * We don't know the alignment of the root_le buffer, so need to |
1070 | * copy into a new structure. |
1071 | */ |
1072 | memcpy(&smr, root_le, sizeof(smr)); |
1073 | |
1074 | r = sm_ll_init(ll, tm); |
1075 | if (r < 0) |
1076 | return r; |
1077 | |
1078 | ll->load_ie = metadata_ll_load_ie; |
1079 | ll->save_ie = metadata_ll_save_ie; |
1080 | ll->init_index = metadata_ll_init_index; |
1081 | ll->open_index = metadata_ll_open; |
1082 | ll->max_entries = metadata_ll_max_entries; |
1083 | ll->commit = metadata_ll_commit; |
1084 | |
1085 | ll->nr_blocks = le64_to_cpu(smr.nr_blocks); |
1086 | ll->nr_allocated = le64_to_cpu(smr.nr_allocated); |
1087 | ll->bitmap_root = le64_to_cpu(smr.bitmap_root); |
1088 | ll->ref_count_root = le64_to_cpu(smr.ref_count_root); |
1089 | |
1090 | return ll->open_index(ll); |
1091 | } |
1092 | |
1093 | /*----------------------------------------------------------------*/ |
1094 | |
1095 | static inline int ie_cache_writeback(struct ll_disk *ll, struct ie_cache *iec) |
1096 | { |
1097 | iec->dirty = false; |
1098 | __dm_bless_for_disk(iec->ie); |
1099 | return dm_btree_insert(info: &ll->bitmap_info, root: ll->bitmap_root, |
1100 | keys: &iec->index, value: &iec->ie, new_root: &ll->bitmap_root); |
1101 | } |
1102 | |
1103 | static inline unsigned int hash_index(dm_block_t index) |
1104 | { |
1105 | return dm_hash_block(b: index, IE_CACHE_MASK); |
1106 | } |
1107 | |
1108 | static int disk_ll_load_ie(struct ll_disk *ll, dm_block_t index, |
1109 | struct disk_index_entry *ie) |
1110 | { |
1111 | int r; |
1112 | unsigned int h = hash_index(index); |
1113 | struct ie_cache *iec = ll->ie_cache + h; |
1114 | |
1115 | if (iec->valid) { |
1116 | if (iec->index == index) { |
1117 | memcpy(ie, &iec->ie, sizeof(*ie)); |
1118 | return 0; |
1119 | } |
1120 | |
1121 | if (iec->dirty) { |
1122 | r = ie_cache_writeback(ll, iec); |
1123 | if (r) |
1124 | return r; |
1125 | } |
1126 | } |
1127 | |
1128 | r = dm_btree_lookup(info: &ll->bitmap_info, root: ll->bitmap_root, keys: &index, value_le: ie); |
1129 | if (!r) { |
1130 | iec->valid = true; |
1131 | iec->dirty = false; |
1132 | iec->index = index; |
1133 | memcpy(&iec->ie, ie, sizeof(*ie)); |
1134 | } |
1135 | |
1136 | return r; |
1137 | } |
1138 | |
1139 | static int disk_ll_save_ie(struct ll_disk *ll, dm_block_t index, |
1140 | struct disk_index_entry *ie) |
1141 | { |
1142 | int r; |
1143 | unsigned int h = hash_index(index); |
1144 | struct ie_cache *iec = ll->ie_cache + h; |
1145 | |
1146 | ll->bitmap_index_changed = true; |
1147 | if (iec->valid) { |
1148 | if (iec->index == index) { |
1149 | memcpy(&iec->ie, ie, sizeof(*ie)); |
1150 | iec->dirty = true; |
1151 | return 0; |
1152 | } |
1153 | |
1154 | if (iec->dirty) { |
1155 | r = ie_cache_writeback(ll, iec); |
1156 | if (r) |
1157 | return r; |
1158 | } |
1159 | } |
1160 | |
1161 | iec->valid = true; |
1162 | iec->dirty = true; |
1163 | iec->index = index; |
1164 | memcpy(&iec->ie, ie, sizeof(*ie)); |
1165 | return 0; |
1166 | } |
1167 | |
1168 | static int disk_ll_init_index(struct ll_disk *ll) |
1169 | { |
1170 | unsigned int i; |
1171 | |
1172 | for (i = 0; i < IE_CACHE_SIZE; i++) { |
1173 | struct ie_cache *iec = ll->ie_cache + i; |
1174 | |
1175 | iec->valid = false; |
1176 | iec->dirty = false; |
1177 | } |
1178 | return dm_btree_empty(info: &ll->bitmap_info, root: &ll->bitmap_root); |
1179 | } |
1180 | |
1181 | static int disk_ll_open(struct ll_disk *ll) |
1182 | { |
1183 | return 0; |
1184 | } |
1185 | |
1186 | static dm_block_t disk_ll_max_entries(struct ll_disk *ll) |
1187 | { |
1188 | return -1ULL; |
1189 | } |
1190 | |
1191 | static int disk_ll_commit(struct ll_disk *ll) |
1192 | { |
1193 | int r = 0; |
1194 | unsigned int i; |
1195 | |
1196 | for (i = 0; i < IE_CACHE_SIZE; i++) { |
1197 | struct ie_cache *iec = ll->ie_cache + i; |
1198 | |
1199 | if (iec->valid && iec->dirty) |
1200 | r = ie_cache_writeback(ll, iec); |
1201 | } |
1202 | |
1203 | return r; |
1204 | } |
1205 | |
1206 | int sm_ll_new_disk(struct ll_disk *ll, struct dm_transaction_manager *tm) |
1207 | { |
1208 | int r; |
1209 | |
1210 | r = sm_ll_init(ll, tm); |
1211 | if (r < 0) |
1212 | return r; |
1213 | |
1214 | ll->load_ie = disk_ll_load_ie; |
1215 | ll->save_ie = disk_ll_save_ie; |
1216 | ll->init_index = disk_ll_init_index; |
1217 | ll->open_index = disk_ll_open; |
1218 | ll->max_entries = disk_ll_max_entries; |
1219 | ll->commit = disk_ll_commit; |
1220 | |
1221 | ll->nr_blocks = 0; |
1222 | ll->nr_allocated = 0; |
1223 | |
1224 | r = ll->init_index(ll); |
1225 | if (r < 0) |
1226 | return r; |
1227 | |
1228 | r = dm_btree_empty(info: &ll->ref_count_info, root: &ll->ref_count_root); |
1229 | if (r < 0) |
1230 | return r; |
1231 | |
1232 | return 0; |
1233 | } |
1234 | |
1235 | int sm_ll_open_disk(struct ll_disk *ll, struct dm_transaction_manager *tm, |
1236 | void *root_le, size_t len) |
1237 | { |
1238 | int r; |
1239 | struct disk_sm_root *smr = root_le; |
1240 | |
1241 | if (len < sizeof(struct disk_sm_root)) { |
1242 | DMERR("sm_metadata root too small" ); |
1243 | return -ENOMEM; |
1244 | } |
1245 | |
1246 | r = sm_ll_init(ll, tm); |
1247 | if (r < 0) |
1248 | return r; |
1249 | |
1250 | ll->load_ie = disk_ll_load_ie; |
1251 | ll->save_ie = disk_ll_save_ie; |
1252 | ll->init_index = disk_ll_init_index; |
1253 | ll->open_index = disk_ll_open; |
1254 | ll->max_entries = disk_ll_max_entries; |
1255 | ll->commit = disk_ll_commit; |
1256 | |
1257 | ll->nr_blocks = le64_to_cpu(smr->nr_blocks); |
1258 | ll->nr_allocated = le64_to_cpu(smr->nr_allocated); |
1259 | ll->bitmap_root = le64_to_cpu(smr->bitmap_root); |
1260 | ll->ref_count_root = le64_to_cpu(smr->ref_count_root); |
1261 | |
1262 | return ll->open_index(ll); |
1263 | } |
1264 | |
1265 | /*----------------------------------------------------------------*/ |
1266 | |