1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2011 Red Hat, Inc.
4 *
5 * This file is released under the GPL.
6 */
7
8#include "dm-space-map.h"
9#include "dm-space-map-common.h"
10#include "dm-space-map-metadata.h"
11
12#include <linux/list.h>
13#include <linux/slab.h>
14#include <linux/device-mapper.h>
15#include <linux/kernel.h>
16
17#define DM_MSG_PREFIX "space map metadata"
18
19/*----------------------------------------------------------------*/
20
21/*
22 * An edge triggered threshold.
23 */
24struct threshold {
25 bool threshold_set;
26 bool value_set;
27 dm_block_t threshold;
28 dm_block_t current_value;
29 dm_sm_threshold_fn fn;
30 void *context;
31};
32
33static void threshold_init(struct threshold *t)
34{
35 t->threshold_set = false;
36 t->value_set = false;
37}
38
39static void set_threshold(struct threshold *t, dm_block_t value,
40 dm_sm_threshold_fn fn, void *context)
41{
42 t->threshold_set = true;
43 t->threshold = value;
44 t->fn = fn;
45 t->context = context;
46}
47
48static bool below_threshold(struct threshold *t, dm_block_t value)
49{
50 return t->threshold_set && value <= t->threshold;
51}
52
53static bool threshold_already_triggered(struct threshold *t)
54{
55 return t->value_set && below_threshold(t, value: t->current_value);
56}
57
58static void check_threshold(struct threshold *t, dm_block_t value)
59{
60 if (below_threshold(t, value) &&
61 !threshold_already_triggered(t))
62 t->fn(t->context);
63
64 t->value_set = true;
65 t->current_value = value;
66}
67
68/*----------------------------------------------------------------*/
69
70/*
71 * Space map interface.
72 *
73 * The low level disk format is written using the standard btree and
74 * transaction manager. This means that performing disk operations may
75 * cause us to recurse into the space map in order to allocate new blocks.
76 * For this reason we have a pool of pre-allocated blocks large enough to
77 * service any metadata_ll_disk operation.
78 */
79
80/*
81 * FIXME: we should calculate this based on the size of the device.
82 * Only the metadata space map needs this functionality.
83 */
84#define MAX_RECURSIVE_ALLOCATIONS 1024
85
86enum block_op_type {
87 BOP_INC,
88 BOP_DEC
89};
90
91struct block_op {
92 enum block_op_type type;
93 dm_block_t b;
94 dm_block_t e;
95};
96
97struct bop_ring_buffer {
98 unsigned int begin;
99 unsigned int end;
100 struct block_op bops[MAX_RECURSIVE_ALLOCATIONS + 1];
101};
102
103static void brb_init(struct bop_ring_buffer *brb)
104{
105 brb->begin = 0;
106 brb->end = 0;
107}
108
109static bool brb_empty(struct bop_ring_buffer *brb)
110{
111 return brb->begin == brb->end;
112}
113
114static unsigned int brb_next(struct bop_ring_buffer *brb, unsigned int old)
115{
116 unsigned int r = old + 1;
117
118 return r >= ARRAY_SIZE(brb->bops) ? 0 : r;
119}
120
121static int brb_push(struct bop_ring_buffer *brb,
122 enum block_op_type type, dm_block_t b, dm_block_t e)
123{
124 struct block_op *bop;
125 unsigned int next = brb_next(brb, old: brb->end);
126
127 /*
128 * We don't allow the last bop to be filled, this way we can
129 * differentiate between full and empty.
130 */
131 if (next == brb->begin)
132 return -ENOMEM;
133
134 bop = brb->bops + brb->end;
135 bop->type = type;
136 bop->b = b;
137 bop->e = e;
138
139 brb->end = next;
140
141 return 0;
142}
143
144static int brb_peek(struct bop_ring_buffer *brb, struct block_op *result)
145{
146 struct block_op *bop;
147
148 if (brb_empty(brb))
149 return -ENODATA;
150
151 bop = brb->bops + brb->begin;
152 memcpy(result, bop, sizeof(*result));
153 return 0;
154}
155
156static int brb_pop(struct bop_ring_buffer *brb)
157{
158 if (brb_empty(brb))
159 return -ENODATA;
160
161 brb->begin = brb_next(brb, old: brb->begin);
162
163 return 0;
164}
165
166/*----------------------------------------------------------------*/
167
168struct sm_metadata {
169 struct dm_space_map sm;
170
171 struct ll_disk ll;
172 struct ll_disk old_ll;
173
174 dm_block_t begin;
175
176 unsigned int recursion_count;
177 unsigned int allocated_this_transaction;
178 struct bop_ring_buffer uncommitted;
179
180 struct threshold threshold;
181};
182
183static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b, dm_block_t e)
184{
185 int r = brb_push(brb: &smm->uncommitted, type, b, e);
186
187 if (r) {
188 DMERR("too many recursive allocations");
189 return -ENOMEM;
190 }
191
192 return 0;
193}
194
195static int commit_bop(struct sm_metadata *smm, struct block_op *op)
196{
197 int r = 0;
198 int32_t nr_allocations;
199
200 switch (op->type) {
201 case BOP_INC:
202 r = sm_ll_inc(ll: &smm->ll, b: op->b, e: op->e, nr_allocations: &nr_allocations);
203 break;
204
205 case BOP_DEC:
206 r = sm_ll_dec(ll: &smm->ll, b: op->b, e: op->e, nr_allocations: &nr_allocations);
207 break;
208 }
209
210 return r;
211}
212
213static void in(struct sm_metadata *smm)
214{
215 smm->recursion_count++;
216}
217
218static int apply_bops(struct sm_metadata *smm)
219{
220 int r = 0;
221
222 while (!brb_empty(brb: &smm->uncommitted)) {
223 struct block_op bop;
224
225 r = brb_peek(brb: &smm->uncommitted, result: &bop);
226 if (r) {
227 DMERR("bug in bop ring buffer");
228 break;
229 }
230
231 r = commit_bop(smm, op: &bop);
232 if (r)
233 break;
234
235 brb_pop(brb: &smm->uncommitted);
236 }
237
238 return r;
239}
240
241static int out(struct sm_metadata *smm)
242{
243 int r = 0;
244
245 /*
246 * If we're not recursing then very bad things are happening.
247 */
248 if (!smm->recursion_count) {
249 DMERR("lost track of recursion depth");
250 return -ENOMEM;
251 }
252
253 if (smm->recursion_count == 1)
254 r = apply_bops(smm);
255
256 smm->recursion_count--;
257
258 return r;
259}
260
261/*
262 * When using the out() function above, we often want to combine an error
263 * code for the operation run in the recursive context with that from
264 * out().
265 */
266static int combine_errors(int r1, int r2)
267{
268 return r1 ? r1 : r2;
269}
270
271static int recursing(struct sm_metadata *smm)
272{
273 return smm->recursion_count;
274}
275
276static void sm_metadata_destroy(struct dm_space_map *sm)
277{
278 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
279
280 kfree(objp: smm);
281}
282
283static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count)
284{
285 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
286
287 *count = smm->ll.nr_blocks;
288
289 return 0;
290}
291
292static int sm_metadata_get_nr_free(struct dm_space_map *sm, dm_block_t *count)
293{
294 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
295
296 *count = smm->old_ll.nr_blocks - smm->old_ll.nr_allocated -
297 smm->allocated_this_transaction;
298
299 return 0;
300}
301
302static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b,
303 uint32_t *result)
304{
305 int r;
306 unsigned int i;
307 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
308 unsigned int adjustment = 0;
309
310 /*
311 * We may have some uncommitted adjustments to add. This list
312 * should always be really short.
313 */
314 for (i = smm->uncommitted.begin;
315 i != smm->uncommitted.end;
316 i = brb_next(brb: &smm->uncommitted, old: i)) {
317 struct block_op *op = smm->uncommitted.bops + i;
318
319 if (b < op->b || b >= op->e)
320 continue;
321
322 switch (op->type) {
323 case BOP_INC:
324 adjustment++;
325 break;
326
327 case BOP_DEC:
328 adjustment--;
329 break;
330 }
331 }
332
333 r = sm_ll_lookup(ll: &smm->ll, b, result);
334 if (r)
335 return r;
336
337 *result += adjustment;
338
339 return 0;
340}
341
342static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm,
343 dm_block_t b, int *result)
344{
345 int r, adjustment = 0;
346 unsigned int i;
347 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
348 uint32_t rc;
349
350 /*
351 * We may have some uncommitted adjustments to add. This list
352 * should always be really short.
353 */
354 for (i = smm->uncommitted.begin;
355 i != smm->uncommitted.end;
356 i = brb_next(brb: &smm->uncommitted, old: i)) {
357
358 struct block_op *op = smm->uncommitted.bops + i;
359
360 if (b < op->b || b >= op->e)
361 continue;
362
363 switch (op->type) {
364 case BOP_INC:
365 adjustment++;
366 break;
367
368 case BOP_DEC:
369 adjustment--;
370 break;
371 }
372 }
373
374 if (adjustment > 1) {
375 *result = 1;
376 return 0;
377 }
378
379 r = sm_ll_lookup_bitmap(ll: &smm->ll, b, result: &rc);
380 if (r)
381 return r;
382
383 if (rc == 3)
384 /*
385 * We err on the side of caution, and always return true.
386 */
387 *result = 1;
388 else
389 *result = rc + adjustment > 1;
390
391 return 0;
392}
393
394static int sm_metadata_set_count(struct dm_space_map *sm, dm_block_t b,
395 uint32_t count)
396{
397 int r, r2;
398 int32_t nr_allocations;
399 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
400
401 if (smm->recursion_count) {
402 DMERR("cannot recurse set_count()");
403 return -EINVAL;
404 }
405
406 in(smm);
407 r = sm_ll_insert(ll: &smm->ll, b, ref_count: count, nr_allocations: &nr_allocations);
408 r2 = out(smm);
409
410 return combine_errors(r1: r, r2);
411}
412
413static int sm_metadata_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
414{
415 int r, r2 = 0;
416 int32_t nr_allocations;
417 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
418
419 if (recursing(smm)) {
420 r = add_bop(smm, type: BOP_INC, b, e);
421 if (r)
422 return r;
423 } else {
424 in(smm);
425 r = sm_ll_inc(ll: &smm->ll, b, e, nr_allocations: &nr_allocations);
426 r2 = out(smm);
427 }
428
429 return combine_errors(r1: r, r2);
430}
431
432static int sm_metadata_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
433{
434 int r, r2 = 0;
435 int32_t nr_allocations;
436 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
437
438 if (recursing(smm))
439 r = add_bop(smm, type: BOP_DEC, b, e);
440 else {
441 in(smm);
442 r = sm_ll_dec(ll: &smm->ll, b, e, nr_allocations: &nr_allocations);
443 r2 = out(smm);
444 }
445
446 return combine_errors(r1: r, r2);
447}
448
449static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b)
450{
451 int r, r2 = 0;
452 int32_t nr_allocations;
453 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
454
455 /*
456 * Any block we allocate has to be free in both the old and current ll.
457 */
458 r = sm_ll_find_common_free_block(old_ll: &smm->old_ll, new_ll: &smm->ll, begin: smm->begin, end: smm->ll.nr_blocks, result: b);
459 if (r == -ENOSPC) {
460 /*
461 * There's no free block between smm->begin and the end of the metadata device.
462 * We search before smm->begin in case something has been freed.
463 */
464 r = sm_ll_find_common_free_block(old_ll: &smm->old_ll, new_ll: &smm->ll, begin: 0, end: smm->begin, result: b);
465 }
466
467 if (r)
468 return r;
469
470 smm->begin = *b + 1;
471
472 if (recursing(smm))
473 r = add_bop(smm, type: BOP_INC, b: *b, e: *b + 1);
474 else {
475 in(smm);
476 r = sm_ll_inc(ll: &smm->ll, b: *b, e: *b + 1, nr_allocations: &nr_allocations);
477 r2 = out(smm);
478 }
479
480 if (!r)
481 smm->allocated_this_transaction++;
482
483 return combine_errors(r1: r, r2);
484}
485
486static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b)
487{
488 dm_block_t count;
489 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
490
491 int r = sm_metadata_new_block_(sm, b);
492
493 if (r) {
494 DMERR_LIMIT("unable to allocate new metadata block");
495 return r;
496 }
497
498 r = sm_metadata_get_nr_free(sm, count: &count);
499 if (r) {
500 DMERR_LIMIT("couldn't get free block count");
501 return r;
502 }
503
504 check_threshold(t: &smm->threshold, value: count);
505
506 return r;
507}
508
509static int sm_metadata_commit(struct dm_space_map *sm)
510{
511 int r;
512 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
513
514 r = sm_ll_commit(ll: &smm->ll);
515 if (r)
516 return r;
517
518 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll));
519 smm->allocated_this_transaction = 0;
520
521 return 0;
522}
523
524static int sm_metadata_register_threshold_callback(struct dm_space_map *sm,
525 dm_block_t threshold,
526 dm_sm_threshold_fn fn,
527 void *context)
528{
529 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
530
531 set_threshold(t: &smm->threshold, value: threshold, fn, context);
532
533 return 0;
534}
535
536static int sm_metadata_root_size(struct dm_space_map *sm, size_t *result)
537{
538 *result = sizeof(struct disk_sm_root);
539
540 return 0;
541}
542
543static int sm_metadata_copy_root(struct dm_space_map *sm, void *where_le, size_t max)
544{
545 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
546 struct disk_sm_root root_le;
547
548 root_le.nr_blocks = cpu_to_le64(smm->ll.nr_blocks);
549 root_le.nr_allocated = cpu_to_le64(smm->ll.nr_allocated);
550 root_le.bitmap_root = cpu_to_le64(smm->ll.bitmap_root);
551 root_le.ref_count_root = cpu_to_le64(smm->ll.ref_count_root);
552
553 if (max < sizeof(root_le))
554 return -ENOSPC;
555
556 memcpy(where_le, &root_le, sizeof(root_le));
557
558 return 0;
559}
560
561static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks);
562
563static const struct dm_space_map ops = {
564 .destroy = sm_metadata_destroy,
565 .extend = sm_metadata_extend,
566 .get_nr_blocks = sm_metadata_get_nr_blocks,
567 .get_nr_free = sm_metadata_get_nr_free,
568 .get_count = sm_metadata_get_count,
569 .count_is_more_than_one = sm_metadata_count_is_more_than_one,
570 .set_count = sm_metadata_set_count,
571 .inc_blocks = sm_metadata_inc_blocks,
572 .dec_blocks = sm_metadata_dec_blocks,
573 .new_block = sm_metadata_new_block,
574 .commit = sm_metadata_commit,
575 .root_size = sm_metadata_root_size,
576 .copy_root = sm_metadata_copy_root,
577 .register_threshold_callback = sm_metadata_register_threshold_callback
578};
579
580/*----------------------------------------------------------------*/
581
582/*
583 * When a new space map is created that manages its own space. We use
584 * this tiny bootstrap allocator.
585 */
586static void sm_bootstrap_destroy(struct dm_space_map *sm)
587{
588}
589
590static int sm_bootstrap_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
591{
592 DMERR("bootstrap doesn't support extend");
593
594 return -EINVAL;
595}
596
597static int sm_bootstrap_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count)
598{
599 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
600
601 *count = smm->ll.nr_blocks;
602
603 return 0;
604}
605
606static int sm_bootstrap_get_nr_free(struct dm_space_map *sm, dm_block_t *count)
607{
608 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
609
610 *count = smm->ll.nr_blocks - smm->begin;
611
612 return 0;
613}
614
615static int sm_bootstrap_get_count(struct dm_space_map *sm, dm_block_t b,
616 uint32_t *result)
617{
618 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
619
620 *result = (b < smm->begin) ? 1 : 0;
621
622 return 0;
623}
624
625static int sm_bootstrap_count_is_more_than_one(struct dm_space_map *sm,
626 dm_block_t b, int *result)
627{
628 *result = 0;
629
630 return 0;
631}
632
633static int sm_bootstrap_set_count(struct dm_space_map *sm, dm_block_t b,
634 uint32_t count)
635{
636 DMERR("bootstrap doesn't support set_count");
637
638 return -EINVAL;
639}
640
641static int sm_bootstrap_new_block(struct dm_space_map *sm, dm_block_t *b)
642{
643 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
644
645 /*
646 * We know the entire device is unused.
647 */
648 if (smm->begin == smm->ll.nr_blocks)
649 return -ENOSPC;
650
651 *b = smm->begin++;
652
653 return 0;
654}
655
656static int sm_bootstrap_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
657{
658 int r;
659 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
660
661 r = add_bop(smm, type: BOP_INC, b, e);
662 if (r)
663 return r;
664
665 return 0;
666}
667
668static int sm_bootstrap_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
669{
670 int r;
671 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
672
673 r = add_bop(smm, type: BOP_DEC, b, e);
674 if (r)
675 return r;
676
677 return 0;
678}
679
680static int sm_bootstrap_commit(struct dm_space_map *sm)
681{
682 return 0;
683}
684
685static int sm_bootstrap_root_size(struct dm_space_map *sm, size_t *result)
686{
687 DMERR("bootstrap doesn't support root_size");
688
689 return -EINVAL;
690}
691
692static int sm_bootstrap_copy_root(struct dm_space_map *sm, void *where,
693 size_t max)
694{
695 DMERR("bootstrap doesn't support copy_root");
696
697 return -EINVAL;
698}
699
700static const struct dm_space_map bootstrap_ops = {
701 .destroy = sm_bootstrap_destroy,
702 .extend = sm_bootstrap_extend,
703 .get_nr_blocks = sm_bootstrap_get_nr_blocks,
704 .get_nr_free = sm_bootstrap_get_nr_free,
705 .get_count = sm_bootstrap_get_count,
706 .count_is_more_than_one = sm_bootstrap_count_is_more_than_one,
707 .set_count = sm_bootstrap_set_count,
708 .inc_blocks = sm_bootstrap_inc_blocks,
709 .dec_blocks = sm_bootstrap_dec_blocks,
710 .new_block = sm_bootstrap_new_block,
711 .commit = sm_bootstrap_commit,
712 .root_size = sm_bootstrap_root_size,
713 .copy_root = sm_bootstrap_copy_root,
714 .register_threshold_callback = NULL
715};
716
717/*----------------------------------------------------------------*/
718
719static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
720{
721 int r;
722 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
723 dm_block_t old_len = smm->ll.nr_blocks;
724
725 /*
726 * Flick into a mode where all blocks get allocated in the new area.
727 */
728 smm->begin = old_len;
729 memcpy(sm, &bootstrap_ops, sizeof(*sm));
730
731 /*
732 * Extend.
733 */
734 r = sm_ll_extend(ll: &smm->ll, extra_blocks);
735 if (r)
736 goto out;
737
738 /*
739 * We repeatedly increment then commit until the commit doesn't
740 * allocate any new blocks.
741 */
742 do {
743 r = add_bop(smm, type: BOP_INC, b: old_len, e: smm->begin);
744 if (r)
745 goto out;
746
747 old_len = smm->begin;
748
749 r = apply_bops(smm);
750 if (r) {
751 DMERR("%s: apply_bops failed", __func__);
752 goto out;
753 }
754
755 r = sm_ll_commit(ll: &smm->ll);
756 if (r)
757 goto out;
758
759 } while (old_len != smm->begin);
760
761out:
762 /*
763 * Switch back to normal behaviour.
764 */
765 memcpy(sm, &ops, sizeof(*sm));
766 return r;
767}
768
769/*----------------------------------------------------------------*/
770
771struct dm_space_map *dm_sm_metadata_init(void)
772{
773 struct sm_metadata *smm;
774
775 smm = kmalloc(size: sizeof(*smm), GFP_KERNEL);
776 if (!smm)
777 return ERR_PTR(error: -ENOMEM);
778
779 memcpy(&smm->sm, &ops, sizeof(smm->sm));
780
781 return &smm->sm;
782}
783
784int dm_sm_metadata_create(struct dm_space_map *sm,
785 struct dm_transaction_manager *tm,
786 dm_block_t nr_blocks,
787 dm_block_t superblock)
788{
789 int r;
790 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
791
792 smm->begin = superblock + 1;
793 smm->recursion_count = 0;
794 smm->allocated_this_transaction = 0;
795 brb_init(brb: &smm->uncommitted);
796 threshold_init(t: &smm->threshold);
797
798 memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm));
799
800 r = sm_ll_new_metadata(ll: &smm->ll, tm);
801 if (!r) {
802 if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS)
803 nr_blocks = DM_SM_METADATA_MAX_BLOCKS;
804 r = sm_ll_extend(ll: &smm->ll, extra_blocks: nr_blocks);
805 }
806 memcpy(&smm->sm, &ops, sizeof(smm->sm));
807 if (r)
808 return r;
809
810 /*
811 * Now we need to update the newly created data structures with the
812 * allocated blocks that they were built from.
813 */
814 r = add_bop(smm, type: BOP_INC, b: superblock, e: smm->begin);
815 if (r)
816 return r;
817
818 r = apply_bops(smm);
819 if (r) {
820 DMERR("%s: apply_bops failed", __func__);
821 return r;
822 }
823
824 return sm_metadata_commit(sm);
825}
826
827int dm_sm_metadata_open(struct dm_space_map *sm,
828 struct dm_transaction_manager *tm,
829 void *root_le, size_t len)
830{
831 int r;
832 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
833
834 r = sm_ll_open_metadata(ll: &smm->ll, tm, root_le, len);
835 if (r)
836 return r;
837
838 smm->begin = 0;
839 smm->recursion_count = 0;
840 smm->allocated_this_transaction = 0;
841 brb_init(brb: &smm->uncommitted);
842 threshold_init(t: &smm->threshold);
843
844 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll));
845 return 0;
846}
847

source code of linux/drivers/md/persistent-data/dm-space-map-metadata.c