buckets.c source code [linux/fs/bcachefs/buckets.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* Code for manipulating bucket marks for garbage collection.
4	*
5	* Copyright 2014 Datera, Inc.
6	*/
7
8	#include "bcachefs.h"
9	#include "alloc_background.h"
10	#include "backpointers.h"
11	#include "bset.h"
12	#include "btree_gc.h"
13	#include "btree_update.h"
14	#include "buckets.h"
15	#include "buckets_waiting_for_journal.h"
16	#include "ec.h"
17	#include "error.h"
18	#include "inode.h"
19	#include "movinggc.h"
20	#include "recovery.h"
21	#include "reflink.h"
22	#include "replicas.h"
23	#include "subvolume.h"
24	#include "trace.h"
25
26	#include <linux/preempt.h>
27
28	static inline void fs_usage_data_type_to_base(struct bch_fs_usage_base *fs_usage,
29	enum bch_data_type data_type,
30	s64 sectors)
31	{
32	switch (data_type) {
33	case BCH_DATA_btree:
34	fs_usage->btree += sectors;
35	break;
36	case BCH_DATA_user:
37	case BCH_DATA_parity:
38	fs_usage->data += sectors;
39	break;
40	case BCH_DATA_cached:
41	fs_usage->cached += sectors;
42	break;
43	default:
44	break;
45	}
46	}
47
48	void bch2_fs_usage_initialize(struct bch_fs *c)
49	{
50	percpu_down_write(&c->mark_lock);
51	struct bch_fs_usage *usage = c->usage_base;
52
53	for (unsigned i = `0`; i < ARRAY_SIZE(c->usage); i++)
54	bch2_fs_usage_acc_to_base(c, i);
55
56	for (unsigned i = `0`; i < BCH_REPLICAS_MAX; i++)
57	usage->b.reserved += usage->persistent_reserved[i];
58
59	for (unsigned i = `0`; i < c->replicas.nr; i++) {
60	struct bch_replicas_entry_v1 *e =
61	cpu_replicas_entry(r: &c->replicas, i);
62
63	fs_usage_data_type_to_base(fs_usage: &usage->b, data_type: e->data_type, sectors: usage->replicas[i]);
64	}
65
66	for_each_member_device(c, ca) {
67	struct bch_dev_usage dev = bch2_dev_usage_read(ca);
68
69	usage->b.hidden += (dev.d[BCH_DATA_sb].buckets +
70	dev.d[BCH_DATA_journal].buckets) *
71	ca->mi.bucket_size;
72	}
73
74	percpu_up_write(&c->mark_lock);
75	}
76
77	static inline struct bch_dev_usage dev_usage_ptr(struct* bch_dev *ca,
78	unsigned journal_seq,
79	bool gc)
80	{
81	BUG_ON(!gc && !journal_seq);
82
83	return this_cpu_ptr(gc
84	? ca->usage_gc
85	: ca->usage[journal_seq & JOURNAL_BUF_MASK]);
86	}
87
88	void bch2_dev_usage_read_fast(struct bch_dev ca, struct* bch_dev_usage *usage)
89	{
90	struct bch_fs *c = ca->fs;
91	unsigned seq, i, u64s = dev_usage_u64s();
92
93	do {
94	seq = read_seqcount_begin(&c->usage_lock);
95	memcpy(usage, ca->usage_base, u64s * sizeof(u64));
96	for (i = `0`; i < ARRAY_SIZE(ca->usage); i++)
97	acc_u64s_percpu(acc: (u64 ) usage, src: (u64 __percpu ) ca->usage[i], nr: u64s);
98	} while (read_seqcount_retry(&c->usage_lock, seq));
99	}
100
101	u64 bch2_fs_usage_read_one(struct bch_fs c, u64 v)
102	{
103	ssize_t offset = v - (u64 *) c->usage_base;
104	unsigned i, seq;
105	u64 ret;
106
107	BUG_ON(offset < `0` \|\| offset >= fs_usage_u64s(c));
108	percpu_rwsem_assert_held(&c->mark_lock);
109
110	do {
111	seq = read_seqcount_begin(&c->usage_lock);
112	ret = *v;
113
114	for (i = `0`; i < ARRAY_SIZE(c->usage); i++)
115	ret += percpu_u64_get(src: (u64 __percpu *) c->usage[i] + offset);
116	} while (read_seqcount_retry(&c->usage_lock, seq));
117
118	return ret;
119	}
120
121	struct bch_fs_usage_online bch2_fs_usage_read(struct* bch_fs *c)
122	{
123	struct bch_fs_usage_online *ret;
124	unsigned nr_replicas = READ_ONCE(c->replicas.nr);
125	unsigned seq, i;
126	retry:
127	ret = kmalloc(size: __fs_usage_online_u64s(nr_replicas) * sizeof(u64), GFP_KERNEL);
128	if (unlikely(!ret))
129	return NULL;
130
131	percpu_down_read(sem: &c->mark_lock);
132
133	if (nr_replicas != c->replicas.nr) {
134	nr_replicas = c->replicas.nr;
135	percpu_up_read(sem: &c->mark_lock);
136	kfree(objp: ret);
137	goto retry;
138	}
139
140	ret->online_reserved = percpu_u64_get(src: c->online_reserved);
141
142	do {
143	seq = read_seqcount_begin(&c->usage_lock);
144	unsafe_memcpy(&ret->u, c->usage_base,
145	__fs_usage_u64s(nr_replicas) * sizeof(u64),
146	"embedded variable length struct");
147	for (i = `0`; i < ARRAY_SIZE(c->usage); i++)
148	acc_u64s_percpu(acc: (u64 ) &ret->u, src: (u64 __percpu ) c->usage[i],
149	nr: __fs_usage_u64s(nr_replicas));
150	} while (read_seqcount_retry(&c->usage_lock, seq));
151
152	return ret;
153	}
154
155	void bch2_fs_usage_acc_to_base(struct bch_fs c, unsigned* idx)
156	{
157	unsigned u64s = fs_usage_u64s(c);
158
159	BUG_ON(idx >= ARRAY_SIZE(c->usage));
160
161	preempt_disable();
162	write_seqcount_begin(&c->usage_lock);
163
164	acc_u64s_percpu(acc: (u64 *) c->usage_base,
165	src: (u64 __percpu *) c->usage[idx], nr: u64s);
166	percpu_memset(p: c->usage[idx], c: `0`, bytes: u64s * sizeof(u64));
167
168	rcu_read_lock();
169	for_each_member_device_rcu(c, ca, NULL) {
170	u64s = dev_usage_u64s();
171
172	acc_u64s_percpu(acc: (u64 *) ca->usage_base,
173	src: (u64 __percpu *) ca->usage[idx], nr: u64s);
174	percpu_memset(p: ca->usage[idx], c: `0`, bytes: u64s * sizeof(u64));
175	}
176	rcu_read_unlock();
177
178	write_seqcount_end(&c->usage_lock);
179	preempt_enable();
180	}
181
182	void bch2_fs_usage_to_text(struct printbuf *out,
183	struct bch_fs *c,
184	struct bch_fs_usage_online *fs_usage)
185	{
186	unsigned i;
187
188	prt_printf(out, "capacity:\t\t\t%llu\n", c->capacity);
189
190	prt_printf(out, "hidden:\t\t\t\t%llu\n",
191	fs_usage->u.b.hidden);
192	prt_printf(out, "data:\t\t\t\t%llu\n",
193	fs_usage->u.b.data);
194	prt_printf(out, "cached:\t\t\t\t%llu\n",
195	fs_usage->u.b.cached);
196	prt_printf(out, "reserved:\t\t\t%llu\n",
197	fs_usage->u.b.reserved);
198	prt_printf(out, "nr_inodes:\t\t\t%llu\n",
199	fs_usage->u.b.nr_inodes);
200	prt_printf(out, "online reserved:\t\t%llu\n",
201	fs_usage->online_reserved);
202
203	for (i = `0`;
204	i < ARRAY_SIZE(fs_usage->u.persistent_reserved);
205	i++) {
206	prt_printf(out, "%u replicas:\n", i + `1`);
207	prt_printf(out, "\treserved:\t\t%llu\n",
208	fs_usage->u.persistent_reserved[i]);
209	}
210
211	for (i = `0`; i < c->replicas.nr; i++) {
212	struct bch_replicas_entry_v1 *e =
213	cpu_replicas_entry(r: &c->replicas, i);
214
215	prt_printf(out, "\t");
216	bch2_replicas_entry_to_text(out, e);
217	prt_printf(out, ":\t%llu\n", fs_usage->u.replicas[i]);
218	}
219	}
220
221	static u64 reserve_factor(u64 r)
222	{
223	return r + (round_up(r, (`1` << RESERVE_FACTOR)) >> RESERVE_FACTOR);
224	}
225
226	u64 bch2_fs_sectors_used(struct bch_fs c, struct* bch_fs_usage_online *fs_usage)
227	{
228	return min(fs_usage->u.b.hidden +
229	fs_usage->u.b.btree +
230	fs_usage->u.b.data +
231	reserve_factor(fs_usage->u.b.reserved +
232	fs_usage->online_reserved),
233	c->capacity);
234	}
235
236	static struct bch_fs_usage_short
237	__bch2_fs_usage_read_short(struct bch_fs *c)
238	{
239	struct bch_fs_usage_short ret;
240	u64 data, reserved;
241
242	ret.capacity = c->capacity -
243	bch2_fs_usage_read_one(c, v: &c->usage_base->b.hidden);
244
245	data = bch2_fs_usage_read_one(c, v: &c->usage_base->b.data) +
246	bch2_fs_usage_read_one(c, v: &c->usage_base->b.btree);
247	reserved = bch2_fs_usage_read_one(c, v: &c->usage_base->b.reserved) +
248	percpu_u64_get(src: c->online_reserved);
249
250	ret.used = min(ret.capacity, data + reserve_factor(reserved));
251	ret.free = ret.capacity - ret.used;
252
253	ret.nr_inodes = bch2_fs_usage_read_one(c, v: &c->usage_base->b.nr_inodes);
254
255	return ret;
256	}
257
258	struct bch_fs_usage_short
259	bch2_fs_usage_read_short(struct bch_fs *c)
260	{
261	struct bch_fs_usage_short ret;
262
263	percpu_down_read(sem: &c->mark_lock);
264	ret = __bch2_fs_usage_read_short(c);
265	percpu_up_read(sem: &c->mark_lock);
266
267	return ret;
268	}
269
270	void bch2_dev_usage_init(struct bch_dev *ca)
271	{
272	ca->usage_base->d[BCH_DATA_free].buckets = ca->mi.nbuckets - ca->mi.first_bucket;
273	}
274
275	void bch2_dev_usage_to_text(struct printbuf out, struct* bch_dev_usage *usage)
276	{
277	prt_tab(out);
278	prt_str(out, str: "buckets");
279	prt_tab_rjust(out);
280	prt_str(out, str: "sectors");
281	prt_tab_rjust(out);
282	prt_str(out, str: "fragmented");
283	prt_tab_rjust(out);
284	prt_newline(out);
285
286	for (unsigned i = `0`; i < BCH_DATA_NR; i++) {
287	bch2_prt_data_type(out, i);
288	prt_tab(out);
289	prt_u64(out, usage->d[i].buckets);
290	prt_tab_rjust(out);
291	prt_u64(out, usage->d[i].sectors);
292	prt_tab_rjust(out);
293	prt_u64(out, usage->d[i].fragmented);
294	prt_tab_rjust(out);
295	prt_newline(out);
296	}
297	}
298
299	void bch2_dev_usage_update(struct bch_fs c, struct* bch_dev *ca,
300	const struct bch_alloc_v4 *old,
301	const struct bch_alloc_v4 *new,
302	u64 journal_seq, bool gc)
303	{
304	struct bch_fs_usage *fs_usage;
305	struct bch_dev_usage *u;
306
307	preempt_disable();
308	fs_usage = fs_usage_ptr(c, journal_seq, gc);
309
310	if (data_type_is_hidden(type: old->data_type))
311	fs_usage->b.hidden -= ca->mi.bucket_size;
312	if (data_type_is_hidden(type: new->data_type))
313	fs_usage->b.hidden += ca->mi.bucket_size;
314
315	u = dev_usage_ptr(ca, journal_seq, gc);
316
317	u->d[old->data_type].buckets--;
318	u->d[new->data_type].buckets++;
319
320	u->d[old->data_type].sectors -= bch2_bucket_sectors_dirty(a: *old);
321	u->d[new->data_type].sectors += bch2_bucket_sectors_dirty(a: *new);
322
323	u->d[BCH_DATA_cached].sectors += new->cached_sectors;
324	u->d[BCH_DATA_cached].sectors -= old->cached_sectors;
325
326	u->d[old->data_type].fragmented -= bch2_bucket_sectors_fragmented(ca, a: *old);
327	u->d[new->data_type].fragmented += bch2_bucket_sectors_fragmented(ca, a: *new);
328
329	preempt_enable();
330	}
331
332	static inline struct bch_alloc_v4 bucket_m_to_alloc(struct bucket b)
333	{
334	return (struct bch_alloc_v4) {
335	.gen = b.gen,
336	.data_type = b.data_type,
337	.dirty_sectors = b.dirty_sectors,
338	.cached_sectors = b.cached_sectors,
339	.stripe = b.stripe,
340	};
341	}
342
343	void bch2_dev_usage_update_m(struct bch_fs c, struct* bch_dev *ca,
344	struct bucket old, struct* bucket *new)
345	{
346	struct bch_alloc_v4 old_a = bucket_m_to_alloc(b: *old);
347	struct bch_alloc_v4 new_a = bucket_m_to_alloc(b: *new);
348
349	bch2_dev_usage_update(c, ca, old: &old_a, new: &new_a, journal_seq: `0`, gc: true);
350	}
351
352	static inline int __update_replicas(struct bch_fs *c,
353	struct bch_fs_usage *fs_usage,
354	struct bch_replicas_entry_v1 *r,
355	s64 sectors)
356	{
357	int idx = bch2_replicas_entry_idx(c, r);
358
359	if (idx < `0`)
360	return -`1`;
361
362	fs_usage_data_type_to_base(fs_usage: &fs_usage->b, data_type: r->data_type, sectors);
363	fs_usage->replicas[idx] += sectors;
364	return `0`;
365	}
366
367	int bch2_update_replicas(struct bch_fs c, struct* bkey_s_c k,
368	struct bch_replicas_entry_v1 *r, s64 sectors,
369	unsigned journal_seq, bool gc)
370	{
371	struct bch_fs_usage *fs_usage;
372	int idx, ret = `0`;
373	struct printbuf buf = PRINTBUF;
374
375	percpu_down_read(sem: &c->mark_lock);
376
377	idx = bch2_replicas_entry_idx(c, r);
378	if (idx < `0` &&
379	fsck_err(c, ptr_to_missing_replicas_entry,
380	"no replicas entry\n while marking %s",
381	(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
382	percpu_up_read(sem: &c->mark_lock);
383	ret = bch2_mark_replicas(c, r);
384	percpu_down_read(sem: &c->mark_lock);
385
386	if (ret)
387	goto err;
388	idx = bch2_replicas_entry_idx(c, r);
389	}
390	if (idx < `0`) {
391	ret = -`1`;
392	goto err;
393	}
394
395	preempt_disable();
396	fs_usage = fs_usage_ptr(c, journal_seq, gc);
397	fs_usage_data_type_to_base(fs_usage: &fs_usage->b, data_type: r->data_type, sectors);
398	fs_usage->replicas[idx] += sectors;
399	preempt_enable();
400	err:
401	fsck_err:
402	percpu_up_read(sem: &c->mark_lock);
403	printbuf_exit(&buf);
404	return ret;
405	}
406
407	static inline int update_cached_sectors(struct bch_fs *c,
408	struct bkey_s_c k,
409	unsigned dev, s64 sectors,
410	unsigned journal_seq, bool gc)
411	{
412	struct bch_replicas_padded r;
413
414	bch2_replicas_entry_cached(e: &r.e, dev);
415
416	return bch2_update_replicas(c, k, r: &r.e, sectors, journal_seq, gc);
417	}
418
419	static int __replicas_deltas_realloc(struct btree_trans trans, unsigned* more,
420	gfp_t gfp)
421	{
422	struct replicas_delta_list *d = trans->fs_usage_deltas;
423	unsigned new_size = d ? (d->size + more) * `2` : `128`;
424	unsigned alloc_size = sizeof(*d) + new_size;
425
426	WARN_ON_ONCE(alloc_size > REPLICAS_DELTA_LIST_MAX);
427
428	if (!d \|\| d->used + more > d->size) {
429	d = krealloc(objp: d, new_size: alloc_size, flags: gfp\|__GFP_ZERO);
430
431	if (unlikely(!d)) {
432	if (alloc_size > REPLICAS_DELTA_LIST_MAX)
433	return -ENOMEM;
434
435	d = mempool_alloc(pool: &trans->c->replicas_delta_pool, gfp_mask: gfp);
436	if (!d)
437	return -ENOMEM;
438
439	memset(d, `0`, REPLICAS_DELTA_LIST_MAX);
440
441	if (trans->fs_usage_deltas)
442	memcpy(d, trans->fs_usage_deltas,
443	trans->fs_usage_deltas->size + sizeof(*d));
444
445	new_size = REPLICAS_DELTA_LIST_MAX - sizeof(*d);
446	kfree(objp: trans->fs_usage_deltas);
447	}
448
449	d->size = new_size;
450	trans->fs_usage_deltas = d;
451	}
452
453	return `0`;
454	}
455
456	int bch2_replicas_deltas_realloc(struct btree_trans trans, unsigned* more)
457	{
458	return allocate_dropping_locks_errcode(trans,
459	__replicas_deltas_realloc(trans, more, _gfp));
460	}
461
462	int bch2_update_replicas_list(struct btree_trans *trans,
463	struct bch_replicas_entry_v1 *r,
464	s64 sectors)
465	{
466	struct replicas_delta_list *d;
467	struct replicas_delta *n;
468	unsigned b;
469	int ret;
470
471	if (!sectors)
472	return `0`;
473
474	b = replicas_entry_bytes(r) + `8`;
475	ret = bch2_replicas_deltas_realloc(trans, more: b);
476	if (ret)
477	return ret;
478
479	d = trans->fs_usage_deltas;
480	n = (void *) d->d + d->used;
481	n->delta = sectors;
482	unsafe_memcpy((void ) n + offsetof(struct* replicas_delta, r),
483	r, replicas_entry_bytes(r),
484	"flexible array member embedded in strcuct with padding");
485	bch2_replicas_entry_sort(&n->r);
486	d->used += b;
487	return `0`;
488	}
489
490	int bch2_update_cached_sectors_list(struct btree_trans trans, unsigned* dev, s64 sectors)
491	{
492	struct bch_replicas_padded r;
493
494	bch2_replicas_entry_cached(e: &r.e, dev);
495
496	return bch2_update_replicas_list(trans, r: &r.e, sectors);
497	}
498
499	int bch2_mark_metadata_bucket(struct bch_fs c, struct* bch_dev *ca,
500	size_t b, enum bch_data_type data_type,
501	unsigned sectors, struct gc_pos pos,
502	unsigned flags)
503	{
504	struct bucket old, new, *g;
505	int ret = `0`;
506
507	BUG_ON(!(flags & BTREE_TRIGGER_GC));
508	BUG_ON(data_type != BCH_DATA_sb &&
509	data_type != BCH_DATA_journal);
510
511	/*
512	* Backup superblock might be past the end of our normal usable space:
513	*/
514	if (b >= ca->mi.nbuckets)
515	return `0`;
516
517	percpu_down_read(sem: &c->mark_lock);
518	g = gc_bucket(ca, b);
519
520	bucket_lock(b: g);
521	old = *g;
522
523	if (bch2_fs_inconsistent_on(g->data_type &&
524	g->data_type != data_type, c,
525	"different types of data in same bucket: %s, %s",
526	bch2_data_type_str(g->data_type),
527	bch2_data_type_str(data_type))) {
528	BUG();
529	ret = -EIO;
530	goto err;
531	}
532
533	if (bch2_fs_inconsistent_on((u64) g->dirty_sectors + sectors > ca->mi.bucket_size, c,
534	"bucket %u:%zu gen %u data type %s sector count overflow: %u + %u > bucket size",
535	ca->dev_idx, b, g->gen,
536	bch2_data_type_str(g->data_type ?: data_type),
537	g->dirty_sectors, sectors)) {
538	ret = -EIO;
539	goto err;
540	}
541
542	g->data_type = data_type;
543	g->dirty_sectors += sectors;
544	new = *g;
545	err:
546	bucket_unlock(b: g);
547	if (!ret)
548	bch2_dev_usage_update_m(c, ca, old: &old, new: &new);
549	percpu_up_read(sem: &c->mark_lock);
550	return ret;
551	}
552
553	int bch2_check_bucket_ref(struct btree_trans *trans,
554	struct bkey_s_c k,
555	const struct bch_extent_ptr *ptr,
556	s64 sectors, enum bch_data_type ptr_data_type,
557	u8 b_gen, u8 bucket_data_type,
558	u32 bucket_sectors)
559	{
560	struct bch_fs *c = trans->c;
561	struct bch_dev *ca = bch_dev_bkey_exists(c, idx: ptr->dev);
562	size_t bucket_nr = PTR_BUCKET_NR(ca, ptr);
563	struct printbuf buf = PRINTBUF;
564	int ret = `0`;
565
566	if (bucket_data_type == BCH_DATA_cached)
567	bucket_data_type = BCH_DATA_user;
568
569	if ((bucket_data_type == BCH_DATA_stripe && ptr_data_type == BCH_DATA_user) \|\|
570	(bucket_data_type == BCH_DATA_user && ptr_data_type == BCH_DATA_stripe))
571	bucket_data_type = ptr_data_type = BCH_DATA_stripe;
572
573	if (gen_after(a: ptr->gen, b: b_gen)) {
574	bch2_fsck_err(c, FSCK_CAN_IGNORE\|FSCK_NEED_FSCK,
575	BCH_FSCK_ERR_ptr_gen_newer_than_bucket_gen,
576	"bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n"
577	"while marking %s",
578	ptr->dev, bucket_nr, b_gen,
579	bch2_data_type_str(type: bucket_data_type ?: ptr_data_type),
580	ptr->gen,
581	(bch2_bkey_val_to_text(&buf, c, k), buf.buf));
582	ret = -EIO;
583	goto err;
584	}
585
586	if (gen_cmp(a: b_gen, b: ptr->gen) > BUCKET_GC_GEN_MAX) {
587	bch2_fsck_err(c, FSCK_CAN_IGNORE\|FSCK_NEED_FSCK,
588	BCH_FSCK_ERR_ptr_too_stale,
589	"bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
590	"while marking %s",
591	ptr->dev, bucket_nr, b_gen,
592	bch2_data_type_str(type: bucket_data_type ?: ptr_data_type),
593	ptr->gen,
594	(printbuf_reset(buf: &buf),
595	bch2_bkey_val_to_text(&buf, c, k), buf.buf));
596	ret = -EIO;
597	goto err;
598	}
599
600	if (b_gen != ptr->gen && !ptr->cached) {
601	bch2_fsck_err(c, FSCK_CAN_IGNORE\|FSCK_NEED_FSCK,
602	BCH_FSCK_ERR_stale_dirty_ptr,
603	"bucket %u:%zu gen %u (mem gen %u) data type %s: stale dirty ptr (gen %u)\n"
604	"while marking %s",
605	ptr->dev, bucket_nr, b_gen,
606	*bucket_gen(ca, b: bucket_nr),
607	bch2_data_type_str(type: bucket_data_type ?: ptr_data_type),
608	ptr->gen,
609	(printbuf_reset(buf: &buf),
610	bch2_bkey_val_to_text(&buf, c, k), buf.buf));
611	ret = -EIO;
612	goto err;
613	}
614
615	if (b_gen != ptr->gen) {
616	ret = `1`;
617	goto out;
618	}
619
620	if (!data_type_is_empty(type: bucket_data_type) &&
621	ptr_data_type &&
622	bucket_data_type != ptr_data_type) {
623	bch2_fsck_err(c, FSCK_CAN_IGNORE\|FSCK_NEED_FSCK,
624	BCH_FSCK_ERR_ptr_bucket_data_type_mismatch,
625	"bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n"
626	"while marking %s",
627	ptr->dev, bucket_nr, b_gen,
628	bch2_data_type_str(type: bucket_data_type),
629	bch2_data_type_str(type: ptr_data_type),
630	(printbuf_reset(buf: &buf),
631	bch2_bkey_val_to_text(&buf, c, k), buf.buf));
632	BUG();
633	ret = -EIO;
634	goto err;
635	}
636
637	if ((u64) bucket_sectors + sectors > U32_MAX) {
638	bch2_fsck_err(c, FSCK_CAN_IGNORE\|FSCK_NEED_FSCK,
639	BCH_FSCK_ERR_bucket_sector_count_overflow,
640	"bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U32_MAX\n"
641	"while marking %s",
642	ptr->dev, bucket_nr, b_gen,
643	bch2_data_type_str(type: bucket_data_type ?: ptr_data_type),
644	bucket_sectors, sectors,
645	(printbuf_reset(buf: &buf),
646	bch2_bkey_val_to_text(&buf, c, k), buf.buf));
647	ret = -EIO;
648	goto err;
649	}
650	out:
651	printbuf_exit(&buf);
652	return ret;
653	err:
654	bch2_dump_trans_updates(trans);
655	goto out;
656	}
657
658	void bch2_trans_fs_usage_revert(struct btree_trans *trans,
659	struct replicas_delta_list *deltas)
660	{
661	struct bch_fs *c = trans->c;
662	struct bch_fs_usage *dst;
663	struct replicas_delta d, top = (void *) deltas->d + deltas->used;
664	s64 added = `0`;
665	unsigned i;
666
667	percpu_down_read(sem: &c->mark_lock);
668	preempt_disable();
669	dst = fs_usage_ptr(c, journal_seq: trans->journal_res.seq, gc: false);
670
671	/ revert changes: /
672	for (d = deltas->d; d != top; d = replicas_delta_next(d)) {
673	switch (d->r.data_type) {
674	case BCH_DATA_btree:
675	case BCH_DATA_user:
676	case BCH_DATA_parity:
677	added += d->delta;
678	}
679	BUG_ON(__update_replicas(c, dst, &d->r, -d->delta));
680	}
681
682	dst->b.nr_inodes -= deltas->nr_inodes;
683
684	for (i = `0`; i < BCH_REPLICAS_MAX; i++) {
685	added -= deltas->persistent_reserved[i];
686	dst->b.reserved -= deltas->persistent_reserved[i];
687	dst->persistent_reserved[i] -= deltas->persistent_reserved[i];
688	}
689
690	if (added > `0`) {
691	trans->disk_res->sectors += added;
692	this_cpu_add(*c->online_reserved, added);
693	}
694
695	preempt_enable();
696	percpu_up_read(sem: &c->mark_lock);
697	}
698
699	void bch2_trans_account_disk_usage_change(struct btree_trans *trans)
700	{
701	struct bch_fs *c = trans->c;
702	u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : `0`;
703	static int warned_disk_usage = `0`;
704	bool warn = false;
705
706	percpu_down_read(sem: &c->mark_lock);
707	preempt_disable();
708	struct bch_fs_usage_base *dst = &fs_usage_ptr(c, journal_seq: trans->journal_res.seq, gc: false)->b;
709	struct bch_fs_usage_base *src = &trans->fs_usage_delta;
710
711	s64 added = src->btree + src->data + src->reserved;
712
713	/*
714	* Not allowed to reduce sectors_available except by getting a
715	* reservation:
716	*/
717	s64 should_not_have_added = added - (s64) disk_res_sectors;
718	if (unlikely(should_not_have_added > `0`)) {
719	u64 old, new, v = atomic64_read(v: &c->sectors_available);
720
721	do {
722	old = v;
723	new = max_t(s64, `0`, old - should_not_have_added);
724	} while ((v = atomic64_cmpxchg(v: &c->sectors_available,
725	old, new)) != old);
726
727	added -= should_not_have_added;
728	warn = true;
729	}
730
731	if (added > `0`) {
732	trans->disk_res->sectors -= added;
733	this_cpu_sub(*c->online_reserved, added);
734	}
735
736	dst->hidden += src->hidden;
737	dst->btree += src->btree;
738	dst->data += src->data;
739	dst->cached += src->cached;
740	dst->reserved += src->reserved;
741	dst->nr_inodes += src->nr_inodes;
742
743	preempt_enable();
744	percpu_up_read(sem: &c->mark_lock);
745
746	if (unlikely(warn) && !xchg(&warned_disk_usage, `1`))
747	bch2_trans_inconsistent(trans,
748	"disk usage increased %lli more than %llu sectors reserved)",
749	should_not_have_added, disk_res_sectors);
750	}
751
752	int bch2_trans_fs_usage_apply(struct btree_trans *trans,
753	struct replicas_delta_list *deltas)
754	{
755	struct bch_fs *c = trans->c;
756	struct replicas_delta d, d2;
757	struct replicas_delta top = (void* *) deltas->d + deltas->used;
758	struct bch_fs_usage *dst;
759	unsigned i;
760
761	percpu_down_read(sem: &c->mark_lock);
762	preempt_disable();
763	dst = fs_usage_ptr(c, journal_seq: trans->journal_res.seq, gc: false);
764
765	for (d = deltas->d; d != top; d = replicas_delta_next(d))
766	if (__update_replicas(c, fs_usage: dst, r: &d->r, sectors: d->delta))
767	goto need_mark;
768
769	dst->b.nr_inodes += deltas->nr_inodes;
770
771	for (i = `0`; i < BCH_REPLICAS_MAX; i++) {
772	dst->b.reserved += deltas->persistent_reserved[i];
773	dst->persistent_reserved[i] += deltas->persistent_reserved[i];
774	}
775
776	preempt_enable();
777	percpu_up_read(sem: &c->mark_lock);
778	return `0`;
779	need_mark:
780	/ revert changes: /
781	for (d2 = deltas->d; d2 != d; d2 = replicas_delta_next(d: d2))
782	BUG_ON(__update_replicas(c, dst, &d2->r, -d2->delta));
783
784	preempt_enable();
785	percpu_up_read(sem: &c->mark_lock);
786	return -`1`;
787	}
788
789	/ KEY_TYPE_extent: /
790
791	static int __mark_pointer(struct btree_trans *trans,
792	struct bkey_s_c k,
793	const struct bch_extent_ptr *ptr,
794	s64 sectors, enum bch_data_type ptr_data_type,
795	u8 bucket_gen, u8 *bucket_data_type,
796	u32 dirty_sectors, u32 cached_sectors)
797	{
798	u32 *dst_sectors = !ptr->cached
799	? dirty_sectors
800	: cached_sectors;
801	int ret = bch2_check_bucket_ref(trans, k, ptr, sectors, ptr_data_type,
802	b_gen: bucket_gen, bucket_data_type: bucket_data_type, bucket_sectors: dst_sectors);
803
804	if (ret)
805	return ret;
806
807	*dst_sectors += sectors;
808
809	if (!dirty_sectors && !cached_sectors)
810	*bucket_data_type = `0`;
811	else if (*bucket_data_type != BCH_DATA_stripe)
812	*bucket_data_type = ptr_data_type;
813
814	return `0`;
815	}
816
817	static int bch2_trigger_pointer(struct btree_trans *trans,
818	enum btree_id btree_id, unsigned level,
819	struct bkey_s_c k, struct extent_ptr_decoded p,
820	const union bch_extent_entry *entry,
821	s64 sectors, unsigned* flags)
822	{
823	bool insert = !(flags & BTREE_TRIGGER_OVERWRITE);
824	struct bpos bucket;
825	struct bch_backpointer bp;
826
827	bch2_extent_ptr_to_bp(c: trans->c, btree_id, level, k, p, entry, bucket_pos: &bucket, bp: &bp);
828	*sectors = insert ? bp.bucket_len : -((s64) bp.bucket_len);
829
830	if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
831	struct btree_iter iter;
832	struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, &iter, bucket);
833	int ret = PTR_ERR_OR_ZERO(ptr: a);
834	if (ret)
835	return ret;
836
837	ret = __mark_pointer(trans, k, ptr: &p.ptr, sectors: *sectors, ptr_data_type: bp.data_type,
838	bucket_gen: a->v.gen, bucket_data_type: &a->v.data_type,
839	dirty_sectors: &a->v.dirty_sectors, cached_sectors: &a->v.cached_sectors) ?:
840	bch2_trans_update(trans, &iter, &a->k_i, `0`);
841	bch2_trans_iter_exit(trans, &iter);
842
843	if (ret)
844	return ret;
845
846	if (!p.ptr.cached) {
847	ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k: k, insert);
848	if (ret)
849	return ret;
850	}
851	}
852
853	if (flags & BTREE_TRIGGER_GC) {
854	struct bch_fs *c = trans->c;
855	struct bch_dev *ca = bch_dev_bkey_exists(c, idx: p.ptr.dev);
856	enum bch_data_type data_type = bch2_bkey_ptr_data_type(k, p, entry);
857
858	percpu_down_read(sem: &c->mark_lock);
859	struct bucket *g = PTR_GC_BUCKET(ca, ptr: &p.ptr);
860	bucket_lock(b: g);
861	struct bucket old = *g;
862
863	u8 bucket_data_type = g->data_type;
864	int ret = __mark_pointer(trans, k, ptr: &p.ptr, sectors: *sectors,
865	ptr_data_type: data_type, bucket_gen: g->gen,
866	bucket_data_type: &bucket_data_type,
867	dirty_sectors: &g->dirty_sectors,
868	cached_sectors: &g->cached_sectors);
869	if (ret) {
870	bucket_unlock(b: g);
871	percpu_up_read(sem: &c->mark_lock);
872	return ret;
873	}
874
875	g->data_type = bucket_data_type;
876	struct bucket new = *g;
877	bucket_unlock(b: g);
878	bch2_dev_usage_update_m(c, ca, old: &old, new: &new);
879	percpu_up_read(sem: &c->mark_lock);
880	}
881
882	return `0`;
883	}
884
885	static int bch2_trigger_stripe_ptr(struct btree_trans *trans,
886	struct bkey_s_c k,
887	struct extent_ptr_decoded p,
888	enum bch_data_type data_type,
889	s64 sectors, unsigned flags)
890	{
891	if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
892	struct btree_iter iter;
893	struct bkey_i_stripe *s = bch2_bkey_get_mut_typed(trans, &iter,
894	BTREE_ID_stripes, POS(`0`, p.ec.idx),
895	BTREE_ITER_WITH_UPDATES, stripe);
896	int ret = PTR_ERR_OR_ZERO(ptr: s);
897	if (unlikely(ret)) {
898	bch2_trans_inconsistent_on(bch2_err_matches(ret, ENOENT), trans,
899	"pointer to nonexistent stripe %llu",
900	(u64) p.ec.idx);
901	goto err;
902	}
903
904	if (!bch2_ptr_matches_stripe(s: &s->v, p)) {
905	bch2_trans_inconsistent(trans,
906	"stripe pointer doesn't match stripe %llu",
907	(u64) p.ec.idx);
908	ret = -EIO;
909	goto err;
910	}
911
912	stripe_blockcount_set(s: &s->v, idx: p.ec.block,
913	v: stripe_blockcount_get(s: &s->v, idx: p.ec.block) +
914	sectors);
915
916	struct bch_replicas_padded r;
917	bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(k: &s->k_i));
918	r.e.data_type = data_type;
919	ret = bch2_update_replicas_list(trans, r: &r.e, sectors);
920	err:
921	bch2_trans_iter_exit(trans, &iter);
922	return ret;
923	}
924
925	if (flags & BTREE_TRIGGER_GC) {
926	struct bch_fs *c = trans->c;
927
928	BUG_ON(!(flags & BTREE_TRIGGER_GC));
929
930	struct gc_stripe *m = genradix_ptr_alloc(&c->gc_stripes, p.ec.idx, GFP_KERNEL);
931	if (!m) {
932	bch_err(c, "error allocating memory for gc_stripes, idx %llu",
933	(u64) p.ec.idx);
934	return -BCH_ERR_ENOMEM_mark_stripe_ptr;
935	}
936
937	mutex_lock(&c->ec_stripes_heap_lock);
938
939	if (!m \|\| !m->alive) {
940	mutex_unlock(lock: &c->ec_stripes_heap_lock);
941	struct printbuf buf = PRINTBUF;
942	bch2_bkey_val_to_text(&buf, c, k);
943	bch_err_ratelimited(c, "pointer to nonexistent stripe %llu\n while marking %s",
944	(u64) p.ec.idx, buf.buf);
945	printbuf_exit(&buf);
946	bch2_inconsistent_error(c);
947	return -EIO;
948	}
949
950	m->block_sectors[p.ec.block] += sectors;
951
952	struct bch_replicas_padded r = m->r;
953	mutex_unlock(lock: &c->ec_stripes_heap_lock);
954
955	r.e.data_type = data_type;
956	bch2_update_replicas(c, k, r: &r.e, sectors, journal_seq: trans->journal_res.seq, gc: true);
957	}
958
959	return `0`;
960	}
961
962	static int __trigger_extent(struct btree_trans *trans,
963	enum btree_id btree_id, unsigned level,
964	struct bkey_s_c k, unsigned flags)
965	{
966	bool gc = flags & BTREE_TRIGGER_GC;
967	struct bch_fs *c = trans->c;
968	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
969	const union bch_extent_entry *entry;
970	struct extent_ptr_decoded p;
971	struct bch_replicas_padded r;
972	enum bch_data_type data_type = bkey_is_btree_ptr(k: k.k)
973	? BCH_DATA_btree
974	: BCH_DATA_user;
975	s64 dirty_sectors = `0`;
976	int ret = `0`;
977
978	r.e.data_type = data_type;
979	r.e.nr_devs = `0`;
980	r.e.nr_required = `1`;
981
982	bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
983	s64 disk_sectors;
984	ret = bch2_trigger_pointer(trans, btree_id, level, k, p, entry, sectors: &disk_sectors, flags);
985	if (ret < `0`)
986	return ret;
987
988	bool stale = ret > `0`;
989
990	if (p.ptr.cached) {
991	if (!stale) {
992	ret = !gc
993	? bch2_update_cached_sectors_list(trans, dev: p.ptr.dev, sectors: disk_sectors)
994	: update_cached_sectors(c, k, dev: p.ptr.dev, sectors: disk_sectors, journal_seq: `0`, gc: true);
995	bch2_fs_fatal_err_on(ret && gc, c, "%s: no replicas entry while updating cached sectors",
996	bch2_err_str(ret));
997	if (ret)
998	return ret;
999	}
1000	} else if (!p.has_ec) {
1001	dirty_sectors += disk_sectors;
1002	r.e.devs[r.e.nr_devs++] = p.ptr.dev;
1003	} else {
1004	ret = bch2_trigger_stripe_ptr(trans, k, p, data_type, sectors: disk_sectors, flags);
1005	if (ret)
1006	return ret;
1007
1008	/*
1009	* There may be other dirty pointers in this extent, but
1010	* if so they're not required for mounting if we have an
1011	* erasure coded pointer in this extent:
1012	*/
1013	r.e.nr_required = `0`;
1014	}
1015	}
1016
1017	if (r.e.nr_devs) {
1018	ret = !gc
1019	? bch2_update_replicas_list(trans, r: &r.e, sectors: dirty_sectors)
1020	: bch2_update_replicas(c, k, r: &r.e, sectors: dirty_sectors, journal_seq: `0`, gc: true);
1021	if (unlikely(ret && gc)) {
1022	struct printbuf buf = PRINTBUF;
1023
1024	bch2_bkey_val_to_text(&buf, c, k);
1025	bch2_fs_fatal_error(c, ": no replicas entry for %s", buf.buf);
1026	printbuf_exit(&buf);
1027	}
1028	if (ret)
1029	return ret;
1030	}
1031
1032	return `0`;
1033	}
1034
1035	int bch2_trigger_extent(struct btree_trans *trans,
1036	enum btree_id btree_id, unsigned level,
1037	struct bkey_s_c old, struct bkey_s new,
1038	unsigned flags)
1039	{
1040	struct bkey_ptrs_c new_ptrs = bch2_bkey_ptrs_c(k: new.s_c);
1041	struct bkey_ptrs_c old_ptrs = bch2_bkey_ptrs_c(k: old);
1042	unsigned new_ptrs_bytes = (void ) new_ptrs.end - (void* *) new_ptrs.start;
1043	unsigned old_ptrs_bytes = (void ) old_ptrs.end - (void* *) old_ptrs.start;
1044
1045	/ if pointers aren't changing - nothing to do: /
1046	if (new_ptrs_bytes == old_ptrs_bytes &&
1047	!memcmp(p: new_ptrs.start,
1048	q: old_ptrs.start,
1049	size: new_ptrs_bytes))
1050	return `0`;
1051
1052	if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
1053	struct bch_fs *c = trans->c;
1054	int mod = (int) bch2_bkey_needs_rebalance(c, new.s_c) -
1055	(int) bch2_bkey_needs_rebalance(c, old);
1056
1057	if (mod) {
1058	int ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work,
1059	new.k->p, mod > `0`);
1060	if (ret)
1061	return ret;
1062	}
1063	}
1064
1065	if (flags & (BTREE_TRIGGER_TRANSACTIONAL\|BTREE_TRIGGER_GC))
1066	return trigger_run_overwrite_then_insert(__trigger_extent, trans, btree_id, level, old, new, flags);
1067
1068	return `0`;
1069	}
1070
1071	/ KEY_TYPE_reservation /
1072
1073	static int __trigger_reservation(struct btree_trans *trans,
1074	enum btree_id btree_id, unsigned level,
1075	struct bkey_s_c k, unsigned flags)
1076	{
1077	struct bch_fs *c = trans->c;
1078	unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
1079	s64 sectors = (s64) k.k->size * replicas;
1080
1081	if (flags & BTREE_TRIGGER_OVERWRITE)
1082	sectors = -sectors;
1083
1084	if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
1085	int ret = bch2_replicas_deltas_realloc(trans, more: `0`);
1086	if (ret)
1087	return ret;
1088
1089	struct replicas_delta_list *d = trans->fs_usage_deltas;
1090	replicas = min(replicas, ARRAY_SIZE(d->persistent_reserved));
1091
1092	d->persistent_reserved[replicas - `1`] += sectors;
1093	}
1094
1095	if (flags & BTREE_TRIGGER_GC) {
1096	percpu_down_read(sem: &c->mark_lock);
1097	preempt_disable();
1098
1099	struct bch_fs_usage *fs_usage = this_cpu_ptr(c->usage_gc);
1100
1101	replicas = min(replicas, ARRAY_SIZE(fs_usage->persistent_reserved));
1102	fs_usage->b.reserved += sectors;
1103	fs_usage->persistent_reserved[replicas - `1`] += sectors;
1104
1105	preempt_enable();
1106	percpu_up_read(sem: &c->mark_lock);
1107	}
1108
1109	return `0`;
1110	}
1111
1112	int bch2_trigger_reservation(struct btree_trans *trans,
1113	enum btree_id btree_id, unsigned level,
1114	struct bkey_s_c old, struct bkey_s new,
1115	unsigned flags)
1116	{
1117	return trigger_run_overwrite_then_insert(__trigger_reservation, trans, btree_id, level, old, new, flags);
1118	}
1119
1120	/ Mark superblocks: /
1121
1122	static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
1123	struct bch_dev *ca, size_t b,
1124	enum bch_data_type type,
1125	unsigned sectors)
1126	{
1127	struct bch_fs *c = trans->c;
1128	struct btree_iter iter;
1129	struct bkey_i_alloc_v4 *a;
1130	int ret = `0`;
1131
1132	/*
1133	* Backup superblock might be past the end of our normal usable space:
1134	*/
1135	if (b >= ca->mi.nbuckets)
1136	return `0`;
1137
1138	a = bch2_trans_start_alloc_update(trans, &iter, POS(ca->dev_idx, b));
1139	if (IS_ERR(ptr: a))
1140	return PTR_ERR(ptr: a);
1141
1142	if (a->v.data_type && type && a->v.data_type != type) {
1143	bch2_fsck_err(c, FSCK_CAN_IGNORE\|FSCK_NEED_FSCK,
1144	BCH_FSCK_ERR_bucket_metadata_type_mismatch,
1145	"bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n"
1146	"while marking %s",
1147	iter.pos.inode, iter.pos.offset, a->v.gen,
1148	bch2_data_type_str(type: a->v.data_type),
1149	bch2_data_type_str(type),
1150	bch2_data_type_str(type));
1151	ret = -EIO;
1152	goto err;
1153	}
1154
1155	if (a->v.data_type != type \|\|
1156	a->v.dirty_sectors != sectors) {
1157	a->v.data_type = type;
1158	a->v.dirty_sectors = sectors;
1159	ret = bch2_trans_update(trans, &iter, &a->k_i, `0`);
1160	}
1161	err:
1162	bch2_trans_iter_exit(trans, &iter);
1163	return ret;
1164	}
1165
1166	int bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
1167	struct bch_dev *ca, size_t b,
1168	enum bch_data_type type,
1169	unsigned sectors)
1170	{
1171	return commit_do(trans, NULL, NULL, `0`,
1172	__bch2_trans_mark_metadata_bucket(trans, ca, b, type, sectors));
1173	}
1174
1175	static int bch2_trans_mark_metadata_sectors(struct btree_trans *trans,
1176	struct bch_dev *ca,
1177	u64 start, u64 end,
1178	enum bch_data_type type,
1179	u64 bucket, unsigned* *bucket_sectors)
1180	{
1181	do {
1182	u64 b = sector_to_bucket(ca, s: start);
1183	unsigned sectors =
1184	min_t(u64, bucket_to_sector(ca, b + `1`), end) - start;
1185
1186	if (b != bucket && bucket_sectors) {
1187	int ret = bch2_trans_mark_metadata_bucket(trans, ca, b: *bucket,
1188	type, sectors: *bucket_sectors);
1189	if (ret)
1190	return ret;
1191
1192	*bucket_sectors = `0`;
1193	}
1194
1195	*bucket = b;
1196	*bucket_sectors += sectors;
1197	start += sectors;
1198	} while (start < end);
1199
1200	return `0`;
1201	}
1202
1203	static int __bch2_trans_mark_dev_sb(struct btree_trans *trans,
1204	struct bch_dev *ca)
1205	{
1206	struct bch_sb_layout *layout = &ca->disk_sb.sb->layout;
1207	u64 bucket = `0`;
1208	unsigned i, bucket_sectors = `0`;
1209	int ret;
1210
1211	for (i = `0`; i < layout->nr_superblocks; i++) {
1212	u64 offset = le64_to_cpu(layout->sb_offset[i]);
1213
1214	if (offset == BCH_SB_SECTOR) {
1215	ret = bch2_trans_mark_metadata_sectors(trans, ca,
1216	start: `0`, BCH_SB_SECTOR,
1217	type: BCH_DATA_sb, bucket: &bucket, bucket_sectors: &bucket_sectors);
1218	if (ret)
1219	return ret;
1220	}
1221
1222	ret = bch2_trans_mark_metadata_sectors(trans, ca, start: offset,
1223	end: offset + (`1` << layout->sb_max_size_bits),
1224	type: BCH_DATA_sb, bucket: &bucket, bucket_sectors: &bucket_sectors);
1225	if (ret)
1226	return ret;
1227	}
1228
1229	if (bucket_sectors) {
1230	ret = bch2_trans_mark_metadata_bucket(trans, ca,
1231	b: bucket, type: BCH_DATA_sb, sectors: bucket_sectors);
1232	if (ret)
1233	return ret;
1234	}
1235
1236	for (i = `0`; i < ca->journal.nr; i++) {
1237	ret = bch2_trans_mark_metadata_bucket(trans, ca,
1238	b: ca->journal.buckets[i],
1239	type: BCH_DATA_journal, sectors: ca->mi.bucket_size);
1240	if (ret)
1241	return ret;
1242	}
1243
1244	return `0`;
1245	}
1246
1247	int bch2_trans_mark_dev_sb(struct bch_fs c, struct* bch_dev *ca)
1248	{
1249	int ret = bch2_trans_run(c, __bch2_trans_mark_dev_sb(trans, ca));
1250
1251	bch_err_fn(c, ret);
1252	return ret;
1253	}
1254
1255	int bch2_trans_mark_dev_sbs(struct bch_fs *c)
1256	{
1257	for_each_online_member(c, ca) {
1258	int ret = bch2_trans_mark_dev_sb(c, ca);
1259	if (ret) {
1260	percpu_ref_put(ref: &ca->ref);
1261	return ret;
1262	}
1263	}
1264
1265	return `0`;
1266	}
1267
1268	/ Disk reservations: /
1269
1270	#define SECTORS_CACHE 1024
1271
1272	int __bch2_disk_reservation_add(struct bch_fs c, struct* disk_reservation *res,
1273	u64 sectors, int flags)
1274	{
1275	struct bch_fs_pcpu *pcpu;
1276	u64 old, v, get;
1277	s64 sectors_available;
1278	int ret;
1279
1280	percpu_down_read(sem: &c->mark_lock);
1281	preempt_disable();
1282	pcpu = this_cpu_ptr(c->pcpu);
1283
1284	if (sectors <= pcpu->sectors_available)
1285	goto out;
1286
1287	v = atomic64_read(v: &c->sectors_available);
1288	do {
1289	old = v;
1290	get = min((u64) sectors + SECTORS_CACHE, old);
1291
1292	if (get < sectors) {
1293	preempt_enable();
1294	goto recalculate;
1295	}
1296	} while ((v = atomic64_cmpxchg(v: &c->sectors_available,
1297	old, new: old - get)) != old);
1298
1299	pcpu->sectors_available += get;
1300
1301	out:
1302	pcpu->sectors_available -= sectors;
1303	this_cpu_add(*c->online_reserved, sectors);
1304	res->sectors += sectors;
1305
1306	preempt_enable();
1307	percpu_up_read(sem: &c->mark_lock);
1308	return `0`;
1309
1310	recalculate:
1311	mutex_lock(&c->sectors_available_lock);
1312
1313	percpu_u64_set(dst: &c->pcpu->sectors_available, src: `0`);
1314	sectors_available = avail_factor(r: __bch2_fs_usage_read_short(c).free);
1315
1316	if (sectors <= sectors_available \|\|
1317	(flags & BCH_DISK_RESERVATION_NOFAIL)) {
1318	atomic64_set(v: &c->sectors_available,
1319	max_t(s64, `0`, sectors_available - sectors));
1320	this_cpu_add(*c->online_reserved, sectors);
1321	res->sectors += sectors;
1322	ret = `0`;
1323	} else {
1324	atomic64_set(v: &c->sectors_available, i: sectors_available);
1325	ret = -BCH_ERR_ENOSPC_disk_reservation;
1326	}
1327
1328	mutex_unlock(lock: &c->sectors_available_lock);
1329	percpu_up_read(sem: &c->mark_lock);
1330
1331	return ret;
1332	}
1333
1334	/ Startup/shutdown: /
1335
1336	static void bucket_gens_free_rcu(struct rcu_head *rcu)
1337	{
1338	struct bucket_gens *buckets =
1339	container_of(rcu, struct bucket_gens, rcu);
1340
1341	kvfree(addr: buckets);
1342	}
1343
1344	int bch2_dev_buckets_resize(struct bch_fs c, struct* bch_dev *ca, u64 nbuckets)
1345	{
1346	struct bucket_gens bucket_gens = NULL, old_bucket_gens = NULL;
1347	unsigned long *buckets_nouse = NULL;
1348	bool resize = ca->bucket_gens != NULL;
1349	int ret;
1350
1351	if (!(bucket_gens = kvmalloc(size: sizeof(struct bucket_gens) + nbuckets,
1352	GFP_KERNEL\|__GFP_ZERO))) {
1353	ret = -BCH_ERR_ENOMEM_bucket_gens;
1354	goto err;
1355	}
1356
1357	if ((c->opts.buckets_nouse &&
1358	!(buckets_nouse = kvmalloc(BITS_TO_LONGS(nbuckets) *
1359	sizeof(unsigned long),
1360	GFP_KERNEL\|__GFP_ZERO)))) {
1361	ret = -BCH_ERR_ENOMEM_buckets_nouse;
1362	goto err;
1363	}
1364
1365	bucket_gens->first_bucket = ca->mi.first_bucket;
1366	bucket_gens->nbuckets = nbuckets;
1367
1368	if (resize) {
1369	down_write(sem: &c->gc_lock);
1370	down_write(sem: &ca->bucket_lock);
1371	percpu_down_write(&c->mark_lock);
1372	}
1373
1374	old_bucket_gens = rcu_dereference_protected(ca->bucket_gens, `1`);
1375
1376	if (resize) {
1377	size_t n = min(bucket_gens->nbuckets, old_bucket_gens->nbuckets);
1378
1379	memcpy(bucket_gens->b,
1380	old_bucket_gens->b,
1381	n);
1382	if (buckets_nouse)
1383	memcpy(buckets_nouse,
1384	ca->buckets_nouse,
1385	BITS_TO_LONGS(n) * sizeof(unsigned long));
1386	}
1387
1388	rcu_assign_pointer(ca->bucket_gens, bucket_gens);
1389	bucket_gens = old_bucket_gens;
1390
1391	swap(ca->buckets_nouse, buckets_nouse);
1392
1393	nbuckets = ca->mi.nbuckets;
1394
1395	if (resize) {
1396	percpu_up_write(&c->mark_lock);
1397	up_write(sem: &ca->bucket_lock);
1398	up_write(sem: &c->gc_lock);
1399	}
1400
1401	ret = `0`;
1402	err:
1403	kvfree(addr: buckets_nouse);
1404	if (bucket_gens)
1405	call_rcu(head: &bucket_gens->rcu, func: bucket_gens_free_rcu);
1406
1407	return ret;
1408	}
1409
1410	void bch2_dev_buckets_free(struct bch_dev *ca)
1411	{
1412	kvfree(addr: ca->buckets_nouse);
1413	kvfree(rcu_dereference_protected(ca->bucket_gens, `1`));
1414
1415	for (unsigned i = `0`; i < ARRAY_SIZE(ca->usage); i++)
1416	free_percpu(pdata: ca->usage[i]);
1417	kfree(objp: ca->usage_base);
1418	}
1419
1420	int bch2_dev_buckets_alloc(struct bch_fs c, struct* bch_dev *ca)
1421	{
1422	ca->usage_base = kzalloc(size: sizeof(struct bch_dev_usage), GFP_KERNEL);
1423	if (!ca->usage_base)
1424	return -BCH_ERR_ENOMEM_usage_init;
1425
1426	for (unsigned i = `0`; i < ARRAY_SIZE(ca->usage); i++) {
1427	ca->usage[i] = alloc_percpu(struct bch_dev_usage);
1428	if (!ca->usage[i])
1429	return -BCH_ERR_ENOMEM_usage_init;
1430	}
1431
1432	return bch2_dev_buckets_resize(c, ca, nbuckets: ca->mi.nbuckets);
1433	}
1434

source code of linux/fs/bcachefs/buckets.c