1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* |
3 | * Code for manipulating bucket marks for garbage collection. |
4 | * |
5 | * Copyright 2014 Datera, Inc. |
6 | */ |
7 | |
8 | #ifndef _BUCKETS_H |
9 | #define _BUCKETS_H |
10 | |
11 | #include "buckets_types.h" |
12 | #include "extents.h" |
13 | #include "sb-members.h" |
14 | |
15 | static inline size_t sector_to_bucket(const struct bch_dev *ca, sector_t s) |
16 | { |
17 | return div_u64(dividend: s, divisor: ca->mi.bucket_size); |
18 | } |
19 | |
20 | static inline sector_t bucket_to_sector(const struct bch_dev *ca, size_t b) |
21 | { |
22 | return ((sector_t) b) * ca->mi.bucket_size; |
23 | } |
24 | |
25 | static inline sector_t bucket_remainder(const struct bch_dev *ca, sector_t s) |
26 | { |
27 | u32 remainder; |
28 | |
29 | div_u64_rem(dividend: s, divisor: ca->mi.bucket_size, remainder: &remainder); |
30 | return remainder; |
31 | } |
32 | |
33 | static inline size_t sector_to_bucket_and_offset(const struct bch_dev *ca, sector_t s, |
34 | u32 *offset) |
35 | { |
36 | return div_u64_rem(dividend: s, divisor: ca->mi.bucket_size, remainder: offset); |
37 | } |
38 | |
39 | #define for_each_bucket(_b, _buckets) \ |
40 | for (_b = (_buckets)->b + (_buckets)->first_bucket; \ |
41 | _b < (_buckets)->b + (_buckets)->nbuckets; _b++) |
42 | |
43 | /* |
44 | * Ugly hack alert: |
45 | * |
46 | * We need to cram a spinlock in a single byte, because that's what we have left |
47 | * in struct bucket, and we care about the size of these - during fsck, we need |
48 | * in memory state for every single bucket on every device. |
49 | * |
50 | * We used to do |
51 | * while (xchg(&b->lock, 1) cpu_relax(); |
52 | * but, it turns out not all architectures support xchg on a single byte. |
53 | * |
54 | * So now we use bit_spin_lock(), with fun games since we can't burn a whole |
55 | * ulong for this - we just need to make sure the lock bit always ends up in the |
56 | * first byte. |
57 | */ |
58 | |
59 | #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ |
60 | #define BUCKET_LOCK_BITNR 0 |
61 | #else |
62 | #define BUCKET_LOCK_BITNR (BITS_PER_LONG - 1) |
63 | #endif |
64 | |
65 | union ulong_byte_assert { |
66 | ulong ulong; |
67 | u8 byte; |
68 | }; |
69 | |
70 | static inline void bucket_unlock(struct bucket *b) |
71 | { |
72 | BUILD_BUG_ON(!((union ulong_byte_assert) { .ulong = 1UL << BUCKET_LOCK_BITNR }).byte); |
73 | |
74 | clear_bit_unlock(BUCKET_LOCK_BITNR, addr: (void *) &b->lock); |
75 | wake_up_bit(word: (void *) &b->lock, BUCKET_LOCK_BITNR); |
76 | } |
77 | |
78 | static inline void bucket_lock(struct bucket *b) |
79 | { |
80 | wait_on_bit_lock(word: (void *) &b->lock, BUCKET_LOCK_BITNR, |
81 | TASK_UNINTERRUPTIBLE); |
82 | } |
83 | |
84 | static inline struct bucket_array *gc_bucket_array(struct bch_dev *ca) |
85 | { |
86 | return rcu_dereference_check(ca->buckets_gc, |
87 | !ca->fs || |
88 | percpu_rwsem_is_held(&ca->fs->mark_lock) || |
89 | lockdep_is_held(&ca->fs->gc_lock) || |
90 | lockdep_is_held(&ca->bucket_lock)); |
91 | } |
92 | |
93 | static inline struct bucket *gc_bucket(struct bch_dev *ca, size_t b) |
94 | { |
95 | struct bucket_array *buckets = gc_bucket_array(ca); |
96 | |
97 | BUG_ON(b < buckets->first_bucket || b >= buckets->nbuckets); |
98 | return buckets->b + b; |
99 | } |
100 | |
101 | static inline struct bucket_gens *bucket_gens(struct bch_dev *ca) |
102 | { |
103 | return rcu_dereference_check(ca->bucket_gens, |
104 | !ca->fs || |
105 | percpu_rwsem_is_held(&ca->fs->mark_lock) || |
106 | lockdep_is_held(&ca->fs->gc_lock) || |
107 | lockdep_is_held(&ca->bucket_lock)); |
108 | } |
109 | |
110 | static inline u8 *bucket_gen(struct bch_dev *ca, size_t b) |
111 | { |
112 | struct bucket_gens *gens = bucket_gens(ca); |
113 | |
114 | BUG_ON(b < gens->first_bucket || b >= gens->nbuckets); |
115 | return gens->b + b; |
116 | } |
117 | |
118 | static inline size_t PTR_BUCKET_NR(const struct bch_dev *ca, |
119 | const struct bch_extent_ptr *ptr) |
120 | { |
121 | return sector_to_bucket(ca, s: ptr->offset); |
122 | } |
123 | |
124 | static inline struct bpos PTR_BUCKET_POS(const struct bch_fs *c, |
125 | const struct bch_extent_ptr *ptr) |
126 | { |
127 | struct bch_dev *ca = bch_dev_bkey_exists(c, idx: ptr->dev); |
128 | |
129 | return POS(ptr->dev, PTR_BUCKET_NR(ca, ptr)); |
130 | } |
131 | |
132 | static inline struct bpos PTR_BUCKET_POS_OFFSET(const struct bch_fs *c, |
133 | const struct bch_extent_ptr *ptr, |
134 | u32 *bucket_offset) |
135 | { |
136 | struct bch_dev *ca = bch_dev_bkey_exists(c, idx: ptr->dev); |
137 | |
138 | return POS(ptr->dev, sector_to_bucket_and_offset(ca, ptr->offset, bucket_offset)); |
139 | } |
140 | |
141 | static inline struct bucket *PTR_GC_BUCKET(struct bch_dev *ca, |
142 | const struct bch_extent_ptr *ptr) |
143 | { |
144 | return gc_bucket(ca, b: PTR_BUCKET_NR(ca, ptr)); |
145 | } |
146 | |
147 | static inline enum bch_data_type ptr_data_type(const struct bkey *k, |
148 | const struct bch_extent_ptr *ptr) |
149 | { |
150 | if (bkey_is_btree_ptr(k)) |
151 | return BCH_DATA_btree; |
152 | |
153 | return ptr->cached ? BCH_DATA_cached : BCH_DATA_user; |
154 | } |
155 | |
156 | static inline s64 ptr_disk_sectors(s64 sectors, struct extent_ptr_decoded p) |
157 | { |
158 | EBUG_ON(sectors < 0); |
159 | |
160 | return crc_is_compressed(crc: p.crc) |
161 | ? DIV_ROUND_UP_ULL(sectors * p.crc.compressed_size, |
162 | p.crc.uncompressed_size) |
163 | : sectors; |
164 | } |
165 | |
166 | static inline int gen_cmp(u8 a, u8 b) |
167 | { |
168 | return (s8) (a - b); |
169 | } |
170 | |
171 | static inline int gen_after(u8 a, u8 b) |
172 | { |
173 | int r = gen_cmp(a, b); |
174 | |
175 | return r > 0 ? r : 0; |
176 | } |
177 | |
178 | /** |
179 | * ptr_stale() - check if a pointer points into a bucket that has been |
180 | * invalidated. |
181 | */ |
182 | static inline u8 ptr_stale(struct bch_dev *ca, |
183 | const struct bch_extent_ptr *ptr) |
184 | { |
185 | u8 ret; |
186 | |
187 | rcu_read_lock(); |
188 | ret = gen_after(a: *bucket_gen(ca, b: PTR_BUCKET_NR(ca, ptr)), b: ptr->gen); |
189 | rcu_read_unlock(); |
190 | |
191 | return ret; |
192 | } |
193 | |
194 | /* Device usage: */ |
195 | |
196 | void bch2_dev_usage_read_fast(struct bch_dev *, struct bch_dev_usage *); |
197 | static inline struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca) |
198 | { |
199 | struct bch_dev_usage ret; |
200 | |
201 | bch2_dev_usage_read_fast(ca, &ret); |
202 | return ret; |
203 | } |
204 | |
205 | void bch2_dev_usage_init(struct bch_dev *); |
206 | void bch2_dev_usage_to_text(struct printbuf *, struct bch_dev_usage *); |
207 | |
208 | static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum bch_watermark watermark) |
209 | { |
210 | s64 reserved = 0; |
211 | |
212 | switch (watermark) { |
213 | case BCH_WATERMARK_NR: |
214 | BUG(); |
215 | case BCH_WATERMARK_stripe: |
216 | reserved += ca->mi.nbuckets >> 6; |
217 | fallthrough; |
218 | case BCH_WATERMARK_normal: |
219 | reserved += ca->mi.nbuckets >> 6; |
220 | fallthrough; |
221 | case BCH_WATERMARK_copygc: |
222 | reserved += ca->nr_btree_reserve; |
223 | fallthrough; |
224 | case BCH_WATERMARK_btree: |
225 | reserved += ca->nr_btree_reserve; |
226 | fallthrough; |
227 | case BCH_WATERMARK_btree_copygc: |
228 | case BCH_WATERMARK_reclaim: |
229 | case BCH_WATERMARK_interior_updates: |
230 | break; |
231 | } |
232 | |
233 | return reserved; |
234 | } |
235 | |
236 | static inline u64 dev_buckets_free(struct bch_dev *ca, |
237 | struct bch_dev_usage usage, |
238 | enum bch_watermark watermark) |
239 | { |
240 | return max_t(s64, 0, |
241 | usage.d[BCH_DATA_free].buckets - |
242 | ca->nr_open_buckets - |
243 | bch2_dev_buckets_reserved(ca, watermark)); |
244 | } |
245 | |
246 | static inline u64 __dev_buckets_available(struct bch_dev *ca, |
247 | struct bch_dev_usage usage, |
248 | enum bch_watermark watermark) |
249 | { |
250 | return max_t(s64, 0, |
251 | usage.d[BCH_DATA_free].buckets |
252 | + usage.d[BCH_DATA_cached].buckets |
253 | + usage.d[BCH_DATA_need_gc_gens].buckets |
254 | + usage.d[BCH_DATA_need_discard].buckets |
255 | - ca->nr_open_buckets |
256 | - bch2_dev_buckets_reserved(ca, watermark)); |
257 | } |
258 | |
259 | static inline u64 dev_buckets_available(struct bch_dev *ca, |
260 | enum bch_watermark watermark) |
261 | { |
262 | return __dev_buckets_available(ca, usage: bch2_dev_usage_read(ca), watermark); |
263 | } |
264 | |
265 | /* Filesystem usage: */ |
266 | |
267 | static inline unsigned __fs_usage_u64s(unsigned nr_replicas) |
268 | { |
269 | return sizeof(struct bch_fs_usage) / sizeof(u64) + nr_replicas; |
270 | } |
271 | |
272 | static inline unsigned fs_usage_u64s(struct bch_fs *c) |
273 | { |
274 | return __fs_usage_u64s(READ_ONCE(c->replicas.nr)); |
275 | } |
276 | |
277 | static inline unsigned __fs_usage_online_u64s(unsigned nr_replicas) |
278 | { |
279 | return sizeof(struct bch_fs_usage_online) / sizeof(u64) + nr_replicas; |
280 | } |
281 | |
282 | static inline unsigned fs_usage_online_u64s(struct bch_fs *c) |
283 | { |
284 | return __fs_usage_online_u64s(READ_ONCE(c->replicas.nr)); |
285 | } |
286 | |
287 | static inline unsigned dev_usage_u64s(void) |
288 | { |
289 | return sizeof(struct bch_dev_usage) / sizeof(u64); |
290 | } |
291 | |
292 | u64 bch2_fs_usage_read_one(struct bch_fs *, u64 *); |
293 | |
294 | struct bch_fs_usage_online *bch2_fs_usage_read(struct bch_fs *); |
295 | |
296 | void bch2_fs_usage_acc_to_base(struct bch_fs *, unsigned); |
297 | |
298 | void bch2_fs_usage_to_text(struct printbuf *, |
299 | struct bch_fs *, struct bch_fs_usage_online *); |
300 | |
301 | u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage_online *); |
302 | |
303 | struct bch_fs_usage_short |
304 | bch2_fs_usage_read_short(struct bch_fs *); |
305 | |
306 | void bch2_dev_usage_update(struct bch_fs *, struct bch_dev *, |
307 | const struct bch_alloc_v4 *, |
308 | const struct bch_alloc_v4 *, u64, bool); |
309 | void bch2_dev_usage_update_m(struct bch_fs *, struct bch_dev *, |
310 | struct bucket *, struct bucket *); |
311 | |
312 | /* key/bucket marking: */ |
313 | |
314 | static inline struct bch_fs_usage *fs_usage_ptr(struct bch_fs *c, |
315 | unsigned journal_seq, |
316 | bool gc) |
317 | { |
318 | percpu_rwsem_assert_held(&c->mark_lock); |
319 | BUG_ON(!gc && !journal_seq); |
320 | |
321 | return this_cpu_ptr(gc |
322 | ? c->usage_gc |
323 | : c->usage[journal_seq & JOURNAL_BUF_MASK]); |
324 | } |
325 | |
326 | int bch2_update_replicas(struct bch_fs *, struct bkey_s_c, |
327 | struct bch_replicas_entry_v1 *, s64, |
328 | unsigned, bool); |
329 | int bch2_update_replicas_list(struct btree_trans *, |
330 | struct bch_replicas_entry_v1 *, s64); |
331 | int bch2_update_cached_sectors_list(struct btree_trans *, unsigned, s64); |
332 | int bch2_replicas_deltas_realloc(struct btree_trans *, unsigned); |
333 | |
334 | void bch2_fs_usage_initialize(struct bch_fs *); |
335 | |
336 | int bch2_check_bucket_ref(struct btree_trans *, struct bkey_s_c, |
337 | const struct bch_extent_ptr *, |
338 | s64, enum bch_data_type, u8, u8, u32); |
339 | |
340 | int bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *, |
341 | size_t, enum bch_data_type, unsigned, |
342 | struct gc_pos, unsigned); |
343 | |
344 | int bch2_trigger_extent(struct btree_trans *, enum btree_id, unsigned, |
345 | struct bkey_s_c, struct bkey_s, unsigned); |
346 | int bch2_trigger_reservation(struct btree_trans *, enum btree_id, unsigned, |
347 | struct bkey_s_c, struct bkey_s, unsigned); |
348 | |
349 | #define trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags)\ |
350 | ({ \ |
351 | int ret = 0; \ |
352 | \ |
353 | if (_old.k->type) \ |
354 | ret = _fn(_trans, _btree_id, _level, _old, _flags & ~BTREE_TRIGGER_INSERT); \ |
355 | if (!ret && _new.k->type) \ |
356 | ret = _fn(_trans, _btree_id, _level, _new.s_c, _flags & ~BTREE_TRIGGER_OVERWRITE);\ |
357 | ret; \ |
358 | }) |
359 | |
360 | void bch2_trans_account_disk_usage_change(struct btree_trans *); |
361 | |
362 | void bch2_trans_fs_usage_revert(struct btree_trans *, struct replicas_delta_list *); |
363 | int bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *); |
364 | |
365 | int bch2_trans_mark_metadata_bucket(struct btree_trans *, struct bch_dev *, |
366 | size_t, enum bch_data_type, unsigned); |
367 | int bch2_trans_mark_dev_sb(struct bch_fs *, struct bch_dev *); |
368 | int bch2_trans_mark_dev_sbs(struct bch_fs *); |
369 | |
370 | static inline bool is_superblock_bucket(struct bch_dev *ca, u64 b) |
371 | { |
372 | struct bch_sb_layout *layout = &ca->disk_sb.sb->layout; |
373 | u64 b_offset = bucket_to_sector(ca, b); |
374 | u64 b_end = bucket_to_sector(ca, b: b + 1); |
375 | unsigned i; |
376 | |
377 | if (!b) |
378 | return true; |
379 | |
380 | for (i = 0; i < layout->nr_superblocks; i++) { |
381 | u64 offset = le64_to_cpu(layout->sb_offset[i]); |
382 | u64 end = offset + (1 << layout->sb_max_size_bits); |
383 | |
384 | if (!(offset >= b_end || end <= b_offset)) |
385 | return true; |
386 | } |
387 | |
388 | return false; |
389 | } |
390 | |
391 | static inline const char *bch2_data_type_str(enum bch_data_type type) |
392 | { |
393 | return type < BCH_DATA_NR |
394 | ? __bch2_data_types[type] |
395 | : "(invalid data type)" ; |
396 | } |
397 | |
398 | /* disk reservations: */ |
399 | |
400 | static inline void bch2_disk_reservation_put(struct bch_fs *c, |
401 | struct disk_reservation *res) |
402 | { |
403 | if (res->sectors) { |
404 | this_cpu_sub(*c->online_reserved, res->sectors); |
405 | res->sectors = 0; |
406 | } |
407 | } |
408 | |
409 | #define BCH_DISK_RESERVATION_NOFAIL (1 << 0) |
410 | |
411 | int __bch2_disk_reservation_add(struct bch_fs *, |
412 | struct disk_reservation *, |
413 | u64, int); |
414 | |
415 | static inline int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res, |
416 | u64 sectors, int flags) |
417 | { |
418 | #ifdef __KERNEL__ |
419 | u64 old, new; |
420 | |
421 | do { |
422 | old = this_cpu_read(c->pcpu->sectors_available); |
423 | if (sectors > old) |
424 | return __bch2_disk_reservation_add(c, res, sectors, flags); |
425 | |
426 | new = old - sectors; |
427 | } while (this_cpu_cmpxchg(c->pcpu->sectors_available, old, new) != old); |
428 | |
429 | this_cpu_add(*c->online_reserved, sectors); |
430 | res->sectors += sectors; |
431 | return 0; |
432 | #else |
433 | return __bch2_disk_reservation_add(c, res, sectors, flags); |
434 | #endif |
435 | } |
436 | |
437 | static inline struct disk_reservation |
438 | bch2_disk_reservation_init(struct bch_fs *c, unsigned nr_replicas) |
439 | { |
440 | return (struct disk_reservation) { |
441 | .sectors = 0, |
442 | #if 0 |
443 | /* not used yet: */ |
444 | .gen = c->capacity_gen, |
445 | #endif |
446 | .nr_replicas = nr_replicas, |
447 | }; |
448 | } |
449 | |
450 | static inline int bch2_disk_reservation_get(struct bch_fs *c, |
451 | struct disk_reservation *res, |
452 | u64 sectors, unsigned nr_replicas, |
453 | int flags) |
454 | { |
455 | *res = bch2_disk_reservation_init(c, nr_replicas); |
456 | |
457 | return bch2_disk_reservation_add(c, res, sectors: sectors * nr_replicas, flags); |
458 | } |
459 | |
460 | #define RESERVE_FACTOR 6 |
461 | |
462 | static inline u64 avail_factor(u64 r) |
463 | { |
464 | return div_u64(dividend: r << RESERVE_FACTOR, divisor: (1 << RESERVE_FACTOR) + 1); |
465 | } |
466 | |
467 | int bch2_dev_buckets_resize(struct bch_fs *, struct bch_dev *, u64); |
468 | void bch2_dev_buckets_free(struct bch_dev *); |
469 | int bch2_dev_buckets_alloc(struct bch_fs *, struct bch_dev *); |
470 | |
471 | #endif /* _BUCKETS_H */ |
472 | |