bcachefs.h source code [linux/fs/bcachefs/bcachefs.h]

1	/ SPDX-License-Identifier: GPL-2.0 /
2	#ifndef _BCACHEFS_H
3	#define _BCACHEFS_H
4
5	/*
6	* SOME HIGH LEVEL CODE DOCUMENTATION:
7	*
8	* Bcache mostly works with cache sets, cache devices, and backing devices.
9	*
10	* Support for multiple cache devices hasn't quite been finished off yet, but
11	* it's about 95% plumbed through. A cache set and its cache devices is sort of
12	* like a md raid array and its component devices. Most of the code doesn't care
13	* about individual cache devices, the main abstraction is the cache set.
14	*
15	* Multiple cache devices is intended to give us the ability to mirror dirty
16	* cached data and metadata, without mirroring clean cached data.
17	*
18	* Backing devices are different, in that they have a lifetime independent of a
19	* cache set. When you register a newly formatted backing device it'll come up
20	* in passthrough mode, and then you can attach and detach a backing device from
21	* a cache set at runtime - while it's mounted and in use. Detaching implicitly
22	* invalidates any cached data for that backing device.
23	*
24	* A cache set can have multiple (many) backing devices attached to it.
25	*
26	* There's also flash only volumes - this is the reason for the distinction
27	* between struct cached_dev and struct bcache_device. A flash only volume
28	* works much like a bcache device that has a backing device, except the
29	* "cached" data is always dirty. The end result is that we get thin
30	* provisioning with very little additional code.
31	*
32	* Flash only volumes work but they're not production ready because the moving
33	* garbage collector needs more work. More on that later.
34	*
35	* BUCKETS/ALLOCATION:
36	*
37	* Bcache is primarily designed for caching, which means that in normal
38	* operation all of our available space will be allocated. Thus, we need an
39	* efficient way of deleting things from the cache so we can write new things to
40	* it.
41	*
42	* To do this, we first divide the cache device up into buckets. A bucket is the
43	* unit of allocation; they're typically around 1 mb - anywhere from 128k to 2M+
44	* works efficiently.
45	*
46	* Each bucket has a 16 bit priority, and an 8 bit generation associated with
47	* it. The gens and priorities for all the buckets are stored contiguously and
48	* packed on disk (in a linked list of buckets - aside from the superblock, all
49	* of bcache's metadata is stored in buckets).
50	*
51	* The priority is used to implement an LRU. We reset a bucket's priority when
52	* we allocate it or on cache it, and every so often we decrement the priority
53	* of each bucket. It could be used to implement something more sophisticated,
54	* if anyone ever gets around to it.
55	*
56	* The generation is used for invalidating buckets. Each pointer also has an 8
57	* bit generation embedded in it; for a pointer to be considered valid, its gen
58	* must match the gen of the bucket it points into. Thus, to reuse a bucket all
59	* we have to do is increment its gen (and write its new gen to disk; we batch
60	* this up).
61	*
62	* Bcache is entirely COW - we never write twice to a bucket, even buckets that
63	* contain metadata (including btree nodes).
64	*
65	* THE BTREE:
66	*
67	* Bcache is in large part design around the btree.
68	*
69	* At a high level, the btree is just an index of key -> ptr tuples.
70	*
71	* Keys represent extents, and thus have a size field. Keys also have a variable
72	* number of pointers attached to them (potentially zero, which is handy for
73	* invalidating the cache).
74	*
75	* The key itself is an inode:offset pair. The inode number corresponds to a
76	* backing device or a flash only volume. The offset is the ending offset of the
77	* extent within the inode - not the starting offset; this makes lookups
78	* slightly more convenient.
79	*
80	* Pointers contain the cache device id, the offset on that device, and an 8 bit
81	* generation number. More on the gen later.
82	*
83	* Index lookups are not fully abstracted - cache lookups in particular are
84	* still somewhat mixed in with the btree code, but things are headed in that
85	* direction.
86	*
87	* Updates are fairly well abstracted, though. There are two different ways of
88	* updating the btree; insert and replace.
89	*
90	* BTREE_INSERT will just take a list of keys and insert them into the btree -
91	* overwriting (possibly only partially) any extents they overlap with. This is
92	* used to update the index after a write.
93	*
94	* BTREE_REPLACE is really cmpxchg(); it inserts a key into the btree iff it is
95	* overwriting a key that matches another given key. This is used for inserting
96	* data into the cache after a cache miss, and for background writeback, and for
97	* the moving garbage collector.
98	*
99	* There is no "delete" operation; deleting things from the index is
100	* accomplished by either by invalidating pointers (by incrementing a bucket's
101	* gen) or by inserting a key with 0 pointers - which will overwrite anything
102	* previously present at that location in the index.
103	*
104	* This means that there are always stale/invalid keys in the btree. They're
105	* filtered out by the code that iterates through a btree node, and removed when
106	* a btree node is rewritten.
107	*
108	* BTREE NODES:
109	*
110	* Our unit of allocation is a bucket, and we can't arbitrarily allocate and
111	* free smaller than a bucket - so, that's how big our btree nodes are.
112	*
113	* (If buckets are really big we'll only use part of the bucket for a btree node
114	* - no less than 1/4th - but a bucket still contains no more than a single
115	* btree node. I'd actually like to change this, but for now we rely on the
116	* bucket's gen for deleting btree nodes when we rewrite/split a node.)
117	*
118	* Anyways, btree nodes are big - big enough to be inefficient with a textbook
119	* btree implementation.
120	*
121	* The way this is solved is that btree nodes are internally log structured; we
122	* can append new keys to an existing btree node without rewriting it. This
123	* means each set of keys we write is sorted, but the node is not.
124	*
125	* We maintain this log structure in memory - keeping 1Mb of keys sorted would
126	* be expensive, and we have to distinguish between the keys we have written and
127	* the keys we haven't. So to do a lookup in a btree node, we have to search
128	* each sorted set. But we do merge written sets together lazily, so the cost of
129	* these extra searches is quite low (normally most of the keys in a btree node
130	* will be in one big set, and then there'll be one or two sets that are much
131	* smaller).
132	*
133	* This log structure makes bcache's btree more of a hybrid between a
134	* conventional btree and a compacting data structure, with some of the
135	* advantages of both.
136	*
137	* GARBAGE COLLECTION:
138	*
139	* We can't just invalidate any bucket - it might contain dirty data or
140	* metadata. If it once contained dirty data, other writes might overwrite it
141	* later, leaving no valid pointers into that bucket in the index.
142	*
143	* Thus, the primary purpose of garbage collection is to find buckets to reuse.
144	* It also counts how much valid data it each bucket currently contains, so that
145	* allocation can reuse buckets sooner when they've been mostly overwritten.
146	*
147	* It also does some things that are really internal to the btree
148	* implementation. If a btree node contains pointers that are stale by more than
149	* some threshold, it rewrites the btree node to avoid the bucket's generation
150	* wrapping around. It also merges adjacent btree nodes if they're empty enough.
151	*
152	* THE JOURNAL:
153	*
154	* Bcache's journal is not necessary for consistency; we always strictly
155	* order metadata writes so that the btree and everything else is consistent on
156	* disk in the event of an unclean shutdown, and in fact bcache had writeback
157	* caching (with recovery from unclean shutdown) before journalling was
158	* implemented.
159	*
160	* Rather, the journal is purely a performance optimization; we can't complete a
161	* write until we've updated the index on disk, otherwise the cache would be
162	* inconsistent in the event of an unclean shutdown. This means that without the
163	* journal, on random write workloads we constantly have to update all the leaf
164	* nodes in the btree, and those writes will be mostly empty (appending at most
165	* a few keys each) - highly inefficient in terms of amount of metadata writes,
166	* and it puts more strain on the various btree resorting/compacting code.
167	*
168	* The journal is just a log of keys we've inserted; on startup we just reinsert
169	* all the keys in the open journal entries. That means that when we're updating
170	* a node in the btree, we can wait until a 4k block of keys fills up before
171	* writing them out.
172	*
173	* For simplicity, we only journal updates to leaf nodes; updates to parent
174	* nodes are rare enough (since our leaf nodes are huge) that it wasn't worth
175	* the complexity to deal with journalling them (in particular, journal replay)
176	* - updates to non leaf nodes just happen synchronously (see btree_split()).
177	*/
178
179	#undef pr_fmt
180	#ifdef __KERNEL__
181	#define pr_fmt(fmt) "bcachefs: %s() " fmt "\n", __func__
182	#else
183	#define pr_fmt(fmt) "%s() " fmt "\n", __func__
184	#endif
185
186	#include <linux/backing-dev-defs.h>
187	#include <linux/bug.h>
188	#include <linux/bio.h>
189	#include <linux/closure.h>
190	#include <linux/kobject.h>
191	#include <linux/list.h>
192	#include <linux/math64.h>
193	#include <linux/mutex.h>
194	#include <linux/percpu-refcount.h>
195	#include <linux/percpu-rwsem.h>
196	#include <linux/refcount.h>
197	#include <linux/rhashtable.h>
198	#include <linux/rwsem.h>
199	#include <linux/semaphore.h>
200	#include <linux/seqlock.h>
201	#include <linux/shrinker.h>
202	#include <linux/srcu.h>
203	#include <linux/types.h>
204	#include <linux/workqueue.h>
205	#include <linux/zstd.h>
206
207	#include "bcachefs_format.h"
208	#include "errcode.h"
209	#include "fifo.h"
210	#include "nocow_locking_types.h"
211	#include "opts.h"
212	#include "recovery_passes_types.h"
213	#include "sb-errors_types.h"
214	#include "seqmutex.h"
215	#include "time_stats.h"
216	#include "util.h"
217
218	#ifdef CONFIG_BCACHEFS_DEBUG
219	#define BCH_WRITE_REF_DEBUG
220	#endif
221
222	#ifndef dynamic_fault
223	#define dynamic_fault(...) 0
224	#endif
225
226	#define race_fault(...) dynamic_fault("bcachefs:race")
227
228	#define count_event(_c, _name) this_cpu_inc((_c)->counters[BCH_COUNTER_##_name])
229
230	#define trace_and_count(_c, _name, ...) \
231	do { \
232	count_event(_c, _name); \
233	trace_##_name(__VA_ARGS__); \
234	} while (0)
235
236	#define bch2_fs_init_fault(name) \
237	dynamic_fault("bcachefs:bch_fs_init:" name)
238	#define bch2_meta_read_fault(name) \
239	dynamic_fault("bcachefs:meta:read:" name)
240	#define bch2_meta_write_fault(name) \
241	dynamic_fault("bcachefs:meta:write:" name)
242
243	#ifdef __KERNEL__
244	#define BCACHEFS_LOG_PREFIX
245	#endif
246
247	#ifdef BCACHEFS_LOG_PREFIX
248
249	#define bch2_log_msg(_c, fmt) "bcachefs (%s): " fmt, ((_c)->name)
250	#define bch2_fmt_dev(_ca, fmt) "bcachefs (%s): " fmt "\n", ((_ca)->name)
251	#define bch2_fmt_dev_offset(_ca, _offset, fmt) "bcachefs (%s sector %llu): " fmt "\n", ((_ca)->name), (_offset)
252	#define bch2_fmt_inum(_c, _inum, fmt) "bcachefs (%s inum %llu): " fmt "\n", ((_c)->name), (_inum)
253	#define bch2_fmt_inum_offset(_c, _inum, _offset, fmt) \
254	"bcachefs (%s inum %llu offset %llu): " fmt "\n", ((_c)->name), (_inum), (_offset)
255
256	#else
257
258	#define bch2_log_msg(_c, fmt) fmt
259	#define bch2_fmt_dev(_ca, fmt) "%s: " fmt "\n", ((_ca)->name)
260	#define bch2_fmt_dev_offset(_ca, _offset, fmt) "%s sector %llu: " fmt "\n", ((_ca)->name), (_offset)
261	#define bch2_fmt_inum(_c, _inum, fmt) "inum %llu: " fmt "\n", (_inum)
262	#define bch2_fmt_inum_offset(_c, _inum, _offset, fmt) \
263	"inum %llu offset %llu: " fmt "\n", (_inum), (_offset)
264
265	#endif
266
267	#define bch2_fmt(_c, fmt) bch2_log_msg(_c, fmt "\n")
268
269	__printf(`2`, `3`)
270	void bch2_print_opts(struct bch_opts , const* char *, ...);
271
272	__printf(`2`, `3`)
273	void __bch2_print(struct bch_fs c, const* char *fmt, ...);
274
275	#define maybe_dev_to_fs(_c) _Generic((_c), \
276	struct bch_dev : ((struct bch_dev ) (_c))->fs, \
277	struct bch_fs *: (_c))
278
279	#define bch2_print(_c, ...) __bch2_print(maybe_dev_to_fs(_c), __VA_ARGS__)
280
281	#define bch2_print_ratelimited(_c, ...) \
282	do { \
283	static DEFINE_RATELIMIT_STATE(_rs, \
284	DEFAULT_RATELIMIT_INTERVAL, \
285	DEFAULT_RATELIMIT_BURST); \
286	\
287	if (__ratelimit(&_rs)) \
288	bch2_print(_c, __VA_ARGS__); \
289	} while (0)
290
291	#define bch_info(c, fmt, ...) \
292	bch2_print(c, KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__)
293	#define bch_notice(c, fmt, ...) \
294	bch2_print(c, KERN_NOTICE bch2_fmt(c, fmt), ##__VA_ARGS__)
295	#define bch_warn(c, fmt, ...) \
296	bch2_print(c, KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__)
297	#define bch_warn_ratelimited(c, fmt, ...) \
298	bch2_print_ratelimited(c, KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__)
299
300	#define bch_err(c, fmt, ...) \
301	bch2_print(c, KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__)
302	#define bch_err_dev(ca, fmt, ...) \
303	bch2_print(c, KERN_ERR bch2_fmt_dev(ca, fmt), ##__VA_ARGS__)
304	#define bch_err_dev_offset(ca, _offset, fmt, ...) \
305	bch2_print(c, KERN_ERR bch2_fmt_dev_offset(ca, _offset, fmt), ##__VA_ARGS__)
306	#define bch_err_inum(c, _inum, fmt, ...) \
307	bch2_print(c, KERN_ERR bch2_fmt_inum(c, _inum, fmt), ##__VA_ARGS__)
308	#define bch_err_inum_offset(c, _inum, _offset, fmt, ...) \
309	bch2_print(c, KERN_ERR bch2_fmt_inum_offset(c, _inum, _offset, fmt), ##__VA_ARGS__)
310
311	#define bch_err_ratelimited(c, fmt, ...) \
312	bch2_print_ratelimited(c, KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__)
313	#define bch_err_dev_ratelimited(ca, fmt, ...) \
314	bch2_print_ratelimited(ca, KERN_ERR bch2_fmt_dev(ca, fmt), ##__VA_ARGS__)
315	#define bch_err_dev_offset_ratelimited(ca, _offset, fmt, ...) \
316	bch2_print_ratelimited(ca, KERN_ERR bch2_fmt_dev_offset(ca, _offset, fmt), ##__VA_ARGS__)
317	#define bch_err_inum_ratelimited(c, _inum, fmt, ...) \
318	bch2_print_ratelimited(c, KERN_ERR bch2_fmt_inum(c, _inum, fmt), ##__VA_ARGS__)
319	#define bch_err_inum_offset_ratelimited(c, _inum, _offset, fmt, ...) \
320	bch2_print_ratelimited(c, KERN_ERR bch2_fmt_inum_offset(c, _inum, _offset, fmt), ##__VA_ARGS__)
321
322	static inline bool should_print_err(int err)
323	{
324	return err && !bch2_err_matches(err, BCH_ERR_transaction_restart);
325	}
326
327	#define bch_err_fn(_c, _ret) \
328	do { \
329	if (should_print_err(_ret)) \
330	bch_err(_c, "%s(): error %s", __func__, bch2_err_str(_ret));\
331	} while (0)
332
333	#define bch_err_fn_ratelimited(_c, _ret) \
334	do { \
335	if (should_print_err(_ret)) \
336	bch_err_ratelimited(_c, "%s(): error %s", __func__, bch2_err_str(_ret));\
337	} while (0)
338
339	#define bch_err_msg(_c, _ret, _msg, ...) \
340	do { \
341	if (should_print_err(_ret)) \
342	bch_err(_c, "%s(): error " _msg " %s", __func__, \
343	##__VA_ARGS__, bch2_err_str(_ret)); \
344	} while (0)
345
346	#define bch_verbose(c, fmt, ...) \
347	do { \
348	if ((c)->opts.verbose) \
349	bch_info(c, fmt, ##__VA_ARGS__); \
350	} while (0)
351
352	#define pr_verbose_init(opts, fmt, ...) \
353	do { \
354	if (opt_get(opts, verbose)) \
355	pr_info(fmt, ##__VA_ARGS__); \
356	} while (0)
357
358	/ Parameters that are useful for debugging, but should always be compiled in: /
359	#define BCH_DEBUG_PARAMS_ALWAYS() \
360	BCH_DEBUG_PARAM(key_merging_disabled, \
361	"Disables merging of extents") \
362	BCH_DEBUG_PARAM(btree_gc_always_rewrite, \
363	"Causes mark and sweep to compact and rewrite every " \
364	"btree node it traverses") \
365	BCH_DEBUG_PARAM(btree_gc_rewrite_disabled, \
366	"Disables rewriting of btree nodes during mark and sweep")\
367	BCH_DEBUG_PARAM(btree_shrinker_disabled, \
368	"Disables the shrinker callback for the btree node cache")\
369	BCH_DEBUG_PARAM(verify_btree_ondisk, \
370	"Reread btree nodes at various points to verify the " \
371	"mergesort in the read path against modifications " \
372	"done in memory") \
373	BCH_DEBUG_PARAM(verify_all_btree_replicas, \
374	"When reading btree nodes, read all replicas and " \
375	"compare them") \
376	BCH_DEBUG_PARAM(backpointers_no_use_write_buffer, \
377	"Don't use the write buffer for backpointers, enabling "\
378	"extra runtime checks")
379
380	/ Parameters that should only be compiled in debug mode: /
381	#define BCH_DEBUG_PARAMS_DEBUG() \
382	BCH_DEBUG_PARAM(expensive_debug_checks, \
383	"Enables various runtime debugging checks that " \
384	"significantly affect performance") \
385	BCH_DEBUG_PARAM(debug_check_iterators, \
386	"Enables extra verification for btree iterators") \
387	BCH_DEBUG_PARAM(debug_check_btree_accounting, \
388	"Verify btree accounting for keys within a node") \
389	BCH_DEBUG_PARAM(journal_seq_verify, \
390	"Store the journal sequence number in the version " \
391	"number of every btree key, and verify that btree " \
392	"update ordering is preserved during recovery") \
393	BCH_DEBUG_PARAM(inject_invalid_keys, \
394	"Store the journal sequence number in the version " \
395	"number of every btree key, and verify that btree " \
396	"update ordering is preserved during recovery") \
397	BCH_DEBUG_PARAM(test_alloc_startup, \
398	"Force allocator startup to use the slowpath where it" \
399	"can't find enough free buckets without invalidating" \
400	"cached data") \
401	BCH_DEBUG_PARAM(force_reconstruct_read, \
402	"Force reads to use the reconstruct path, when reading" \
403	"from erasure coded extents") \
404	BCH_DEBUG_PARAM(test_restart_gc, \
405	"Test restarting mark and sweep gc when bucket gens change")
406
407	#define BCH_DEBUG_PARAMS_ALL() BCH_DEBUG_PARAMS_ALWAYS() BCH_DEBUG_PARAMS_DEBUG()
408
409	#ifdef CONFIG_BCACHEFS_DEBUG
410	#define BCH_DEBUG_PARAMS() BCH_DEBUG_PARAMS_ALL()
411	#else
412	#define BCH_DEBUG_PARAMS() BCH_DEBUG_PARAMS_ALWAYS()
413	#endif
414
415	#define BCH_DEBUG_PARAM(name, description) extern bool bch2_##name;
416	BCH_DEBUG_PARAMS()
417	#undef BCH_DEBUG_PARAM
418
419	#ifndef CONFIG_BCACHEFS_DEBUG
420	#define BCH_DEBUG_PARAM(name, description) static const __maybe_unused bool bch2_##name;
421	BCH_DEBUG_PARAMS_DEBUG()
422	#undef BCH_DEBUG_PARAM
423	#endif
424
425	#define BCH_TIME_STATS() \
426	x(btree_node_mem_alloc) \
427	x(btree_node_split) \
428	x(btree_node_compact) \
429	x(btree_node_merge) \
430	x(btree_node_sort) \
431	x(btree_node_read) \
432	x(btree_node_read_done) \
433	x(btree_interior_update_foreground) \
434	x(btree_interior_update_total) \
435	x(btree_gc) \
436	x(data_write) \
437	x(data_read) \
438	x(data_promote) \
439	x(journal_flush_write) \
440	x(journal_noflush_write) \
441	x(journal_flush_seq) \
442	x(blocked_journal_low_on_space) \
443	x(blocked_journal_low_on_pin) \
444	x(blocked_journal_max_in_flight) \
445	x(blocked_allocate) \
446	x(blocked_allocate_open_bucket) \
447	x(blocked_write_buffer_full) \
448	x(nocow_lock_contended)
449
450	enum bch_time_stats {
451	#define x(name) BCH_TIME_##name,
452	BCH_TIME_STATS()
453	#undef x
454	BCH_TIME_STAT_NR
455	};
456
457	#include "alloc_types.h"
458	#include "btree_types.h"
459	#include "btree_node_scan_types.h"
460	#include "btree_write_buffer_types.h"
461	#include "buckets_types.h"
462	#include "buckets_waiting_for_journal_types.h"
463	#include "clock_types.h"
464	#include "disk_groups_types.h"
465	#include "ec_types.h"
466	#include "journal_types.h"
467	#include "keylist_types.h"
468	#include "quota_types.h"
469	#include "rebalance_types.h"
470	#include "replicas_types.h"
471	#include "subvolume_types.h"
472	#include "super_types.h"
473	#include "thread_with_file_types.h"
474
475	/ Number of nodes btree coalesce will try to coalesce at once /
476	#define GC_MERGE_NODES 4U
477
478	/ Maximum number of nodes we might need to allocate atomically: /
479	#define BTREE_RESERVE_MAX (BTREE_MAX_DEPTH + (BTREE_MAX_DEPTH - 1))
480
481	/ Size of the freelist we allocate btree nodes from: /
482	#define BTREE_NODE_RESERVE (BTREE_RESERVE_MAX * 4)
483
484	#define BTREE_NODE_OPEN_BUCKET_RESERVE (BTREE_RESERVE_MAX * BCH_REPLICAS_MAX)
485
486	struct btree;
487
488	enum gc_phase {
489	GC_PHASE_NOT_RUNNING,
490	GC_PHASE_START,
491	GC_PHASE_SB,
492
493	GC_PHASE_BTREE_stripes,
494	GC_PHASE_BTREE_extents,
495	GC_PHASE_BTREE_inodes,
496	GC_PHASE_BTREE_dirents,
497	GC_PHASE_BTREE_xattrs,
498	GC_PHASE_BTREE_alloc,
499	GC_PHASE_BTREE_quotas,
500	GC_PHASE_BTREE_reflink,
501	GC_PHASE_BTREE_subvolumes,
502	GC_PHASE_BTREE_snapshots,
503	GC_PHASE_BTREE_lru,
504	GC_PHASE_BTREE_freespace,
505	GC_PHASE_BTREE_need_discard,
506	GC_PHASE_BTREE_backpointers,
507	GC_PHASE_BTREE_bucket_gens,
508	GC_PHASE_BTREE_snapshot_trees,
509	GC_PHASE_BTREE_deleted_inodes,
510	GC_PHASE_BTREE_logged_ops,
511	GC_PHASE_BTREE_rebalance_work,
512	GC_PHASE_BTREE_subvolume_children,
513
514	GC_PHASE_PENDING_DELETE,
515	};
516
517	struct gc_pos {
518	enum gc_phase phase;
519	struct bpos pos;
520	unsigned level;
521	};
522
523	struct reflink_gc {
524	u64 offset;
525	u32 size;
526	u32 refcount;
527	};
528
529	typedef GENRADIX(struct reflink_gc) reflink_gc_table;
530
531	struct io_count {
532	u64 sectors[`2`][BCH_DATA_NR];
533	};
534
535	struct bch_dev {
536	struct kobject kobj;
537	struct percpu_ref ref;
538	struct completion ref_completion;
539	struct percpu_ref io_ref;
540	struct completion io_ref_completion;
541
542	struct bch_fs *fs;
543
544	u8 dev_idx;
545	/*
546	* Cached version of this device's member info from superblock
547	* Committed by bch2_write_super() -> bch_fs_mi_update()
548	*/
549	struct bch_member_cpu mi;
550	atomic64_t errors[BCH_MEMBER_ERROR_NR];
551
552	__uuid_t uuid;
553	char name[BDEVNAME_SIZE];
554
555	struct bch_sb_handle disk_sb;
556	struct bch_sb *sb_read_scratch;
557	int sb_write_error;
558	dev_t dev;
559	atomic_t flush_seq;
560
561	struct bch_devs_mask self;
562
563	/ biosets used in cloned bios for writing multiple replicas /
564	struct bio_set replica_set;
565
566	/*
567	* Buckets:
568	* Per-bucket arrays are protected by c->mark_lock, bucket_lock and
569	* gc_lock, for device resize - holding any is sufficient for access:
570	* Or rcu_read_lock(), but only for ptr_stale():
571	*/
572	struct bucket_array __rcu *buckets_gc;
573	struct bucket_gens __rcu *bucket_gens;
574	u8 *oldest_gen;
575	unsigned long *buckets_nouse;
576	struct rw_semaphore bucket_lock;
577
578	struct bch_dev_usage *usage_base;
579	struct bch_dev_usage __percpu *usage[JOURNAL_BUF_NR];
580	struct bch_dev_usage __percpu *usage_gc;
581
582	/ Allocator: /
583	u64 new_fs_bucket_idx;
584	u64 alloc_cursor;
585
586	unsigned nr_open_buckets;
587	unsigned nr_btree_reserve;
588
589	size_t inc_gen_needs_gc;
590	size_t inc_gen_really_needs_gc;
591	size_t buckets_waiting_on_journal;
592
593	atomic64_t rebalance_work;
594
595	struct journal_device journal;
596	u64 prev_journal_sector;
597
598	struct work_struct io_error_work;
599
600	/ The rest of this all shows up in sysfs /
601	atomic64_t cur_latency[`2`];
602	struct bch2_time_stats_quantiles io_latency[`2`];
603
604	#define CONGESTED_MAX 1024
605	atomic_t congested;
606	u64 congested_last;
607
608	struct io_count __percpu *io_done;
609	};
610
611	/*
612	* initial_gc_unfixed
613	* error
614	* topology error
615	*/
616
617	#define BCH_FS_FLAGS() \
618	x(new_fs) \
619	x(started) \
620	x(may_go_rw) \
621	x(rw) \
622	x(was_rw) \
623	x(stopping) \
624	x(emergency_ro) \
625	x(going_ro) \
626	x(write_disable_complete) \
627	x(clean_shutdown) \
628	x(fsck_running) \
629	x(initial_gc_unfixed) \
630	x(need_another_gc) \
631	x(need_delete_dead_snapshots) \
632	x(error) \
633	x(topology_error) \
634	x(errors_fixed) \
635	x(errors_not_fixed)
636
637	enum bch_fs_flags {
638	#define x(n) BCH_FS_##n,
639	BCH_FS_FLAGS()
640	#undef x
641	};
642
643	struct btree_debug {
644	unsigned id;
645	};
646
647	#define BCH_TRANSACTIONS_NR 128
648
649	struct btree_transaction_stats {
650	struct bch2_time_stats duration;
651	struct bch2_time_stats lock_hold_times;
652	struct mutex lock;
653	unsigned nr_max_paths;
654	unsigned journal_entries_size;
655	unsigned max_mem;
656	char *max_paths_text;
657	};
658
659	struct bch_fs_pcpu {
660	u64 sectors_available;
661	};
662
663	struct journal_seq_blacklist_table {
664	size_t nr;
665	struct journal_seq_blacklist_table_entry {
666	u64 start;
667	u64 end;
668	bool dirty;
669	} entries[];
670	};
671
672	struct journal_keys {
673	/ must match layout in darray_types.h /
674	size_t nr, size;
675	struct journal_key {
676	u64 journal_seq;
677	u32 journal_offset;
678	enum btree_id btree_id:`8`;
679	unsigned level:`8`;
680	bool allocated;
681	bool overwritten;
682	struct bkey_i *k;
683	} *data;
684	/*
685	* Gap buffer: instead of all the empty space in the array being at the
686	* end of the buffer - from @nr to @size - the empty space is at @gap.
687	* This means that sequential insertions are O(n) instead of O(n^2).
688	*/
689	size_t gap;
690	atomic_t ref;
691	bool initial_ref_held;
692	};
693
694	struct btree_trans_buf {
695	struct btree_trans *trans;
696	};
697
698	#define REPLICAS_DELTA_LIST_MAX (1U << 16)
699
700	#define BCACHEFS_ROOT_SUBVOL_INUM \
701	((subvol_inum) { BCACHEFS_ROOT_SUBVOL, BCACHEFS_ROOT_INO })
702
703	#define BCH_WRITE_REFS() \
704	x(trans) \
705	x(write) \
706	x(promote) \
707	x(node_rewrite) \
708	x(stripe_create) \
709	x(stripe_delete) \
710	x(reflink) \
711	x(fallocate) \
712	x(fsync) \
713	x(dio_write) \
714	x(discard) \
715	x(discard_fast) \
716	x(invalidate) \
717	x(delete_dead_snapshots) \
718	x(snapshot_delete_pagecache) \
719	x(sysfs) \
720	x(btree_write_buffer)
721
722	enum bch_write_ref {
723	#define x(n) BCH_WRITE_REF_##n,
724	BCH_WRITE_REFS()
725	#undef x
726	BCH_WRITE_REF_NR,
727	};
728
729	struct bch_fs {
730	struct closure cl;
731
732	struct list_head list;
733	struct kobject kobj;
734	struct kobject counters_kobj;
735	struct kobject internal;
736	struct kobject opts_dir;
737	struct kobject time_stats;
738	unsigned long flags;
739
740	int minor;
741	struct device *chardev;
742	struct super_block *vfs_sb;
743	dev_t dev;
744	char name[`40`];
745	struct stdio_redirect *stdio;
746	struct task_struct *stdio_filter;
747
748	/ ro/rw, add/remove/resize devices: /
749	struct rw_semaphore state_lock;
750
751	/ Counts outstanding writes, for clean transition to read-only /
752	#ifdef BCH_WRITE_REF_DEBUG
753	atomic_long_t writes[BCH_WRITE_REF_NR];
754	#else
755	struct percpu_ref writes;
756	#endif
757	/*
758	* Analagous to c->writes, for asynchronous ops that don't necessarily
759	* need fs to be read-write
760	*/
761	refcount_t ro_ref;
762	wait_queue_head_t ro_ref_wait;
763
764	struct work_struct read_only_work;
765
766	struct bch_dev __rcu *devs[BCH_SB_MEMBERS_MAX];
767
768	struct bch_replicas_cpu replicas;
769	struct bch_replicas_cpu replicas_gc;
770	struct mutex replicas_gc_lock;
771	mempool_t replicas_delta_pool;
772
773	struct journal_entry_res btree_root_journal_res;
774	struct journal_entry_res replicas_journal_res;
775	struct journal_entry_res clock_journal_res;
776	struct journal_entry_res dev_usage_journal_res;
777
778	struct bch_disk_groups_cpu __rcu *disk_groups;
779
780	struct bch_opts opts;
781
782	/ Updated by bch2_sb_update():/
783	struct {
784	__uuid_t uuid;
785	__uuid_t user_uuid;
786
787	u16 version;
788	u16 version_min;
789	u16 version_upgrade_complete;
790
791	u8 nr_devices;
792	u8 clean;
793
794	u8 encryption_type;
795
796	u64 time_base_lo;
797	u32 time_base_hi;
798	unsigned time_units_per_sec;
799	unsigned nsec_per_time_unit;
800	u64 features;
801	u64 compat;
802	unsigned long errors_silent[BITS_TO_LONGS(BCH_SB_ERR_MAX)];
803	u64 btrees_lost_data;
804	} sb;
805
806
807	struct bch_sb_handle disk_sb;
808
809	unsigned short block_bits; / ilog2(block_size) /
810
811	u16 btree_foreground_merge_threshold;
812
813	struct closure sb_write;
814	struct mutex sb_lock;
815
816	/ snapshot.c: /
817	struct snapshot_table __rcu *snapshots;
818	struct mutex snapshot_table_lock;
819	struct rw_semaphore snapshot_create_lock;
820
821	struct work_struct snapshot_delete_work;
822	struct work_struct snapshot_wait_for_pagecache_and_delete_work;
823	snapshot_id_list snapshots_unlinked;
824	struct mutex snapshots_unlinked_lock;
825
826	/ BTREE CACHE /
827	struct bio_set btree_bio;
828	struct workqueue_struct *io_complete_wq;
829
830	struct btree_root btree_roots_known[BTREE_ID_NR];
831	DARRAY(struct btree_root) btree_roots_extra;
832	struct mutex btree_root_lock;
833
834	struct btree_cache btree_cache;
835
836	/*
837	* Cache of allocated btree nodes - if we allocate a btree node and
838	* don't use it, if we free it that space can't be reused until going
839	* _all_ the way through the allocator (which exposes us to a livelock
840	* when allocating btree reserves fail halfway through) - instead, we
841	* can stick them here:
842	*/
843	struct btree_alloc btree_reserve_cache[BTREE_NODE_RESERVE * `2`];
844	unsigned btree_reserve_cache_nr;
845	struct mutex btree_reserve_cache_lock;
846
847	mempool_t btree_interior_update_pool;
848	struct list_head btree_interior_update_list;
849	struct list_head btree_interior_updates_unwritten;
850	struct mutex btree_interior_update_lock;
851	struct closure_waitlist btree_interior_update_wait;
852
853	struct workqueue_struct *btree_interior_update_worker;
854	struct work_struct btree_interior_update_work;
855
856	struct workqueue_struct *btree_node_rewrite_worker;
857
858	struct list_head pending_node_rewrites;
859	struct mutex pending_node_rewrites_lock;
860
861	/ btree_io.c: /
862	spinlock_t btree_write_error_lock;
863	struct btree_write_stats {
864	atomic64_t nr;
865	atomic64_t bytes;
866	} btree_write_stats[BTREE_WRITE_TYPE_NR];
867
868	/ btree_iter.c: /
869	struct seqmutex btree_trans_lock;
870	struct list_head btree_trans_list;
871	mempool_t btree_trans_pool;
872	mempool_t btree_trans_mem_pool;
873	struct btree_trans_buf __percpu *btree_trans_bufs;
874
875	struct srcu_struct btree_trans_barrier;
876	bool btree_trans_barrier_initialized;
877
878	struct btree_key_cache btree_key_cache;
879	unsigned btree_key_cache_btrees;
880
881	struct btree_write_buffer btree_write_buffer;
882
883	struct workqueue_struct *btree_update_wq;
884	struct workqueue_struct *btree_io_complete_wq;
885	/ copygc needs its own workqueue for index updates.. /
886	struct workqueue_struct *copygc_wq;
887	/*
888	* Use a dedicated wq for write ref holder tasks. Required to avoid
889	* dependency problems with other wq tasks that can block on ref
890	* draining, such as read-only transition.
891	*/
892	struct workqueue_struct *write_ref_wq;
893
894	/ ALLOCATION /
895	struct bch_devs_mask rw_devs[BCH_DATA_NR];
896
897	u64 capacity; / sectors /
898
899	/*
900	* When capacity _decreases_ (due to a disk being removed), we
901	* increment capacity_gen - this invalidates outstanding reservations
902	* and forces them to be revalidated
903	*/
904	u32 capacity_gen;
905	unsigned bucket_size_max;
906
907	atomic64_t sectors_available;
908	struct mutex sectors_available_lock;
909
910	struct bch_fs_pcpu __percpu *pcpu;
911
912	struct percpu_rw_semaphore mark_lock;
913
914	seqcount_t usage_lock;
915	struct bch_fs_usage *usage_base;
916	struct bch_fs_usage __percpu *usage[JOURNAL_BUF_NR];
917	struct bch_fs_usage __percpu *usage_gc;
918	u64 __percpu *online_reserved;
919
920	/ single element mempool: /
921	struct mutex usage_scratch_lock;
922	struct bch_fs_usage_online *usage_scratch;
923
924	struct io_clock io_clock[`2`];
925
926	/ JOURNAL SEQ BLACKLIST /
927	struct journal_seq_blacklist_table *
928	journal_seq_blacklist_table;
929	struct work_struct journal_seq_blacklist_gc_work;
930
931	/ ALLOCATOR /
932	spinlock_t freelist_lock;
933	struct closure_waitlist freelist_wait;
934
935	open_bucket_idx_t open_buckets_freelist;
936	open_bucket_idx_t open_buckets_nr_free;
937	struct closure_waitlist open_buckets_wait;
938	struct open_bucket open_buckets[OPEN_BUCKETS_COUNT];
939	open_bucket_idx_t open_buckets_hash[OPEN_BUCKETS_COUNT];
940
941	open_bucket_idx_t open_buckets_partial[OPEN_BUCKETS_COUNT];
942	open_bucket_idx_t open_buckets_partial_nr;
943
944	struct write_point btree_write_point;
945	struct write_point rebalance_write_point;
946
947	struct write_point write_points[WRITE_POINT_MAX];
948	struct hlist_head write_points_hash[WRITE_POINT_HASH_NR];
949	struct mutex write_points_hash_lock;
950	unsigned write_points_nr;
951
952	struct buckets_waiting_for_journal buckets_waiting_for_journal;
953	struct work_struct invalidate_work;
954	struct work_struct discard_work;
955	struct mutex discard_buckets_in_flight_lock;
956	DARRAY(struct bpos) discard_buckets_in_flight;
957	struct work_struct discard_fast_work;
958
959	/ GARBAGE COLLECTION /
960	struct task_struct *gc_thread;
961	atomic_t kick_gc;
962	unsigned long gc_count;
963
964	enum btree_id gc_gens_btree;
965	struct bpos gc_gens_pos;
966
967	/*
968	* Tracks GC's progress - everything in the range [ZERO_KEY..gc_cur_pos]
969	* has been marked by GC.
970	*
971	* gc_cur_phase is a superset of btree_ids (BTREE_ID_extents etc.)
972	*
973	* Protected by gc_pos_lock. Only written to by GC thread, so GC thread
974	* can read without a lock.
975	*/
976	seqcount_t gc_pos_lock;
977	struct gc_pos gc_pos;
978
979	/*
980	* The allocation code needs gc_mark in struct bucket to be correct, but
981	* it's not while a gc is in progress.
982	*/
983	struct rw_semaphore gc_lock;
984	struct mutex gc_gens_lock;
985
986	/ IO PATH /
987	struct semaphore io_in_flight;
988	struct bio_set bio_read;
989	struct bio_set bio_read_split;
990	struct bio_set bio_write;
991	struct mutex bio_bounce_pages_lock;
992	mempool_t bio_bounce_pages;
993	struct bucket_nocow_lock_table
994	nocow_locks;
995	struct rhashtable promote_table;
996
997	mempool_t compression_bounce[`2`];
998	mempool_t compress_workspace[BCH_COMPRESSION_TYPE_NR];
999	mempool_t decompress_workspace;
1000	size_t zstd_workspace_size;
1001
1002	struct crypto_shash *sha256;
1003	struct crypto_sync_skcipher *chacha20;
1004	struct crypto_shash *poly1305;
1005
1006	atomic64_t key_version;
1007
1008	mempool_t large_bkey_pool;
1009
1010	/ MOVE.C /
1011	struct list_head moving_context_list;
1012	struct mutex moving_context_lock;
1013
1014	/ REBALANCE /
1015	struct bch_fs_rebalance rebalance;
1016
1017	/ COPYGC /
1018	struct task_struct *copygc_thread;
1019	struct write_point copygc_write_point;
1020	s64 copygc_wait_at;
1021	s64 copygc_wait;
1022	bool copygc_running;
1023	wait_queue_head_t copygc_running_wq;
1024
1025	/ STRIPES: /
1026	GENRADIX(struct stripe) stripes;
1027	GENRADIX(struct gc_stripe) gc_stripes;
1028
1029	struct hlist_head ec_stripes_new[`32`];
1030	spinlock_t ec_stripes_new_lock;
1031
1032	ec_stripes_heap ec_stripes_heap;
1033	struct mutex ec_stripes_heap_lock;
1034
1035	/ ERASURE CODING /
1036	struct list_head ec_stripe_head_list;
1037	struct mutex ec_stripe_head_lock;
1038
1039	struct list_head ec_stripe_new_list;
1040	struct mutex ec_stripe_new_lock;
1041	wait_queue_head_t ec_stripe_new_wait;
1042
1043	struct work_struct ec_stripe_create_work;
1044	u64 ec_stripe_hint;
1045
1046	struct work_struct ec_stripe_delete_work;
1047
1048	struct bio_set ec_bioset;
1049
1050	/ REFLINK /
1051	reflink_gc_table reflink_gc_table;
1052	size_t reflink_gc_nr;
1053
1054	/ fs.c /
1055	struct list_head vfs_inodes_list;
1056	struct mutex vfs_inodes_lock;
1057
1058	/ VFS IO PATH - fs-io.c /
1059	struct bio_set writepage_bioset;
1060	struct bio_set dio_write_bioset;
1061	struct bio_set dio_read_bioset;
1062	struct bio_set nocow_flush_bioset;
1063
1064	/ QUOTAS /
1065	struct bch_memquota_type quotas[QTYP_NR];
1066
1067	/ RECOVERY /
1068	u64 journal_replay_seq_start;
1069	u64 journal_replay_seq_end;
1070	/*
1071	* Two different uses:
1072	* "Has this fsck pass?" - i.e. should this type of error be an
1073	* emergency read-only
1074	* And, in certain situations fsck will rewind to an earlier pass: used
1075	* for signaling to the toplevel code which pass we want to run now.
1076	*/
1077	enum bch_recovery_pass curr_recovery_pass;
1078	/ bitmap of explicitly enabled recovery passes: /
1079	u64 recovery_passes_explicit;
1080	/ bitmask of recovery passes that we actually ran /
1081	u64 recovery_passes_complete;
1082	/ never rewinds version of curr_recovery_pass /
1083	enum bch_recovery_pass recovery_pass_done;
1084	struct semaphore online_fsck_mutex;
1085
1086	/ DEBUG JUNK /
1087	struct dentry *fs_debug_dir;
1088	struct dentry *btree_debug_dir;
1089	struct btree_debug btree_debug[BTREE_ID_NR];
1090	struct btree *verify_data;
1091	struct btree_node *verify_ondisk;
1092	struct mutex verify_lock;
1093
1094	u64 *unused_inode_hints;
1095	unsigned inode_shard_bits;
1096
1097	/*
1098	* A btree node on disk could have too many bsets for an iterator to fit
1099	* on the stack - have to dynamically allocate them
1100	*/
1101	mempool_t fill_iter;
1102
1103	mempool_t btree_bounce_pool;
1104
1105	struct journal journal;
1106	GENRADIX(struct journal_replay *) journal_entries;
1107	u64 journal_entries_base_seq;
1108	struct journal_keys journal_keys;
1109	struct list_head journal_iters;
1110
1111	struct find_btree_nodes found_btree_nodes;
1112
1113	u64 last_bucket_seq_cleanup;
1114
1115	u64 counters_on_mount[BCH_COUNTER_NR];
1116	u64 __percpu *counters;
1117
1118	unsigned btree_gc_periodic:`1`;
1119	unsigned copy_gc_enabled:`1`;
1120	bool promote_whole_extents;
1121
1122	struct bch2_time_stats times[BCH_TIME_STAT_NR];
1123
1124	struct btree_transaction_stats btree_transaction_stats[BCH_TRANSACTIONS_NR];
1125
1126	/ ERRORS /
1127	struct list_head fsck_error_msgs;
1128	struct mutex fsck_error_msgs_lock;
1129	bool fsck_alloc_msgs_err;
1130
1131	bch_sb_errors_cpu fsck_error_counts;
1132	struct mutex fsck_error_counts_lock;
1133	};
1134
1135	extern struct wait_queue_head bch2_read_only_wait;
1136
1137	static inline void bch2_write_ref_get(struct bch_fs c, enum* bch_write_ref ref)
1138	{
1139	#ifdef BCH_WRITE_REF_DEBUG
1140	atomic_long_inc(v: &c->writes[ref]);
1141	#else
1142	percpu_ref_get(&c->writes);
1143	#endif
1144	}
1145
1146	static inline bool __bch2_write_ref_tryget(struct bch_fs c, enum* bch_write_ref ref)
1147	{
1148	#ifdef BCH_WRITE_REF_DEBUG
1149	return !test_bit(BCH_FS_going_ro, &c->flags) &&
1150	atomic_long_inc_not_zero(v: &c->writes[ref]);
1151	#else
1152	return percpu_ref_tryget(&c->writes);
1153	#endif
1154	}
1155
1156	static inline bool bch2_write_ref_tryget(struct bch_fs c, enum* bch_write_ref ref)
1157	{
1158	#ifdef BCH_WRITE_REF_DEBUG
1159	return !test_bit(BCH_FS_going_ro, &c->flags) &&
1160	atomic_long_inc_not_zero(v: &c->writes[ref]);
1161	#else
1162	return percpu_ref_tryget_live(&c->writes);
1163	#endif
1164	}
1165
1166	static inline void bch2_write_ref_put(struct bch_fs c, enum* bch_write_ref ref)
1167	{
1168	#ifdef BCH_WRITE_REF_DEBUG
1169	long v = atomic_long_dec_return(v: &c->writes[ref]);
1170
1171	BUG_ON(v < `0`);
1172	if (v)
1173	return;
1174	for (unsigned i = `0`; i < BCH_WRITE_REF_NR; i++)
1175	if (atomic_long_read(v: &c->writes[i]))
1176	return;
1177
1178	set_bit(nr: BCH_FS_write_disable_complete, addr: &c->flags);
1179	wake_up(&bch2_read_only_wait);
1180	#else
1181	percpu_ref_put(&c->writes);
1182	#endif
1183	}
1184
1185	static inline bool bch2_ro_ref_tryget(struct bch_fs *c)
1186	{
1187	if (test_bit(BCH_FS_stopping, &c->flags))
1188	return false;
1189
1190	return refcount_inc_not_zero(r: &c->ro_ref);
1191	}
1192
1193	static inline void bch2_ro_ref_put(struct bch_fs *c)
1194	{
1195	if (refcount_dec_and_test(r: &c->ro_ref))
1196	wake_up(&c->ro_ref_wait);
1197	}
1198
1199	static inline void bch2_set_ra_pages(struct bch_fs c, unsigned* ra_pages)
1200	{
1201	#ifndef NO_BCACHEFS_FS
1202	if (c->vfs_sb)
1203	c->vfs_sb->s_bdi->ra_pages = ra_pages;
1204	#endif
1205	}
1206
1207	static inline unsigned bucket_bytes(const struct bch_dev *ca)
1208	{
1209	return ca->mi.bucket_size << `9`;
1210	}
1211
1212	static inline unsigned block_bytes(const struct bch_fs *c)
1213	{
1214	return c->opts.block_size;
1215	}
1216
1217	static inline unsigned block_sectors(const struct bch_fs *c)
1218	{
1219	return c->opts.block_size >> `9`;
1220	}
1221
1222	static inline bool btree_id_cached(const struct bch_fs c, enum* btree_id btree)
1223	{
1224	return c->btree_key_cache_btrees & (`1U` << btree);
1225	}
1226
1227	static inline struct timespec64 bch2_time_to_timespec(const struct bch_fs *c, s64 time)
1228	{
1229	struct timespec64 t;
1230	s32 rem;
1231
1232	time += c->sb.time_base_lo;
1233
1234	t.tv_sec = div_s64_rem(dividend: time, divisor: c->sb.time_units_per_sec, remainder: &rem);
1235	t.tv_nsec = rem * c->sb.nsec_per_time_unit;
1236	return t;
1237	}
1238
1239	static inline s64 timespec_to_bch2_time(const struct bch_fs c, struct* timespec64 ts)
1240	{
1241	return (ts.tv_sec * c->sb.time_units_per_sec +
1242	(int) ts.tv_nsec / c->sb.nsec_per_time_unit) - c->sb.time_base_lo;
1243	}
1244
1245	static inline s64 bch2_current_time(const struct bch_fs *c)
1246	{
1247	struct timespec64 now;
1248
1249	ktime_get_coarse_real_ts64(ts: &now);
1250	return timespec_to_bch2_time(c, ts: now);
1251	}
1252
1253	static inline bool bch2_dev_exists2(const struct bch_fs c, unsigned* dev)
1254	{
1255	return dev < c->sb.nr_devices && c->devs[dev];
1256	}
1257
1258	static inline struct stdio_redirect bch2_fs_stdio_redirect(struct* bch_fs *c)
1259	{
1260	struct stdio_redirect *stdio = c->stdio;
1261
1262	if (c->stdio_filter && c->stdio_filter != current)
1263	stdio = NULL;
1264	return stdio;
1265	}
1266
1267	static inline unsigned metadata_replicas_required(struct bch_fs *c)
1268	{
1269	return min(c->opts.metadata_replicas,
1270	c->opts.metadata_replicas_required);
1271	}
1272
1273	static inline unsigned data_replicas_required(struct bch_fs *c)
1274	{
1275	return min(c->opts.data_replicas,
1276	c->opts.data_replicas_required);
1277	}
1278
1279	#define BKEY_PADDED_ONSTACK(key, pad) \
1280	struct { struct bkey_i key; __u64 key ## _pad[pad]; }
1281
1282	#endif /* _BCACHEFS_H */
1283

source code of linux/fs/bcachefs/bcachefs.h