fs.h source code [linux/fs/btrfs/fs.h]

1	/ SPDX-License-Identifier: GPL-2.0 /
2
3	#ifndef BTRFS_FS_H
4	#define BTRFS_FS_H
5
6	#include <linux/blkdev.h>
7	#include <linux/fs.h>
8	#include <linux/btrfs_tree.h>
9	#include <linux/sizes.h>
10	#include "extent-io-tree.h"
11	#include "extent_map.h"
12	#include "async-thread.h"
13	#include "block-rsv.h"
14
15	#define BTRFS_MAX_EXTENT_SIZE SZ_128M
16
17	#define BTRFS_OLDEST_GENERATION 0ULL
18
19	#define BTRFS_EMPTY_DIR_SIZE 0
20
21	#define BTRFS_DIRTY_METADATA_THRESH SZ_32M
22
23	#define BTRFS_SUPER_INFO_OFFSET SZ_64K
24	#define BTRFS_SUPER_INFO_SIZE 4096
25	static_assert(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE);
26
27	/*
28	* Number of metadata items necessary for an unlink operation:
29	*
30	* 1 for the possible orphan item
31	* 1 for the dir item
32	* 1 for the dir index
33	* 1 for the inode ref
34	* 1 for the inode
35	* 1 for the parent inode
36	*/
37	#define BTRFS_UNLINK_METADATA_UNITS 6
38
39	/*
40	* The reserved space at the beginning of each device. It covers the primary
41	* super block and leaves space for potential use by other tools like
42	* bootloaders or to lower potential damage of accidental overwrite.
43	*/
44	#define BTRFS_DEVICE_RANGE_RESERVED (SZ_1M)
45	/*
46	* Runtime (in-memory) states of filesystem
47	*/
48	enum {
49	/*
50	* Filesystem is being remounted, allow to skip some operations, like
51	* defrag
52	*/
53	BTRFS_FS_STATE_REMOUNTING,
54	/ Filesystem in RO mode /
55	BTRFS_FS_STATE_RO,
56	/ Track if a transaction abort has been reported on this filesystem /
57	BTRFS_FS_STATE_TRANS_ABORTED,
58	/*
59	* Bio operations should be blocked on this filesystem because a source
60	* or target device is being destroyed as part of a device replace
61	*/
62	BTRFS_FS_STATE_DEV_REPLACING,
63	/ The btrfs_fs_info created for self-tests /
64	BTRFS_FS_STATE_DUMMY_FS_INFO,
65
66	BTRFS_FS_STATE_NO_CSUMS,
67
68	/ Indicates there was an error cleaning up a log tree. /
69	BTRFS_FS_STATE_LOG_CLEANUP_ERROR,
70
71	BTRFS_FS_STATE_COUNT
72	};
73
74	enum {
75	BTRFS_FS_CLOSING_START,
76	BTRFS_FS_CLOSING_DONE,
77	BTRFS_FS_LOG_RECOVERING,
78	BTRFS_FS_OPEN,
79	BTRFS_FS_QUOTA_ENABLED,
80	BTRFS_FS_UPDATE_UUID_TREE_GEN,
81	BTRFS_FS_CREATING_FREE_SPACE_TREE,
82	BTRFS_FS_BTREE_ERR,
83	BTRFS_FS_LOG1_ERR,
84	BTRFS_FS_LOG2_ERR,
85	BTRFS_FS_QUOTA_OVERRIDE,
86	/ Used to record internally whether fs has been frozen /
87	BTRFS_FS_FROZEN,
88	/*
89	* Indicate that balance has been set up from the ioctl and is in the
90	* main phase. The fs_info::balance_ctl is initialized.
91	*/
92	BTRFS_FS_BALANCE_RUNNING,
93
94	/*
95	* Indicate that relocation of a chunk has started, it's set per chunk
96	* and is toggled between chunks.
97	*/
98	BTRFS_FS_RELOC_RUNNING,
99
100	/ Indicate that the cleaner thread is awake and doing something. /
101	BTRFS_FS_CLEANER_RUNNING,
102
103	/*
104	* The checksumming has an optimized version and is considered fast,
105	* so we don't need to offload checksums to workqueues.
106	*/
107	BTRFS_FS_CSUM_IMPL_FAST,
108
109	/ Indicate that the discard workqueue can service discards. /
110	BTRFS_FS_DISCARD_RUNNING,
111
112	/ Indicate that we need to cleanup space cache v1 /
113	BTRFS_FS_CLEANUP_SPACE_CACHE_V1,
114
115	/ Indicate that we can't trust the free space tree for caching yet /
116	BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED,
117
118	/ Indicate whether there are any tree modification log users /
119	BTRFS_FS_TREE_MOD_LOG_USERS,
120
121	/ Indicate that we want the transaction kthread to commit right now. /
122	BTRFS_FS_COMMIT_TRANS,
123
124	/ Indicate we have half completed snapshot deletions pending. /
125	BTRFS_FS_UNFINISHED_DROPS,
126
127	/ Indicate we have to finish a zone to do next allocation. /
128	BTRFS_FS_NEED_ZONE_FINISH,
129
130	/ Indicate that we want to commit the transaction. /
131	BTRFS_FS_NEED_TRANS_COMMIT,
132
133	/ This is set when active zone tracking is needed. /
134	BTRFS_FS_ACTIVE_ZONE_TRACKING,
135
136	/*
137	* Indicate if we have some features changed, this is mostly for
138	* cleaner thread to update the sysfs interface.
139	*/
140	BTRFS_FS_FEATURE_CHANGED,
141
142	/*
143	* Indicate that we have found a tree block which is only aligned to
144	* sectorsize, but not to nodesize. This should be rare nowadays.
145	*/
146	BTRFS_FS_UNALIGNED_TREE_BLOCK,
147
148	#if BITS_PER_LONG == 32
149	/ Indicate if we have error/warn message printed on 32bit systems /
150	BTRFS_FS_32BIT_ERROR,
151	BTRFS_FS_32BIT_WARN,
152	#endif
153	};
154
155	/*
156	* Flags for mount options.
157	*
158	* Note: don't forget to add new options to btrfs_show_options()
159	*/
160	enum {
161	BTRFS_MOUNT_NODATASUM = (`1UL` << `0`),
162	BTRFS_MOUNT_NODATACOW = (`1UL` << `1`),
163	BTRFS_MOUNT_NOBARRIER = (`1UL` << `2`),
164	BTRFS_MOUNT_SSD = (`1UL` << `3`),
165	BTRFS_MOUNT_DEGRADED = (`1UL` << `4`),
166	BTRFS_MOUNT_COMPRESS = (`1UL` << `5`),
167	BTRFS_MOUNT_NOTREELOG = (`1UL` << `6`),
168	BTRFS_MOUNT_FLUSHONCOMMIT = (`1UL` << `7`),
169	BTRFS_MOUNT_SSD_SPREAD = (`1UL` << `8`),
170	BTRFS_MOUNT_NOSSD = (`1UL` << `9`),
171	BTRFS_MOUNT_DISCARD_SYNC = (`1UL` << `10`),
172	BTRFS_MOUNT_FORCE_COMPRESS = (`1UL` << `11`),
173	BTRFS_MOUNT_SPACE_CACHE = (`1UL` << `12`),
174	BTRFS_MOUNT_CLEAR_CACHE = (`1UL` << `13`),
175	BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED = (`1UL` << `14`),
176	BTRFS_MOUNT_ENOSPC_DEBUG = (`1UL` << `15`),
177	BTRFS_MOUNT_AUTO_DEFRAG = (`1UL` << `16`),
178	BTRFS_MOUNT_USEBACKUPROOT = (`1UL` << `17`),
179	BTRFS_MOUNT_SKIP_BALANCE = (`1UL` << `18`),
180	BTRFS_MOUNT_PANIC_ON_FATAL_ERROR = (`1UL` << `19`),
181	BTRFS_MOUNT_RESCAN_UUID_TREE = (`1UL` << `20`),
182	BTRFS_MOUNT_FRAGMENT_DATA = (`1UL` << `21`),
183	BTRFS_MOUNT_FRAGMENT_METADATA = (`1UL` << `22`),
184	BTRFS_MOUNT_FREE_SPACE_TREE = (`1UL` << `23`),
185	BTRFS_MOUNT_NOLOGREPLAY = (`1UL` << `24`),
186	BTRFS_MOUNT_REF_VERIFY = (`1UL` << `25`),
187	BTRFS_MOUNT_DISCARD_ASYNC = (`1UL` << `26`),
188	BTRFS_MOUNT_IGNOREBADROOTS = (`1UL` << `27`),
189	BTRFS_MOUNT_IGNOREDATACSUMS = (`1UL` << `28`),
190	BTRFS_MOUNT_NODISCARD = (`1UL` << `29`),
191	};
192
193	/*
194	* Compat flags that we support. If any incompat flags are set other than the
195	* ones specified below then we will fail to mount
196	*/
197	#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
198	#define BTRFS_FEATURE_COMPAT_SAFE_SET 0ULL
199	#define BTRFS_FEATURE_COMPAT_SAFE_CLEAR 0ULL
200
201	#define BTRFS_FEATURE_COMPAT_RO_SUPP \
202	(BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE \| \
203	BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID \| \
204	BTRFS_FEATURE_COMPAT_RO_VERITY \| \
205	BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE)
206
207	#define BTRFS_FEATURE_COMPAT_RO_SAFE_SET 0ULL
208	#define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR 0ULL
209
210	#define BTRFS_FEATURE_INCOMPAT_SUPP_STABLE \
211	(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF \| \
212	BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL \| \
213	BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS \| \
214	BTRFS_FEATURE_INCOMPAT_BIG_METADATA \| \
215	BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO \| \
216	BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD \| \
217	BTRFS_FEATURE_INCOMPAT_RAID56 \| \
218	BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF \| \
219	BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA \| \
220	BTRFS_FEATURE_INCOMPAT_NO_HOLES \| \
221	BTRFS_FEATURE_INCOMPAT_METADATA_UUID \| \
222	BTRFS_FEATURE_INCOMPAT_RAID1C34 \| \
223	BTRFS_FEATURE_INCOMPAT_ZONED \| \
224	BTRFS_FEATURE_INCOMPAT_SIMPLE_QUOTA)
225
226	#ifdef CONFIG_BTRFS_DEBUG
227	/*
228	* Features under developmen like Extent tree v2 support is enabled
229	* only under CONFIG_BTRFS_DEBUG.
230	*/
231	#define BTRFS_FEATURE_INCOMPAT_SUPP \
232	(BTRFS_FEATURE_INCOMPAT_SUPP_STABLE \| \
233	BTRFS_FEATURE_INCOMPAT_RAID_STRIPE_TREE \| \
234	BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2)
235
236	#else
237
238	#define BTRFS_FEATURE_INCOMPAT_SUPP \
239	(BTRFS_FEATURE_INCOMPAT_SUPP_STABLE)
240
241	#endif
242
243	#define BTRFS_FEATURE_INCOMPAT_SAFE_SET \
244	(BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)
245	#define BTRFS_FEATURE_INCOMPAT_SAFE_CLEAR 0ULL
246
247	#define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
248	#define BTRFS_DEFAULT_MAX_INLINE (2048)
249
250	struct btrfs_dev_replace {
251	/ See #define above /
252	u64 replace_state;
253	/ Seconds since 1-Jan-1970 /
254	time64_t time_started;
255	/ Seconds since 1-Jan-1970 /
256	time64_t time_stopped;
257	atomic64_t num_write_errors;
258	atomic64_t num_uncorrectable_read_errors;
259
260	u64 cursor_left;
261	u64 committed_cursor_left;
262	u64 cursor_left_last_write_of_item;
263	u64 cursor_right;
264
265	/ See #define above /
266	u64 cont_reading_from_srcdev_mode;
267
268	int is_valid;
269	int item_needs_writeback;
270	struct btrfs_device *srcdev;
271	struct btrfs_device *tgtdev;
272
273	struct mutex lock_finishing_cancel_unmount;
274	struct rw_semaphore rwsem;
275
276	struct btrfs_scrub_progress scrub_progress;
277
278	struct percpu_counter bio_counter;
279	wait_queue_head_t replace_wait;
280	};
281
282	/*
283	* Free clusters are used to claim free space in relatively large chunks,
284	* allowing us to do less seeky writes. They are used for all metadata
285	* allocations. In ssd_spread mode they are also used for data allocations.
286	*/
287	struct btrfs_free_cluster {
288	spinlock_t lock;
289	spinlock_t refill_lock;
290	struct rb_root root;
291
292	/ Largest extent in this cluster /
293	u64 max_size;
294
295	/ First extent starting offset /
296	u64 window_start;
297
298	/ We did a full search and couldn't create a cluster /
299	bool fragmented;
300
301	struct btrfs_block_group *block_group;
302	/*
303	* When a cluster is allocated from a block group, we put the cluster
304	* onto a list in the block group so that it can be freed before the
305	* block group is freed.
306	*/
307	struct list_head block_group_list;
308	};
309
310	/ Discard control. /
311	/*
312	* Async discard uses multiple lists to differentiate the discard filter
313	* parameters. Index 0 is for completely free block groups where we need to
314	* ensure the entire block group is trimmed without being lossy. Indices
315	* afterwards represent monotonically decreasing discard filter sizes to
316	* prioritize what should be discarded next.
317	*/
318	#define BTRFS_NR_DISCARD_LISTS 3
319	#define BTRFS_DISCARD_INDEX_UNUSED 0
320	#define BTRFS_DISCARD_INDEX_START 1
321
322	struct btrfs_discard_ctl {
323	struct workqueue_struct *discard_workers;
324	struct delayed_work work;
325	spinlock_t lock;
326	struct btrfs_block_group *block_group;
327	struct list_head discard_list[BTRFS_NR_DISCARD_LISTS];
328	u64 prev_discard;
329	u64 prev_discard_time;
330	atomic_t discardable_extents;
331	atomic64_t discardable_bytes;
332	u64 max_discard_size;
333	u64 delay_ms;
334	u32 iops_limit;
335	u32 kbps_limit;
336	u64 discard_extent_bytes;
337	u64 discard_bitmap_bytes;
338	atomic64_t discard_bytes_saved;
339	};
340
341	/*
342	* Exclusive operations (device replace, resize, device add/remove, balance)
343	*/
344	enum btrfs_exclusive_operation {
345	BTRFS_EXCLOP_NONE,
346	BTRFS_EXCLOP_BALANCE_PAUSED,
347	BTRFS_EXCLOP_BALANCE,
348	BTRFS_EXCLOP_DEV_ADD,
349	BTRFS_EXCLOP_DEV_REMOVE,
350	BTRFS_EXCLOP_DEV_REPLACE,
351	BTRFS_EXCLOP_RESIZE,
352	BTRFS_EXCLOP_SWAP_ACTIVATE,
353	};
354
355	/ Store data about transaction commits, exported via sysfs. /
356	struct btrfs_commit_stats {
357	/ Total number of commits /
358	u64 commit_count;
359	/ The maximum commit duration so far in ns /
360	u64 max_commit_dur;
361	/ The last commit duration in ns /
362	u64 last_commit_dur;
363	/ The total commit duration in ns /
364	u64 total_commit_dur;
365	};
366
367	struct btrfs_fs_info {
368	u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
369	unsigned long flags;
370	struct btrfs_root *tree_root;
371	struct btrfs_root *chunk_root;
372	struct btrfs_root *dev_root;
373	struct btrfs_root *fs_root;
374	struct btrfs_root *quota_root;
375	struct btrfs_root *uuid_root;
376	struct btrfs_root *data_reloc_root;
377	struct btrfs_root *block_group_root;
378	struct btrfs_root *stripe_root;
379
380	/ The log root tree is a directory of all the other log roots /
381	struct btrfs_root *log_root_tree;
382
383	/ The tree that holds the global roots (csum, extent, etc) /
384	rwlock_t global_root_lock;
385	struct rb_root global_root_tree;
386
387	spinlock_t fs_roots_radix_lock;
388	struct radix_tree_root fs_roots_radix;
389
390	/ Block group cache stuff /
391	rwlock_t block_group_cache_lock;
392	struct rb_root_cached block_group_cache_tree;
393
394	/ Keep track of unallocated space /
395	atomic64_t free_chunk_space;
396
397	/ Track ranges which are used by log trees blocks/logged data extents /
398	struct extent_io_tree excluded_extents;
399
400	/ logical->physical extent mapping /
401	struct extent_map_tree mapping_tree;
402
403	/*
404	* Block reservation for extent, checksum, root tree and delayed dir
405	* index item.
406	*/
407	struct btrfs_block_rsv global_block_rsv;
408	/ Block reservation for metadata operations /
409	struct btrfs_block_rsv trans_block_rsv;
410	/ Block reservation for chunk tree /
411	struct btrfs_block_rsv chunk_block_rsv;
412	/ Block reservation for delayed operations /
413	struct btrfs_block_rsv delayed_block_rsv;
414	/ Block reservation for delayed refs /
415	struct btrfs_block_rsv delayed_refs_rsv;
416
417	struct btrfs_block_rsv empty_block_rsv;
418
419	/*
420	* Updated while holding the lock 'trans_lock'. Due to the life cycle of
421	* a transaction, it can be directly read while holding a transaction
422	* handle, everywhere else must be read with btrfs_get_fs_generation().
423	* Should always be updated using btrfs_set_fs_generation().
424	*/
425	u64 generation;
426	/*
427	* Always use btrfs_get_last_trans_committed() and
428	* btrfs_set_last_trans_committed() to read and update this field.
429	*/
430	u64 last_trans_committed;
431	/*
432	* Generation of the last transaction used for block group relocation
433	* since the filesystem was last mounted (or 0 if none happened yet).
434	* Must be written and read while holding btrfs_fs_info::commit_root_sem.
435	*/
436	u64 last_reloc_trans;
437
438	/*
439	* This is updated to the current trans every time a full commit is
440	* required instead of the faster short fsync log commits
441	*/
442	u64 last_trans_log_full_commit;
443	unsigned long mount_opt;
444
445	unsigned long compress_type:`4`;
446	unsigned int compress_level;
447	u32 commit_interval;
448	/*
449	* It is a suggestive number, the read side is safe even it gets a
450	* wrong number because we will write out the data into a regular
451	* extent. The write side(mount/remount) is under ->s_umount lock,
452	* so it is also safe.
453	*/
454	u64 max_inline;
455
456	struct btrfs_transaction *running_transaction;
457	wait_queue_head_t transaction_throttle;
458	wait_queue_head_t transaction_wait;
459	wait_queue_head_t transaction_blocked_wait;
460	wait_queue_head_t async_submit_wait;
461
462	/*
463	* Used to protect the incompat_flags, compat_flags, compat_ro_flags
464	* when they are updated.
465	*
466	* Because we do not clear the flags for ever, so we needn't use
467	* the lock on the read side.
468	*
469	* We also needn't use the lock when we mount the fs, because
470	* there is no other task which will update the flag.
471	*/
472	spinlock_t super_lock;
473	struct btrfs_super_block *super_copy;
474	struct btrfs_super_block *super_for_commit;
475	struct super_block *sb;
476	struct inode *btree_inode;
477	struct mutex tree_log_mutex;
478	struct mutex transaction_kthread_mutex;
479	struct mutex cleaner_mutex;
480	struct mutex chunk_mutex;
481
482	/*
483	* This is taken to make sure we don't set block groups ro after the
484	* free space cache has been allocated on them.
485	*/
486	struct mutex ro_block_group_mutex;
487
488	/*
489	* This is used during read/modify/write to make sure no two ios are
490	* trying to mod the same stripe at the same time.
491	*/
492	struct btrfs_stripe_hash_table *stripe_hash_table;
493
494	/*
495	* This protects the ordered operations list only while we are
496	* processing all of the entries on it. This way we make sure the
497	* commit code doesn't find the list temporarily empty because another
498	* function happens to be doing non-waiting preflush before jumping
499	* into the main commit.
500	*/
501	struct mutex ordered_operations_mutex;
502
503	struct rw_semaphore commit_root_sem;
504
505	struct rw_semaphore cleanup_work_sem;
506
507	struct rw_semaphore subvol_sem;
508
509	spinlock_t trans_lock;
510	/*
511	* The reloc mutex goes with the trans lock, it is taken during commit
512	* to protect us from the relocation code.
513	*/
514	struct mutex reloc_mutex;
515
516	struct list_head trans_list;
517	struct list_head dead_roots;
518	struct list_head caching_block_groups;
519
520	spinlock_t delayed_iput_lock;
521	struct list_head delayed_iputs;
522	atomic_t nr_delayed_iputs;
523	wait_queue_head_t delayed_iputs_wait;
524
525	atomic64_t tree_mod_seq;
526
527	/ This protects tree_mod_log and tree_mod_seq_list /
528	rwlock_t tree_mod_log_lock;
529	struct rb_root tree_mod_log;
530	struct list_head tree_mod_seq_list;
531
532	atomic_t async_delalloc_pages;
533
534	/ This is used to protect the following list -- ordered_roots. /
535	spinlock_t ordered_root_lock;
536
537	/*
538	* All fs/file tree roots in which there are data=ordered extents
539	* pending writeback are added into this list.
540	*
541	* These can span multiple transactions and basically include every
542	* dirty data page that isn't from nodatacow.
543	*/
544	struct list_head ordered_roots;
545
546	struct mutex delalloc_root_mutex;
547	spinlock_t delalloc_root_lock;
548	/ All fs/file tree roots that have delalloc inodes. /
549	struct list_head delalloc_roots;
550
551	/*
552	* There is a pool of worker threads for checksumming during writes and
553	* a pool for checksumming after reads. This is because readers can
554	* run with FS locks held, and the writers may be waiting for those
555	* locks. We don't want ordering in the pending list to cause
556	* deadlocks, and so the two are serviced separately.
557	*
558	* A third pool does submit_bio to avoid deadlocking with the other two.
559	*/
560	struct btrfs_workqueue *workers;
561	struct btrfs_workqueue *delalloc_workers;
562	struct btrfs_workqueue *flush_workers;
563	struct workqueue_struct *endio_workers;
564	struct workqueue_struct *endio_meta_workers;
565	struct workqueue_struct *rmw_workers;
566	struct workqueue_struct *compressed_write_workers;
567	struct btrfs_workqueue *endio_write_workers;
568	struct btrfs_workqueue *endio_freespace_worker;
569	struct btrfs_workqueue *caching_workers;
570
571	/*
572	* Fixup workers take dirty pages that didn't properly go through the
573	* cow mechanism and make them safe to write. It happens for the
574	* sys_munmap function call path.
575	*/
576	struct btrfs_workqueue *fixup_workers;
577	struct btrfs_workqueue *delayed_workers;
578
579	struct task_struct *transaction_kthread;
580	struct task_struct *cleaner_kthread;
581	u32 thread_pool_size;
582
583	struct kobject *space_info_kobj;
584	struct kobject *qgroups_kobj;
585	struct kobject *discard_kobj;
586
587	/ Used to keep from writing metadata until there is a nice batch /
588	struct percpu_counter dirty_metadata_bytes;
589	struct percpu_counter delalloc_bytes;
590	struct percpu_counter ordered_bytes;
591	s32 dirty_metadata_batch;
592	s32 delalloc_batch;
593
594	/ Protected by 'trans_lock'. /
595	struct list_head dirty_cowonly_roots;
596
597	struct btrfs_fs_devices *fs_devices;
598
599	/*
600	* The space_info list is effectively read only after initial setup.
601	* It is populated at mount time and cleaned up after all block groups
602	* are removed. RCU is used to protect it.
603	*/
604	struct list_head space_info;
605
606	struct btrfs_space_info *data_sinfo;
607
608	struct reloc_control *reloc_ctl;
609
610	/ data_alloc_cluster is only used in ssd_spread mode /
611	struct btrfs_free_cluster data_alloc_cluster;
612
613	/ All metadata allocations go through this cluster. /
614	struct btrfs_free_cluster meta_alloc_cluster;
615
616	/ Auto defrag inodes go here. /
617	spinlock_t defrag_inodes_lock;
618	struct rb_root defrag_inodes;
619	atomic_t defrag_running;
620
621	/ Used to protect avail_{data, metadata, system}_alloc_bits /
622	seqlock_t profiles_lock;
623	/*
624	* These three are in extended format (availability of single chunks is
625	* denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE bit, other types are denoted
626	* by corresponding BTRFS_BLOCK_GROUP_* bits)
627	*/
628	u64 avail_data_alloc_bits;
629	u64 avail_metadata_alloc_bits;
630	u64 avail_system_alloc_bits;
631
632	/ Balance state /
633	spinlock_t balance_lock;
634	struct mutex balance_mutex;
635	atomic_t balance_pause_req;
636	atomic_t balance_cancel_req;
637	struct btrfs_balance_control *balance_ctl;
638	wait_queue_head_t balance_wait_q;
639
640	/ Cancellation requests for chunk relocation /
641	atomic_t reloc_cancel_req;
642
643	u32 data_chunk_allocations;
644	u32 metadata_ratio;
645
646	void *bdev_holder;
647
648	/ Private scrub information /
649	struct mutex scrub_lock;
650	atomic_t scrubs_running;
651	atomic_t scrub_pause_req;
652	atomic_t scrubs_paused;
653	atomic_t scrub_cancel_req;
654	wait_queue_head_t scrub_pause_wait;
655	/*
656	* The worker pointers are NULL iff the refcount is 0, ie. scrub is not
657	* running.
658	*/
659	refcount_t scrub_workers_refcnt;
660	struct workqueue_struct *scrub_workers;
661	struct btrfs_subpage_info *subpage_info;
662
663	struct btrfs_discard_ctl discard_ctl;
664
665	/ Is qgroup tracking in a consistent state? /
666	u64 qgroup_flags;
667
668	/ Holds configuration and tracking. Protected by qgroup_lock. /
669	struct rb_root qgroup_tree;
670	spinlock_t qgroup_lock;
671
672	/*
673	* Used to avoid frequently calling ulist_alloc()/ulist_free()
674	* when doing qgroup accounting, it must be protected by qgroup_lock.
675	*/
676	struct ulist *qgroup_ulist;
677
678	/*
679	* Protect user change for quota operations. If a transaction is needed,
680	* it must be started before locking this lock.
681	*/
682	struct mutex qgroup_ioctl_lock;
683
684	/ List of dirty qgroups to be written at next commit. /
685	struct list_head dirty_qgroups;
686
687	/ Used by qgroup for an efficient tree traversal. /
688	u64 qgroup_seq;
689
690	/ Qgroup rescan items. /
691	/ Protects the progress item /
692	struct mutex qgroup_rescan_lock;
693	struct btrfs_key qgroup_rescan_progress;
694	struct btrfs_workqueue *qgroup_rescan_workers;
695	struct completion qgroup_rescan_completion;
696	struct btrfs_work qgroup_rescan_work;
697	/ Protected by qgroup_rescan_lock /
698	bool qgroup_rescan_running;
699	u8 qgroup_drop_subtree_thres;
700	u64 qgroup_enable_gen;
701
702	/*
703	* If this is not 0, then it indicates a serious filesystem error has
704	* happened and it contains that error (negative errno value).
705	*/
706	int fs_error;
707
708	/ Filesystem state /
709	unsigned long fs_state;
710
711	struct btrfs_delayed_root *delayed_root;
712
713	/ Extent buffer radix tree /
714	spinlock_t buffer_lock;
715	/ Entries are eb->start / sectorsize /
716	struct radix_tree_root buffer_radix;
717
718	/ Next backup root to be overwritten /
719	int backup_root_index;
720
721	/ Device replace state /
722	struct btrfs_dev_replace dev_replace;
723
724	struct semaphore uuid_tree_rescan_sem;
725
726	/ Used to reclaim the metadata space in the background. /
727	struct work_struct async_reclaim_work;
728	struct work_struct async_data_reclaim_work;
729	struct work_struct preempt_reclaim_work;
730
731	/ Reclaim partially filled block groups in the background /
732	struct work_struct reclaim_bgs_work;
733	struct list_head reclaim_bgs;
734	int bg_reclaim_threshold;
735
736	spinlock_t unused_bgs_lock;
737	struct list_head unused_bgs;
738	struct mutex unused_bg_unpin_mutex;
739	/ Protect block groups that are going to be deleted /
740	struct mutex reclaim_bgs_lock;
741
742	/ Cached block sizes /
743	u32 nodesize;
744	u32 sectorsize;
745	/ ilog2 of sectorsize, use to avoid 64bit division /
746	u32 sectorsize_bits;
747	u32 csum_size;
748	u32 csums_per_leaf;
749	u32 stripesize;
750
751	/*
752	* Maximum size of an extent. BTRFS_MAX_EXTENT_SIZE on regular
753	* filesystem, on zoned it depends on the device constraints.
754	*/
755	u64 max_extent_size;
756
757	/ Block groups and devices containing active swapfiles. /
758	spinlock_t swapfile_pins_lock;
759	struct rb_root swapfile_pins;
760
761	struct crypto_shash *csum_shash;
762
763	/ Type of exclusive operation running, protected by super_lock /
764	enum btrfs_exclusive_operation exclusive_operation;
765
766	/*
767	* Zone size > 0 when in ZONED mode, otherwise it's used for a check
768	* if the mode is enabled
769	*/
770	u64 zone_size;
771
772	/ Constraints for ZONE_APPEND commands: /
773	struct queue_limits limits;
774	u64 max_zone_append_size;
775
776	struct mutex zoned_meta_io_lock;
777	spinlock_t treelog_bg_lock;
778	u64 treelog_bg;
779
780	/*
781	* Start of the dedicated data relocation block group, protected by
782	* relocation_bg_lock.
783	*/
784	spinlock_t relocation_bg_lock;
785	u64 data_reloc_bg;
786	struct mutex zoned_data_reloc_io_lock;
787
788	struct btrfs_block_group *active_meta_bg;
789	struct btrfs_block_group *active_system_bg;
790
791	u64 nr_global_roots;
792
793	spinlock_t zone_active_bgs_lock;
794	struct list_head zone_active_bgs;
795
796	/ Updates are not protected by any lock /
797	struct btrfs_commit_stats commit_stats;
798
799	/*
800	* Last generation where we dropped a non-relocation root.
801	* Use btrfs_set_last_root_drop_gen() and btrfs_get_last_root_drop_gen()
802	* to change it and to read it, respectively.
803	*/
804	u64 last_root_drop_gen;
805
806	/*
807	* Annotations for transaction events (structures are empty when
808	* compiled without lockdep).
809	*/
810	struct lockdep_map btrfs_trans_num_writers_map;
811	struct lockdep_map btrfs_trans_num_extwriters_map;
812	struct lockdep_map btrfs_state_change_map[`4`];
813	struct lockdep_map btrfs_trans_pending_ordered_map;
814	struct lockdep_map btrfs_ordered_extent_map;
815
816	#ifdef CONFIG_BTRFS_FS_REF_VERIFY
817	spinlock_t ref_verify_lock;
818	struct rb_root block_tree;
819	#endif
820
821	#ifdef CONFIG_BTRFS_DEBUG
822	struct kobject *debug_kobj;
823	struct list_head allocated_roots;
824
825	spinlock_t eb_leak_lock;
826	struct list_head allocated_ebs;
827	#endif
828	};
829
830	static inline u64 btrfs_get_fs_generation(const struct btrfs_fs_info *fs_info)
831	{
832	return READ_ONCE(fs_info->generation);
833	}
834
835	static inline void btrfs_set_fs_generation(struct btrfs_fs_info *fs_info, u64 gen)
836	{
837	WRITE_ONCE(fs_info->generation, gen);
838	}
839
840	static inline u64 btrfs_get_last_trans_committed(const struct btrfs_fs_info *fs_info)
841	{
842	return READ_ONCE(fs_info->last_trans_committed);
843	}
844
845	static inline void btrfs_set_last_trans_committed(struct btrfs_fs_info *fs_info, u64 gen)
846	{
847	WRITE_ONCE(fs_info->last_trans_committed, gen);
848	}
849
850	static inline void btrfs_set_last_root_drop_gen(struct btrfs_fs_info *fs_info,
851	u64 gen)
852	{
853	WRITE_ONCE(fs_info->last_root_drop_gen, gen);
854	}
855
856	static inline u64 btrfs_get_last_root_drop_gen(const struct btrfs_fs_info *fs_info)
857	{
858	return READ_ONCE(fs_info->last_root_drop_gen);
859	}
860
861	/*
862	* Take the number of bytes to be checksummed and figure out how many leaves
863	* it would require to store the csums for that many bytes.
864	*/
865	static inline u64 btrfs_csum_bytes_to_leaves(
866	const struct btrfs_fs_info *fs_info, u64 csum_bytes)
867	{
868	const u64 num_csums = csum_bytes >> fs_info->sectorsize_bits;
869
870	return DIV_ROUND_UP_ULL(num_csums, fs_info->csums_per_leaf);
871	}
872
873	/*
874	* Use this if we would be adding new items, as we could split nodes as we cow
875	* down the tree.
876	*/
877	static inline u64 btrfs_calc_insert_metadata_size(const struct btrfs_fs_info *fs_info,
878	unsigned num_items)
879	{
880	return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * `2` * num_items;
881	}
882
883	/*
884	* Doing a truncate or a modification won't result in new nodes or leaves, just
885	* what we need for COW.
886	*/
887	static inline u64 btrfs_calc_metadata_size(const struct btrfs_fs_info *fs_info,
888	unsigned num_items)
889	{
890	return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * num_items;
891	}
892
893	#define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r->fs_info) >> 4) - \
894	sizeof(struct btrfs_item))
895
896	static inline bool btrfs_is_zoned(const struct btrfs_fs_info *fs_info)
897	{
898	return IS_ENABLED(CONFIG_BLK_DEV_ZONED) && fs_info->zone_size > `0`;
899	}
900
901	/*
902	* Count how many fs_info->max_extent_size cover the @size
903	*/
904	static inline u32 count_max_extents(struct btrfs_fs_info *fs_info, u64 size)
905	{
906	#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
907	if (!fs_info)
908	return div_u64(dividend: size + BTRFS_MAX_EXTENT_SIZE - `1`, BTRFS_MAX_EXTENT_SIZE);
909	#endif
910
911	return div_u64(dividend: size + fs_info->max_extent_size - `1`, divisor: fs_info->max_extent_size);
912	}
913
914	bool btrfs_exclop_start(struct btrfs_fs_info *fs_info,
915	enum btrfs_exclusive_operation type);
916	bool btrfs_exclop_start_try_lock(struct btrfs_fs_info *fs_info,
917	enum btrfs_exclusive_operation type);
918	void btrfs_exclop_start_unlock(struct btrfs_fs_info *fs_info);
919	void btrfs_exclop_finish(struct btrfs_fs_info *fs_info);
920	void btrfs_exclop_balance(struct btrfs_fs_info *fs_info,
921	enum btrfs_exclusive_operation op);
922
923	/ Compatibility and incompatibility defines /
924	void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag,
925	const char *name);
926	void __btrfs_clear_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag,
927	const char *name);
928	void __btrfs_set_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag,
929	const char *name);
930	void __btrfs_clear_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag,
931	const char *name);
932
933	#define __btrfs_fs_incompat(fs_info, flags) \
934	(!!(btrfs_super_incompat_flags((fs_info)->super_copy) & (flags)))
935
936	#define __btrfs_fs_compat_ro(fs_info, flags) \
937	(!!(btrfs_super_compat_ro_flags((fs_info)->super_copy) & (flags)))
938
939	#define btrfs_set_fs_incompat(__fs_info, opt) \
940	__btrfs_set_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt, #opt)
941
942	#define btrfs_clear_fs_incompat(__fs_info, opt) \
943	__btrfs_clear_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt, #opt)
944
945	#define btrfs_fs_incompat(fs_info, opt) \
946	__btrfs_fs_incompat((fs_info), BTRFS_FEATURE_INCOMPAT_##opt)
947
948	#define btrfs_set_fs_compat_ro(__fs_info, opt) \
949	__btrfs_set_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt, #opt)
950
951	#define btrfs_clear_fs_compat_ro(__fs_info, opt) \
952	__btrfs_clear_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt, #opt)
953
954	#define btrfs_fs_compat_ro(fs_info, opt) \
955	__btrfs_fs_compat_ro((fs_info), BTRFS_FEATURE_COMPAT_RO_##opt)
956
957	#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
958	#define btrfs_set_opt(o, opt) ((o) \|= BTRFS_MOUNT_##opt)
959	#define btrfs_raw_test_opt(o, opt) ((o) & BTRFS_MOUNT_##opt)
960	#define btrfs_test_opt(fs_info, opt) ((fs_info)->mount_opt & \
961	BTRFS_MOUNT_##opt)
962
963	#define btrfs_set_and_info(fs_info, opt, fmt, args...) \
964	do { \
965	if (!btrfs_test_opt(fs_info, opt)) \
966	btrfs_info(fs_info, fmt, ##args); \
967	btrfs_set_opt(fs_info->mount_opt, opt); \
968	} while (0)
969
970	#define btrfs_clear_and_info(fs_info, opt, fmt, args...) \
971	do { \
972	if (btrfs_test_opt(fs_info, opt)) \
973	btrfs_info(fs_info, fmt, ##args); \
974	btrfs_clear_opt(fs_info->mount_opt, opt); \
975	} while (0)
976
977	static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info)
978	{
979	/ Do it this way so we only ever do one test_bit in the normal case. /
980	if (test_bit(BTRFS_FS_CLOSING_START, &fs_info->flags)) {
981	if (test_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags))
982	return `2`;
983	return `1`;
984	}
985	return `0`;
986	}
987
988	/*
989	* If we remount the fs to be R/O or umount the fs, the cleaner needn't do
990	* anything except sleeping. This function is used to check the status of
991	* the fs.
992	* We check for BTRFS_FS_STATE_RO to avoid races with a concurrent remount,
993	* since setting and checking for SB_RDONLY in the superblock's flags is not
994	* atomic.
995	*/
996	static inline int btrfs_need_cleaner_sleep(struct btrfs_fs_info *fs_info)
997	{
998	return test_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state) \|\|
999	btrfs_fs_closing(fs_info);
1000	}
1001
1002	static inline void btrfs_wake_unfinished_drop(struct btrfs_fs_info *fs_info)
1003	{
1004	clear_and_wake_up_bit(bit: BTRFS_FS_UNFINISHED_DROPS, word: &fs_info->flags);
1005	}
1006
1007	#define BTRFS_FS_ERROR(fs_info) (READ_ONCE((fs_info)->fs_error))
1008
1009	#define BTRFS_FS_LOG_CLEANUP_ERROR(fs_info) \
1010	(unlikely(test_bit(BTRFS_FS_STATE_LOG_CLEANUP_ERROR, \
1011	&(fs_info)->fs_state)))
1012
1013	#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
1014
1015	#define EXPORT_FOR_TESTS
1016
1017	static inline int btrfs_is_testing(struct btrfs_fs_info *fs_info)
1018	{
1019	return test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state);
1020	}
1021
1022	void btrfs_test_destroy_inode(struct inode *inode);
1023
1024	#else
1025
1026	#define EXPORT_FOR_TESTS static
1027
1028	static inline int btrfs_is_testing(struct btrfs_fs_info *fs_info)
1029	{
1030	return `0`;
1031	}
1032	#endif
1033
1034	#endif
1035

source code of linux/fs/btrfs/fs.h