xfs_mount.c source code [linux/fs/xfs/xfs_mount.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* Copyright (c) 2000-2005 Silicon Graphics, Inc.
4	* All Rights Reserved.
5	*/
6	#include "xfs.h"
7	#include "xfs_fs.h"
8	#include "xfs_shared.h"
9	#include "xfs_format.h"
10	#include "xfs_log_format.h"
11	#include "xfs_trans_resv.h"
12	#include "xfs_bit.h"
13	#include "xfs_sb.h"
14	#include "xfs_mount.h"
15	#include "xfs_inode.h"
16	#include "xfs_dir2.h"
17	#include "xfs_ialloc.h"
18	#include "xfs_alloc.h"
19	#include "xfs_rtalloc.h"
20	#include "xfs_bmap.h"
21	#include "xfs_trans.h"
22	#include "xfs_trans_priv.h"
23	#include "xfs_log.h"
24	#include "xfs_log_priv.h"
25	#include "xfs_error.h"
26	#include "xfs_quota.h"
27	#include "xfs_fsops.h"
28	#include "xfs_icache.h"
29	#include "xfs_sysfs.h"
30	#include "xfs_rmap_btree.h"
31	#include "xfs_refcount_btree.h"
32	#include "xfs_reflink.h"
33	#include "xfs_extent_busy.h"
34	#include "xfs_health.h"
35	#include "xfs_trace.h"
36	#include "xfs_ag.h"
37	#include "scrub/stats.h"
38
39	static DEFINE_MUTEX(xfs_uuid_table_mutex);
40	static int xfs_uuid_table_size;
41	static uuid_t *xfs_uuid_table;
42
43	void
44	xfs_uuid_table_free(void)
45	{
46	if (xfs_uuid_table_size == `0`)
47	return;
48	kmem_free(ptr: xfs_uuid_table);
49	xfs_uuid_table = NULL;
50	xfs_uuid_table_size = `0`;
51	}
52
53	/*
54	* See if the UUID is unique among mounted XFS filesystems.
55	* Mount fails if UUID is nil or a FS with the same UUID is already mounted.
56	*/
57	STATIC int
58	xfs_uuid_mount(
59	struct xfs_mount *mp)
60	{
61	uuid_t *uuid = &mp->m_sb.sb_uuid;
62	int hole, i;
63
64	/ Publish UUID in struct super_block /
65	uuid_copy(dst: &mp->m_super->s_uuid, src: uuid);
66
67	if (xfs_has_nouuid(mp))
68	return `0`;
69
70	if (uuid_is_null(uuid)) {
71	xfs_warn(mp, "Filesystem has null UUID - can't mount");
72	return -EINVAL;
73	}
74
75	mutex_lock(&xfs_uuid_table_mutex);
76	for (i = `0`, hole = -`1`; i < xfs_uuid_table_size; i++) {
77	if (uuid_is_null(uuid: &xfs_uuid_table[i])) {
78	hole = i;
79	continue;
80	}
81	if (uuid_equal(u1: uuid, u2: &xfs_uuid_table[i]))
82	goto out_duplicate;
83	}
84
85	if (hole < `0`) {
86	xfs_uuid_table = krealloc(objp: xfs_uuid_table,
87	new_size: (xfs_uuid_table_size + `1`) * sizeof(*xfs_uuid_table),
88	GFP_KERNEL \| __GFP_NOFAIL);
89	hole = xfs_uuid_table_size++;
90	}
91	xfs_uuid_table[hole] = *uuid;
92	mutex_unlock(lock: &xfs_uuid_table_mutex);
93
94	return `0`;
95
96	out_duplicate:
97	mutex_unlock(lock: &xfs_uuid_table_mutex);
98	xfs_warn(mp, "Filesystem has duplicate UUID %pU - can't mount", uuid);
99	return -EINVAL;
100	}
101
102	STATIC void
103	xfs_uuid_unmount(
104	struct xfs_mount *mp)
105	{
106	uuid_t *uuid = &mp->m_sb.sb_uuid;
107	int i;
108
109	if (xfs_has_nouuid(mp))
110	return;
111
112	mutex_lock(&xfs_uuid_table_mutex);
113	for (i = `0`; i < xfs_uuid_table_size; i++) {
114	if (uuid_is_null(uuid: &xfs_uuid_table[i]))
115	continue;
116	if (!uuid_equal(u1: uuid, u2: &xfs_uuid_table[i]))
117	continue;
118	memset(&xfs_uuid_table[i], `0`, sizeof(uuid_t));
119	break;
120	}
121	ASSERT(i < xfs_uuid_table_size);
122	mutex_unlock(lock: &xfs_uuid_table_mutex);
123	}
124
125	/*
126	* Check size of device based on the (data/realtime) block count.
127	* Note: this check is used by the growfs code as well as mount.
128	*/
129	int
130	xfs_sb_validate_fsb_count(
131	xfs_sb_t *sbp,
132	uint64_t nblocks)
133	{
134	ASSERT(PAGE_SHIFT >= sbp->sb_blocklog);
135	ASSERT(sbp->sb_blocklog >= BBSHIFT);
136
137	/ Limited by ULONG_MAX of page cache index /
138	if (nblocks >> (PAGE_SHIFT - sbp->sb_blocklog) > ULONG_MAX)
139	return -EFBIG;
140	return `0`;
141	}
142
143	/*
144	* xfs_readsb
145	*
146	* Does the initial read of the superblock.
147	*/
148	int
149	xfs_readsb(
150	struct xfs_mount *mp,
151	int flags)
152	{
153	unsigned int sector_size;
154	struct xfs_buf *bp;
155	struct xfs_sb *sbp = &mp->m_sb;
156	int error;
157	int loud = !(flags & XFS_MFSI_QUIET);
158	const struct xfs_buf_ops *buf_ops;
159
160	ASSERT(mp->m_sb_bp == NULL);
161	ASSERT(mp->m_ddev_targp != NULL);
162
163	/*
164	* For the initial read, we must guess at the sector
165	* size based on the block device. It's enough to
166	* get the sb_sectsize out of the superblock and
167	* then reread with the proper length.
168	* We don't verify it yet, because it may not be complete.
169	*/
170	sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
171	buf_ops = NULL;
172
173	/*
174	* Allocate a (locked) buffer to hold the superblock. This will be kept
175	* around at all times to optimize access to the superblock. Therefore,
176	* set XBF_NO_IOACCT to make sure it doesn't hold the buftarg count
177	* elevated.
178	*/
179	reread:
180	error = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR,
181	BTOBB(sector_size), XBF_NO_IOACCT, &bp,
182	buf_ops);
183	if (error) {
184	if (loud)
185	xfs_warn(mp, "SB validate failed with error %d.", error);
186	/ bad CRC means corrupted metadata /
187	if (error == -EFSBADCRC)
188	error = -EFSCORRUPTED;
189	return error;
190	}
191
192	/*
193	* Initialize the mount structure from the superblock.
194	*/
195	xfs_sb_from_disk(sbp, bp->b_addr);
196
197	/*
198	* If we haven't validated the superblock, do so now before we try
199	* to check the sector size and reread the superblock appropriately.
200	*/
201	if (sbp->sb_magicnum != XFS_SB_MAGIC) {
202	if (loud)
203	xfs_warn(mp, "Invalid superblock magic number");
204	error = -EINVAL;
205	goto release_buf;
206	}
207
208	/*
209	* We must be able to do sector-sized and sector-aligned IO.
210	*/
211	if (sector_size > sbp->sb_sectsize) {
212	if (loud)
213	xfs_warn(mp, "device supports %u byte sectors (not %u)",
214	sector_size, sbp->sb_sectsize);
215	error = -ENOSYS;
216	goto release_buf;
217	}
218
219	if (buf_ops == NULL) {
220	/*
221	* Re-read the superblock so the buffer is correctly sized,
222	* and properly verified.
223	*/
224	xfs_buf_relse(bp);
225	sector_size = sbp->sb_sectsize;
226	buf_ops = loud ? &xfs_sb_buf_ops : &xfs_sb_quiet_buf_ops;
227	goto reread;
228	}
229
230	mp->m_features \|= xfs_sb_version_to_features(sbp);
231	xfs_reinit_percpu_counters(mp);
232
233	/ no need to be quiet anymore, so reset the buf ops /
234	bp->b_ops = &xfs_sb_buf_ops;
235
236	mp->m_sb_bp = bp;
237	xfs_buf_unlock(bp);
238	return `0`;
239
240	release_buf:
241	xfs_buf_relse(bp);
242	return error;
243	}
244
245	/*
246	* If the sunit/swidth change would move the precomputed root inode value, we
247	* must reject the ondisk change because repair will stumble over that.
248	* However, we allow the mount to proceed because we never rejected this
249	* combination before. Returns true to update the sb, false otherwise.
250	*/
251	static inline int
252	xfs_check_new_dalign(
253	struct xfs_mount *mp,
254	int new_dalign,
255	bool *update_sb)
256	{
257	struct xfs_sb *sbp = &mp->m_sb;
258	xfs_ino_t calc_ino;
259
260	calc_ino = xfs_ialloc_calc_rootino(mp, new_dalign);
261	trace_xfs_check_new_dalign(mp, new_dalign, calc_rootino: calc_ino);
262
263	if (sbp->sb_rootino == calc_ino) {
264	*update_sb = true;
265	return `0`;
266	}
267
268	xfs_warn(mp,
269	"Cannot change stripe alignment; would require moving root inode.");
270
271	/*
272	* XXX: Next time we add a new incompat feature, this should start
273	* returning -EINVAL to fail the mount. Until then, spit out a warning
274	* that we're ignoring the administrator's instructions.
275	*/
276	xfs_warn(mp, "Skipping superblock stripe alignment update.");
277	*update_sb = false;
278	return `0`;
279	}
280
281	/*
282	* If we were provided with new sunit/swidth values as mount options, make sure
283	* that they pass basic alignment and superblock feature checks, and convert
284	* them into the same units (FSB) that everything else expects. This step
285	* /must/ be done before computing the inode geometry.
286	*/
287	STATIC int
288	xfs_validate_new_dalign(
289	struct xfs_mount *mp)
290	{
291	if (mp->m_dalign == `0`)
292	return `0`;
293
294	/*
295	* If stripe unit and stripe width are not multiples
296	* of the fs blocksize turn off alignment.
297	*/
298	if ((BBTOB(mp->m_dalign) & mp->m_blockmask) \|\|
299	(BBTOB(mp->m_swidth) & mp->m_blockmask)) {
300	xfs_warn(mp,
301	"alignment check failed: sunit/swidth vs. blocksize(%d)",
302	mp->m_sb.sb_blocksize);
303	return -EINVAL;
304	}
305
306	/*
307	* Convert the stripe unit and width to FSBs.
308	*/
309	mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
310	if (mp->m_dalign && (mp->m_sb.sb_agblocks % mp->m_dalign)) {
311	xfs_warn(mp,
312	"alignment check failed: sunit/swidth vs. agsize(%d)",
313	mp->m_sb.sb_agblocks);
314	return -EINVAL;
315	}
316
317	if (!mp->m_dalign) {
318	xfs_warn(mp,
319	"alignment check failed: sunit(%d) less than bsize(%d)",
320	mp->m_dalign, mp->m_sb.sb_blocksize);
321	return -EINVAL;
322	}
323
324	mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
325
326	if (!xfs_has_dalign(mp)) {
327	xfs_warn(mp,
328	"cannot change alignment: superblock does not support data alignment");
329	return -EINVAL;
330	}
331
332	return `0`;
333	}
334
335	/ Update alignment values based on mount options and sb values. /
336	STATIC int
337	xfs_update_alignment(
338	struct xfs_mount *mp)
339	{
340	struct xfs_sb *sbp = &mp->m_sb;
341
342	if (mp->m_dalign) {
343	bool update_sb;
344	int error;
345
346	if (sbp->sb_unit == mp->m_dalign &&
347	sbp->sb_width == mp->m_swidth)
348	return `0`;
349
350	error = xfs_check_new_dalign(mp, new_dalign: mp->m_dalign, update_sb: &update_sb);
351	if (error \|\| !update_sb)
352	return error;
353
354	sbp->sb_unit = mp->m_dalign;
355	sbp->sb_width = mp->m_swidth;
356	mp->m_update_sb = true;
357	} else if (!xfs_has_noalign(mp) && xfs_has_dalign(mp)) {
358	mp->m_dalign = sbp->sb_unit;
359	mp->m_swidth = sbp->sb_width;
360	}
361
362	return `0`;
363	}
364
365	/*
366	* precalculate the low space thresholds for dynamic speculative preallocation.
367	*/
368	void
369	xfs_set_low_space_thresholds(
370	struct xfs_mount *mp)
371	{
372	uint64_t dblocks = mp->m_sb.sb_dblocks;
373	uint64_t rtexts = mp->m_sb.sb_rextents;
374	int i;
375
376	do_div(dblocks, `100`);
377	do_div(rtexts, `100`);
378
379	for (i = `0`; i < XFS_LOWSP_MAX; i++) {
380	mp->m_low_space[i] = dblocks * (i + `1`);
381	mp->m_low_rtexts[i] = rtexts * (i + `1`);
382	}
383	}
384
385	/*
386	* Check that the data (and log if separate) is an ok size.
387	*/
388	STATIC int
389	xfs_check_sizes(
390	struct xfs_mount *mp)
391	{
392	struct xfs_buf *bp;
393	xfs_daddr_t d;
394	int error;
395
396	d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
397	if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
398	xfs_warn(mp, "filesystem size mismatch detected");
399	return -EFBIG;
400	}
401	error = xfs_buf_read_uncached(target: mp->m_ddev_targp,
402	daddr: d - XFS_FSS_TO_BB(mp, `1`),
403	numblks: XFS_FSS_TO_BB(mp, `1`), flags: `0`, bpp: &bp, NULL);
404	if (error) {
405	xfs_warn(mp, "last sector read failed");
406	return error;
407	}
408	xfs_buf_relse(bp);
409
410	if (mp->m_logdev_targp == mp->m_ddev_targp)
411	return `0`;
412
413	d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
414	if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
415	xfs_warn(mp, "log size mismatch detected");
416	return -EFBIG;
417	}
418	error = xfs_buf_read_uncached(target: mp->m_logdev_targp,
419	daddr: d - XFS_FSB_TO_BB(mp, `1`),
420	numblks: XFS_FSB_TO_BB(mp, `1`), flags: `0`, bpp: &bp, NULL);
421	if (error) {
422	xfs_warn(mp, "log device read failed");
423	return error;
424	}
425	xfs_buf_relse(bp);
426	return `0`;
427	}
428
429	/*
430	* Clear the quotaflags in memory and in the superblock.
431	*/
432	int
433	xfs_mount_reset_sbqflags(
434	struct xfs_mount *mp)
435	{
436	mp->m_qflags = `0`;
437
438	/ It is OK to look at sb_qflags in the mount path without m_sb_lock. /
439	if (mp->m_sb.sb_qflags == `0`)
440	return `0`;
441	spin_lock(lock: &mp->m_sb_lock);
442	mp->m_sb.sb_qflags = `0`;
443	spin_unlock(lock: &mp->m_sb_lock);
444
445	if (!xfs_fs_writable(mp, level: SB_FREEZE_WRITE))
446	return `0`;
447
448	return xfs_sync_sb(mp, false);
449	}
450
451	uint64_t
452	xfs_default_resblks(xfs_mount_t *mp)
453	{
454	uint64_t resblks;
455
456	/*
457	* We default to 5% or 8192 fsbs of space reserved, whichever is
458	* smaller. This is intended to cover concurrent allocation
459	* transactions when we initially hit enospc. These each require a 4
460	* block reservation. Hence by default we cover roughly 2000 concurrent
461	* allocation reservations.
462	*/
463	resblks = mp->m_sb.sb_dblocks;
464	do_div(resblks, `20`);
465	resblks = min_t(uint64_t, resblks, `8192`);
466	return resblks;
467	}
468
469	/ Ensure the summary counts are correct. /
470	STATIC int
471	xfs_check_summary_counts(
472	struct xfs_mount *mp)
473	{
474	int error = `0`;
475
476	/*
477	* The AG0 superblock verifier rejects in-progress filesystems,
478	* so we should never see the flag set this far into mounting.
479	*/
480	if (mp->m_sb.sb_inprogress) {
481	xfs_err(mp, "sb_inprogress set after log recovery??");
482	WARN_ON(`1`);
483	return -EFSCORRUPTED;
484	}
485
486	/*
487	* Now the log is mounted, we know if it was an unclean shutdown or
488	* not. If it was, with the first phase of recovery has completed, we
489	* have consistent AG blocks on disk. We have not recovered EFIs yet,
490	* but they are recovered transactionally in the second recovery phase
491	* later.
492	*
493	* If the log was clean when we mounted, we can check the summary
494	* counters. If any of them are obviously incorrect, we can recompute
495	* them from the AGF headers in the next step.
496	*/
497	if (xfs_is_clean(mp) &&
498	(mp->m_sb.sb_fdblocks > mp->m_sb.sb_dblocks \|\|
499	!xfs_verify_icount(mp, mp->m_sb.sb_icount) \|\|
500	mp->m_sb.sb_ifree > mp->m_sb.sb_icount))
501	xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS);
502
503	/*
504	* We can safely re-initialise incore superblock counters from the
505	* per-ag data. These may not be correct if the filesystem was not
506	* cleanly unmounted, so we waited for recovery to finish before doing
507	* this.
508	*
509	* If the filesystem was cleanly unmounted or the previous check did
510	* not flag anything weird, then we can trust the values in the
511	* superblock to be correct and we don't need to do anything here.
512	* Otherwise, recalculate the summary counters.
513	*/
514	if ((xfs_has_lazysbcount(mp) && !xfs_is_clean(mp)) \|\|
515	xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS)) {
516	error = xfs_initialize_perag_data(mp, mp->m_sb.sb_agcount);
517	if (error)
518	return error;
519	}
520
521	/*
522	* Older kernels misused sb_frextents to reflect both incore
523	* reservations made by running transactions and the actual count of
524	* free rt extents in the ondisk metadata. Transactions committed
525	* during runtime can therefore contain a superblock update that
526	* undercounts the number of free rt extents tracked in the rt bitmap.
527	* A clean unmount record will have the correct frextents value since
528	* there can be no other transactions running at that point.
529	*
530	* If we're mounting the rt volume after recovering the log, recompute
531	* frextents from the rtbitmap file to fix the inconsistency.
532	*/
533	if (xfs_has_realtime(mp) && !xfs_is_clean(mp)) {
534	error = xfs_rtalloc_reinit_frextents(mp);
535	if (error)
536	return error;
537	}
538
539	return `0`;
540	}
541
542	static void
543	xfs_unmount_check(
544	struct xfs_mount *mp)
545	{
546	if (xfs_is_shutdown(mp))
547	return;
548
549	if (percpu_counter_sum(fbc: &mp->m_ifree) >
550	percpu_counter_sum(fbc: &mp->m_icount)) {
551	xfs_alert(mp, "ifree/icount mismatch at unmount");
552	xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS);
553	}
554	}
555
556	/*
557	* Flush and reclaim dirty inodes in preparation for unmount. Inodes and
558	* internal inode structures can be sitting in the CIL and AIL at this point,
559	* so we need to unpin them, write them back and/or reclaim them before unmount
560	* can proceed. In other words, callers are required to have inactivated all
561	* inodes.
562	*
563	* An inode cluster that has been freed can have its buffer still pinned in
564	* memory because the transaction is still sitting in a iclog. The stale inodes
565	* on that buffer will be pinned to the buffer until the transaction hits the
566	* disk and the callbacks run. Pushing the AIL will skip the stale inodes and
567	* may never see the pinned buffer, so nothing will push out the iclog and
568	* unpin the buffer.
569	*
570	* Hence we need to force the log to unpin everything first. However, log
571	* forces don't wait for the discards they issue to complete, so we have to
572	* explicitly wait for them to complete here as well.
573	*
574	* Then we can tell the world we are unmounting so that error handling knows
575	* that the filesystem is going away and we should error out anything that we
576	* have been retrying in the background. This will prevent never-ending
577	* retries in AIL pushing from hanging the unmount.
578	*
579	* Finally, we can push the AIL to clean all the remaining dirty objects, then
580	* reclaim the remaining inodes that are still in memory at this point in time.
581	*/
582	static void
583	xfs_unmount_flush_inodes(
584	struct xfs_mount *mp)
585	{
586	xfs_log_force(mp, XFS_LOG_SYNC);
587	xfs_extent_busy_wait_all(mp);
588	flush_workqueue(xfs_discard_wq);
589
590	set_bit(XFS_OPSTATE_UNMOUNTING, addr: &mp->m_opstate);
591
592	xfs_ail_push_all_sync(mp->m_ail);
593	xfs_inodegc_stop(mp);
594	cancel_delayed_work_sync(dwork: &mp->m_reclaim_work);
595	xfs_reclaim_inodes(mp);
596	xfs_health_unmount(mp);
597	}
598
599	static void
600	xfs_mount_setup_inode_geom(
601	struct xfs_mount *mp)
602	{
603	struct xfs_ino_geometry *igeo = M_IGEO(mp);
604
605	igeo->attr_fork_offset = xfs_bmap_compute_attr_offset(mp);
606	ASSERT(igeo->attr_fork_offset < XFS_LITINO(mp));
607
608	xfs_ialloc_setup_geometry(mp);
609	}
610
611	/ Compute maximum possible height for per-AG btree types for this fs. /
612	static inline void
613	xfs_agbtree_compute_maxlevels(
614	struct xfs_mount *mp)
615	{
616	unsigned int levels;
617
618	levels = max(mp->m_alloc_maxlevels, M_IGEO(mp)->inobt_maxlevels);
619	levels = max(levels, mp->m_rmap_maxlevels);
620	mp->m_agbtree_maxlevels = max(levels, mp->m_refc_maxlevels);
621	}
622
623	/*
624	* This function does the following on an initial mount of a file system:
625	* - reads the superblock from disk and init the mount struct
626	* - if we're a 32-bit kernel, do a size check on the superblock
627	* so we don't mount terabyte filesystems
628	* - init mount struct realtime fields
629	* - allocate inode hash table for fs
630	* - init directory manager
631	* - perform recovery and init the log manager
632	*/
633	int
634	xfs_mountfs(
635	struct xfs_mount *mp)
636	{
637	struct xfs_sb *sbp = &(mp->m_sb);
638	struct xfs_inode *rip;
639	struct xfs_ino_geometry *igeo = M_IGEO(mp);
640	uint64_t resblks;
641	uint quotamount = `0`;
642	uint quotaflags = `0`;
643	int error = `0`;
644
645	xfs_sb_mount_common(mp, sbp);
646
647	/*
648	* Check for a mismatched features2 values. Older kernels read & wrote
649	* into the wrong sb offset for sb_features2 on some platforms due to
650	* xfs_sb_t not being 64bit size aligned when sb_features2 was added,
651	* which made older superblock reading/writing routines swap it as a
652	* 64-bit value.
653	*
654	* For backwards compatibility, we make both slots equal.
655	*
656	* If we detect a mismatched field, we OR the set bits into the existing
657	* features2 field in case it has already been modified; we don't want
658	* to lose any features. We then update the bad location with the ORed
659	* value so that older kernels will see any features2 flags. The
660	* superblock writeback code ensures the new sb_features2 is copied to
661	* sb_bad_features2 before it is logged or written to disk.
662	*/
663	if (xfs_sb_has_mismatched_features2(sbp)) {
664	xfs_warn(mp, "correcting sb_features alignment problem");
665	sbp->sb_features2 \|= sbp->sb_bad_features2;
666	mp->m_update_sb = true;
667	}
668
669
670	/ always use v2 inodes by default now /
671	if (!(mp->m_sb.sb_versionnum & XFS_SB_VERSION_NLINKBIT)) {
672	mp->m_sb.sb_versionnum \|= XFS_SB_VERSION_NLINKBIT;
673	mp->m_features \|= XFS_FEAT_NLINK;
674	mp->m_update_sb = true;
675	}
676
677	/*
678	* If we were given new sunit/swidth options, do some basic validation
679	* checks and convert the incore dalign and swidth values to the
680	* same units (FSB) that everything else uses. This /must/ happen
681	* before computing the inode geometry.
682	*/
683	error = xfs_validate_new_dalign(mp);
684	if (error)
685	goto out;
686
687	xfs_alloc_compute_maxlevels(mp);
688	xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
689	xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
690	xfs_mount_setup_inode_geom(mp);
691	xfs_rmapbt_compute_maxlevels(mp);
692	xfs_refcountbt_compute_maxlevels(mp);
693
694	xfs_agbtree_compute_maxlevels(mp);
695
696	/*
697	* Check if sb_agblocks is aligned at stripe boundary. If sb_agblocks
698	* is NOT aligned turn off m_dalign since allocator alignment is within
699	* an ag, therefore ag has to be aligned at stripe boundary. Note that
700	* we must compute the free space and rmap btree geometry before doing
701	* this.
702	*/
703	error = xfs_update_alignment(mp);
704	if (error)
705	goto out;
706
707	/ enable fail_at_unmount as default /
708	mp->m_fail_unmount = true;
709
710	error = xfs_sysfs_init(kobj: &mp->m_kobj, ktype: &xfs_mp_ktype,
711	NULL, name: mp->m_super->s_id);
712	if (error)
713	goto out;
714
715	error = xfs_sysfs_init(kobj: &mp->m_stats.xs_kobj, ktype: &xfs_stats_ktype,
716	parent_kobj: &mp->m_kobj, name: "stats");
717	if (error)
718	goto out_remove_sysfs;
719
720	xchk_stats_register(cs: mp->m_scrub_stats, parent: mp->m_debugfs);
721
722	error = xfs_error_sysfs_init(mp);
723	if (error)
724	goto out_remove_scrub_stats;
725
726	error = xfs_errortag_init(mp);
727	if (error)
728	goto out_remove_error_sysfs;
729
730	error = xfs_uuid_mount(mp);
731	if (error)
732	goto out_remove_errortag;
733
734	/*
735	* Update the preferred write size based on the information from the
736	* on-disk superblock.
737	*/
738	mp->m_allocsize_log =
739	max_t(uint32_t, sbp->sb_blocklog, mp->m_allocsize_log);
740	mp->m_allocsize_blocks = `1U` << (mp->m_allocsize_log - sbp->sb_blocklog);
741
742	/ set the low space thresholds for dynamic preallocation /
743	xfs_set_low_space_thresholds(mp);
744
745	/*
746	* If enabled, sparse inode chunk alignment is expected to match the
747	* cluster size. Full inode chunk alignment must match the chunk size,
748	* but that is checked on sb read verification...
749	*/
750	if (xfs_has_sparseinodes(mp) &&
751	mp->m_sb.sb_spino_align !=
752	XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw)) {
753	xfs_warn(mp,
754	"Sparse inode block alignment (%u) must match cluster size (%llu).",
755	mp->m_sb.sb_spino_align,
756	XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw));
757	error = -EINVAL;
758	goto out_remove_uuid;
759	}
760
761	/*
762	* Check that the data (and log if separate) is an ok size.
763	*/
764	error = xfs_check_sizes(mp);
765	if (error)
766	goto out_remove_uuid;
767
768	/*
769	* Initialize realtime fields in the mount structure
770	*/
771	error = xfs_rtmount_init(mp);
772	if (error) {
773	xfs_warn(mp, "RT mount failed");
774	goto out_remove_uuid;
775	}
776
777	/*
778	* Copies the low order bits of the timestamp and the randomly
779	* set "sequence" number out of a UUID.
780	*/
781	mp->m_fixedfsid[`0`] =
782	(get_unaligned_be16(p: &sbp->sb_uuid.b[`8`]) << `16`) \|
783	get_unaligned_be16(p: &sbp->sb_uuid.b[`4`]);
784	mp->m_fixedfsid[`1`] = get_unaligned_be32(p: &sbp->sb_uuid.b[`0`]);
785
786	error = xfs_da_mount(mp);
787	if (error) {
788	xfs_warn(mp, "Failed dir/attr init: %d", error);
789	goto out_remove_uuid;
790	}
791
792	/*
793	* Initialize the precomputed transaction reservations values.
794	*/
795	xfs_trans_init(mp);
796
797	/*
798	* Allocate and initialize the per-ag data.
799	*/
800	error = xfs_initialize_perag(mp, sbp->sb_agcount, mp->m_sb.sb_dblocks,
801	&mp->m_maxagi);
802	if (error) {
803	xfs_warn(mp, "Failed per-ag init: %d", error);
804	goto out_free_dir;
805	}
806
807	if (XFS_IS_CORRUPT(mp, !sbp->sb_logblocks)) {
808	xfs_warn(mp, "no log defined");
809	error = -EFSCORRUPTED;
810	goto out_free_perag;
811	}
812
813	error = xfs_inodegc_register_shrinker(mp);
814	if (error)
815	goto out_fail_wait;
816
817	/*
818	* Log's mount-time initialization. The first part of recovery can place
819	* some items on the AIL, to be handled when recovery is finished or
820	* cancelled.
821	*/
822	error = xfs_log_mount(mp, log_target: mp->m_logdev_targp,
823	start_block: XFS_FSB_TO_DADDR(mp, sbp->sb_logstart),
824	num_bblocks: XFS_FSB_TO_BB(mp, sbp->sb_logblocks));
825	if (error) {
826	xfs_warn(mp, "log mount failed");
827	goto out_inodegc_shrinker;
828	}
829
830	/ Enable background inode inactivation workers. /
831	xfs_inodegc_start(mp);
832	xfs_blockgc_start(mp);
833
834	/*
835	* Now that we've recovered any pending superblock feature bit
836	* additions, we can finish setting up the attr2 behaviour for the
837	* mount. The noattr2 option overrides the superblock flag, so only
838	* check the superblock feature flag if the mount option is not set.
839	*/
840	if (xfs_has_noattr2(mp)) {
841	mp->m_features &= ~XFS_FEAT_ATTR2;
842	} else if (!xfs_has_attr2(mp) &&
843	(mp->m_sb.sb_features2 & XFS_SB_VERSION2_ATTR2BIT)) {
844	mp->m_features \|= XFS_FEAT_ATTR2;
845	}
846
847	/*
848	* Get and sanity-check the root inode.
849	* Save the pointer to it in the mount structure.
850	*/
851	error = xfs_iget(mp, NULL, ino: sbp->sb_rootino, XFS_IGET_UNTRUSTED,
852	XFS_ILOCK_EXCL, ipp: &rip);
853	if (error) {
854	xfs_warn(mp,
855	"Failed to read root inode 0x%llx, error %d",
856	sbp->sb_rootino, -error);
857	goto out_log_dealloc;
858	}
859
860	ASSERT(rip != NULL);
861
862	if (XFS_IS_CORRUPT(mp, !S_ISDIR(VFS_I(rip)->i_mode))) {
863	xfs_warn(mp, "corrupted root inode %llu: not a directory",
864	(unsigned long long)rip->i_ino);
865	xfs_iunlock(rip, XFS_ILOCK_EXCL);
866	error = -EFSCORRUPTED;
867	goto out_rele_rip;
868	}
869	mp->m_rootip = rip; / save it /
870
871	xfs_iunlock(rip, XFS_ILOCK_EXCL);
872
873	/*
874	* Initialize realtime inode pointers in the mount structure
875	*/
876	error = xfs_rtmount_inodes(mp);
877	if (error) {
878	/*
879	* Free up the root inode.
880	*/
881	xfs_warn(mp, "failed to read RT inodes");
882	goto out_rele_rip;
883	}
884
885	/ Make sure the summary counts are ok. /
886	error = xfs_check_summary_counts(mp);
887	if (error)
888	goto out_rtunmount;
889
890	/*
891	* If this is a read-only mount defer the superblock updates until
892	* the next remount into writeable mode. Otherwise we would never
893	* perform the update e.g. for the root filesystem.
894	*/
895	if (mp->m_update_sb && !xfs_is_readonly(mp)) {
896	error = xfs_sync_sb(mp, false);
897	if (error) {
898	xfs_warn(mp, "failed to write sb changes");
899	goto out_rtunmount;
900	}
901	}
902
903	/*
904	* Initialise the XFS quota management subsystem for this mount
905	*/
906	if (XFS_IS_QUOTA_ON(mp)) {
907	error = xfs_qm_newmount(mp, &quotamount, &quotaflags);
908	if (error)
909	goto out_rtunmount;
910	} else {
911	/*
912	* If a file system had quotas running earlier, but decided to
913	* mount without -o uquota/pquota/gquota options, revoke the
914	* quotachecked license.
915	*/
916	if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) {
917	xfs_notice(mp, "resetting quota flags");
918	error = xfs_mount_reset_sbqflags(mp);
919	if (error)
920	goto out_rtunmount;
921	}
922	}
923
924	/*
925	* Finish recovering the file system. This part needed to be delayed
926	* until after the root and real-time bitmap inodes were consistently
927	* read in. Temporarily create per-AG space reservations for metadata
928	* btree shape changes because space freeing transactions (for inode
929	* inactivation) require the per-AG reservation in lieu of reserving
930	* blocks.
931	*/
932	error = xfs_fs_reserve_ag_blocks(mp);
933	if (error && error == -ENOSPC)
934	xfs_warn(mp,
935	"ENOSPC reserving per-AG metadata pool, log recovery may fail.");
936	error = xfs_log_mount_finish(mp);
937	xfs_fs_unreserve_ag_blocks(mp);
938	if (error) {
939	xfs_warn(mp, "log mount finish failed");
940	goto out_rtunmount;
941	}
942
943	/*
944	* Now the log is fully replayed, we can transition to full read-only
945	* mode for read-only mounts. This will sync all the metadata and clean
946	* the log so that the recovery we just performed does not have to be
947	* replayed again on the next mount.
948	*
949	* We use the same quiesce mechanism as the rw->ro remount, as they are
950	* semantically identical operations.
951	*/
952	if (xfs_is_readonly(mp) && !xfs_has_norecovery(mp))
953	xfs_log_clean(mp);
954
955	/*
956	* Complete the quota initialisation, post-log-replay component.
957	*/
958	if (quotamount) {
959	ASSERT(mp->m_qflags == `0`);
960	mp->m_qflags = quotaflags;
961
962	xfs_qm_mount_quotas(mp);
963	}
964
965	/*
966	* Now we are mounted, reserve a small amount of unused space for
967	* privileged transactions. This is needed so that transaction
968	* space required for critical operations can dip into this pool
969	* when at ENOSPC. This is needed for operations like create with
970	* attr, unwritten extent conversion at ENOSPC, etc. Data allocations
971	* are not allowed to use this reserved space.
972	*
973	* This may drive us straight to ENOSPC on mount, but that implies
974	* we were already there on the last unmount. Warn if this occurs.
975	*/
976	if (!xfs_is_readonly(mp)) {
977	resblks = xfs_default_resblks(mp);
978	error = xfs_reserve_blocks(mp, &resblks, NULL);
979	if (error)
980	xfs_warn(mp,
981	"Unable to allocate reserve blocks. Continuing without reserve pool.");
982
983	/ Reserve AG blocks for future btree expansion. /
984	error = xfs_fs_reserve_ag_blocks(mp);
985	if (error && error != -ENOSPC)
986	goto out_agresv;
987	}
988
989	return `0`;
990
991	out_agresv:
992	xfs_fs_unreserve_ag_blocks(mp);
993	xfs_qm_unmount_quotas(mp);
994	out_rtunmount:
995	xfs_rtunmount_inodes(mp);
996	out_rele_rip:
997	xfs_irele(ip: rip);
998	/ Clean out dquots that might be in memory after quotacheck. /
999	xfs_qm_unmount(mp);
1000
1001	/*
1002	* Inactivate all inodes that might still be in memory after a log
1003	* intent recovery failure so that reclaim can free them. Metadata
1004	* inodes and the root directory shouldn't need inactivation, but the
1005	* mount failed for some reason, so pull down all the state and flee.
1006	*/
1007	xfs_inodegc_flush(mp);
1008
1009	/*
1010	* Flush all inode reclamation work and flush the log.
1011	* We have to do this /after/ rtunmount and qm_unmount because those
1012	* two will have scheduled delayed reclaim for the rt/quota inodes.
1013	*
1014	* This is slightly different from the unmountfs call sequence
1015	* because we could be tearing down a partially set up mount. In
1016	* particular, if log_mount_finish fails we bail out without calling
1017	* qm_unmount_quotas and therefore rely on qm_unmount to release the
1018	* quota inodes.
1019	*/
1020	xfs_unmount_flush_inodes(mp);
1021	out_log_dealloc:
1022	xfs_log_mount_cancel(mp);
1023	out_inodegc_shrinker:
1024	shrinker_free(shrinker: mp->m_inodegc_shrinker);
1025	out_fail_wait:
1026	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
1027	xfs_buftarg_drain(mp->m_logdev_targp);
1028	xfs_buftarg_drain(mp->m_ddev_targp);
1029	out_free_perag:
1030	xfs_free_perag(mp);
1031	out_free_dir:
1032	xfs_da_unmount(mp);
1033	out_remove_uuid:
1034	xfs_uuid_unmount(mp);
1035	out_remove_errortag:
1036	xfs_errortag_del(mp);
1037	out_remove_error_sysfs:
1038	xfs_error_sysfs_del(mp);
1039	out_remove_scrub_stats:
1040	xchk_stats_unregister(cs: mp->m_scrub_stats);
1041	xfs_sysfs_del(kobj: &mp->m_stats.xs_kobj);
1042	out_remove_sysfs:
1043	xfs_sysfs_del(kobj: &mp->m_kobj);
1044	out:
1045	return error;
1046	}
1047
1048	/*
1049	* This flushes out the inodes,dquots and the superblock, unmounts the
1050	* log and makes sure that incore structures are freed.
1051	*/
1052	void
1053	xfs_unmountfs(
1054	struct xfs_mount *mp)
1055	{
1056	uint64_t resblks;
1057	int error;
1058
1059	/*
1060	* Perform all on-disk metadata updates required to inactivate inodes
1061	* that the VFS evicted earlier in the unmount process. Freeing inodes
1062	* and discarding CoW fork preallocations can cause shape changes to
1063	* the free inode and refcount btrees, respectively, so we must finish
1064	* this before we discard the metadata space reservations. Metadata
1065	* inodes and the root directory do not require inactivation.
1066	*/
1067	xfs_inodegc_flush(mp);
1068
1069	xfs_blockgc_stop(mp);
1070	xfs_fs_unreserve_ag_blocks(mp);
1071	xfs_qm_unmount_quotas(mp);
1072	xfs_rtunmount_inodes(mp);
1073	xfs_irele(ip: mp->m_rootip);
1074
1075	xfs_unmount_flush_inodes(mp);
1076
1077	xfs_qm_unmount(mp);
1078
1079	/*
1080	* Unreserve any blocks we have so that when we unmount we don't account
1081	* the reserved free space as used. This is really only necessary for
1082	* lazy superblock counting because it trusts the incore superblock
1083	* counters to be absolutely correct on clean unmount.
1084	*
1085	* We don't bother correcting this elsewhere for lazy superblock
1086	* counting because on mount of an unclean filesystem we reconstruct the
1087	* correct counter value and this is irrelevant.
1088	*
1089	* For non-lazy counter filesystems, this doesn't matter at all because
1090	* we only every apply deltas to the superblock and hence the incore
1091	* value does not matter....
1092	*/
1093	resblks = `0`;
1094	error = xfs_reserve_blocks(mp, &resblks, NULL);
1095	if (error)
1096	xfs_warn(mp, "Unable to free reserved block pool. "
1097	"Freespace may not be correct on next mount.");
1098	xfs_unmount_check(mp);
1099
1100	xfs_log_unmount(mp);
1101	xfs_da_unmount(mp);
1102	xfs_uuid_unmount(mp);
1103
1104	#if defined(DEBUG)
1105	xfs_errortag_clearall(mp);
1106	#endif
1107	shrinker_free(shrinker: mp->m_inodegc_shrinker);
1108	xfs_free_perag(mp);
1109
1110	xfs_errortag_del(mp);
1111	xfs_error_sysfs_del(mp);
1112	xchk_stats_unregister(cs: mp->m_scrub_stats);
1113	xfs_sysfs_del(kobj: &mp->m_stats.xs_kobj);
1114	xfs_sysfs_del(kobj: &mp->m_kobj);
1115	}
1116
1117	/*
1118	* Determine whether modifications can proceed. The caller specifies the minimum
1119	* freeze level for which modifications should not be allowed. This allows
1120	* certain operations to proceed while the freeze sequence is in progress, if
1121	* necessary.
1122	*/
1123	bool
1124	xfs_fs_writable(
1125	struct xfs_mount *mp,
1126	int level)
1127	{
1128	ASSERT(level > SB_UNFROZEN);
1129	if ((mp->m_super->s_writers.frozen >= level) \|\|
1130	xfs_is_shutdown(mp) \|\| xfs_is_readonly(mp))
1131	return false;
1132
1133	return true;
1134	}
1135
1136	/ Adjust m_fdblocks or m_frextents. /
1137	int
1138	xfs_mod_freecounter(
1139	struct xfs_mount *mp,
1140	struct percpu_counter *counter,
1141	int64_t delta,
1142	bool rsvd)
1143	{
1144	int64_t lcounter;
1145	long long res_used;
1146	uint64_t set_aside = `0`;
1147	s32 batch;
1148	bool has_resv_pool;
1149
1150	ASSERT(counter == &mp->m_fdblocks \|\| counter == &mp->m_frextents);
1151	has_resv_pool = (counter == &mp->m_fdblocks);
1152	if (rsvd)
1153	ASSERT(has_resv_pool);
1154
1155	if (delta > `0`) {
1156	/*
1157	* If the reserve pool is depleted, put blocks back into it
1158	* first. Most of the time the pool is full.
1159	*/
1160	if (likely(!has_resv_pool \|\|
1161	mp->m_resblks == mp->m_resblks_avail)) {
1162	percpu_counter_add(fbc: counter, amount: delta);
1163	return `0`;
1164	}
1165
1166	spin_lock(lock: &mp->m_sb_lock);
1167	res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
1168
1169	if (res_used > delta) {
1170	mp->m_resblks_avail += delta;
1171	} else {
1172	delta -= res_used;
1173	mp->m_resblks_avail = mp->m_resblks;
1174	percpu_counter_add(fbc: counter, amount: delta);
1175	}
1176	spin_unlock(lock: &mp->m_sb_lock);
1177	return `0`;
1178	}
1179
1180	/*
1181	* Taking blocks away, need to be more accurate the closer we
1182	* are to zero.
1183	*
1184	* If the counter has a value of less than 2 * max batch size,
1185	* then make everything serialise as we are real close to
1186	* ENOSPC.
1187	*/
1188	if (__percpu_counter_compare(fbc: counter, rhs: `2` * XFS_FDBLOCKS_BATCH,
1189	XFS_FDBLOCKS_BATCH) < `0`)
1190	batch = `1`;
1191	else
1192	batch = XFS_FDBLOCKS_BATCH;
1193
1194	/*
1195	* Set aside allocbt blocks because these blocks are tracked as free
1196	* space but not available for allocation. Technically this means that a
1197	* single reservation cannot consume all remaining free space, but the
1198	* ratio of allocbt blocks to usable free blocks should be rather small.
1199	* The tradeoff without this is that filesystems that maintain high
1200	* perag block reservations can over reserve physical block availability
1201	* and fail physical allocation, which leads to much more serious
1202	* problems (i.e. transaction abort, pagecache discards, etc.) than
1203	* slightly premature -ENOSPC.
1204	*/
1205	if (has_resv_pool)
1206	set_aside = xfs_fdblocks_unavailable(mp);
1207	percpu_counter_add_batch(fbc: counter, amount: delta, batch);
1208	if (__percpu_counter_compare(fbc: counter, rhs: set_aside,
1209	XFS_FDBLOCKS_BATCH) >= `0`) {
1210	/ we had space! /
1211	return `0`;
1212	}
1213
1214	/*
1215	* lock up the sb for dipping into reserves before releasing the space
1216	* that took us to ENOSPC.
1217	*/
1218	spin_lock(lock: &mp->m_sb_lock);
1219	percpu_counter_add(fbc: counter, amount: -delta);
1220	if (!has_resv_pool \|\| !rsvd)
1221	goto fdblocks_enospc;
1222
1223	lcounter = (long long)mp->m_resblks_avail + delta;
1224	if (lcounter >= `0`) {
1225	mp->m_resblks_avail = lcounter;
1226	spin_unlock(lock: &mp->m_sb_lock);
1227	return `0`;
1228	}
1229	xfs_warn_once(mp,
1230	"Reserve blocks depleted! Consider increasing reserve pool size.");
1231
1232	fdblocks_enospc:
1233	spin_unlock(lock: &mp->m_sb_lock);
1234	return -ENOSPC;
1235	}
1236
1237	/*
1238	* Used to free the superblock along various error paths.
1239	*/
1240	void
1241	xfs_freesb(
1242	struct xfs_mount *mp)
1243	{
1244	struct xfs_buf *bp = mp->m_sb_bp;
1245
1246	xfs_buf_lock(bp);
1247	mp->m_sb_bp = NULL;
1248	xfs_buf_relse(bp);
1249	}
1250
1251	/*
1252	* If the underlying (data/log/rt) device is readonly, there are some
1253	* operations that cannot proceed.
1254	*/
1255	int
1256	xfs_dev_is_read_only(
1257	struct xfs_mount *mp,
1258	char *message)
1259	{
1260	if (xfs_readonly_buftarg(mp->m_ddev_targp) \|\|
1261	xfs_readonly_buftarg(mp->m_logdev_targp) \|\|
1262	(mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) {
1263	xfs_notice(mp, "%s required on read-only device.", message);
1264	xfs_notice(mp, "write access unavailable, cannot proceed.");
1265	return -EROFS;
1266	}
1267	return `0`;
1268	}
1269
1270	/ Force the summary counters to be recalculated at next mount. /
1271	void
1272	xfs_force_summary_recalc(
1273	struct xfs_mount *mp)
1274	{
1275	if (!xfs_has_lazysbcount(mp))
1276	return;
1277
1278	xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS);
1279	}
1280
1281	/*
1282	* Enable a log incompat feature flag in the primary superblock. The caller
1283	* cannot have any other transactions in progress.
1284	*/
1285	int
1286	xfs_add_incompat_log_feature(
1287	struct xfs_mount *mp,
1288	uint32_t feature)
1289	{
1290	struct xfs_dsb *dsb;
1291	int error;
1292
1293	ASSERT(hweight32(feature) == `1`);
1294	ASSERT(!(feature & XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN));
1295
1296	/*
1297	* Force the log to disk and kick the background AIL thread to reduce
1298	* the chances that the bwrite will stall waiting for the AIL to unpin
1299	* the primary superblock buffer. This isn't a data integrity
1300	* operation, so we don't need a synchronous push.
1301	*/
1302	error = xfs_log_force(mp, XFS_LOG_SYNC);
1303	if (error)
1304	return error;
1305	xfs_ail_push_all(mp->m_ail);
1306
1307	/*
1308	* Lock the primary superblock buffer to serialize all callers that
1309	* are trying to set feature bits.
1310	*/
1311	xfs_buf_lock(mp->m_sb_bp);
1312	xfs_buf_hold(bp: mp->m_sb_bp);
1313
1314	if (xfs_is_shutdown(mp)) {
1315	error = -EIO;
1316	goto rele;
1317	}
1318
1319	if (xfs_sb_has_incompat_log_feature(&mp->m_sb, feature))
1320	goto rele;
1321
1322	/*
1323	* Write the primary superblock to disk immediately, because we need
1324	* the log_incompat bit to be set in the primary super now to protect
1325	* the log items that we're going to commit later.
1326	*/
1327	dsb = mp->m_sb_bp->b_addr;
1328	xfs_sb_to_disk(dsb, &mp->m_sb);
1329	dsb->sb_features_log_incompat \|= cpu_to_be32(feature);
1330	error = xfs_bwrite(bp: mp->m_sb_bp);
1331	if (error)
1332	goto shutdown;
1333
1334	/*
1335	* Add the feature bits to the incore superblock before we unlock the
1336	* buffer.
1337	*/
1338	xfs_sb_add_incompat_log_features(&mp->m_sb, feature);
1339	xfs_buf_relse(bp: mp->m_sb_bp);
1340
1341	/ Log the superblock to disk. /
1342	return xfs_sync_sb(mp, false);
1343	shutdown:
1344	xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
1345	rele:
1346	xfs_buf_relse(bp: mp->m_sb_bp);
1347	return error;
1348	}
1349
1350	/*
1351	* Clear all the log incompat flags from the superblock.
1352	*
1353	* The caller cannot be in a transaction, must ensure that the log does not
1354	* contain any log items protected by any log incompat bit, and must ensure
1355	* that there are no other threads that depend on the state of the log incompat
1356	* feature flags in the primary super.
1357	*
1358	* Returns true if the superblock is dirty.
1359	*/
1360	bool
1361	xfs_clear_incompat_log_features(
1362	struct xfs_mount *mp)
1363	{
1364	bool ret = false;
1365
1366	if (!xfs_has_crc(mp) \|\|
1367	!xfs_sb_has_incompat_log_feature(&mp->m_sb,
1368	XFS_SB_FEAT_INCOMPAT_LOG_ALL) \|\|
1369	xfs_is_shutdown(mp))
1370	return false;
1371
1372	/*
1373	* Update the incore superblock. We synchronize on the primary super
1374	* buffer lock to be consistent with the add function, though at least
1375	* in theory this shouldn't be necessary.
1376	*/
1377	xfs_buf_lock(mp->m_sb_bp);
1378	xfs_buf_hold(bp: mp->m_sb_bp);
1379
1380	if (xfs_sb_has_incompat_log_feature(&mp->m_sb,
1381	XFS_SB_FEAT_INCOMPAT_LOG_ALL)) {
1382	xfs_sb_remove_incompat_log_features(&mp->m_sb);
1383	ret = true;
1384	}
1385
1386	xfs_buf_relse(bp: mp->m_sb_bp);
1387	return ret;
1388	}
1389
1390	/*
1391	* Update the in-core delayed block counter.
1392	*
1393	* We prefer to update the counter without having to take a spinlock for every
1394	* counter update (i.e. batching). Each change to delayed allocation
1395	* reservations can change can easily exceed the default percpu counter
1396	* batching, so we use a larger batch factor here.
1397	*
1398	* Note that we don't currently have any callers requiring fast summation
1399	* (e.g. percpu_counter_read) so we can use a big batch value here.
1400	*/
1401	#define XFS_DELALLOC_BATCH (4096)
1402	void
1403	xfs_mod_delalloc(
1404	struct xfs_mount *mp,
1405	int64_t delta)
1406	{
1407	percpu_counter_add_batch(fbc: &mp->m_delalloc_blks, amount: delta,
1408	XFS_DELALLOC_BATCH);
1409	}
1410

source code of linux/fs/xfs/xfs_mount.c