xfs_super.c source code [linux/fs/xfs/xfs_super.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* Copyright (c) 2000-2006 Silicon Graphics, Inc.
4	* All Rights Reserved.
5	*/
6
7	#include "xfs.h"
8	#include "xfs_shared.h"
9	#include "xfs_format.h"
10	#include "xfs_log_format.h"
11	#include "xfs_trans_resv.h"
12	#include "xfs_sb.h"
13	#include "xfs_mount.h"
14	#include "xfs_inode.h"
15	#include "xfs_btree.h"
16	#include "xfs_bmap.h"
17	#include "xfs_alloc.h"
18	#include "xfs_fsops.h"
19	#include "xfs_trans.h"
20	#include "xfs_buf_item.h"
21	#include "xfs_log.h"
22	#include "xfs_log_priv.h"
23	#include "xfs_dir2.h"
24	#include "xfs_extfree_item.h"
25	#include "xfs_mru_cache.h"
26	#include "xfs_inode_item.h"
27	#include "xfs_icache.h"
28	#include "xfs_trace.h"
29	#include "xfs_icreate_item.h"
30	#include "xfs_filestream.h"
31	#include "xfs_quota.h"
32	#include "xfs_sysfs.h"
33	#include "xfs_ondisk.h"
34	#include "xfs_rmap_item.h"
35	#include "xfs_refcount_item.h"
36	#include "xfs_bmap_item.h"
37	#include "xfs_reflink.h"
38	#include "xfs_pwork.h"
39	#include "xfs_ag.h"
40	#include "xfs_defer.h"
41	#include "xfs_attr_item.h"
42	#include "xfs_xattr.h"
43	#include "xfs_iunlink_item.h"
44	#include "xfs_dahash_test.h"
45	#include "xfs_rtbitmap.h"
46	#include "scrub/stats.h"
47
48	#include <linux/magic.h>
49	#include <linux/fs_context.h>
50	#include <linux/fs_parser.h>
51
52	static const struct super_operations xfs_super_operations;
53
54	static struct dentry xfs_debugfs; /* top-level xfs debugfs dir /
55	static struct kset xfs_kset; /* top-level xfs sysfs dir /
56	#ifdef DEBUG
57	static struct xfs_kobj xfs_dbg_kobj; / global debug sysfs attrs /
58	#endif
59
60	enum xfs_dax_mode {
61	XFS_DAX_INODE = `0`,
62	XFS_DAX_ALWAYS = `1`,
63	XFS_DAX_NEVER = `2`,
64	};
65
66	static void
67	xfs_mount_set_dax_mode(
68	struct xfs_mount *mp,
69	enum xfs_dax_mode mode)
70	{
71	switch (mode) {
72	case XFS_DAX_INODE:
73	mp->m_features &= ~(XFS_FEAT_DAX_ALWAYS \| XFS_FEAT_DAX_NEVER);
74	break;
75	case XFS_DAX_ALWAYS:
76	mp->m_features \|= XFS_FEAT_DAX_ALWAYS;
77	mp->m_features &= ~XFS_FEAT_DAX_NEVER;
78	break;
79	case XFS_DAX_NEVER:
80	mp->m_features \|= XFS_FEAT_DAX_NEVER;
81	mp->m_features &= ~XFS_FEAT_DAX_ALWAYS;
82	break;
83	}
84	}
85
86	static const struct constant_table dax_param_enums[] = {
87	{"inode", XFS_DAX_INODE },
88	{"always", XFS_DAX_ALWAYS },
89	{"never", XFS_DAX_NEVER },
90	{}
91	};
92
93	/*
94	* Table driven mount option parser.
95	*/
96	enum {
97	Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev,
98	Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid,
99	Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups,
100	Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32, Opt_ikeep,
101	Opt_noikeep, Opt_largeio, Opt_nolargeio, Opt_attr2, Opt_noattr2,
102	Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota,
103	Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota,
104	Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce,
105	Opt_discard, Opt_nodiscard, Opt_dax, Opt_dax_enum,
106	};
107
108	static const struct fs_parameter_spec xfs_fs_parameters[] = {
109	fsparam_u32("logbufs", Opt_logbufs),
110	fsparam_string("logbsize", Opt_logbsize),
111	fsparam_string("logdev", Opt_logdev),
112	fsparam_string("rtdev", Opt_rtdev),
113	fsparam_flag("wsync", Opt_wsync),
114	fsparam_flag("noalign", Opt_noalign),
115	fsparam_flag("swalloc", Opt_swalloc),
116	fsparam_u32("sunit", Opt_sunit),
117	fsparam_u32("swidth", Opt_swidth),
118	fsparam_flag("nouuid", Opt_nouuid),
119	fsparam_flag("grpid", Opt_grpid),
120	fsparam_flag("nogrpid", Opt_nogrpid),
121	fsparam_flag("bsdgroups", Opt_bsdgroups),
122	fsparam_flag("sysvgroups", Opt_sysvgroups),
123	fsparam_string("allocsize", Opt_allocsize),
124	fsparam_flag("norecovery", Opt_norecovery),
125	fsparam_flag("inode64", Opt_inode64),
126	fsparam_flag("inode32", Opt_inode32),
127	fsparam_flag("ikeep", Opt_ikeep),
128	fsparam_flag("noikeep", Opt_noikeep),
129	fsparam_flag("largeio", Opt_largeio),
130	fsparam_flag("nolargeio", Opt_nolargeio),
131	fsparam_flag("attr2", Opt_attr2),
132	fsparam_flag("noattr2", Opt_noattr2),
133	fsparam_flag("filestreams", Opt_filestreams),
134	fsparam_flag("quota", Opt_quota),
135	fsparam_flag("noquota", Opt_noquota),
136	fsparam_flag("usrquota", Opt_usrquota),
137	fsparam_flag("grpquota", Opt_grpquota),
138	fsparam_flag("prjquota", Opt_prjquota),
139	fsparam_flag("uquota", Opt_uquota),
140	fsparam_flag("gquota", Opt_gquota),
141	fsparam_flag("pquota", Opt_pquota),
142	fsparam_flag("uqnoenforce", Opt_uqnoenforce),
143	fsparam_flag("gqnoenforce", Opt_gqnoenforce),
144	fsparam_flag("pqnoenforce", Opt_pqnoenforce),
145	fsparam_flag("qnoenforce", Opt_qnoenforce),
146	fsparam_flag("discard", Opt_discard),
147	fsparam_flag("nodiscard", Opt_nodiscard),
148	fsparam_flag("dax", Opt_dax),
149	fsparam_enum("dax", Opt_dax_enum, dax_param_enums),
150	{}
151	};
152
153	struct proc_xfs_info {
154	uint64_t flag;
155	char *str;
156	};
157
158	static int
159	xfs_fs_show_options(
160	struct seq_file *m,
161	struct dentry *root)
162	{
163	static struct proc_xfs_info xfs_info_set[] = {
164	/ the few simple ones we can get from the mount struct /
165	{ XFS_FEAT_IKEEP, ",ikeep" },
166	{ XFS_FEAT_WSYNC, ",wsync" },
167	{ XFS_FEAT_NOALIGN, ",noalign" },
168	{ XFS_FEAT_SWALLOC, ",swalloc" },
169	{ XFS_FEAT_NOUUID, ",nouuid" },
170	{ XFS_FEAT_NORECOVERY, ",norecovery" },
171	{ XFS_FEAT_ATTR2, ",attr2" },
172	{ XFS_FEAT_FILESTREAMS, ",filestreams" },
173	{ XFS_FEAT_GRPID, ",grpid" },
174	{ XFS_FEAT_DISCARD, ",discard" },
175	{ XFS_FEAT_LARGE_IOSIZE, ",largeio" },
176	{ XFS_FEAT_DAX_ALWAYS, ",dax=always" },
177	{ XFS_FEAT_DAX_NEVER, ",dax=never" },
178	{ `0`, NULL }
179	};
180	struct xfs_mount *mp = XFS_M(root->d_sb);
181	struct proc_xfs_info *xfs_infop;
182
183	for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) {
184	if (mp->m_features & xfs_infop->flag)
185	seq_puts(m, s: xfs_infop->str);
186	}
187
188	seq_printf(m, fmt: ",inode%d", xfs_has_small_inums(mp) ? `32` : `64`);
189
190	if (xfs_has_allocsize(mp))
191	seq_printf(m, fmt: ",allocsize=%dk",
192	(`1` << mp->m_allocsize_log) >> `10`);
193
194	if (mp->m_logbufs > `0`)
195	seq_printf(m, fmt: ",logbufs=%d", mp->m_logbufs);
196	if (mp->m_logbsize > `0`)
197	seq_printf(m, fmt: ",logbsize=%dk", mp->m_logbsize >> `10`);
198
199	if (mp->m_logname)
200	seq_show_option(m, name: "logdev", value: mp->m_logname);
201	if (mp->m_rtname)
202	seq_show_option(m, name: "rtdev", value: mp->m_rtname);
203
204	if (mp->m_dalign > `0`)
205	seq_printf(m, fmt: ",sunit=%d",
206	(int)XFS_FSB_TO_BB(mp, mp->m_dalign));
207	if (mp->m_swidth > `0`)
208	seq_printf(m, fmt: ",swidth=%d",
209	(int)XFS_FSB_TO_BB(mp, mp->m_swidth));
210
211	if (mp->m_qflags & XFS_UQUOTA_ENFD)
212	seq_puts(m, s: ",usrquota");
213	else if (mp->m_qflags & XFS_UQUOTA_ACCT)
214	seq_puts(m, s: ",uqnoenforce");
215
216	if (mp->m_qflags & XFS_PQUOTA_ENFD)
217	seq_puts(m, s: ",prjquota");
218	else if (mp->m_qflags & XFS_PQUOTA_ACCT)
219	seq_puts(m, s: ",pqnoenforce");
220
221	if (mp->m_qflags & XFS_GQUOTA_ENFD)
222	seq_puts(m, s: ",grpquota");
223	else if (mp->m_qflags & XFS_GQUOTA_ACCT)
224	seq_puts(m, s: ",gqnoenforce");
225
226	if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
227	seq_puts(m, s: ",noquota");
228
229	return `0`;
230	}
231
232	static bool
233	xfs_set_inode_alloc_perag(
234	struct xfs_perag *pag,
235	xfs_ino_t ino,
236	xfs_agnumber_t max_metadata)
237	{
238	if (!xfs_is_inode32(mp: pag->pag_mount)) {
239	set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate);
240	clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
241	return false;
242	}
243
244	if (ino > XFS_MAXINUMBER_32) {
245	clear_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate);
246	clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
247	return false;
248	}
249
250	set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate);
251	if (pag->pag_agno < max_metadata)
252	set_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
253	else
254	clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
255	return true;
256	}
257
258	/*
259	* Set parameters for inode allocation heuristics, taking into account
260	* filesystem size and inode32/inode64 mount options; i.e. specifically
261	* whether or not XFS_FEAT_SMALL_INUMS is set.
262	*
263	* Inode allocation patterns are altered only if inode32 is requested
264	* (XFS_FEAT_SMALL_INUMS), and the filesystem is sufficiently large.
265	* If altered, XFS_OPSTATE_INODE32 is set as well.
266	*
267	* An agcount independent of that in the mount structure is provided
268	* because in the growfs case, mp->m_sb.sb_agcount is not yet updated
269	* to the potentially higher ag count.
270	*
271	* Returns the maximum AG index which may contain inodes.
272	*/
273	xfs_agnumber_t
274	xfs_set_inode_alloc(
275	struct xfs_mount *mp,
276	xfs_agnumber_t agcount)
277	{
278	xfs_agnumber_t index;
279	xfs_agnumber_t maxagi = `0`;
280	xfs_sb_t *sbp = &mp->m_sb;
281	xfs_agnumber_t max_metadata;
282	xfs_agino_t agino;
283	xfs_ino_t ino;
284
285	/*
286	* Calculate how much should be reserved for inodes to meet
287	* the max inode percentage. Used only for inode32.
288	*/
289	if (M_IGEO(mp)->maxicount) {
290	uint64_t icount;
291
292	icount = sbp->sb_dblocks * sbp->sb_imax_pct;
293	do_div(icount, `100`);
294	icount += sbp->sb_agblocks - `1`;
295	do_div(icount, sbp->sb_agblocks);
296	max_metadata = icount;
297	} else {
298	max_metadata = agcount;
299	}
300
301	/ Get the last possible inode in the filesystem /
302	agino = XFS_AGB_TO_AGINO(mp, sbp->sb_agblocks - `1`);
303	ino = XFS_AGINO_TO_INO(mp, agcount - `1`, agino);
304
305	/*
306	* If user asked for no more than 32-bit inodes, and the fs is
307	* sufficiently large, set XFS_OPSTATE_INODE32 if we must alter
308	* the allocator to accommodate the request.
309	*/
310	if (xfs_has_small_inums(mp) && ino > XFS_MAXINUMBER_32)
311	set_bit(XFS_OPSTATE_INODE32, addr: &mp->m_opstate);
312	else
313	clear_bit(XFS_OPSTATE_INODE32, addr: &mp->m_opstate);
314
315	for (index = `0`; index < agcount; index++) {
316	struct xfs_perag *pag;
317
318	ino = XFS_AGINO_TO_INO(mp, index, agino);
319
320	pag = xfs_perag_get(mp, index);
321	if (xfs_set_inode_alloc_perag(pag, ino, max_metadata))
322	maxagi++;
323	xfs_perag_put(pag);
324	}
325
326	return xfs_is_inode32(mp) ? maxagi : agcount;
327	}
328
329	static int
330	xfs_setup_dax_always(
331	struct xfs_mount *mp)
332	{
333	if (!mp->m_ddev_targp->bt_daxdev &&
334	(!mp->m_rtdev_targp \|\| !mp->m_rtdev_targp->bt_daxdev)) {
335	xfs_alert(mp,
336	"DAX unsupported by block device. Turning off DAX.");
337	goto disable_dax;
338	}
339
340	if (mp->m_super->s_blocksize != PAGE_SIZE) {
341	xfs_alert(mp,
342	"DAX not supported for blocksize. Turning off DAX.");
343	goto disable_dax;
344	}
345
346	if (xfs_has_reflink(mp) &&
347	bdev_is_partition(bdev: mp->m_ddev_targp->bt_bdev)) {
348	xfs_alert(mp,
349	"DAX and reflink cannot work with multi-partitions!");
350	return -EINVAL;
351	}
352
353	xfs_warn(mp, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
354	return `0`;
355
356	disable_dax:
357	xfs_mount_set_dax_mode(mp, mode: XFS_DAX_NEVER);
358	return `0`;
359	}
360
361	STATIC int
362	xfs_blkdev_get(
363	xfs_mount_t *mp,
364	const char *name,
365	struct bdev_handle **handlep)
366	{
367	int error = `0`;
368
369	*handlep = bdev_open_by_path(path: name, BLK_OPEN_READ \| BLK_OPEN_WRITE,
370	holder: mp->m_super, hops: &fs_holder_ops);
371	if (IS_ERR(ptr: *handlep)) {
372	error = PTR_ERR(ptr: *handlep);
373	*handlep = NULL;
374	xfs_warn(mp, "Invalid device [%s], error=%d", name, error);
375	}
376
377	return error;
378	}
379
380	STATIC void
381	xfs_shutdown_devices(
382	struct xfs_mount *mp)
383	{
384	/*
385	* Udev is triggered whenever anyone closes a block device or unmounts
386	* a file systemm on a block device.
387	* The default udev rules invoke blkid to read the fs super and create
388	* symlinks to the bdev under /dev/disk. For this, it uses buffered
389	* reads through the page cache.
390	*
391	* xfs_db also uses buffered reads to examine metadata. There is no
392	* coordination between xfs_db and udev, which means that they can run
393	* concurrently. Note there is no coordination between the kernel and
394	* blkid either.
395	*
396	* On a system with 64k pages, the page cache can cache the superblock
397	* and the root inode (and hence the root directory) with the same 64k
398	* page. If udev spawns blkid after the mkfs and the system is busy
399	* enough that it is still running when xfs_db starts up, they'll both
400	* read from the same page in the pagecache.
401	*
402	* The unmount writes updated inode metadata to disk directly. The XFS
403	* buffer cache does not use the bdev pagecache, so it needs to
404	* invalidate that pagecache on unmount. If the above scenario occurs,
405	* the pagecache no longer reflects what's on disk, xfs_db reads the
406	* stale metadata, and fails to find /a. Most of the time this succeeds
407	* because closing a bdev invalidates the page cache, but when processes
408	* race, everyone loses.
409	*/
410	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
411	blkdev_issue_flush(bdev: mp->m_logdev_targp->bt_bdev);
412	invalidate_bdev(bdev: mp->m_logdev_targp->bt_bdev);
413	}
414	if (mp->m_rtdev_targp) {
415	blkdev_issue_flush(bdev: mp->m_rtdev_targp->bt_bdev);
416	invalidate_bdev(bdev: mp->m_rtdev_targp->bt_bdev);
417	}
418	blkdev_issue_flush(bdev: mp->m_ddev_targp->bt_bdev);
419	invalidate_bdev(bdev: mp->m_ddev_targp->bt_bdev);
420	}
421
422	/*
423	* The file system configurations are:
424	* (1) device (partition) with data and internal log
425	* (2) logical volume with data and log subvolumes.
426	* (3) logical volume with data, log, and realtime subvolumes.
427	*
428	* We only have to handle opening the log and realtime volumes here if
429	* they are present. The data subvolume has already been opened by
430	* get_sb_bdev() and is stored in sb->s_bdev.
431	*/
432	STATIC int
433	xfs_open_devices(
434	struct xfs_mount *mp)
435	{
436	struct super_block *sb = mp->m_super;
437	struct block_device *ddev = sb->s_bdev;
438	struct bdev_handle logdev_handle = NULL, rtdev_handle = NULL;
439	int error;
440
441	/*
442	* blkdev_put() can't be called under s_umount, see the comment
443	* in get_tree_bdev() for more details
444	*/
445	up_write(sem: &sb->s_umount);
446
447	/*
448	* Open real time and log devices - order is important.
449	*/
450	if (mp->m_logname) {
451	error = xfs_blkdev_get(mp, name: mp->m_logname, handlep: &logdev_handle);
452	if (error)
453	goto out_relock;
454	}
455
456	if (mp->m_rtname) {
457	error = xfs_blkdev_get(mp, name: mp->m_rtname, handlep: &rtdev_handle);
458	if (error)
459	goto out_close_logdev;
460
461	if (rtdev_handle->bdev == ddev \|\|
462	(logdev_handle &&
463	rtdev_handle->bdev == logdev_handle->bdev)) {
464	xfs_warn(mp,
465	"Cannot mount filesystem with identical rtdev and ddev/logdev.");
466	error = -EINVAL;
467	goto out_close_rtdev;
468	}
469	}
470
471	/*
472	* Setup xfs_mount buffer target pointers
473	*/
474	error = -ENOMEM;
475	mp->m_ddev_targp = xfs_alloc_buftarg(mp, bdev_handle: sb->s_bdev_handle);
476	if (!mp->m_ddev_targp)
477	goto out_close_rtdev;
478
479	if (rtdev_handle) {
480	mp->m_rtdev_targp = xfs_alloc_buftarg(mp, bdev_handle: rtdev_handle);
481	if (!mp->m_rtdev_targp)
482	goto out_free_ddev_targ;
483	}
484
485	if (logdev_handle && logdev_handle->bdev != ddev) {
486	mp->m_logdev_targp = xfs_alloc_buftarg(mp, bdev_handle: logdev_handle);
487	if (!mp->m_logdev_targp)
488	goto out_free_rtdev_targ;
489	} else {
490	mp->m_logdev_targp = mp->m_ddev_targp;
491	/ Handle won't be used, drop it /
492	if (logdev_handle)
493	bdev_release(handle: logdev_handle);
494	}
495
496	error = `0`;
497	out_relock:
498	down_write(sem: &sb->s_umount);
499	return error;
500
501	out_free_rtdev_targ:
502	if (mp->m_rtdev_targp)
503	xfs_free_buftarg(mp->m_rtdev_targp);
504	out_free_ddev_targ:
505	xfs_free_buftarg(mp->m_ddev_targp);
506	out_close_rtdev:
507	if (rtdev_handle)
508	bdev_release(handle: rtdev_handle);
509	out_close_logdev:
510	if (logdev_handle)
511	bdev_release(handle: logdev_handle);
512	goto out_relock;
513	}
514
515	/*
516	* Setup xfs_mount buffer target pointers based on superblock
517	*/
518	STATIC int
519	xfs_setup_devices(
520	struct xfs_mount *mp)
521	{
522	int error;
523
524	error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize);
525	if (error)
526	return error;
527
528	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
529	unsigned int log_sector_size = BBSIZE;
530
531	if (xfs_has_sector(mp))
532	log_sector_size = mp->m_sb.sb_logsectsize;
533	error = xfs_setsize_buftarg(mp->m_logdev_targp,
534	log_sector_size);
535	if (error)
536	return error;
537	}
538	if (mp->m_rtdev_targp) {
539	error = xfs_setsize_buftarg(mp->m_rtdev_targp,
540	mp->m_sb.sb_sectsize);
541	if (error)
542	return error;
543	}
544
545	return `0`;
546	}
547
548	STATIC int
549	xfs_init_mount_workqueues(
550	struct xfs_mount *mp)
551	{
552	mp->m_buf_workqueue = alloc_workqueue(fmt: "xfs-buf/%s",
553	XFS_WQFLAGS(WQ_FREEZABLE \| WQ_MEM_RECLAIM),
554	max_active: `1`, mp->m_super->s_id);
555	if (!mp->m_buf_workqueue)
556	goto out;
557
558	mp->m_unwritten_workqueue = alloc_workqueue(fmt: "xfs-conv/%s",
559	XFS_WQFLAGS(WQ_FREEZABLE \| WQ_MEM_RECLAIM),
560	max_active: `0`, mp->m_super->s_id);
561	if (!mp->m_unwritten_workqueue)
562	goto out_destroy_buf;
563
564	mp->m_reclaim_workqueue = alloc_workqueue(fmt: "xfs-reclaim/%s",
565	XFS_WQFLAGS(WQ_FREEZABLE \| WQ_MEM_RECLAIM),
566	max_active: `0`, mp->m_super->s_id);
567	if (!mp->m_reclaim_workqueue)
568	goto out_destroy_unwritten;
569
570	mp->m_blockgc_wq = alloc_workqueue(fmt: "xfs-blockgc/%s",
571	XFS_WQFLAGS(WQ_UNBOUND \| WQ_FREEZABLE \| WQ_MEM_RECLAIM),
572	max_active: `0`, mp->m_super->s_id);
573	if (!mp->m_blockgc_wq)
574	goto out_destroy_reclaim;
575
576	mp->m_inodegc_wq = alloc_workqueue(fmt: "xfs-inodegc/%s",
577	XFS_WQFLAGS(WQ_FREEZABLE \| WQ_MEM_RECLAIM),
578	max_active: `1`, mp->m_super->s_id);
579	if (!mp->m_inodegc_wq)
580	goto out_destroy_blockgc;
581
582	mp->m_sync_workqueue = alloc_workqueue(fmt: "xfs-sync/%s",
583	XFS_WQFLAGS(WQ_FREEZABLE), max_active: `0`, mp->m_super->s_id);
584	if (!mp->m_sync_workqueue)
585	goto out_destroy_inodegc;
586
587	return `0`;
588
589	out_destroy_inodegc:
590	destroy_workqueue(wq: mp->m_inodegc_wq);
591	out_destroy_blockgc:
592	destroy_workqueue(wq: mp->m_blockgc_wq);
593	out_destroy_reclaim:
594	destroy_workqueue(wq: mp->m_reclaim_workqueue);
595	out_destroy_unwritten:
596	destroy_workqueue(wq: mp->m_unwritten_workqueue);
597	out_destroy_buf:
598	destroy_workqueue(wq: mp->m_buf_workqueue);
599	out:
600	return -ENOMEM;
601	}
602
603	STATIC void
604	xfs_destroy_mount_workqueues(
605	struct xfs_mount *mp)
606	{
607	destroy_workqueue(wq: mp->m_sync_workqueue);
608	destroy_workqueue(wq: mp->m_blockgc_wq);
609	destroy_workqueue(wq: mp->m_inodegc_wq);
610	destroy_workqueue(wq: mp->m_reclaim_workqueue);
611	destroy_workqueue(wq: mp->m_unwritten_workqueue);
612	destroy_workqueue(wq: mp->m_buf_workqueue);
613	}
614
615	static void
616	xfs_flush_inodes_worker(
617	struct work_struct *work)
618	{
619	struct xfs_mount mp = container_of(work, struct* xfs_mount,
620	m_flush_inodes_work);
621	struct super_block *sb = mp->m_super;
622
623	if (down_read_trylock(sem: &sb->s_umount)) {
624	sync_inodes_sb(sb);
625	up_read(sem: &sb->s_umount);
626	}
627	}
628
629	/*
630	* Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK
631	* or a page lock. We use sync_inodes_sb() here to ensure we block while waiting
632	* for IO to complete so that we effectively throttle multiple callers to the
633	* rate at which IO is completing.
634	*/
635	void
636	xfs_flush_inodes(
637	struct xfs_mount *mp)
638	{
639	/*
640	* If flush_work() returns true then that means we waited for a flush
641	* which was already in progress. Don't bother running another scan.
642	*/
643	if (flush_work(work: &mp->m_flush_inodes_work))
644	return;
645
646	queue_work(wq: mp->m_sync_workqueue, work: &mp->m_flush_inodes_work);
647	flush_work(work: &mp->m_flush_inodes_work);
648	}
649
650	/ Catch misguided souls that try to use this interface on XFS /
651	STATIC struct inode *
652	xfs_fs_alloc_inode(
653	struct super_block *sb)
654	{
655	BUG();
656	return NULL;
657	}
658
659	/*
660	* Now that the generic code is guaranteed not to be accessing
661	* the linux inode, we can inactivate and reclaim the inode.
662	*/
663	STATIC void
664	xfs_fs_destroy_inode(
665	struct inode *inode)
666	{
667	struct xfs_inode *ip = XFS_I(inode);
668
669	trace_xfs_destroy_inode(ip);
670
671	ASSERT(!rwsem_is_locked(&inode->i_rwsem));
672	XFS_STATS_INC(ip->i_mount, vn_rele);
673	XFS_STATS_INC(ip->i_mount, vn_remove);
674	xfs_inode_mark_reclaimable(ip);
675	}
676
677	static void
678	xfs_fs_dirty_inode(
679	struct inode *inode,
680	int flags)
681	{
682	struct xfs_inode *ip = XFS_I(inode);
683	struct xfs_mount *mp = ip->i_mount;
684	struct xfs_trans *tp;
685
686	if (!(inode->i_sb->s_flags & SB_LAZYTIME))
687	return;
688
689	/*
690	* Only do the timestamp update if the inode is dirty (I_DIRTY_SYNC)
691	* and has dirty timestamp (I_DIRTY_TIME). I_DIRTY_TIME can be passed
692	* in flags possibly together with I_DIRTY_SYNC.
693	*/
694	if ((flags & ~I_DIRTY_TIME) != I_DIRTY_SYNC \|\| !(flags & I_DIRTY_TIME))
695	return;
696
697	if (xfs_trans_alloc(mp, resp: &M_RES(mp)->tr_fsyncts, blocks: `0`, rtextents: `0`, flags: `0`, tpp: &tp))
698	return;
699	xfs_ilock(ip, XFS_ILOCK_EXCL);
700	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
701	xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
702	xfs_trans_commit(tp);
703	}
704
705	/*
706	* Slab object creation initialisation for the XFS inode.
707	* This covers only the idempotent fields in the XFS inode;
708	* all other fields need to be initialised on allocation
709	* from the slab. This avoids the need to repeatedly initialise
710	* fields in the xfs inode that left in the initialise state
711	* when freeing the inode.
712	*/
713	STATIC void
714	xfs_fs_inode_init_once(
715	void *inode)
716	{
717	struct xfs_inode *ip = inode;
718
719	memset(ip, `0`, sizeof(struct xfs_inode));
720
721	/ vfs inode /
722	inode_init_once(VFS_I(ip));
723
724	/ xfs inode /
725	atomic_set(v: &ip->i_pincount, i: `0`);
726	spin_lock_init(&ip->i_flags_lock);
727
728	mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI\|MRLOCK_BARRIER,
729	"xfsino", ip->i_ino);
730	}
731
732	/*
733	* We do an unlocked check for XFS_IDONTCACHE here because we are already
734	* serialised against cache hits here via the inode->i_lock and igrab() in
735	* xfs_iget_cache_hit(). Hence a lookup that might clear this flag will not be
736	* racing with us, and it avoids needing to grab a spinlock here for every inode
737	* we drop the final reference on.
738	*/
739	STATIC int
740	xfs_fs_drop_inode(
741	struct inode *inode)
742	{
743	struct xfs_inode *ip = XFS_I(inode);
744
745	/*
746	* If this unlinked inode is in the middle of recovery, don't
747	* drop the inode just yet; log recovery will take care of
748	* that. See the comment for this inode flag.
749	*/
750	if (ip->i_flags & XFS_IRECOVERY) {
751	ASSERT(xlog_recovery_needed(ip->i_mount->m_log));
752	return `0`;
753	}
754
755	return generic_drop_inode(inode);
756	}
757
758	static void
759	xfs_mount_free(
760	struct xfs_mount *mp)
761	{
762	/*
763	* Free the buftargs here because blkdev_put needs to be called outside
764	* of sb->s_umount, which is held around the call to ->put_super.
765	*/
766	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
767	xfs_free_buftarg(mp->m_logdev_targp);
768	if (mp->m_rtdev_targp)
769	xfs_free_buftarg(mp->m_rtdev_targp);
770	if (mp->m_ddev_targp)
771	xfs_free_buftarg(mp->m_ddev_targp);
772
773	debugfs_remove(dentry: mp->m_debugfs);
774	kfree(objp: mp->m_rtname);
775	kfree(objp: mp->m_logname);
776	kmem_free(ptr: mp);
777	}
778
779	STATIC int
780	xfs_fs_sync_fs(
781	struct super_block *sb,
782	int wait)
783	{
784	struct xfs_mount *mp = XFS_M(sb);
785	int error;
786
787	trace_xfs_fs_sync_fs(mp, __return_address);
788
789	/*
790	* Doing anything during the async pass would be counterproductive.
791	*/
792	if (!wait)
793	return `0`;
794
795	error = xfs_log_force(mp, XFS_LOG_SYNC);
796	if (error)
797	return error;
798
799	if (laptop_mode) {
800	/*
801	* The disk must be active because we're syncing.
802	* We schedule log work now (now that the disk is
803	* active) instead of later (when it might not be).
804	*/
805	flush_delayed_work(dwork: &mp->m_log->l_work);
806	}
807
808	/*
809	* If we are called with page faults frozen out, it means we are about
810	* to freeze the transaction subsystem. Take the opportunity to shut
811	* down inodegc because once SB_FREEZE_FS is set it's too late to
812	* prevent inactivation races with freeze. The fs doesn't get called
813	* again by the freezing process until after SB_FREEZE_FS has been set,
814	* so it's now or never. Same logic applies to speculative allocation
815	* garbage collection.
816	*
817	* We don't care if this is a normal syncfs call that does this or
818	* freeze that does this - we can run this multiple times without issue
819	* and we won't race with a restart because a restart can only occur
820	* when the state is either SB_FREEZE_FS or SB_FREEZE_COMPLETE.
821	*/
822	if (sb->s_writers.frozen == SB_FREEZE_PAGEFAULT) {
823	xfs_inodegc_stop(mp);
824	xfs_blockgc_stop(mp);
825	}
826
827	return `0`;
828	}
829
830	STATIC int
831	xfs_fs_statfs(
832	struct dentry *dentry,
833	struct kstatfs *statp)
834	{
835	struct xfs_mount *mp = XFS_M(dentry->d_sb);
836	xfs_sb_t *sbp = &mp->m_sb;
837	struct xfs_inode *ip = XFS_I(inode: d_inode(dentry));
838	uint64_t fakeinos, id;
839	uint64_t icount;
840	uint64_t ifree;
841	uint64_t fdblocks;
842	xfs_extlen_t lsize;
843	int64_t ffree;
844
845	/*
846	* Expedite background inodegc but don't wait. We do not want to block
847	* here waiting hours for a billion extent file to be truncated.
848	*/
849	xfs_inodegc_push(mp);
850
851	statp->f_type = XFS_SUPER_MAGIC;
852	statp->f_namelen = MAXNAMELEN - `1`;
853
854	id = huge_encode_dev(dev: mp->m_ddev_targp->bt_dev);
855	statp->f_fsid = u64_to_fsid(v: id);
856
857	icount = percpu_counter_sum(fbc: &mp->m_icount);
858	ifree = percpu_counter_sum(fbc: &mp->m_ifree);
859	fdblocks = percpu_counter_sum(fbc: &mp->m_fdblocks);
860
861	spin_lock(lock: &mp->m_sb_lock);
862	statp->f_bsize = sbp->sb_blocksize;
863	lsize = sbp->sb_logstart ? sbp->sb_logblocks : `0`;
864	statp->f_blocks = sbp->sb_dblocks - lsize;
865	spin_unlock(lock: &mp->m_sb_lock);
866
867	/ make sure statp->f_bfree does not underflow /
868	statp->f_bfree = max_t(int64_t, `0`,
869	fdblocks - xfs_fdblocks_unavailable(mp));
870	statp->f_bavail = statp->f_bfree;
871
872	fakeinos = XFS_FSB_TO_INO(mp, statp->f_bfree);
873	statp->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER);
874	if (M_IGEO(mp)->maxicount)
875	statp->f_files = min_t(typeof(statp->f_files),
876	statp->f_files,
877	M_IGEO(mp)->maxicount);
878
879	/ If sb_icount overshot maxicount, report actual allocation /
880	statp->f_files = max_t(typeof(statp->f_files),
881	statp->f_files,
882	sbp->sb_icount);
883
884	/ make sure statp->f_ffree does not underflow /
885	ffree = statp->f_files - (icount - ifree);
886	statp->f_ffree = max_t(int64_t, ffree, `0`);
887
888
889	if ((ip->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
890	((mp->m_qflags & (XFS_PQUOTA_ACCT\|XFS_PQUOTA_ENFD))) ==
891	(XFS_PQUOTA_ACCT\|XFS_PQUOTA_ENFD))
892	xfs_qm_statvfs(ip, statp);
893
894	if (XFS_IS_REALTIME_MOUNT(mp) &&
895	(ip->i_diflags & (XFS_DIFLAG_RTINHERIT \| XFS_DIFLAG_REALTIME))) {
896	s64 freertx;
897
898	statp->f_blocks = sbp->sb_rblocks;
899	freertx = percpu_counter_sum_positive(fbc: &mp->m_frextents);
900	statp->f_bavail = statp->f_bfree = xfs_rtx_to_rtb(mp, freertx);
901	}
902
903	return `0`;
904	}
905
906	STATIC void
907	xfs_save_resvblks(struct xfs_mount *mp)
908	{
909	uint64_t resblks = `0`;
910
911	mp->m_resblks_save = mp->m_resblks;
912	xfs_reserve_blocks(mp, &resblks, NULL);
913	}
914
915	STATIC void
916	xfs_restore_resvblks(struct xfs_mount *mp)
917	{
918	uint64_t resblks;
919
920	if (mp->m_resblks_save) {
921	resblks = mp->m_resblks_save;
922	mp->m_resblks_save = `0`;
923	} else
924	resblks = xfs_default_resblks(mp);
925
926	xfs_reserve_blocks(mp, &resblks, NULL);
927	}
928
929	/*
930	* Second stage of a freeze. The data is already frozen so we only
931	* need to take care of the metadata. Once that's done sync the superblock
932	* to the log to dirty it in case of a crash while frozen. This ensures that we
933	* will recover the unlinked inode lists on the next mount.
934	*/
935	STATIC int
936	xfs_fs_freeze(
937	struct super_block *sb)
938	{
939	struct xfs_mount *mp = XFS_M(sb);
940	unsigned int flags;
941	int ret;
942
943	/*
944	* The filesystem is now frozen far enough that memory reclaim
945	* cannot safely operate on the filesystem. Hence we need to
946	* set a GFP_NOFS context here to avoid recursion deadlocks.
947	*/
948	flags = memalloc_nofs_save();
949	xfs_save_resvblks(mp);
950	ret = xfs_log_quiesce(mp);
951	memalloc_nofs_restore(flags);
952
953	/*
954	* For read-write filesystems, we need to restart the inodegc on error
955	* because we stopped it at SB_FREEZE_PAGEFAULT level and a thaw is not
956	* going to be run to restart it now. We are at SB_FREEZE_FS level
957	* here, so we can restart safely without racing with a stop in
958	* xfs_fs_sync_fs().
959	*/
960	if (ret && !xfs_is_readonly(mp)) {
961	xfs_blockgc_start(mp);
962	xfs_inodegc_start(mp);
963	}
964
965	return ret;
966	}
967
968	STATIC int
969	xfs_fs_unfreeze(
970	struct super_block *sb)
971	{
972	struct xfs_mount *mp = XFS_M(sb);
973
974	xfs_restore_resvblks(mp);
975	xfs_log_work_queue(mp);
976
977	/*
978	* Don't reactivate the inodegc worker on a readonly filesystem because
979	* inodes are sent directly to reclaim. Don't reactivate the blockgc
980	* worker because there are no speculative preallocations on a readonly
981	* filesystem.
982	*/
983	if (!xfs_is_readonly(mp)) {
984	xfs_blockgc_start(mp);
985	xfs_inodegc_start(mp);
986	}
987
988	return `0`;
989	}
990
991	/*
992	* This function fills in xfs_mount_t fields based on mount args.
993	* Note: the superblock _has_ now been read in.
994	*/
995	STATIC int
996	xfs_finish_flags(
997	struct xfs_mount *mp)
998	{
999	/ Fail a mount where the logbuf is smaller than the log stripe /
1000	if (xfs_has_logv2(mp)) {
1001	if (mp->m_logbsize <= `0` &&
1002	mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
1003	mp->m_logbsize = mp->m_sb.sb_logsunit;
1004	} else if (mp->m_logbsize > `0` &&
1005	mp->m_logbsize < mp->m_sb.sb_logsunit) {
1006	xfs_warn(mp,
1007	"logbuf size must be greater than or equal to log stripe size");
1008	return -EINVAL;
1009	}
1010	} else {
1011	/ Fail a mount if the logbuf is larger than 32K /
1012	if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
1013	xfs_warn(mp,
1014	"logbuf size for version 1 logs must be 16K or 32K");
1015	return -EINVAL;
1016	}
1017	}
1018
1019	/*
1020	* V5 filesystems always use attr2 format for attributes.
1021	*/
1022	if (xfs_has_crc(mp) && xfs_has_noattr2(mp)) {
1023	xfs_warn(mp, "Cannot mount a V5 filesystem as noattr2. "
1024	"attr2 is always enabled for V5 filesystems.");
1025	return -EINVAL;
1026	}
1027
1028	/*
1029	* prohibit r/w mounts of read-only filesystems
1030	*/
1031	if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !xfs_is_readonly(mp)) {
1032	xfs_warn(mp,
1033	"cannot mount a read-only filesystem as read-write");
1034	return -EROFS;
1035	}
1036
1037	if ((mp->m_qflags & XFS_GQUOTA_ACCT) &&
1038	(mp->m_qflags & XFS_PQUOTA_ACCT) &&
1039	!xfs_has_pquotino(mp)) {
1040	xfs_warn(mp,
1041	"Super block does not support project and group quota together");
1042	return -EINVAL;
1043	}
1044
1045	return `0`;
1046	}
1047
1048	static int
1049	xfs_init_percpu_counters(
1050	struct xfs_mount *mp)
1051	{
1052	int error;
1053
1054	error = percpu_counter_init(&mp->m_icount, `0`, GFP_KERNEL);
1055	if (error)
1056	return -ENOMEM;
1057
1058	error = percpu_counter_init(&mp->m_ifree, `0`, GFP_KERNEL);
1059	if (error)
1060	goto free_icount;
1061
1062	error = percpu_counter_init(&mp->m_fdblocks, `0`, GFP_KERNEL);
1063	if (error)
1064	goto free_ifree;
1065
1066	error = percpu_counter_init(&mp->m_delalloc_blks, `0`, GFP_KERNEL);
1067	if (error)
1068	goto free_fdblocks;
1069
1070	error = percpu_counter_init(&mp->m_frextents, `0`, GFP_KERNEL);
1071	if (error)
1072	goto free_delalloc;
1073
1074	return `0`;
1075
1076	free_delalloc:
1077	percpu_counter_destroy(fbc: &mp->m_delalloc_blks);
1078	free_fdblocks:
1079	percpu_counter_destroy(fbc: &mp->m_fdblocks);
1080	free_ifree:
1081	percpu_counter_destroy(fbc: &mp->m_ifree);
1082	free_icount:
1083	percpu_counter_destroy(fbc: &mp->m_icount);
1084	return -ENOMEM;
1085	}
1086
1087	void
1088	xfs_reinit_percpu_counters(
1089	struct xfs_mount *mp)
1090	{
1091	percpu_counter_set(fbc: &mp->m_icount, amount: mp->m_sb.sb_icount);
1092	percpu_counter_set(fbc: &mp->m_ifree, amount: mp->m_sb.sb_ifree);
1093	percpu_counter_set(fbc: &mp->m_fdblocks, amount: mp->m_sb.sb_fdblocks);
1094	percpu_counter_set(fbc: &mp->m_frextents, amount: mp->m_sb.sb_frextents);
1095	}
1096
1097	static void
1098	xfs_destroy_percpu_counters(
1099	struct xfs_mount *mp)
1100	{
1101	percpu_counter_destroy(fbc: &mp->m_icount);
1102	percpu_counter_destroy(fbc: &mp->m_ifree);
1103	percpu_counter_destroy(fbc: &mp->m_fdblocks);
1104	ASSERT(xfs_is_shutdown(mp) \|\|
1105	percpu_counter_sum(&mp->m_delalloc_blks) == `0`);
1106	percpu_counter_destroy(fbc: &mp->m_delalloc_blks);
1107	percpu_counter_destroy(fbc: &mp->m_frextents);
1108	}
1109
1110	static int
1111	xfs_inodegc_init_percpu(
1112	struct xfs_mount *mp)
1113	{
1114	struct xfs_inodegc *gc;
1115	int cpu;
1116
1117	mp->m_inodegc = alloc_percpu(struct xfs_inodegc);
1118	if (!mp->m_inodegc)
1119	return -ENOMEM;
1120
1121	for_each_possible_cpu(cpu) {
1122	gc = per_cpu_ptr(mp->m_inodegc, cpu);
1123	gc->cpu = cpu;
1124	gc->mp = mp;
1125	init_llist_head(list: &gc->list);
1126	gc->items = `0`;
1127	gc->error = `0`;
1128	INIT_DELAYED_WORK(&gc->work, xfs_inodegc_worker);
1129	}
1130	return `0`;
1131	}
1132
1133	static void
1134	xfs_inodegc_free_percpu(
1135	struct xfs_mount *mp)
1136	{
1137	if (!mp->m_inodegc)
1138	return;
1139	free_percpu(pdata: mp->m_inodegc);
1140	}
1141
1142	static void
1143	xfs_fs_put_super(
1144	struct super_block *sb)
1145	{
1146	struct xfs_mount *mp = XFS_M(sb);
1147
1148	xfs_notice(mp, "Unmounting Filesystem %pU", &mp->m_sb.sb_uuid);
1149	xfs_filestream_unmount(mp);
1150	xfs_unmountfs(mp);
1151
1152	xfs_freesb(mp);
1153	xchk_mount_stats_free(mp);
1154	free_percpu(pdata: mp->m_stats.xs_stats);
1155	xfs_inodegc_free_percpu(mp);
1156	xfs_destroy_percpu_counters(mp);
1157	xfs_destroy_mount_workqueues(mp);
1158	xfs_shutdown_devices(mp);
1159	}
1160
1161	static long
1162	xfs_fs_nr_cached_objects(
1163	struct super_block *sb,
1164	struct shrink_control *sc)
1165	{
1166	/ Paranoia: catch incorrect calls during mount setup or teardown /
1167	if (WARN_ON_ONCE(!sb->s_fs_info))
1168	return `0`;
1169	return xfs_reclaim_inodes_count(XFS_M(sb));
1170	}
1171
1172	static long
1173	xfs_fs_free_cached_objects(
1174	struct super_block *sb,
1175	struct shrink_control *sc)
1176	{
1177	return xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan: sc->nr_to_scan);
1178	}
1179
1180	static void
1181	xfs_fs_shutdown(
1182	struct super_block *sb)
1183	{
1184	xfs_force_shutdown(XFS_M(sb), SHUTDOWN_DEVICE_REMOVED);
1185	}
1186
1187	static const struct super_operations xfs_super_operations = {
1188	.alloc_inode = xfs_fs_alloc_inode,
1189	.destroy_inode = xfs_fs_destroy_inode,
1190	.dirty_inode = xfs_fs_dirty_inode,
1191	.drop_inode = xfs_fs_drop_inode,
1192	.put_super = xfs_fs_put_super,
1193	.sync_fs = xfs_fs_sync_fs,
1194	.freeze_fs = xfs_fs_freeze,
1195	.unfreeze_fs = xfs_fs_unfreeze,
1196	.statfs = xfs_fs_statfs,
1197	.show_options = xfs_fs_show_options,
1198	.nr_cached_objects = xfs_fs_nr_cached_objects,
1199	.free_cached_objects = xfs_fs_free_cached_objects,
1200	.shutdown = xfs_fs_shutdown,
1201	};
1202
1203	static int
1204	suffix_kstrtoint(
1205	const char *s,
1206	unsigned int base,
1207	int *res)
1208	{
1209	int last, shift_left_factor = `0`, _res;
1210	char *value;
1211	int ret = `0`;
1212
1213	value = kstrdup(s, GFP_KERNEL);
1214	if (!value)
1215	return -ENOMEM;
1216
1217	last = strlen(value) - `1`;
1218	if (value[last] == `'K'` \|\| value[last] == `'k'`) {
1219	shift_left_factor = `10`;
1220	value[last] = `'\0'`;
1221	}
1222	if (value[last] == `'M'` \|\| value[last] == `'m'`) {
1223	shift_left_factor = `20`;
1224	value[last] = `'\0'`;
1225	}
1226	if (value[last] == `'G'` \|\| value[last] == `'g'`) {
1227	shift_left_factor = `30`;
1228	value[last] = `'\0'`;
1229	}
1230
1231	if (kstrtoint(s: value, base, res: &_res))
1232	ret = -EINVAL;
1233	kfree(objp: value);
1234	*res = _res << shift_left_factor;
1235	return ret;
1236	}
1237
1238	static inline void
1239	xfs_fs_warn_deprecated(
1240	struct fs_context *fc,
1241	struct fs_parameter *param,
1242	uint64_t flag,
1243	bool value)
1244	{
1245	/ Don't print the warning if reconfiguring and current mount point*
1246	* already had the flag set
1247	*/
1248	if ((fc->purpose & FS_CONTEXT_FOR_RECONFIGURE) &&
1249	!!(XFS_M(fc->root->d_sb)->m_features & flag) == value)
1250	return;
1251	xfs_warn(fc->s_fs_info, "%s mount option is deprecated.", param->key);
1252	}
1253
1254	/*
1255	* Set mount state from a mount option.
1256	*
1257	* NOTE: mp->m_super is NULL here!
1258	*/
1259	static int
1260	xfs_fs_parse_param(
1261	struct fs_context *fc,
1262	struct fs_parameter *param)
1263	{
1264	struct xfs_mount *parsing_mp = fc->s_fs_info;
1265	struct fs_parse_result result;
1266	int size = `0`;
1267	int opt;
1268
1269	opt = fs_parse(fc, desc: xfs_fs_parameters, param, result: &result);
1270	if (opt < `0`)
1271	return opt;
1272
1273	switch (opt) {
1274	case Opt_logbufs:
1275	parsing_mp->m_logbufs = result.uint_32;
1276	return `0`;
1277	case Opt_logbsize:
1278	if (suffix_kstrtoint(s: param->string, base: `10`, res: &parsing_mp->m_logbsize))
1279	return -EINVAL;
1280	return `0`;
1281	case Opt_logdev:
1282	kfree(objp: parsing_mp->m_logname);
1283	parsing_mp->m_logname = kstrdup(s: param->string, GFP_KERNEL);
1284	if (!parsing_mp->m_logname)
1285	return -ENOMEM;
1286	return `0`;
1287	case Opt_rtdev:
1288	kfree(objp: parsing_mp->m_rtname);
1289	parsing_mp->m_rtname = kstrdup(s: param->string, GFP_KERNEL);
1290	if (!parsing_mp->m_rtname)
1291	return -ENOMEM;
1292	return `0`;
1293	case Opt_allocsize:
1294	if (suffix_kstrtoint(s: param->string, base: `10`, res: &size))
1295	return -EINVAL;
1296	parsing_mp->m_allocsize_log = ffs(size) - `1`;
1297	parsing_mp->m_features \|= XFS_FEAT_ALLOCSIZE;
1298	return `0`;
1299	case Opt_grpid:
1300	case Opt_bsdgroups:
1301	parsing_mp->m_features \|= XFS_FEAT_GRPID;
1302	return `0`;
1303	case Opt_nogrpid:
1304	case Opt_sysvgroups:
1305	parsing_mp->m_features &= ~XFS_FEAT_GRPID;
1306	return `0`;
1307	case Opt_wsync:
1308	parsing_mp->m_features \|= XFS_FEAT_WSYNC;
1309	return `0`;
1310	case Opt_norecovery:
1311	parsing_mp->m_features \|= XFS_FEAT_NORECOVERY;
1312	return `0`;
1313	case Opt_noalign:
1314	parsing_mp->m_features \|= XFS_FEAT_NOALIGN;
1315	return `0`;
1316	case Opt_swalloc:
1317	parsing_mp->m_features \|= XFS_FEAT_SWALLOC;
1318	return `0`;
1319	case Opt_sunit:
1320	parsing_mp->m_dalign = result.uint_32;
1321	return `0`;
1322	case Opt_swidth:
1323	parsing_mp->m_swidth = result.uint_32;
1324	return `0`;
1325	case Opt_inode32:
1326	parsing_mp->m_features \|= XFS_FEAT_SMALL_INUMS;
1327	return `0`;
1328	case Opt_inode64:
1329	parsing_mp->m_features &= ~XFS_FEAT_SMALL_INUMS;
1330	return `0`;
1331	case Opt_nouuid:
1332	parsing_mp->m_features \|= XFS_FEAT_NOUUID;
1333	return `0`;
1334	case Opt_largeio:
1335	parsing_mp->m_features \|= XFS_FEAT_LARGE_IOSIZE;
1336	return `0`;
1337	case Opt_nolargeio:
1338	parsing_mp->m_features &= ~XFS_FEAT_LARGE_IOSIZE;
1339	return `0`;
1340	case Opt_filestreams:
1341	parsing_mp->m_features \|= XFS_FEAT_FILESTREAMS;
1342	return `0`;
1343	case Opt_noquota:
1344	parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT;
1345	parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD;
1346	return `0`;
1347	case Opt_quota:
1348	case Opt_uquota:
1349	case Opt_usrquota:
1350	parsing_mp->m_qflags \|= (XFS_UQUOTA_ACCT \| XFS_UQUOTA_ENFD);
1351	return `0`;
1352	case Opt_qnoenforce:
1353	case Opt_uqnoenforce:
1354	parsing_mp->m_qflags \|= XFS_UQUOTA_ACCT;
1355	parsing_mp->m_qflags &= ~XFS_UQUOTA_ENFD;
1356	return `0`;
1357	case Opt_pquota:
1358	case Opt_prjquota:
1359	parsing_mp->m_qflags \|= (XFS_PQUOTA_ACCT \| XFS_PQUOTA_ENFD);
1360	return `0`;
1361	case Opt_pqnoenforce:
1362	parsing_mp->m_qflags \|= XFS_PQUOTA_ACCT;
1363	parsing_mp->m_qflags &= ~XFS_PQUOTA_ENFD;
1364	return `0`;
1365	case Opt_gquota:
1366	case Opt_grpquota:
1367	parsing_mp->m_qflags \|= (XFS_GQUOTA_ACCT \| XFS_GQUOTA_ENFD);
1368	return `0`;
1369	case Opt_gqnoenforce:
1370	parsing_mp->m_qflags \|= XFS_GQUOTA_ACCT;
1371	parsing_mp->m_qflags &= ~XFS_GQUOTA_ENFD;
1372	return `0`;
1373	case Opt_discard:
1374	parsing_mp->m_features \|= XFS_FEAT_DISCARD;
1375	return `0`;
1376	case Opt_nodiscard:
1377	parsing_mp->m_features &= ~XFS_FEAT_DISCARD;
1378	return `0`;
1379	#ifdef CONFIG_FS_DAX
1380	case Opt_dax:
1381	xfs_mount_set_dax_mode(mp: parsing_mp, mode: XFS_DAX_ALWAYS);
1382	return `0`;
1383	case Opt_dax_enum:
1384	xfs_mount_set_dax_mode(mp: parsing_mp, mode: result.uint_32);
1385	return `0`;
1386	#endif
1387	/ Following mount options will be removed in September 2025 /
1388	case Opt_ikeep:
1389	xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, value: true);
1390	parsing_mp->m_features \|= XFS_FEAT_IKEEP;
1391	return `0`;
1392	case Opt_noikeep:
1393	xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, value: false);
1394	parsing_mp->m_features &= ~XFS_FEAT_IKEEP;
1395	return `0`;
1396	case Opt_attr2:
1397	xfs_fs_warn_deprecated(fc, param, XFS_FEAT_ATTR2, value: true);
1398	parsing_mp->m_features \|= XFS_FEAT_ATTR2;
1399	return `0`;
1400	case Opt_noattr2:
1401	xfs_fs_warn_deprecated(fc, param, XFS_FEAT_NOATTR2, value: true);
1402	parsing_mp->m_features \|= XFS_FEAT_NOATTR2;
1403	return `0`;
1404	default:
1405	xfs_warn(parsing_mp, "unknown mount option [%s].", param->key);
1406	return -EINVAL;
1407	}
1408
1409	return `0`;
1410	}
1411
1412	static int
1413	xfs_fs_validate_params(
1414	struct xfs_mount *mp)
1415	{
1416	/ No recovery flag requires a read-only mount /
1417	if (xfs_has_norecovery(mp) && !xfs_is_readonly(mp)) {
1418	xfs_warn(mp, "no-recovery mounts must be read-only.");
1419	return -EINVAL;
1420	}
1421
1422	/*
1423	* We have not read the superblock at this point, so only the attr2
1424	* mount option can set the attr2 feature by this stage.
1425	*/
1426	if (xfs_has_attr2(mp) && xfs_has_noattr2(mp)) {
1427	xfs_warn(mp, "attr2 and noattr2 cannot both be specified.");
1428	return -EINVAL;
1429	}
1430
1431
1432	if (xfs_has_noalign(mp) && (mp->m_dalign \|\| mp->m_swidth)) {
1433	xfs_warn(mp,
1434	"sunit and swidth options incompatible with the noalign option");
1435	return -EINVAL;
1436	}
1437
1438	if (!IS_ENABLED(CONFIG_XFS_QUOTA) && mp->m_qflags != `0`) {
1439	xfs_warn(mp, "quota support not available in this kernel.");
1440	return -EINVAL;
1441	}
1442
1443	if ((mp->m_dalign && !mp->m_swidth) \|\|
1444	(!mp->m_dalign && mp->m_swidth)) {
1445	xfs_warn(mp, "sunit and swidth must be specified together");
1446	return -EINVAL;
1447	}
1448
1449	if (mp->m_dalign && (mp->m_swidth % mp->m_dalign != `0`)) {
1450	xfs_warn(mp,
1451	"stripe width (%d) must be a multiple of the stripe unit (%d)",
1452	mp->m_swidth, mp->m_dalign);
1453	return -EINVAL;
1454	}
1455
1456	if (mp->m_logbufs != -`1` &&
1457	mp->m_logbufs != `0` &&
1458	(mp->m_logbufs < XLOG_MIN_ICLOGS \|\|
1459	mp->m_logbufs > XLOG_MAX_ICLOGS)) {
1460	xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]",
1461	mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
1462	return -EINVAL;
1463	}
1464
1465	if (mp->m_logbsize != -`1` &&
1466	mp->m_logbsize != `0` &&
1467	(mp->m_logbsize < XLOG_MIN_RECORD_BSIZE \|\|
1468	mp->m_logbsize > XLOG_MAX_RECORD_BSIZE \|\|
1469	!is_power_of_2(mp->m_logbsize))) {
1470	xfs_warn(mp,
1471	"invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
1472	mp->m_logbsize);
1473	return -EINVAL;
1474	}
1475
1476	if (xfs_has_allocsize(mp) &&
1477	(mp->m_allocsize_log > XFS_MAX_IO_LOG \|\|
1478	mp->m_allocsize_log < XFS_MIN_IO_LOG)) {
1479	xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
1480	mp->m_allocsize_log, XFS_MIN_IO_LOG, XFS_MAX_IO_LOG);
1481	return -EINVAL;
1482	}
1483
1484	return `0`;
1485	}
1486
1487	struct dentry *
1488	xfs_debugfs_mkdir(
1489	const char *name,
1490	struct dentry *parent)
1491	{
1492	struct dentry *child;
1493
1494	/ Apparently we're expected to ignore error returns?? /
1495	child = debugfs_create_dir(name, parent);
1496	if (IS_ERR(ptr: child))
1497	return NULL;
1498
1499	return child;
1500	}
1501
1502	static int
1503	xfs_fs_fill_super(
1504	struct super_block *sb,
1505	struct fs_context *fc)
1506	{
1507	struct xfs_mount *mp = sb->s_fs_info;
1508	struct inode *root;
1509	int flags = `0`, error;
1510
1511	mp->m_super = sb;
1512
1513	error = xfs_fs_validate_params(mp);
1514	if (error)
1515	return error;
1516
1517	sb_min_blocksize(sb, BBSIZE);
1518	sb->s_xattr = xfs_xattr_handlers;
1519	sb->s_export_op = &xfs_export_operations;
1520	#ifdef CONFIG_XFS_QUOTA
1521	sb->s_qcop = &xfs_quotactl_operations;
1522	sb->s_quota_types = QTYPE_MASK_USR \| QTYPE_MASK_GRP \| QTYPE_MASK_PRJ;
1523	#endif
1524	sb->s_op = &xfs_super_operations;
1525
1526	/*
1527	* Delay mount work if the debug hook is set. This is debug
1528	* instrumention to coordinate simulation of xfs mount failures with
1529	* VFS superblock operations
1530	*/
1531	if (xfs_globals.mount_delay) {
1532	xfs_notice(mp, "Delaying mount for %d seconds.",
1533	xfs_globals.mount_delay);
1534	msleep(msecs: xfs_globals.mount_delay * `1000`);
1535	}
1536
1537	if (fc->sb_flags & SB_SILENT)
1538	flags \|= XFS_MFSI_QUIET;
1539
1540	error = xfs_open_devices(mp);
1541	if (error)
1542	return error;
1543
1544	if (xfs_debugfs) {
1545	mp->m_debugfs = xfs_debugfs_mkdir(name: mp->m_super->s_id,
1546	parent: xfs_debugfs);
1547	} else {
1548	mp->m_debugfs = NULL;
1549	}
1550
1551	error = xfs_init_mount_workqueues(mp);
1552	if (error)
1553	goto out_shutdown_devices;
1554
1555	error = xfs_init_percpu_counters(mp);
1556	if (error)
1557	goto out_destroy_workqueues;
1558
1559	error = xfs_inodegc_init_percpu(mp);
1560	if (error)
1561	goto out_destroy_counters;
1562
1563	/ Allocate stats memory before we do operations that might use it /
1564	mp->m_stats.xs_stats = alloc_percpu(struct xfsstats);
1565	if (!mp->m_stats.xs_stats) {
1566	error = -ENOMEM;
1567	goto out_destroy_inodegc;
1568	}
1569
1570	error = xchk_mount_stats_alloc(mp);
1571	if (error)
1572	goto out_free_stats;
1573
1574	error = xfs_readsb(mp, flags);
1575	if (error)
1576	goto out_free_scrub_stats;
1577
1578	error = xfs_finish_flags(mp);
1579	if (error)
1580	goto out_free_sb;
1581
1582	error = xfs_setup_devices(mp);
1583	if (error)
1584	goto out_free_sb;
1585
1586	/ V4 support is undergoing deprecation. /
1587	if (!xfs_has_crc(mp)) {
1588	#ifdef CONFIG_XFS_SUPPORT_V4
1589	xfs_warn_once(mp,
1590	"Deprecated V4 format (crc=0) will not be supported after September 2030.");
1591	#else
1592	xfs_warn(mp,
1593	"Deprecated V4 format (crc=0) not supported by kernel.");
1594	error = -EINVAL;
1595	goto out_free_sb;
1596	#endif
1597	}
1598
1599	/ ASCII case insensitivity is undergoing deprecation. /
1600	if (xfs_has_asciici(mp)) {
1601	#ifdef CONFIG_XFS_SUPPORT_ASCII_CI
1602	xfs_warn_once(mp,
1603	"Deprecated ASCII case-insensitivity feature (ascii-ci=1) will not be supported after September 2030.");
1604	#else
1605	xfs_warn(mp,
1606	"Deprecated ASCII case-insensitivity feature (ascii-ci=1) not supported by kernel.");
1607	error = -EINVAL;
1608	goto out_free_sb;
1609	#endif
1610	}
1611
1612	/ Filesystem claims it needs repair, so refuse the mount. /
1613	if (xfs_has_needsrepair(mp)) {
1614	xfs_warn(mp, "Filesystem needs repair. Please run xfs_repair.");
1615	error = -EFSCORRUPTED;
1616	goto out_free_sb;
1617	}
1618
1619	/*
1620	* Don't touch the filesystem if a user tool thinks it owns the primary
1621	* superblock. mkfs doesn't clear the flag from secondary supers, so
1622	* we don't check them at all.
1623	*/
1624	if (mp->m_sb.sb_inprogress) {
1625	xfs_warn(mp, "Offline file system operation in progress!");
1626	error = -EFSCORRUPTED;
1627	goto out_free_sb;
1628	}
1629
1630	/*
1631	* Until this is fixed only page-sized or smaller data blocks work.
1632	*/
1633	if (mp->m_sb.sb_blocksize > PAGE_SIZE) {
1634	xfs_warn(mp,
1635	"File system with blocksize %d bytes. "
1636	"Only pagesize (%ld) or less will currently work.",
1637	mp->m_sb.sb_blocksize, PAGE_SIZE);
1638	error = -ENOSYS;
1639	goto out_free_sb;
1640	}
1641
1642	/ Ensure this filesystem fits in the page cache limits /
1643	if (xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_dblocks) \|\|
1644	xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_rblocks)) {
1645	xfs_warn(mp,
1646	"file system too large to be mounted on this system.");
1647	error = -EFBIG;
1648	goto out_free_sb;
1649	}
1650
1651	/*
1652	* XFS block mappings use 54 bits to store the logical block offset.
1653	* This should suffice to handle the maximum file size that the VFS
1654	* supports (currently 2^63 bytes on 64-bit and ULONG_MAX << PAGE_SHIFT
1655	* bytes on 32-bit), but as XFS and VFS have gotten the s_maxbytes
1656	* calculation wrong on 32-bit kernels in the past, we'll add a WARN_ON
1657	* to check this assertion.
1658	*
1659	* Avoid integer overflow by comparing the maximum bmbt offset to the
1660	* maximum pagecache offset in units of fs blocks.
1661	*/
1662	if (!xfs_verify_fileoff(mp, XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE))) {
1663	xfs_warn(mp,
1664	"MAX_LFS_FILESIZE block offset (%llu) exceeds extent map maximum (%llu)!",
1665	XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE),
1666	XFS_MAX_FILEOFF);
1667	error = -EINVAL;
1668	goto out_free_sb;
1669	}
1670
1671	error = xfs_filestream_mount(mp);
1672	if (error)
1673	goto out_free_sb;
1674
1675	/*
1676	* we must configure the block size in the superblock before we run the
1677	* full mount process as the mount process can lookup and cache inodes.
1678	*/
1679	sb->s_magic = XFS_SUPER_MAGIC;
1680	sb->s_blocksize = mp->m_sb.sb_blocksize;
1681	sb->s_blocksize_bits = ffs(sb->s_blocksize) - `1`;
1682	sb->s_maxbytes = MAX_LFS_FILESIZE;
1683	sb->s_max_links = XFS_MAXLINK;
1684	sb->s_time_gran = `1`;
1685	if (xfs_has_bigtime(mp)) {
1686	sb->s_time_min = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MIN);
1687	sb->s_time_max = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MAX);
1688	} else {
1689	sb->s_time_min = XFS_LEGACY_TIME_MIN;
1690	sb->s_time_max = XFS_LEGACY_TIME_MAX;
1691	}
1692	trace_xfs_inode_timestamp_range(mp, min: sb->s_time_min, max: sb->s_time_max);
1693	sb->s_iflags \|= SB_I_CGROUPWB;
1694
1695	set_posix_acl_flag(sb);
1696
1697	/ version 5 superblocks support inode version counters. /
1698	if (xfs_has_crc(mp))
1699	sb->s_flags \|= SB_I_VERSION;
1700
1701	if (xfs_has_dax_always(mp)) {
1702	error = xfs_setup_dax_always(mp);
1703	if (error)
1704	goto out_filestream_unmount;
1705	}
1706
1707	if (xfs_has_discard(mp) && !bdev_max_discard_sectors(bdev: sb->s_bdev)) {
1708	xfs_warn(mp,
1709	"mounting with \"discard\" option, but the device does not support discard");
1710	mp->m_features &= ~XFS_FEAT_DISCARD;
1711	}
1712
1713	if (xfs_has_reflink(mp)) {
1714	if (mp->m_sb.sb_rblocks) {
1715	xfs_alert(mp,
1716	"reflink not compatible with realtime device!");
1717	error = -EINVAL;
1718	goto out_filestream_unmount;
1719	}
1720
1721	if (xfs_globals.always_cow) {
1722	xfs_info(mp, "using DEBUG-only always_cow mode.");
1723	mp->m_always_cow = true;
1724	}
1725	}
1726
1727	if (xfs_has_rmapbt(mp) && mp->m_sb.sb_rblocks) {
1728	xfs_alert(mp,
1729	"reverse mapping btree not compatible with realtime device!");
1730	error = -EINVAL;
1731	goto out_filestream_unmount;
1732	}
1733
1734	error = xfs_mountfs(mp);
1735	if (error)
1736	goto out_filestream_unmount;
1737
1738	root = igrab(VFS_I(ip: mp->m_rootip));
1739	if (!root) {
1740	error = -ENOENT;
1741	goto out_unmount;
1742	}
1743	sb->s_root = d_make_root(root);
1744	if (!sb->s_root) {
1745	error = -ENOMEM;
1746	goto out_unmount;
1747	}
1748
1749	return `0`;
1750
1751	out_filestream_unmount:
1752	xfs_filestream_unmount(mp);
1753	out_free_sb:
1754	xfs_freesb(mp);
1755	out_free_scrub_stats:
1756	xchk_mount_stats_free(mp);
1757	out_free_stats:
1758	free_percpu(pdata: mp->m_stats.xs_stats);
1759	out_destroy_inodegc:
1760	xfs_inodegc_free_percpu(mp);
1761	out_destroy_counters:
1762	xfs_destroy_percpu_counters(mp);
1763	out_destroy_workqueues:
1764	xfs_destroy_mount_workqueues(mp);
1765	out_shutdown_devices:
1766	xfs_shutdown_devices(mp);
1767	return error;
1768
1769	out_unmount:
1770	xfs_filestream_unmount(mp);
1771	xfs_unmountfs(mp);
1772	goto out_free_sb;
1773	}
1774
1775	static int
1776	xfs_fs_get_tree(
1777	struct fs_context *fc)
1778	{
1779	return get_tree_bdev(fc, fill_super: xfs_fs_fill_super);
1780	}
1781
1782	static int
1783	xfs_remount_rw(
1784	struct xfs_mount *mp)
1785	{
1786	struct xfs_sb *sbp = &mp->m_sb;
1787	int error;
1788
1789	if (xfs_has_norecovery(mp)) {
1790	xfs_warn(mp,
1791	"ro->rw transition prohibited on norecovery mount");
1792	return -EINVAL;
1793	}
1794
1795	if (xfs_sb_is_v5(sbp) &&
1796	xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
1797	xfs_warn(mp,
1798	"ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem",
1799	(sbp->sb_features_ro_compat &
1800	XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
1801	return -EINVAL;
1802	}
1803
1804	clear_bit(XFS_OPSTATE_READONLY, addr: &mp->m_opstate);
1805
1806	/*
1807	* If this is the first remount to writeable state we might have some
1808	* superblock changes to update.
1809	*/
1810	if (mp->m_update_sb) {
1811	error = xfs_sync_sb(mp, false);
1812	if (error) {
1813	xfs_warn(mp, "failed to write sb changes");
1814	return error;
1815	}
1816	mp->m_update_sb = false;
1817	}
1818
1819	/*
1820	* Fill out the reserve pool if it is empty. Use the stashed value if
1821	* it is non-zero, otherwise go with the default.
1822	*/
1823	xfs_restore_resvblks(mp);
1824	xfs_log_work_queue(mp);
1825	xfs_blockgc_start(mp);
1826
1827	/ Create the per-AG metadata reservation pool ./
1828	error = xfs_fs_reserve_ag_blocks(mp);
1829	if (error && error != -ENOSPC)
1830	return error;
1831
1832	/ Re-enable the background inode inactivation worker. /
1833	xfs_inodegc_start(mp);
1834
1835	return `0`;
1836	}
1837
1838	static int
1839	xfs_remount_ro(
1840	struct xfs_mount *mp)
1841	{
1842	struct xfs_icwalk icw = {
1843	.icw_flags = XFS_ICWALK_FLAG_SYNC,
1844	};
1845	int error;
1846
1847	/ Flush all the dirty data to disk. /
1848	error = sync_filesystem(mp->m_super);
1849	if (error)
1850	return error;
1851
1852	/*
1853	* Cancel background eofb scanning so it cannot race with the final
1854	* log force+buftarg wait and deadlock the remount.
1855	*/
1856	xfs_blockgc_stop(mp);
1857
1858	/*
1859	* Clear out all remaining COW staging extents and speculative post-EOF
1860	* preallocations so that we don't leave inodes requiring inactivation
1861	* cleanups during reclaim on a read-only mount. We must process every
1862	* cached inode, so this requires a synchronous cache scan.
1863	*/
1864	error = xfs_blockgc_free_space(mp, icm: &icw);
1865	if (error) {
1866	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1867	return error;
1868	}
1869
1870	/*
1871	* Stop the inodegc background worker. xfs_fs_reconfigure already
1872	* flushed all pending inodegc work when it sync'd the filesystem.
1873	* The VFS holds s_umount, so we know that inodes cannot enter
1874	* xfs_fs_destroy_inode during a remount operation. In readonly mode
1875	* we send inodes straight to reclaim, so no inodes will be queued.
1876	*/
1877	xfs_inodegc_stop(mp);
1878
1879	/ Free the per-AG metadata reservation pool. /
1880	error = xfs_fs_unreserve_ag_blocks(mp);
1881	if (error) {
1882	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1883	return error;
1884	}
1885
1886	/*
1887	* Before we sync the metadata, we need to free up the reserve block
1888	* pool so that the used block count in the superblock on disk is
1889	* correct at the end of the remount. Stash the current* reserve pool
1890	* size so that if we get remounted rw, we can return it to the same
1891	* size.
1892	*/
1893	xfs_save_resvblks(mp);
1894
1895	xfs_log_clean(mp);
1896	set_bit(XFS_OPSTATE_READONLY, addr: &mp->m_opstate);
1897
1898	return `0`;
1899	}
1900
1901	/*
1902	* Logically we would return an error here to prevent users from believing
1903	* they might have changed mount options using remount which can't be changed.
1904	*
1905	* But unfortunately mount(8) adds all options from mtab and fstab to the mount
1906	* arguments in some cases so we can't blindly reject options, but have to
1907	* check for each specified option if it actually differs from the currently
1908	* set option and only reject it if that's the case.
1909	*
1910	* Until that is implemented we return success for every remount request, and
1911	* silently ignore all options that we can't actually change.
1912	*/
1913	static int
1914	xfs_fs_reconfigure(
1915	struct fs_context *fc)
1916	{
1917	struct xfs_mount *mp = XFS_M(fc->root->d_sb);
1918	struct xfs_mount *new_mp = fc->s_fs_info;
1919	int flags = fc->sb_flags;
1920	int error;
1921
1922	/ version 5 superblocks always support version counters. /
1923	if (xfs_has_crc(mp))
1924	fc->sb_flags \|= SB_I_VERSION;
1925
1926	error = xfs_fs_validate_params(mp: new_mp);
1927	if (error)
1928	return error;
1929
1930	/ inode32 -> inode64 /
1931	if (xfs_has_small_inums(mp) && !xfs_has_small_inums(mp: new_mp)) {
1932	mp->m_features &= ~XFS_FEAT_SMALL_INUMS;
1933	mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount);
1934	}
1935
1936	/ inode64 -> inode32 /
1937	if (!xfs_has_small_inums(mp) && xfs_has_small_inums(mp: new_mp)) {
1938	mp->m_features \|= XFS_FEAT_SMALL_INUMS;
1939	mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount);
1940	}
1941
1942	/ ro -> rw /
1943	if (xfs_is_readonly(mp) && !(flags & SB_RDONLY)) {
1944	error = xfs_remount_rw(mp);
1945	if (error)
1946	return error;
1947	}
1948
1949	/ rw -> ro /
1950	if (!xfs_is_readonly(mp) && (flags & SB_RDONLY)) {
1951	error = xfs_remount_ro(mp);
1952	if (error)
1953	return error;
1954	}
1955
1956	return `0`;
1957	}
1958
1959	static void
1960	xfs_fs_free(
1961	struct fs_context *fc)
1962	{
1963	struct xfs_mount *mp = fc->s_fs_info;
1964
1965	/*
1966	* mp is stored in the fs_context when it is initialized.
1967	* mp is transferred to the superblock on a successful mount,
1968	* but if an error occurs before the transfer we have to free
1969	* it here.
1970	*/
1971	if (mp)
1972	xfs_mount_free(mp);
1973	}
1974
1975	static const struct fs_context_operations xfs_context_ops = {
1976	.parse_param = xfs_fs_parse_param,
1977	.get_tree = xfs_fs_get_tree,
1978	.reconfigure = xfs_fs_reconfigure,
1979	.free = xfs_fs_free,
1980	};
1981
1982	static int xfs_init_fs_context(
1983	struct fs_context *fc)
1984	{
1985	struct xfs_mount *mp;
1986
1987	mp = kmem_alloc(sizeof(struct xfs_mount), KM_ZERO);
1988	if (!mp)
1989	return -ENOMEM;
1990
1991	spin_lock_init(&mp->m_sb_lock);
1992	INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
1993	spin_lock_init(&mp->m_perag_lock);
1994	mutex_init(&mp->m_growlock);
1995	INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker);
1996	INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
1997	mp->m_kobj.kobject.kset = xfs_kset;
1998	/*
1999	* We don't create the finobt per-ag space reservation until after log
2000	* recovery, so we must set this to true so that an ifree transaction
2001	* started during log recovery will not depend on space reservations
2002	* for finobt expansion.
2003	*/
2004	mp->m_finobt_nores = true;
2005
2006	/*
2007	* These can be overridden by the mount option parsing.
2008	*/
2009	mp->m_logbufs = -`1`;
2010	mp->m_logbsize = -`1`;
2011	mp->m_allocsize_log = `16`; / 64k /
2012
2013	/*
2014	* Copy binary VFS mount flags we are interested in.
2015	*/
2016	if (fc->sb_flags & SB_RDONLY)
2017	set_bit(XFS_OPSTATE_READONLY, addr: &mp->m_opstate);
2018	if (fc->sb_flags & SB_DIRSYNC)
2019	mp->m_features \|= XFS_FEAT_DIRSYNC;
2020	if (fc->sb_flags & SB_SYNCHRONOUS)
2021	mp->m_features \|= XFS_FEAT_WSYNC;
2022
2023	fc->s_fs_info = mp;
2024	fc->ops = &xfs_context_ops;
2025
2026	return `0`;
2027	}
2028
2029	static void
2030	xfs_kill_sb(
2031	struct super_block *sb)
2032	{
2033	kill_block_super(sb);
2034	xfs_mount_free(XFS_M(sb));
2035	}
2036
2037	static struct file_system_type xfs_fs_type = {
2038	.owner = THIS_MODULE,
2039	.name = "xfs",
2040	.init_fs_context = xfs_init_fs_context,
2041	.parameters = xfs_fs_parameters,
2042	.kill_sb = xfs_kill_sb,
2043	.fs_flags = FS_REQUIRES_DEV \| FS_ALLOW_IDMAP,
2044	};
2045	MODULE_ALIAS_FS("xfs");
2046
2047	STATIC int __init
2048	xfs_init_caches(void)
2049	{
2050	int error;
2051
2052	xfs_buf_cache = kmem_cache_create(name: "xfs_buf", size: sizeof(struct xfs_buf), align: `0`,
2053	SLAB_HWCACHE_ALIGN \|
2054	SLAB_RECLAIM_ACCOUNT \|
2055	SLAB_MEM_SPREAD,
2056	NULL);
2057	if (!xfs_buf_cache)
2058	goto out;
2059
2060	xfs_log_ticket_cache = kmem_cache_create(name: "xfs_log_ticket",
2061	size: sizeof(struct xlog_ticket),
2062	align: `0`, flags: `0`, NULL);
2063	if (!xfs_log_ticket_cache)
2064	goto out_destroy_buf_cache;
2065
2066	error = xfs_btree_init_cur_caches();
2067	if (error)
2068	goto out_destroy_log_ticket_cache;
2069
2070	error = xfs_defer_init_item_caches();
2071	if (error)
2072	goto out_destroy_btree_cur_cache;
2073
2074	xfs_da_state_cache = kmem_cache_create("xfs_da_state",
2075	sizeof(struct xfs_da_state),
2076	`0`, `0`, NULL);
2077	if (!xfs_da_state_cache)
2078	goto out_destroy_defer_item_cache;
2079
2080	xfs_ifork_cache = kmem_cache_create("xfs_ifork",
2081	sizeof(struct xfs_ifork),
2082	`0`, `0`, NULL);
2083	if (!xfs_ifork_cache)
2084	goto out_destroy_da_state_cache;
2085
2086	xfs_trans_cache = kmem_cache_create(name: "xfs_trans",
2087	size: sizeof(struct xfs_trans),
2088	align: `0`, flags: `0`, NULL);
2089	if (!xfs_trans_cache)
2090	goto out_destroy_ifork_cache;
2091
2092
2093	/*
2094	* The size of the cache-allocated buf log item is the maximum
2095	* size possible under XFS. This wastes a little bit of memory,
2096	* but it is much faster.
2097	*/
2098	xfs_buf_item_cache = kmem_cache_create(name: "xfs_buf_item",
2099	size: sizeof(struct xfs_buf_log_item),
2100	align: `0`, flags: `0`, NULL);
2101	if (!xfs_buf_item_cache)
2102	goto out_destroy_trans_cache;
2103
2104	xfs_efd_cache = kmem_cache_create(name: "xfs_efd_item",
2105	size: xfs_efd_log_item_sizeof(XFS_EFD_MAX_FAST_EXTENTS),
2106	align: `0`, flags: `0`, NULL);
2107	if (!xfs_efd_cache)
2108	goto out_destroy_buf_item_cache;
2109
2110	xfs_efi_cache = kmem_cache_create(name: "xfs_efi_item",
2111	size: xfs_efi_log_item_sizeof(XFS_EFI_MAX_FAST_EXTENTS),
2112	align: `0`, flags: `0`, NULL);
2113	if (!xfs_efi_cache)
2114	goto out_destroy_efd_cache;
2115
2116	xfs_inode_cache = kmem_cache_create(name: "xfs_inode",
2117	size: sizeof(struct xfs_inode), align: `0`,
2118	flags: (SLAB_HWCACHE_ALIGN \|
2119	SLAB_RECLAIM_ACCOUNT \|
2120	SLAB_MEM_SPREAD \| SLAB_ACCOUNT),
2121	ctor: xfs_fs_inode_init_once);
2122	if (!xfs_inode_cache)
2123	goto out_destroy_efi_cache;
2124
2125	xfs_ili_cache = kmem_cache_create(name: "xfs_ili",
2126	size: sizeof(struct xfs_inode_log_item), align: `0`,
2127	SLAB_RECLAIM_ACCOUNT \| SLAB_MEM_SPREAD,
2128	NULL);
2129	if (!xfs_ili_cache)
2130	goto out_destroy_inode_cache;
2131
2132	xfs_icreate_cache = kmem_cache_create(name: "xfs_icr",
2133	size: sizeof(struct xfs_icreate_item),
2134	align: `0`, flags: `0`, NULL);
2135	if (!xfs_icreate_cache)
2136	goto out_destroy_ili_cache;
2137
2138	xfs_rud_cache = kmem_cache_create(name: "xfs_rud_item",
2139	size: sizeof(struct xfs_rud_log_item),
2140	align: `0`, flags: `0`, NULL);
2141	if (!xfs_rud_cache)
2142	goto out_destroy_icreate_cache;
2143
2144	xfs_rui_cache = kmem_cache_create(name: "xfs_rui_item",
2145	size: xfs_rui_log_item_sizeof(XFS_RUI_MAX_FAST_EXTENTS),
2146	align: `0`, flags: `0`, NULL);
2147	if (!xfs_rui_cache)
2148	goto out_destroy_rud_cache;
2149
2150	xfs_cud_cache = kmem_cache_create(name: "xfs_cud_item",
2151	size: sizeof(struct xfs_cud_log_item),
2152	align: `0`, flags: `0`, NULL);
2153	if (!xfs_cud_cache)
2154	goto out_destroy_rui_cache;
2155
2156	xfs_cui_cache = kmem_cache_create(name: "xfs_cui_item",
2157	size: xfs_cui_log_item_sizeof(XFS_CUI_MAX_FAST_EXTENTS),
2158	align: `0`, flags: `0`, NULL);
2159	if (!xfs_cui_cache)
2160	goto out_destroy_cud_cache;
2161
2162	xfs_bud_cache = kmem_cache_create(name: "xfs_bud_item",
2163	size: sizeof(struct xfs_bud_log_item),
2164	align: `0`, flags: `0`, NULL);
2165	if (!xfs_bud_cache)
2166	goto out_destroy_cui_cache;
2167
2168	xfs_bui_cache = kmem_cache_create(name: "xfs_bui_item",
2169	size: xfs_bui_log_item_sizeof(XFS_BUI_MAX_FAST_EXTENTS),
2170	align: `0`, flags: `0`, NULL);
2171	if (!xfs_bui_cache)
2172	goto out_destroy_bud_cache;
2173
2174	xfs_attrd_cache = kmem_cache_create(name: "xfs_attrd_item",
2175	size: sizeof(struct xfs_attrd_log_item),
2176	align: `0`, flags: `0`, NULL);
2177	if (!xfs_attrd_cache)
2178	goto out_destroy_bui_cache;
2179
2180	xfs_attri_cache = kmem_cache_create(name: "xfs_attri_item",
2181	size: sizeof(struct xfs_attri_log_item),
2182	align: `0`, flags: `0`, NULL);
2183	if (!xfs_attri_cache)
2184	goto out_destroy_attrd_cache;
2185
2186	xfs_iunlink_cache = kmem_cache_create(name: "xfs_iul_item",
2187	size: sizeof(struct xfs_iunlink_item),
2188	align: `0`, flags: `0`, NULL);
2189	if (!xfs_iunlink_cache)
2190	goto out_destroy_attri_cache;
2191
2192	return `0`;
2193
2194	out_destroy_attri_cache:
2195	kmem_cache_destroy(s: xfs_attri_cache);
2196	out_destroy_attrd_cache:
2197	kmem_cache_destroy(s: xfs_attrd_cache);
2198	out_destroy_bui_cache:
2199	kmem_cache_destroy(s: xfs_bui_cache);
2200	out_destroy_bud_cache:
2201	kmem_cache_destroy(s: xfs_bud_cache);
2202	out_destroy_cui_cache:
2203	kmem_cache_destroy(s: xfs_cui_cache);
2204	out_destroy_cud_cache:
2205	kmem_cache_destroy(s: xfs_cud_cache);
2206	out_destroy_rui_cache:
2207	kmem_cache_destroy(s: xfs_rui_cache);
2208	out_destroy_rud_cache:
2209	kmem_cache_destroy(s: xfs_rud_cache);
2210	out_destroy_icreate_cache:
2211	kmem_cache_destroy(s: xfs_icreate_cache);
2212	out_destroy_ili_cache:
2213	kmem_cache_destroy(s: xfs_ili_cache);
2214	out_destroy_inode_cache:
2215	kmem_cache_destroy(s: xfs_inode_cache);
2216	out_destroy_efi_cache:
2217	kmem_cache_destroy(s: xfs_efi_cache);
2218	out_destroy_efd_cache:
2219	kmem_cache_destroy(s: xfs_efd_cache);
2220	out_destroy_buf_item_cache:
2221	kmem_cache_destroy(s: xfs_buf_item_cache);
2222	out_destroy_trans_cache:
2223	kmem_cache_destroy(s: xfs_trans_cache);
2224	out_destroy_ifork_cache:
2225	kmem_cache_destroy(xfs_ifork_cache);
2226	out_destroy_da_state_cache:
2227	kmem_cache_destroy(xfs_da_state_cache);
2228	out_destroy_defer_item_cache:
2229	xfs_defer_destroy_item_caches();
2230	out_destroy_btree_cur_cache:
2231	xfs_btree_destroy_cur_caches();
2232	out_destroy_log_ticket_cache:
2233	kmem_cache_destroy(s: xfs_log_ticket_cache);
2234	out_destroy_buf_cache:
2235	kmem_cache_destroy(s: xfs_buf_cache);
2236	out:
2237	return -ENOMEM;
2238	}
2239
2240	STATIC void
2241	xfs_destroy_caches(void)
2242	{
2243	/*
2244	* Make sure all delayed rcu free are flushed before we
2245	* destroy caches.
2246	*/
2247	rcu_barrier();
2248	kmem_cache_destroy(s: xfs_iunlink_cache);
2249	kmem_cache_destroy(s: xfs_attri_cache);
2250	kmem_cache_destroy(s: xfs_attrd_cache);
2251	kmem_cache_destroy(s: xfs_bui_cache);
2252	kmem_cache_destroy(s: xfs_bud_cache);
2253	kmem_cache_destroy(s: xfs_cui_cache);
2254	kmem_cache_destroy(s: xfs_cud_cache);
2255	kmem_cache_destroy(s: xfs_rui_cache);
2256	kmem_cache_destroy(s: xfs_rud_cache);
2257	kmem_cache_destroy(s: xfs_icreate_cache);
2258	kmem_cache_destroy(s: xfs_ili_cache);
2259	kmem_cache_destroy(s: xfs_inode_cache);
2260	kmem_cache_destroy(s: xfs_efi_cache);
2261	kmem_cache_destroy(s: xfs_efd_cache);
2262	kmem_cache_destroy(s: xfs_buf_item_cache);
2263	kmem_cache_destroy(s: xfs_trans_cache);
2264	kmem_cache_destroy(xfs_ifork_cache);
2265	kmem_cache_destroy(xfs_da_state_cache);
2266	xfs_defer_destroy_item_caches();
2267	xfs_btree_destroy_cur_caches();
2268	kmem_cache_destroy(s: xfs_log_ticket_cache);
2269	kmem_cache_destroy(s: xfs_buf_cache);
2270	}
2271
2272	STATIC int __init
2273	xfs_init_workqueues(void)
2274	{
2275	/*
2276	* The allocation workqueue can be used in memory reclaim situations
2277	* (writepage path), and parallelism is only limited by the number of
2278	* AGs in all the filesystems mounted. Hence use the default large
2279	* max_active value for this workqueue.
2280	*/
2281	xfs_alloc_wq = alloc_workqueue("xfsalloc",
2282	XFS_WQFLAGS(WQ_MEM_RECLAIM \| WQ_FREEZABLE), `0`);
2283	if (!xfs_alloc_wq)
2284	return -ENOMEM;
2285
2286	xfs_discard_wq = alloc_workqueue(fmt: "xfsdiscard", XFS_WQFLAGS(WQ_UNBOUND),
2287	max_active: `0`);
2288	if (!xfs_discard_wq)
2289	goto out_free_alloc_wq;
2290
2291	return `0`;
2292	out_free_alloc_wq:
2293	destroy_workqueue(xfs_alloc_wq);
2294	return -ENOMEM;
2295	}
2296
2297	STATIC void
2298	xfs_destroy_workqueues(void)
2299	{
2300	destroy_workqueue(wq: xfs_discard_wq);
2301	destroy_workqueue(xfs_alloc_wq);
2302	}
2303
2304	STATIC int __init
2305	init_xfs_fs(void)
2306	{
2307	int error;
2308
2309	xfs_check_ondisk_structs();
2310
2311	error = xfs_dahash_test();
2312	if (error)
2313	return error;
2314
2315	printk(KERN_INFO XFS_VERSION_STRING " with "
2316	XFS_BUILD_OPTIONS " enabled\n");
2317
2318	xfs_dir_startup();
2319
2320	error = xfs_init_caches();
2321	if (error)
2322	goto out;
2323
2324	error = xfs_init_workqueues();
2325	if (error)
2326	goto out_destroy_caches;
2327
2328	error = xfs_mru_cache_init();
2329	if (error)
2330	goto out_destroy_wq;
2331
2332	error = xfs_init_procfs();
2333	if (error)
2334	goto out_mru_cache_uninit;
2335
2336	error = xfs_sysctl_register();
2337	if (error)
2338	goto out_cleanup_procfs;
2339
2340	xfs_debugfs = xfs_debugfs_mkdir(name: "xfs", NULL);
2341
2342	xfs_kset = kset_create_and_add(name: "xfs", NULL, parent_kobj: fs_kobj);
2343	if (!xfs_kset) {
2344	error = -ENOMEM;
2345	goto out_debugfs_unregister;
2346	}
2347
2348	xfsstats.xs_kobj.kobject.kset = xfs_kset;
2349
2350	xfsstats.xs_stats = alloc_percpu(struct xfsstats);
2351	if (!xfsstats.xs_stats) {
2352	error = -ENOMEM;
2353	goto out_kset_unregister;
2354	}
2355
2356	error = xfs_sysfs_init(kobj: &xfsstats.xs_kobj, ktype: &xfs_stats_ktype, NULL,
2357	name: "stats");
2358	if (error)
2359	goto out_free_stats;
2360
2361	error = xchk_global_stats_setup(parent: xfs_debugfs);
2362	if (error)
2363	goto out_remove_stats_kobj;
2364
2365	#ifdef DEBUG
2366	xfs_dbg_kobj.kobject.kset = xfs_kset;
2367	error = xfs_sysfs_init(kobj: &xfs_dbg_kobj, ktype: &xfs_dbg_ktype, NULL, name: "debug");
2368	if (error)
2369	goto out_remove_scrub_stats;
2370	#endif
2371
2372	error = xfs_qm_init();
2373	if (error)
2374	goto out_remove_dbg_kobj;
2375
2376	error = register_filesystem(&xfs_fs_type);
2377	if (error)
2378	goto out_qm_exit;
2379	return `0`;
2380
2381	out_qm_exit:
2382	xfs_qm_exit();
2383	out_remove_dbg_kobj:
2384	#ifdef DEBUG
2385	xfs_sysfs_del(kobj: &xfs_dbg_kobj);
2386	out_remove_scrub_stats:
2387	#endif
2388	xchk_global_stats_teardown();
2389	out_remove_stats_kobj:
2390	xfs_sysfs_del(kobj: &xfsstats.xs_kobj);
2391	out_free_stats:
2392	free_percpu(pdata: xfsstats.xs_stats);
2393	out_kset_unregister:
2394	kset_unregister(kset: xfs_kset);
2395	out_debugfs_unregister:
2396	debugfs_remove(dentry: xfs_debugfs);
2397	xfs_sysctl_unregister();
2398	out_cleanup_procfs:
2399	xfs_cleanup_procfs();
2400	out_mru_cache_uninit:
2401	xfs_mru_cache_uninit();
2402	out_destroy_wq:
2403	xfs_destroy_workqueues();
2404	out_destroy_caches:
2405	xfs_destroy_caches();
2406	out:
2407	return error;
2408	}
2409
2410	STATIC void __exit
2411	exit_xfs_fs(void)
2412	{
2413	xfs_qm_exit();
2414	unregister_filesystem(&xfs_fs_type);
2415	#ifdef DEBUG
2416	xfs_sysfs_del(kobj: &xfs_dbg_kobj);
2417	#endif
2418	xchk_global_stats_teardown();
2419	xfs_sysfs_del(kobj: &xfsstats.xs_kobj);
2420	free_percpu(pdata: xfsstats.xs_stats);
2421	kset_unregister(kset: xfs_kset);
2422	debugfs_remove(dentry: xfs_debugfs);
2423	xfs_sysctl_unregister();
2424	xfs_cleanup_procfs();
2425	xfs_mru_cache_uninit();
2426	xfs_destroy_workqueues();
2427	xfs_destroy_caches();
2428	xfs_uuid_table_free();
2429	}
2430
2431	module_init(init_xfs_fs);
2432	module_exit(exit_xfs_fs);
2433
2434	MODULE_AUTHOR("Silicon Graphics, Inc.");
2435	MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled");
2436	MODULE_LICENSE("GPL");
2437

source code of linux/fs/xfs/xfs_super.c