ioctl.c source code [linux/fs/btrfs/ioctl.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* Copyright (C) 2007 Oracle. All rights reserved.
4	*/
5
6	#include <linux/kernel.h>
7	#include <linux/bio.h>
8	#include <linux/file.h>
9	#include <linux/fs.h>
10	#include <linux/fsnotify.h>
11	#include <linux/pagemap.h>
12	#include <linux/highmem.h>
13	#include <linux/time.h>
14	#include <linux/string.h>
15	#include <linux/backing-dev.h>
16	#include <linux/mount.h>
17	#include <linux/namei.h>
18	#include <linux/writeback.h>
19	#include <linux/compat.h>
20	#include <linux/security.h>
21	#include <linux/xattr.h>
22	#include <linux/mm.h>
23	#include <linux/slab.h>
24	#include <linux/blkdev.h>
25	#include <linux/uuid.h>
26	#include <linux/btrfs.h>
27	#include <linux/uaccess.h>
28	#include <linux/iversion.h>
29	#include <linux/fileattr.h>
30	#include <linux/fsverity.h>
31	#include <linux/sched/xacct.h>
32	#include "ctree.h"
33	#include "disk-io.h"
34	#include "export.h"
35	#include "transaction.h"
36	#include "btrfs_inode.h"
37	#include "print-tree.h"
38	#include "volumes.h"
39	#include "locking.h"
40	#include "backref.h"
41	#include "rcu-string.h"
42	#include "send.h"
43	#include "dev-replace.h"
44	#include "props.h"
45	#include "sysfs.h"
46	#include "qgroup.h"
47	#include "tree-log.h"
48	#include "compression.h"
49	#include "space-info.h"
50	#include "delalloc-space.h"
51	#include "block-group.h"
52	#include "subpage.h"
53	#include "fs.h"
54	#include "accessors.h"
55	#include "extent-tree.h"
56	#include "root-tree.h"
57	#include "defrag.h"
58	#include "dir-item.h"
59	#include "uuid-tree.h"
60	#include "ioctl.h"
61	#include "file.h"
62	#include "scrub.h"
63	#include "super.h"
64
65	#ifdef CONFIG_64BIT
66	/ If we have a 32-bit userspace and 64-bit kernel, then the UAPI*
67	* structures are incorrect, as the timespec structure from userspace
68	* is 4 bytes too small. We define these alternatives here to teach
69	* the kernel about the 32-bit struct packing.
70	*/
71	struct btrfs_ioctl_timespec_32 {
72	__u64 sec;
73	__u32 nsec;
74	} __attribute__ ((__packed__));
75
76	struct btrfs_ioctl_received_subvol_args_32 {
77	char uuid[BTRFS_UUID_SIZE]; / in /
78	__u64 stransid; / in /
79	__u64 rtransid; / out /
80	struct btrfs_ioctl_timespec_32 stime; / in /
81	struct btrfs_ioctl_timespec_32 rtime; / out /
82	__u64 flags; / in /
83	__u64 reserved[`16`]; / in /
84	} __attribute__ ((__packed__));
85
86	#define BTRFS_IOC_SET_RECEIVED_SUBVOL_32 _IOWR(BTRFS_IOCTL_MAGIC, 37, \
87	struct btrfs_ioctl_received_subvol_args_32)
88	#endif
89
90	#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
91	struct btrfs_ioctl_send_args_32 {
92	__s64 send_fd; / in /
93	__u64 clone_sources_count; / in /
94	compat_uptr_t clone_sources; / in /
95	__u64 parent_root; / in /
96	__u64 flags; / in /
97	__u32 version; / in /
98	__u8 reserved[`28`]; / in /
99	} __attribute__ ((__packed__));
100
101	#define BTRFS_IOC_SEND_32 _IOW(BTRFS_IOCTL_MAGIC, 38, \
102	struct btrfs_ioctl_send_args_32)
103
104	struct btrfs_ioctl_encoded_io_args_32 {
105	compat_uptr_t iov;
106	compat_ulong_t iovcnt;
107	__s64 offset;
108	__u64 flags;
109	__u64 len;
110	__u64 unencoded_len;
111	__u64 unencoded_offset;
112	__u32 compression;
113	__u32 encryption;
114	__u8 reserved[`64`];
115	};
116
117	#define BTRFS_IOC_ENCODED_READ_32 _IOR(BTRFS_IOCTL_MAGIC, 64, \
118	struct btrfs_ioctl_encoded_io_args_32)
119	#define BTRFS_IOC_ENCODED_WRITE_32 _IOW(BTRFS_IOCTL_MAGIC, 64, \
120	struct btrfs_ioctl_encoded_io_args_32)
121	#endif
122
123	/ Mask out flags that are inappropriate for the given type of inode. /
124	static unsigned int btrfs_mask_fsflags_for_type(struct inode *inode,
125	unsigned int flags)
126	{
127	if (S_ISDIR(inode->i_mode))
128	return flags;
129	else if (S_ISREG(inode->i_mode))
130	return flags & ~FS_DIRSYNC_FL;
131	else
132	return flags & (FS_NODUMP_FL \| FS_NOATIME_FL);
133	}
134
135	/*
136	* Export internal inode flags to the format expected by the FS_IOC_GETFLAGS
137	* ioctl.
138	*/
139	static unsigned int btrfs_inode_flags_to_fsflags(struct btrfs_inode *binode)
140	{
141	unsigned int iflags = `0`;
142	u32 flags = binode->flags;
143	u32 ro_flags = binode->ro_flags;
144
145	if (flags & BTRFS_INODE_SYNC)
146	iflags \|= FS_SYNC_FL;
147	if (flags & BTRFS_INODE_IMMUTABLE)
148	iflags \|= FS_IMMUTABLE_FL;
149	if (flags & BTRFS_INODE_APPEND)
150	iflags \|= FS_APPEND_FL;
151	if (flags & BTRFS_INODE_NODUMP)
152	iflags \|= FS_NODUMP_FL;
153	if (flags & BTRFS_INODE_NOATIME)
154	iflags \|= FS_NOATIME_FL;
155	if (flags & BTRFS_INODE_DIRSYNC)
156	iflags \|= FS_DIRSYNC_FL;
157	if (flags & BTRFS_INODE_NODATACOW)
158	iflags \|= FS_NOCOW_FL;
159	if (ro_flags & BTRFS_INODE_RO_VERITY)
160	iflags \|= FS_VERITY_FL;
161
162	if (flags & BTRFS_INODE_NOCOMPRESS)
163	iflags \|= FS_NOCOMP_FL;
164	else if (flags & BTRFS_INODE_COMPRESS)
165	iflags \|= FS_COMPR_FL;
166
167	return iflags;
168	}
169
170	/*
171	* Update inode->i_flags based on the btrfs internal flags.
172	*/
173	void btrfs_sync_inode_flags_to_i_flags(struct inode *inode)
174	{
175	struct btrfs_inode *binode = BTRFS_I(inode);
176	unsigned int new_fl = `0`;
177
178	if (binode->flags & BTRFS_INODE_SYNC)
179	new_fl \|= S_SYNC;
180	if (binode->flags & BTRFS_INODE_IMMUTABLE)
181	new_fl \|= S_IMMUTABLE;
182	if (binode->flags & BTRFS_INODE_APPEND)
183	new_fl \|= S_APPEND;
184	if (binode->flags & BTRFS_INODE_NOATIME)
185	new_fl \|= S_NOATIME;
186	if (binode->flags & BTRFS_INODE_DIRSYNC)
187	new_fl \|= S_DIRSYNC;
188	if (binode->ro_flags & BTRFS_INODE_RO_VERITY)
189	new_fl \|= S_VERITY;
190
191	set_mask_bits(&inode->i_flags,
192	S_SYNC \| S_APPEND \| S_IMMUTABLE \| S_NOATIME \| S_DIRSYNC \|
193	S_VERITY, new_fl);
194	}
195
196	/*
197	* Check if @flags are a supported and valid set of FS_*_FL flags and that
198	* the old and new flags are not conflicting
199	*/
200	static int check_fsflags(unsigned int old_flags, unsigned int flags)
201	{
202	if (flags & ~(FS_IMMUTABLE_FL \| FS_APPEND_FL \| \
203	FS_NOATIME_FL \| FS_NODUMP_FL \| \
204	FS_SYNC_FL \| FS_DIRSYNC_FL \| \
205	FS_NOCOMP_FL \| FS_COMPR_FL \|
206	FS_NOCOW_FL))
207	return -EOPNOTSUPP;
208
209	/ COMPR and NOCOMP on new/old are valid /
210	if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL))
211	return -EINVAL;
212
213	if ((flags & FS_COMPR_FL) && (flags & FS_NOCOW_FL))
214	return -EINVAL;
215
216	/ NOCOW and compression options are mutually exclusive /
217	if ((old_flags & FS_NOCOW_FL) && (flags & (FS_COMPR_FL \| FS_NOCOMP_FL)))
218	return -EINVAL;
219	if ((flags & FS_NOCOW_FL) && (old_flags & (FS_COMPR_FL \| FS_NOCOMP_FL)))
220	return -EINVAL;
221
222	return `0`;
223	}
224
225	static int check_fsflags_compatible(struct btrfs_fs_info *fs_info,
226	unsigned int flags)
227	{
228	if (btrfs_is_zoned(fs_info) && (flags & FS_NOCOW_FL))
229	return -EPERM;
230
231	return `0`;
232	}
233
234	/*
235	* Set flags/xflags from the internal inode flags. The remaining items of
236	* fsxattr are zeroed.
237	*/
238	int btrfs_fileattr_get(struct dentry dentry, struct* fileattr *fa)
239	{
240	struct btrfs_inode *binode = BTRFS_I(inode: d_inode(dentry));
241
242	fileattr_fill_flags(fa, flags: btrfs_inode_flags_to_fsflags(binode));
243	return `0`;
244	}
245
246	int btrfs_fileattr_set(struct mnt_idmap *idmap,
247	struct dentry dentry, struct* fileattr *fa)
248	{
249	struct inode *inode = d_inode(dentry);
250	struct btrfs_fs_info *fs_info = btrfs_sb(sb: inode->i_sb);
251	struct btrfs_inode *binode = BTRFS_I(inode);
252	struct btrfs_root *root = binode->root;
253	struct btrfs_trans_handle *trans;
254	unsigned int fsflags, old_fsflags;
255	int ret;
256	const char *comp = NULL;
257	u32 binode_flags;
258
259	if (btrfs_root_readonly(root))
260	return -EROFS;
261
262	if (fileattr_has_fsx(fa))
263	return -EOPNOTSUPP;
264
265	fsflags = btrfs_mask_fsflags_for_type(inode, flags: fa->flags);
266	old_fsflags = btrfs_inode_flags_to_fsflags(binode);
267	ret = check_fsflags(old_flags: old_fsflags, flags: fsflags);
268	if (ret)
269	return ret;
270
271	ret = check_fsflags_compatible(fs_info, flags: fsflags);
272	if (ret)
273	return ret;
274
275	binode_flags = binode->flags;
276	if (fsflags & FS_SYNC_FL)
277	binode_flags \|= BTRFS_INODE_SYNC;
278	else
279	binode_flags &= ~BTRFS_INODE_SYNC;
280	if (fsflags & FS_IMMUTABLE_FL)
281	binode_flags \|= BTRFS_INODE_IMMUTABLE;
282	else
283	binode_flags &= ~BTRFS_INODE_IMMUTABLE;
284	if (fsflags & FS_APPEND_FL)
285	binode_flags \|= BTRFS_INODE_APPEND;
286	else
287	binode_flags &= ~BTRFS_INODE_APPEND;
288	if (fsflags & FS_NODUMP_FL)
289	binode_flags \|= BTRFS_INODE_NODUMP;
290	else
291	binode_flags &= ~BTRFS_INODE_NODUMP;
292	if (fsflags & FS_NOATIME_FL)
293	binode_flags \|= BTRFS_INODE_NOATIME;
294	else
295	binode_flags &= ~BTRFS_INODE_NOATIME;
296
297	/ If coming from FS_IOC_FSSETXATTR then skip unconverted flags /
298	if (!fa->flags_valid) {
299	/ 1 item for the inode /
300	trans = btrfs_start_transaction(root, num_items: `1`);
301	if (IS_ERR(ptr: trans))
302	return PTR_ERR(ptr: trans);
303	goto update_flags;
304	}
305
306	if (fsflags & FS_DIRSYNC_FL)
307	binode_flags \|= BTRFS_INODE_DIRSYNC;
308	else
309	binode_flags &= ~BTRFS_INODE_DIRSYNC;
310	if (fsflags & FS_NOCOW_FL) {
311	if (S_ISREG(inode->i_mode)) {
312	/*
313	* It's safe to turn csums off here, no extents exist.
314	* Otherwise we want the flag to reflect the real COW
315	* status of the file and will not set it.
316	*/
317	if (inode->i_size == `0`)
318	binode_flags \|= BTRFS_INODE_NODATACOW \|
319	BTRFS_INODE_NODATASUM;
320	} else {
321	binode_flags \|= BTRFS_INODE_NODATACOW;
322	}
323	} else {
324	/*
325	* Revert back under same assumptions as above
326	*/
327	if (S_ISREG(inode->i_mode)) {
328	if (inode->i_size == `0`)
329	binode_flags &= ~(BTRFS_INODE_NODATACOW \|
330	BTRFS_INODE_NODATASUM);
331	} else {
332	binode_flags &= ~BTRFS_INODE_NODATACOW;
333	}
334	}
335
336	/*
337	* The COMPRESS flag can only be changed by users, while the NOCOMPRESS
338	* flag may be changed automatically if compression code won't make
339	* things smaller.
340	*/
341	if (fsflags & FS_NOCOMP_FL) {
342	binode_flags &= ~BTRFS_INODE_COMPRESS;
343	binode_flags \|= BTRFS_INODE_NOCOMPRESS;
344	} else if (fsflags & FS_COMPR_FL) {
345
346	if (IS_SWAPFILE(inode))
347	return -ETXTBSY;
348
349	binode_flags \|= BTRFS_INODE_COMPRESS;
350	binode_flags &= ~BTRFS_INODE_NOCOMPRESS;
351
352	comp = btrfs_compress_type2str(type: fs_info->compress_type);
353	if (!comp \|\| comp[`0`] == `0`)
354	comp = btrfs_compress_type2str(type: BTRFS_COMPRESS_ZLIB);
355	} else {
356	binode_flags &= ~(BTRFS_INODE_COMPRESS \| BTRFS_INODE_NOCOMPRESS);
357	}
358
359	/*
360	* 1 for inode item
361	* 2 for properties
362	*/
363	trans = btrfs_start_transaction(root, num_items: `3`);
364	if (IS_ERR(ptr: trans))
365	return PTR_ERR(ptr: trans);
366
367	if (comp) {
368	ret = btrfs_set_prop(trans, inode, name: "btrfs.compression", value: comp,
369	strlen(comp), flags: `0`);
370	if (ret) {
371	btrfs_abort_transaction(trans, ret);
372	goto out_end_trans;
373	}
374	} else {
375	ret = btrfs_set_prop(trans, inode, name: "btrfs.compression", NULL,
376	value_len: `0`, flags: `0`);
377	if (ret && ret != -ENODATA) {
378	btrfs_abort_transaction(trans, ret);
379	goto out_end_trans;
380	}
381	}
382
383	update_flags:
384	binode->flags = binode_flags;
385	btrfs_sync_inode_flags_to_i_flags(inode);
386	inode_inc_iversion(inode);
387	inode_set_ctime_current(inode);
388	ret = btrfs_update_inode(trans, inode: BTRFS_I(inode));
389
390	out_end_trans:
391	btrfs_end_transaction(trans);
392	return ret;
393	}
394
395	/*
396	* Start exclusive operation @type, return true on success
397	*/
398	bool btrfs_exclop_start(struct btrfs_fs_info *fs_info,
399	enum btrfs_exclusive_operation type)
400	{
401	bool ret = false;
402
403	spin_lock(lock: &fs_info->super_lock);
404	if (fs_info->exclusive_operation == BTRFS_EXCLOP_NONE) {
405	fs_info->exclusive_operation = type;
406	ret = true;
407	}
408	spin_unlock(lock: &fs_info->super_lock);
409
410	return ret;
411	}
412
413	/*
414	* Conditionally allow to enter the exclusive operation in case it's compatible
415	* with the running one. This must be paired with btrfs_exclop_start_unlock and
416	* btrfs_exclop_finish.
417	*
418	* Compatibility:
419	* - the same type is already running
420	* - when trying to add a device and balance has been paused
421	* - not BTRFS_EXCLOP_NONE - this is intentionally incompatible and the caller
422	* must check the condition first that would allow none -> @type
423	*/
424	bool btrfs_exclop_start_try_lock(struct btrfs_fs_info *fs_info,
425	enum btrfs_exclusive_operation type)
426	{
427	spin_lock(lock: &fs_info->super_lock);
428	if (fs_info->exclusive_operation == type \|\|
429	(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED &&
430	type == BTRFS_EXCLOP_DEV_ADD))
431	return true;
432
433	spin_unlock(lock: &fs_info->super_lock);
434	return false;
435	}
436
437	void btrfs_exclop_start_unlock(struct btrfs_fs_info *fs_info)
438	{
439	spin_unlock(lock: &fs_info->super_lock);
440	}
441
442	void btrfs_exclop_finish(struct btrfs_fs_info *fs_info)
443	{
444	spin_lock(lock: &fs_info->super_lock);
445	WRITE_ONCE(fs_info->exclusive_operation, BTRFS_EXCLOP_NONE);
446	spin_unlock(lock: &fs_info->super_lock);
447	sysfs_notify(kobj: &fs_info->fs_devices->fsid_kobj, NULL, attr: "exclusive_operation");
448	}
449
450	void btrfs_exclop_balance(struct btrfs_fs_info *fs_info,
451	enum btrfs_exclusive_operation op)
452	{
453	switch (op) {
454	case BTRFS_EXCLOP_BALANCE_PAUSED:
455	spin_lock(lock: &fs_info->super_lock);
456	ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE \|\|
457	fs_info->exclusive_operation == BTRFS_EXCLOP_DEV_ADD \|\|
458	fs_info->exclusive_operation == BTRFS_EXCLOP_NONE \|\|
459	fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED);
460	fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE_PAUSED;
461	spin_unlock(lock: &fs_info->super_lock);
462	break;
463	case BTRFS_EXCLOP_BALANCE:
464	spin_lock(lock: &fs_info->super_lock);
465	ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED);
466	fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE;
467	spin_unlock(lock: &fs_info->super_lock);
468	break;
469	default:
470	btrfs_warn(fs_info,
471	"invalid exclop balance operation %d requested", op);
472	}
473	}
474
475	static int btrfs_ioctl_getversion(struct inode inode, int* __user *arg)
476	{
477	return put_user(inode->i_generation, arg);
478	}
479
480	static noinline int btrfs_ioctl_fitrim(struct btrfs_fs_info *fs_info,
481	void __user *arg)
482	{
483	struct btrfs_device *device;
484	struct fstrim_range range;
485	u64 minlen = ULLONG_MAX;
486	u64 num_devices = `0`;
487	int ret;
488
489	if (!capable(CAP_SYS_ADMIN))
490	return -EPERM;
491
492	/*
493	* btrfs_trim_block_group() depends on space cache, which is not
494	* available in zoned filesystem. So, disallow fitrim on a zoned
495	* filesystem for now.
496	*/
497	if (btrfs_is_zoned(fs_info))
498	return -EOPNOTSUPP;
499
500	/*
501	* If the fs is mounted with nologreplay, which requires it to be
502	* mounted in RO mode as well, we can not allow discard on free space
503	* inside block groups, because log trees refer to extents that are not
504	* pinned in a block group's free space cache (pinning the extents is
505	* precisely the first phase of replaying a log tree).
506	*/
507	if (btrfs_test_opt(fs_info, NOLOGREPLAY))
508	return -EROFS;
509
510	rcu_read_lock();
511	list_for_each_entry_rcu(device, &fs_info->fs_devices->devices,
512	dev_list) {
513	if (!device->bdev \|\| !bdev_max_discard_sectors(bdev: device->bdev))
514	continue;
515	num_devices++;
516	minlen = min_t(u64, bdev_discard_granularity(device->bdev),
517	minlen);
518	}
519	rcu_read_unlock();
520
521	if (!num_devices)
522	return -EOPNOTSUPP;
523	if (copy_from_user(to: &range, from: arg, n: sizeof(range)))
524	return -EFAULT;
525
526	/*
527	* NOTE: Don't truncate the range using super->total_bytes. Bytenr of
528	* block group is in the logical address space, which can be any
529	* sectorsize aligned bytenr in the range [0, U64_MAX].
530	*/
531	if (range.len < fs_info->sb->s_blocksize)
532	return -EINVAL;
533
534	range.minlen = max(range.minlen, minlen);
535	ret = btrfs_trim_fs(fs_info, range: &range);
536	if (ret < `0`)
537	return ret;
538
539	if (copy_to_user(to: arg, from: &range, n: sizeof(range)))
540	return -EFAULT;
541
542	return `0`;
543	}
544
545	int __pure btrfs_is_empty_uuid(u8 *uuid)
546	{
547	int i;
548
549	for (i = `0`; i < BTRFS_UUID_SIZE; i++) {
550	if (uuid[i])
551	return `0`;
552	}
553	return `1`;
554	}
555
556	/*
557	* Calculate the number of transaction items to reserve for creating a subvolume
558	* or snapshot, not including the inode, directory entries, or parent directory.
559	*/
560	static unsigned int create_subvol_num_items(struct btrfs_qgroup_inherit *inherit)
561	{
562	/*
563	* 1 to add root block
564	* 1 to add root item
565	* 1 to add root ref
566	* 1 to add root backref
567	* 1 to add UUID item
568	* 1 to add qgroup info
569	* 1 to add qgroup limit
570	*
571	* Ideally the last two would only be accounted if qgroups are enabled,
572	* but that can change between now and the time we would insert them.
573	*/
574	unsigned int num_items = `7`;
575
576	if (inherit) {
577	/ 2 to add qgroup relations for each inherited qgroup /
578	num_items += `2` * inherit->num_qgroups;
579	}
580	return num_items;
581	}
582
583	static noinline int create_subvol(struct mnt_idmap *idmap,
584	struct inode dir, struct* dentry *dentry,
585	struct btrfs_qgroup_inherit *inherit)
586	{
587	struct btrfs_fs_info *fs_info = btrfs_sb(sb: dir->i_sb);
588	struct btrfs_trans_handle *trans;
589	struct btrfs_key key;
590	struct btrfs_root_item *root_item;
591	struct btrfs_inode_item *inode_item;
592	struct extent_buffer *leaf;
593	struct btrfs_root *root = BTRFS_I(inode: dir)->root;
594	struct btrfs_root *new_root;
595	struct btrfs_block_rsv block_rsv;
596	struct timespec64 cur_time = current_time(inode: dir);
597	struct btrfs_new_inode_args new_inode_args = {
598	.dir = dir,
599	.dentry = dentry,
600	.subvol = true,
601	};
602	unsigned int trans_num_items;
603	int ret;
604	dev_t anon_dev;
605	u64 objectid;
606
607	root_item = kzalloc(size: sizeof(*root_item), GFP_KERNEL);
608	if (!root_item)
609	return -ENOMEM;
610
611	ret = btrfs_get_free_objectid(root: fs_info->tree_root, objectid: &objectid);
612	if (ret)
613	goto out_root_item;
614
615	/*
616	* Don't create subvolume whose level is not zero. Or qgroup will be
617	* screwed up since it assumes subvolume qgroup's level to be 0.
618	*/
619	if (btrfs_qgroup_level(qgroupid: objectid)) {
620	ret = -ENOSPC;
621	goto out_root_item;
622	}
623
624	ret = get_anon_bdev(&anon_dev);
625	if (ret < `0`)
626	goto out_root_item;
627
628	new_inode_args.inode = btrfs_new_subvol_inode(idmap, dir);
629	if (!new_inode_args.inode) {
630	ret = -ENOMEM;
631	goto out_anon_dev;
632	}
633	ret = btrfs_new_inode_prepare(args: &new_inode_args, trans_num_items: &trans_num_items);
634	if (ret)
635	goto out_inode;
636	trans_num_items += create_subvol_num_items(inherit);
637
638	btrfs_init_block_rsv(rsv: &block_rsv, type: BTRFS_BLOCK_RSV_TEMP);
639	ret = btrfs_subvolume_reserve_metadata(root, rsv: &block_rsv,
640	nitems: trans_num_items, use_global_rsv: false);
641	if (ret)
642	goto out_new_inode_args;
643
644	trans = btrfs_start_transaction(root, num_items: `0`);
645	if (IS_ERR(ptr: trans)) {
646	ret = PTR_ERR(ptr: trans);
647	btrfs_subvolume_release_metadata(root, rsv: &block_rsv);
648	goto out_new_inode_args;
649	}
650	trans->block_rsv = &block_rsv;
651	trans->bytes_reserved = block_rsv.size;
652	/ Tree log can't currently deal with an inode which is a new root. /
653	btrfs_set_log_full_commit(trans);
654
655	ret = btrfs_qgroup_inherit(trans, srcid: `0`, objectid, inode_rootid: root->root_key.objectid, inherit);
656	if (ret)
657	goto out;
658
659	leaf = btrfs_alloc_tree_block(trans, root, parent: `0`, root_objectid: objectid, NULL, level: `0`, hint: `0`, empty_size: `0`,
660	reloc_src_root: `0`, nest: BTRFS_NESTING_NORMAL);
661	if (IS_ERR(ptr: leaf)) {
662	ret = PTR_ERR(ptr: leaf);
663	goto out;
664	}
665
666	btrfs_mark_buffer_dirty(trans, buf: leaf);
667
668	inode_item = &root_item->inode;
669	btrfs_set_stack_inode_generation(s: inode_item, val: `1`);
670	btrfs_set_stack_inode_size(s: inode_item, val: `3`);
671	btrfs_set_stack_inode_nlink(s: inode_item, val: `1`);
672	btrfs_set_stack_inode_nbytes(s: inode_item,
673	val: fs_info->nodesize);
674	btrfs_set_stack_inode_mode(s: inode_item, S_IFDIR \| `0755`);
675
676	btrfs_set_root_flags(s: root_item, val: `0`);
677	btrfs_set_root_limit(s: root_item, val: `0`);
678	btrfs_set_stack_inode_flags(s: inode_item, BTRFS_INODE_ROOT_ITEM_INIT);
679
680	btrfs_set_root_bytenr(s: root_item, val: leaf->start);
681	btrfs_set_root_generation(s: root_item, val: trans->transid);
682	btrfs_set_root_level(s: root_item, val: `0`);
683	btrfs_set_root_refs(s: root_item, val: `1`);
684	btrfs_set_root_used(s: root_item, val: leaf->len);
685	btrfs_set_root_last_snapshot(s: root_item, val: `0`);
686
687	btrfs_set_root_generation_v2(s: root_item,
688	val: btrfs_root_generation(s: root_item));
689	generate_random_guid(guid: root_item->uuid);
690	btrfs_set_stack_timespec_sec(s: &root_item->otime, val: cur_time.tv_sec);
691	btrfs_set_stack_timespec_nsec(s: &root_item->otime, val: cur_time.tv_nsec);
692	root_item->ctime = root_item->otime;
693	btrfs_set_root_ctransid(s: root_item, val: trans->transid);
694	btrfs_set_root_otransid(s: root_item, val: trans->transid);
695
696	btrfs_tree_unlock(eb: leaf);
697
698	btrfs_set_root_dirid(s: root_item, BTRFS_FIRST_FREE_OBJECTID);
699
700	key.objectid = objectid;
701	key.offset = `0`;
702	key.type = BTRFS_ROOT_ITEM_KEY;
703	ret = btrfs_insert_root(trans, root: fs_info->tree_root, key: &key,
704	item: root_item);
705	if (ret) {
706	/*
707	* Since we don't abort the transaction in this case, free the
708	* tree block so that we don't leak space and leave the
709	* filesystem in an inconsistent state (an extent item in the
710	* extent tree with a backreference for a root that does not
711	* exists).
712	*/
713	btrfs_tree_lock(eb: leaf);
714	btrfs_clear_buffer_dirty(trans, buf: leaf);
715	btrfs_tree_unlock(eb: leaf);
716	btrfs_free_tree_block(trans, root_id: objectid, buf: leaf, parent: `0`, last_ref: `1`);
717	free_extent_buffer(eb: leaf);
718	goto out;
719	}
720
721	free_extent_buffer(eb: leaf);
722	leaf = NULL;
723
724	new_root = btrfs_get_new_fs_root(fs_info, objectid, anon_dev);
725	if (IS_ERR(ptr: new_root)) {
726	ret = PTR_ERR(ptr: new_root);
727	btrfs_abort_transaction(trans, ret);
728	goto out;
729	}
730	/ anon_dev is owned by new_root now. /
731	anon_dev = `0`;
732	BTRFS_I(inode: new_inode_args.inode)->root = new_root;
733	/ ... and new_root is owned by new_inode_args.inode now. /
734
735	ret = btrfs_record_root_in_trans(trans, root: new_root);
736	if (ret) {
737	btrfs_abort_transaction(trans, ret);
738	goto out;
739	}
740
741	ret = btrfs_uuid_tree_add(trans, uuid: root_item->uuid,
742	BTRFS_UUID_KEY_SUBVOL, subid: objectid);
743	if (ret) {
744	btrfs_abort_transaction(trans, ret);
745	goto out;
746	}
747
748	ret = btrfs_create_new_inode(trans, args: &new_inode_args);
749	if (ret) {
750	btrfs_abort_transaction(trans, ret);
751	goto out;
752	}
753
754	d_instantiate_new(dentry, new_inode_args.inode);
755	new_inode_args.inode = NULL;
756
757	out:
758	trans->block_rsv = NULL;
759	trans->bytes_reserved = `0`;
760	btrfs_subvolume_release_metadata(root, rsv: &block_rsv);
761
762	btrfs_end_transaction(trans);
763	out_new_inode_args:
764	btrfs_new_inode_args_destroy(args: &new_inode_args);
765	out_inode:
766	iput(new_inode_args.inode);
767	out_anon_dev:
768	if (anon_dev)
769	free_anon_bdev(anon_dev);
770	out_root_item:
771	kfree(objp: root_item);
772	return ret;
773	}
774
775	static int create_snapshot(struct btrfs_root root, struct* inode *dir,
776	struct dentry *dentry, bool readonly,
777	struct btrfs_qgroup_inherit *inherit)
778	{
779	struct btrfs_fs_info *fs_info = btrfs_sb(sb: dir->i_sb);
780	struct inode *inode;
781	struct btrfs_pending_snapshot *pending_snapshot;
782	unsigned int trans_num_items;
783	struct btrfs_trans_handle *trans;
784	int ret;
785
786	/ We do not support snapshotting right now. /
787	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
788	btrfs_warn(fs_info,
789	"extent tree v2 doesn't support snapshotting yet");
790	return -EOPNOTSUPP;
791	}
792
793	if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
794	return -EINVAL;
795
796	if (atomic_read(v: &root->nr_swapfiles)) {
797	btrfs_warn(fs_info,
798	"cannot snapshot subvolume with active swapfile");
799	return -ETXTBSY;
800	}
801
802	pending_snapshot = kzalloc(size: sizeof(*pending_snapshot), GFP_KERNEL);
803	if (!pending_snapshot)
804	return -ENOMEM;
805
806	ret = get_anon_bdev(&pending_snapshot->anon_dev);
807	if (ret < `0`)
808	goto free_pending;
809	pending_snapshot->root_item = kzalloc(size: sizeof(struct btrfs_root_item),
810	GFP_KERNEL);
811	pending_snapshot->path = btrfs_alloc_path();
812	if (!pending_snapshot->root_item \|\| !pending_snapshot->path) {
813	ret = -ENOMEM;
814	goto free_pending;
815	}
816
817	btrfs_init_block_rsv(rsv: &pending_snapshot->block_rsv,
818	type: BTRFS_BLOCK_RSV_TEMP);
819	/*
820	* 1 to add dir item
821	* 1 to add dir index
822	* 1 to update parent inode item
823	*/
824	trans_num_items = create_subvol_num_items(inherit) + `3`;
825	ret = btrfs_subvolume_reserve_metadata(root: BTRFS_I(inode: dir)->root,
826	rsv: &pending_snapshot->block_rsv,
827	nitems: trans_num_items, use_global_rsv: false);
828	if (ret)
829	goto free_pending;
830
831	pending_snapshot->dentry = dentry;
832	pending_snapshot->root = root;
833	pending_snapshot->readonly = readonly;
834	pending_snapshot->dir = dir;
835	pending_snapshot->inherit = inherit;
836
837	trans = btrfs_start_transaction(root, num_items: `0`);
838	if (IS_ERR(ptr: trans)) {
839	ret = PTR_ERR(ptr: trans);
840	goto fail;
841	}
842
843	trans->pending_snapshot = pending_snapshot;
844
845	ret = btrfs_commit_transaction(trans);
846	if (ret)
847	goto fail;
848
849	ret = pending_snapshot->error;
850	if (ret)
851	goto fail;
852
853	ret = btrfs_orphan_cleanup(root: pending_snapshot->snap);
854	if (ret)
855	goto fail;
856
857	inode = btrfs_lookup_dentry(dir: d_inode(dentry: dentry->d_parent), dentry);
858	if (IS_ERR(ptr: inode)) {
859	ret = PTR_ERR(ptr: inode);
860	goto fail;
861	}
862
863	d_instantiate(dentry, inode);
864	ret = `0`;
865	pending_snapshot->anon_dev = `0`;
866	fail:
867	/ Prevent double freeing of anon_dev /
868	if (ret && pending_snapshot->snap)
869	pending_snapshot->snap->anon_dev = `0`;
870	btrfs_put_root(root: pending_snapshot->snap);
871	btrfs_subvolume_release_metadata(root, rsv: &pending_snapshot->block_rsv);
872	free_pending:
873	if (pending_snapshot->anon_dev)
874	free_anon_bdev(pending_snapshot->anon_dev);
875	kfree(objp: pending_snapshot->root_item);
876	btrfs_free_path(p: pending_snapshot->path);
877	kfree(objp: pending_snapshot);
878
879	return ret;
880	}
881
882	/ copy of may_delete in fs/namei.c()*
883	* Check whether we can remove a link victim from directory dir, check
884	* whether the type of victim is right.
885	* 1. We can't do it if dir is read-only (done in permission())
886	* 2. We should have write and exec permissions on dir
887	* 3. We can't remove anything from append-only dir
888	* 4. We can't do anything with immutable dir (done in permission())
889	* 5. If the sticky bit on dir is set we should either
890	* a. be owner of dir, or
891	* b. be owner of victim, or
892	* c. have CAP_FOWNER capability
893	* 6. If the victim is append-only or immutable we can't do anything with
894	* links pointing to it.
895	* 7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
896	* 8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
897	* 9. We can't remove a root or mountpoint.
898	* 10. We don't allow removal of NFS sillyrenamed files; it's handled by
899	* nfs_async_unlink().
900	*/
901
902	static int btrfs_may_delete(struct mnt_idmap *idmap,
903	struct inode dir, struct* dentry victim, int* isdir)
904	{
905	int error;
906
907	if (d_really_is_negative(dentry: victim))
908	return -ENOENT;
909
910	BUG_ON(d_inode(victim->d_parent) != dir);
911	audit_inode_child(parent: dir, dentry: victim, AUDIT_TYPE_CHILD_DELETE);
912
913	error = inode_permission(idmap, dir, MAY_WRITE \| MAY_EXEC);
914	if (error)
915	return error;
916	if (IS_APPEND(dir))
917	return -EPERM;
918	if (check_sticky(idmap, dir, inode: d_inode(dentry: victim)) \|\|
919	IS_APPEND(d_inode(victim)) \|\| IS_IMMUTABLE(d_inode(victim)) \|\|
920	IS_SWAPFILE(d_inode(victim)))
921	return -EPERM;
922	if (isdir) {
923	if (!d_is_dir(dentry: victim))
924	return -ENOTDIR;
925	if (IS_ROOT(victim))
926	return -EBUSY;
927	} else if (d_is_dir(dentry: victim))
928	return -EISDIR;
929	if (IS_DEADDIR(dir))
930	return -ENOENT;
931	if (victim->d_flags & DCACHE_NFSFS_RENAMED)
932	return -EBUSY;
933	return `0`;
934	}
935
936	/ copy of may_create in fs/namei.c() /
937	static inline int btrfs_may_create(struct mnt_idmap *idmap,
938	struct inode dir, struct* dentry *child)
939	{
940	if (d_really_is_positive(dentry: child))
941	return -EEXIST;
942	if (IS_DEADDIR(dir))
943	return -ENOENT;
944	if (!fsuidgid_has_mapping(sb: dir->i_sb, idmap))
945	return -EOVERFLOW;
946	return inode_permission(idmap, dir, MAY_WRITE \| MAY_EXEC);
947	}
948
949	/*
950	* Create a new subvolume below @parent. This is largely modeled after
951	* sys_mkdirat and vfs_mkdir, but we only do a single component lookup
952	* inside this filesystem so it's quite a bit simpler.
953	*/
954	static noinline int btrfs_mksubvol(const struct path *parent,
955	struct mnt_idmap *idmap,
956	const char name, int* namelen,
957	struct btrfs_root *snap_src,
958	bool readonly,
959	struct btrfs_qgroup_inherit *inherit)
960	{
961	struct inode *dir = d_inode(dentry: parent->dentry);
962	struct btrfs_fs_info *fs_info = btrfs_sb(sb: dir->i_sb);
963	struct dentry *dentry;
964	struct fscrypt_str name_str = FSTR_INIT((char *)name, namelen);
965	int error;
966
967	error = down_write_killable_nested(sem: &dir->i_rwsem, subclass: I_MUTEX_PARENT);
968	if (error == -EINTR)
969	return error;
970
971	dentry = lookup_one(idmap, name, parent->dentry, namelen);
972	error = PTR_ERR(ptr: dentry);
973	if (IS_ERR(ptr: dentry))
974	goto out_unlock;
975
976	error = btrfs_may_create(idmap, dir, child: dentry);
977	if (error)
978	goto out_dput;
979
980	/*
981	* even if this name doesn't exist, we may get hash collisions.
982	* check for them now when we can safely fail
983	*/
984	error = btrfs_check_dir_item_collision(root: BTRFS_I(inode: dir)->root,
985	dir: dir->i_ino, name: &name_str);
986	if (error)
987	goto out_dput;
988
989	down_read(sem: &fs_info->subvol_sem);
990
991	if (btrfs_root_refs(s: &BTRFS_I(inode: dir)->root->root_item) == `0`)
992	goto out_up_read;
993
994	if (snap_src)
995	error = create_snapshot(root: snap_src, dir, dentry, readonly, inherit);
996	else
997	error = create_subvol(idmap, dir, dentry, inherit);
998
999	if (!error)
1000	fsnotify_mkdir(dir, dentry);
1001	out_up_read:
1002	up_read(sem: &fs_info->subvol_sem);
1003	out_dput:
1004	dput(dentry);
1005	out_unlock:
1006	btrfs_inode_unlock(inode: BTRFS_I(inode: dir), ilock_flags: `0`);
1007	return error;
1008	}
1009
1010	static noinline int btrfs_mksnapshot(const struct path *parent,
1011	struct mnt_idmap *idmap,
1012	const char name, int* namelen,
1013	struct btrfs_root *root,
1014	bool readonly,
1015	struct btrfs_qgroup_inherit *inherit)
1016	{
1017	int ret;
1018	bool snapshot_force_cow = false;
1019
1020	/*
1021	* Force new buffered writes to reserve space even when NOCOW is
1022	* possible. This is to avoid later writeback (running dealloc) to
1023	* fallback to COW mode and unexpectedly fail with ENOSPC.
1024	*/
1025	btrfs_drew_read_lock(lock: &root->snapshot_lock);
1026
1027	ret = btrfs_start_delalloc_snapshot(root, in_reclaim_context: false);
1028	if (ret)
1029	goto out;
1030
1031	/*
1032	* All previous writes have started writeback in NOCOW mode, so now
1033	* we force future writes to fallback to COW mode during snapshot
1034	* creation.
1035	*/
1036	atomic_inc(v: &root->snapshot_force_cow);
1037	snapshot_force_cow = true;
1038
1039	btrfs_wait_ordered_extents(root, U64_MAX, range_start: `0`, range_len: (u64)-`1`);
1040
1041	ret = btrfs_mksubvol(parent, idmap, name, namelen,
1042	snap_src: root, readonly, inherit);
1043	out:
1044	if (snapshot_force_cow)
1045	atomic_dec(v: &root->snapshot_force_cow);
1046	btrfs_drew_read_unlock(lock: &root->snapshot_lock);
1047	return ret;
1048	}
1049
1050	/*
1051	* Try to start exclusive operation @type or cancel it if it's running.
1052	*
1053	* Return:
1054	* 0 - normal mode, newly claimed op started
1055	* >0 - normal mode, something else is running,
1056	* return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS to user space
1057	* ECANCELED - cancel mode, successful cancel
1058	* ENOTCONN - cancel mode, operation not running anymore
1059	*/
1060	static int exclop_start_or_cancel_reloc(struct btrfs_fs_info *fs_info,
1061	enum btrfs_exclusive_operation type, bool cancel)
1062	{
1063	if (!cancel) {
1064	/ Start normal op /
1065	if (!btrfs_exclop_start(fs_info, type))
1066	return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
1067	/ Exclusive operation is now claimed /
1068	return `0`;
1069	}
1070
1071	/ Cancel running op /
1072	if (btrfs_exclop_start_try_lock(fs_info, type)) {
1073	/*
1074	* This blocks any exclop finish from setting it to NONE, so we
1075	* request cancellation. Either it runs and we will wait for it,
1076	* or it has finished and no waiting will happen.
1077	*/
1078	atomic_inc(v: &fs_info->reloc_cancel_req);
1079	btrfs_exclop_start_unlock(fs_info);
1080
1081	if (test_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags))
1082	wait_on_bit(word: &fs_info->flags, bit: BTRFS_FS_RELOC_RUNNING,
1083	TASK_INTERRUPTIBLE);
1084
1085	return -ECANCELED;
1086	}
1087
1088	/ Something else is running or none /
1089	return -ENOTCONN;
1090	}
1091
1092	static noinline int btrfs_ioctl_resize(struct file *file,
1093	void __user *arg)
1094	{
1095	BTRFS_DEV_LOOKUP_ARGS(args);
1096	struct inode *inode = file_inode(f: file);
1097	struct btrfs_fs_info *fs_info = btrfs_sb(sb: inode->i_sb);
1098	u64 new_size;
1099	u64 old_size;
1100	u64 devid = `1`;
1101	struct btrfs_root *root = BTRFS_I(inode)->root;
1102	struct btrfs_ioctl_vol_args *vol_args;
1103	struct btrfs_trans_handle *trans;
1104	struct btrfs_device *device = NULL;
1105	char *sizestr;
1106	char *retptr;
1107	char *devstr = NULL;
1108	int ret = `0`;
1109	int mod = `0`;
1110	bool cancel;
1111
1112	if (!capable(CAP_SYS_ADMIN))
1113	return -EPERM;
1114
1115	ret = mnt_want_write_file(file);
1116	if (ret)
1117	return ret;
1118
1119	/*
1120	* Read the arguments before checking exclusivity to be able to
1121	* distinguish regular resize and cancel
1122	*/
1123	vol_args = memdup_user(arg, sizeof(*vol_args));
1124	if (IS_ERR(ptr: vol_args)) {
1125	ret = PTR_ERR(ptr: vol_args);
1126	goto out_drop;
1127	}
1128	vol_args->name[BTRFS_PATH_NAME_MAX] = `'\0'`;
1129	sizestr = vol_args->name;
1130	cancel = (strcmp("cancel", sizestr) == `0`);
1131	ret = exclop_start_or_cancel_reloc(fs_info, type: BTRFS_EXCLOP_RESIZE, cancel);
1132	if (ret)
1133	goto out_free;
1134	/ Exclusive operation is now claimed /
1135
1136	devstr = strchr(sizestr, `':'`);
1137	if (devstr) {
1138	sizestr = devstr + `1`;
1139	*devstr = `'\0'`;
1140	devstr = vol_args->name;
1141	ret = kstrtoull(s: devstr, base: `10`, res: &devid);
1142	if (ret)
1143	goto out_finish;
1144	if (!devid) {
1145	ret = -EINVAL;
1146	goto out_finish;
1147	}
1148	btrfs_info(fs_info, "resizing devid %llu", devid);
1149	}
1150
1151	args.devid = devid;
1152	device = btrfs_find_device(fs_devices: fs_info->fs_devices, args: &args);
1153	if (!device) {
1154	btrfs_info(fs_info, "resizer unable to find device %llu",
1155	devid);
1156	ret = -ENODEV;
1157	goto out_finish;
1158	}
1159
1160	if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
1161	btrfs_info(fs_info,
1162	"resizer unable to apply on readonly device %llu",
1163	devid);
1164	ret = -EPERM;
1165	goto out_finish;
1166	}
1167
1168	if (!strcmp(sizestr, "max"))
1169	new_size = bdev_nr_bytes(bdev: device->bdev);
1170	else {
1171	if (sizestr[`0`] == `'-'`) {
1172	mod = -`1`;
1173	sizestr++;
1174	} else if (sizestr[`0`] == `'+'`) {
1175	mod = `1`;
1176	sizestr++;
1177	}
1178	new_size = memparse(ptr: sizestr, retptr: &retptr);
1179	if (*retptr != `'\0'` \|\| new_size == `0`) {
1180	ret = -EINVAL;
1181	goto out_finish;
1182	}
1183	}
1184
1185	if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
1186	ret = -EPERM;
1187	goto out_finish;
1188	}
1189
1190	old_size = btrfs_device_get_total_bytes(dev: device);
1191
1192	if (mod < `0`) {
1193	if (new_size > old_size) {
1194	ret = -EINVAL;
1195	goto out_finish;
1196	}
1197	new_size = old_size - new_size;
1198	} else if (mod > `0`) {
1199	if (new_size > ULLONG_MAX - old_size) {
1200	ret = -ERANGE;
1201	goto out_finish;
1202	}
1203	new_size = old_size + new_size;
1204	}
1205
1206	if (new_size < SZ_256M) {
1207	ret = -EINVAL;
1208	goto out_finish;
1209	}
1210	if (new_size > bdev_nr_bytes(bdev: device->bdev)) {
1211	ret = -EFBIG;
1212	goto out_finish;
1213	}
1214
1215	new_size = round_down(new_size, fs_info->sectorsize);
1216
1217	if (new_size > old_size) {
1218	trans = btrfs_start_transaction(root, num_items: `0`);
1219	if (IS_ERR(ptr: trans)) {
1220	ret = PTR_ERR(ptr: trans);
1221	goto out_finish;
1222	}
1223	ret = btrfs_grow_device(trans, device, new_size);
1224	btrfs_commit_transaction(trans);
1225	} else if (new_size < old_size) {
1226	ret = btrfs_shrink_device(device, new_size);
1227	} / equal, nothing need to do /
1228
1229	if (ret == `0` && new_size != old_size)
1230	btrfs_info_in_rcu(fs_info,
1231	"resize device %s (devid %llu) from %llu to %llu",
1232	btrfs_dev_name(device), device->devid,
1233	old_size, new_size);
1234	out_finish:
1235	btrfs_exclop_finish(fs_info);
1236	out_free:
1237	kfree(objp: vol_args);
1238	out_drop:
1239	mnt_drop_write_file(file);
1240	return ret;
1241	}
1242
1243	static noinline int __btrfs_ioctl_snap_create(struct file *file,
1244	struct mnt_idmap *idmap,
1245	const char name, unsigned* long fd, int subvol,
1246	bool readonly,
1247	struct btrfs_qgroup_inherit *inherit)
1248	{
1249	int namelen;
1250	int ret = `0`;
1251
1252	if (!S_ISDIR(file_inode(file)->i_mode))
1253	return -ENOTDIR;
1254
1255	ret = mnt_want_write_file(file);
1256	if (ret)
1257	goto out;
1258
1259	namelen = strlen(name);
1260	if (strchr(name, `'/'`)) {
1261	ret = -EINVAL;
1262	goto out_drop_write;
1263	}
1264
1265	if (name[`0`] == `'.'` &&
1266	(namelen == `1` \|\| (name[`1`] == `'.'` && namelen == `2`))) {
1267	ret = -EEXIST;
1268	goto out_drop_write;
1269	}
1270
1271	if (subvol) {
1272	ret = btrfs_mksubvol(parent: &file->f_path, idmap, name,
1273	namelen, NULL, readonly, inherit);
1274	} else {
1275	struct fd src = fdget(fd);
1276	struct inode *src_inode;
1277	if (!src.file) {
1278	ret = -EINVAL;
1279	goto out_drop_write;
1280	}
1281
1282	src_inode = file_inode(f: src.file);
1283	if (src_inode->i_sb != file_inode(f: file)->i_sb) {
1284	btrfs_info(BTRFS_I(file_inode(file))->root->fs_info,
1285	"Snapshot src from another FS");
1286	ret = -EXDEV;
1287	} else if (!inode_owner_or_capable(idmap, inode: src_inode)) {
1288	/*
1289	* Subvolume creation is not restricted, but snapshots
1290	* are limited to own subvolumes only
1291	*/
1292	ret = -EPERM;
1293	} else {
1294	ret = btrfs_mksnapshot(parent: &file->f_path, idmap,
1295	name, namelen,
1296	root: BTRFS_I(inode: src_inode)->root,
1297	readonly, inherit);
1298	}
1299	fdput(fd: src);
1300	}
1301	out_drop_write:
1302	mnt_drop_write_file(file);
1303	out:
1304	return ret;
1305	}
1306
1307	static noinline int btrfs_ioctl_snap_create(struct file *file,
1308	void __user arg, int* subvol)
1309	{
1310	struct btrfs_ioctl_vol_args *vol_args;
1311	int ret;
1312
1313	if (!S_ISDIR(file_inode(file)->i_mode))
1314	return -ENOTDIR;
1315
1316	vol_args = memdup_user(arg, sizeof(*vol_args));
1317	if (IS_ERR(ptr: vol_args))
1318	return PTR_ERR(ptr: vol_args);
1319	vol_args->name[BTRFS_PATH_NAME_MAX] = `'\0'`;
1320
1321	ret = __btrfs_ioctl_snap_create(file, idmap: file_mnt_idmap(file),
1322	name: vol_args->name, fd: vol_args->fd, subvol,
1323	readonly: false, NULL);
1324
1325	kfree(objp: vol_args);
1326	return ret;
1327	}
1328
1329	static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
1330	void __user arg, int* subvol)
1331	{
1332	struct btrfs_ioctl_vol_args_v2 *vol_args;
1333	int ret;
1334	bool readonly = false;
1335	struct btrfs_qgroup_inherit *inherit = NULL;
1336
1337	if (!S_ISDIR(file_inode(file)->i_mode))
1338	return -ENOTDIR;
1339
1340	vol_args = memdup_user(arg, sizeof(*vol_args));
1341	if (IS_ERR(ptr: vol_args))
1342	return PTR_ERR(ptr: vol_args);
1343	vol_args->name[BTRFS_SUBVOL_NAME_MAX] = `'\0'`;
1344
1345	if (vol_args->flags & ~BTRFS_SUBVOL_CREATE_ARGS_MASK) {
1346	ret = -EOPNOTSUPP;
1347	goto free_args;
1348	}
1349
1350	if (vol_args->flags & BTRFS_SUBVOL_RDONLY)
1351	readonly = true;
1352	if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) {
1353	u64 nums;
1354
1355	if (vol_args->size < sizeof(*inherit) \|\|
1356	vol_args->size > PAGE_SIZE) {
1357	ret = -EINVAL;
1358	goto free_args;
1359	}
1360	inherit = memdup_user(vol_args->qgroup_inherit, vol_args->size);
1361	if (IS_ERR(ptr: inherit)) {
1362	ret = PTR_ERR(ptr: inherit);
1363	goto free_args;
1364	}
1365
1366	if (inherit->num_qgroups > PAGE_SIZE \|\|
1367	inherit->num_ref_copies > PAGE_SIZE \|\|
1368	inherit->num_excl_copies > PAGE_SIZE) {
1369	ret = -EINVAL;
1370	goto free_inherit;
1371	}
1372
1373	nums = inherit->num_qgroups + `2` * inherit->num_ref_copies +
1374	`2` * inherit->num_excl_copies;
1375	if (vol_args->size != struct_size(inherit, qgroups, nums)) {
1376	ret = -EINVAL;
1377	goto free_inherit;
1378	}
1379	}
1380
1381	ret = __btrfs_ioctl_snap_create(file, idmap: file_mnt_idmap(file),
1382	name: vol_args->name, fd: vol_args->fd, subvol,
1383	readonly, inherit);
1384	if (ret)
1385	goto free_inherit;
1386	free_inherit:
1387	kfree(objp: inherit);
1388	free_args:
1389	kfree(objp: vol_args);
1390	return ret;
1391	}
1392
1393	static noinline int btrfs_ioctl_subvol_getflags(struct inode *inode,
1394	void __user *arg)
1395	{
1396	struct btrfs_fs_info *fs_info = btrfs_sb(sb: inode->i_sb);
1397	struct btrfs_root *root = BTRFS_I(inode)->root;
1398	int ret = `0`;
1399	u64 flags = `0`;
1400
1401	if (btrfs_ino(inode: BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID)
1402	return -EINVAL;
1403
1404	down_read(sem: &fs_info->subvol_sem);
1405	if (btrfs_root_readonly(root))
1406	flags \|= BTRFS_SUBVOL_RDONLY;
1407	up_read(sem: &fs_info->subvol_sem);
1408
1409	if (copy_to_user(to: arg, from: &flags, n: sizeof(flags)))
1410	ret = -EFAULT;
1411
1412	return ret;
1413	}
1414
1415	static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1416	void __user *arg)
1417	{
1418	struct inode *inode = file_inode(f: file);
1419	struct btrfs_fs_info *fs_info = btrfs_sb(sb: inode->i_sb);
1420	struct btrfs_root *root = BTRFS_I(inode)->root;
1421	struct btrfs_trans_handle *trans;
1422	u64 root_flags;
1423	u64 flags;
1424	int ret = `0`;
1425
1426	if (!inode_owner_or_capable(idmap: file_mnt_idmap(file), inode))
1427	return -EPERM;
1428
1429	ret = mnt_want_write_file(file);
1430	if (ret)
1431	goto out;
1432
1433	if (btrfs_ino(inode: BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) {
1434	ret = -EINVAL;
1435	goto out_drop_write;
1436	}
1437
1438	if (copy_from_user(to: &flags, from: arg, n: sizeof(flags))) {
1439	ret = -EFAULT;
1440	goto out_drop_write;
1441	}
1442
1443	if (flags & ~BTRFS_SUBVOL_RDONLY) {
1444	ret = -EOPNOTSUPP;
1445	goto out_drop_write;
1446	}
1447
1448	down_write(sem: &fs_info->subvol_sem);
1449
1450	/ nothing to do /
1451	if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root))
1452	goto out_drop_sem;
1453
1454	root_flags = btrfs_root_flags(s: &root->root_item);
1455	if (flags & BTRFS_SUBVOL_RDONLY) {
1456	btrfs_set_root_flags(s: &root->root_item,
1457	val: root_flags \| BTRFS_ROOT_SUBVOL_RDONLY);
1458	} else {
1459	/*
1460	* Block RO -> RW transition if this subvolume is involved in
1461	* send
1462	*/
1463	spin_lock(lock: &root->root_item_lock);
1464	if (root->send_in_progress == `0`) {
1465	btrfs_set_root_flags(s: &root->root_item,
1466	val: root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY);
1467	spin_unlock(lock: &root->root_item_lock);
1468	} else {
1469	spin_unlock(lock: &root->root_item_lock);
1470	btrfs_warn(fs_info,
1471	"Attempt to set subvolume %llu read-write during send",
1472	root->root_key.objectid);
1473	ret = -EPERM;
1474	goto out_drop_sem;
1475	}
1476	}
1477
1478	trans = btrfs_start_transaction(root, num_items: `1`);
1479	if (IS_ERR(ptr: trans)) {
1480	ret = PTR_ERR(ptr: trans);
1481	goto out_reset;
1482	}
1483
1484	ret = btrfs_update_root(trans, root: fs_info->tree_root,
1485	key: &root->root_key, item: &root->root_item);
1486	if (ret < `0`) {
1487	btrfs_end_transaction(trans);
1488	goto out_reset;
1489	}
1490
1491	ret = btrfs_commit_transaction(trans);
1492
1493	out_reset:
1494	if (ret)
1495	btrfs_set_root_flags(s: &root->root_item, val: root_flags);
1496	out_drop_sem:
1497	up_write(sem: &fs_info->subvol_sem);
1498	out_drop_write:
1499	mnt_drop_write_file(file);
1500	out:
1501	return ret;
1502	}
1503
1504	static noinline int key_in_sk(struct btrfs_key *key,
1505	struct btrfs_ioctl_search_key *sk)
1506	{
1507	struct btrfs_key test;
1508	int ret;
1509
1510	test.objectid = sk->min_objectid;
1511	test.type = sk->min_type;
1512	test.offset = sk->min_offset;
1513
1514	ret = btrfs_comp_cpu_keys(k1: key, k2: &test);
1515	if (ret < `0`)
1516	return `0`;
1517
1518	test.objectid = sk->max_objectid;
1519	test.type = sk->max_type;
1520	test.offset = sk->max_offset;
1521
1522	ret = btrfs_comp_cpu_keys(k1: key, k2: &test);
1523	if (ret > `0`)
1524	return `0`;
1525	return `1`;
1526	}
1527
1528	static noinline int copy_to_sk(struct btrfs_path *path,
1529	struct btrfs_key *key,
1530	struct btrfs_ioctl_search_key *sk,
1531	size_t *buf_size,
1532	char __user *ubuf,
1533	unsigned long *sk_offset,
1534	int *num_found)
1535	{
1536	u64 found_transid;
1537	struct extent_buffer *leaf;
1538	struct btrfs_ioctl_search_header sh;
1539	struct btrfs_key test;
1540	unsigned long item_off;
1541	unsigned long item_len;
1542	int nritems;
1543	int i;
1544	int slot;
1545	int ret = `0`;
1546
1547	leaf = path->nodes[`0`];
1548	slot = path->slots[`0`];
1549	nritems = btrfs_header_nritems(eb: leaf);
1550
1551	if (btrfs_header_generation(eb: leaf) > sk->max_transid) {
1552	i = nritems;
1553	goto advance_key;
1554	}
1555	found_transid = btrfs_header_generation(eb: leaf);
1556
1557	for (i = slot; i < nritems; i++) {
1558	item_off = btrfs_item_ptr_offset(leaf, i);
1559	item_len = btrfs_item_size(eb: leaf, slot: i);
1560
1561	btrfs_item_key_to_cpu(eb: leaf, cpu_key: key, nr: i);
1562	if (!key_in_sk(key, sk))
1563	continue;
1564
1565	if (sizeof(sh) + item_len > *buf_size) {
1566	if (*num_found) {
1567	ret = `1`;
1568	goto out;
1569	}
1570
1571	/*
1572	* return one empty item back for v1, which does not
1573	* handle -EOVERFLOW
1574	*/
1575
1576	buf_size = sizeof*(sh) + item_len;
1577	item_len = `0`;
1578	ret = -EOVERFLOW;
1579	}
1580
1581	if (sizeof(sh) + item_len + sk_offset > buf_size) {
1582	ret = `1`;
1583	goto out;
1584	}
1585
1586	sh.objectid = key->objectid;
1587	sh.offset = key->offset;
1588	sh.type = key->type;
1589	sh.len = item_len;
1590	sh.transid = found_transid;
1591
1592	/*
1593	* Copy search result header. If we fault then loop again so we
1594	* can fault in the pages and -EFAULT there if there's a
1595	* problem. Otherwise we'll fault and then copy the buffer in
1596	* properly this next time through
1597	*/
1598	if (copy_to_user_nofault(dst: ubuf + sk_offset, src: &sh, size: sizeof*(sh))) {
1599	ret = `0`;
1600	goto out;
1601	}
1602
1603	sk_offset += sizeof*(sh);
1604
1605	if (item_len) {
1606	char __user up = ubuf + sk_offset;
1607	/*
1608	* Copy the item, same behavior as above, but reset the
1609	* * sk_offset so we copy the full thing again.
1610	*/
1611	if (read_extent_buffer_to_user_nofault(eb: leaf, dst: up,
1612	start: item_off, len: item_len)) {
1613	ret = `0`;
1614	sk_offset -= sizeof*(sh);
1615	goto out;
1616	}
1617
1618	*sk_offset += item_len;
1619	}
1620	(*num_found)++;
1621
1622	if (ret) / -EOVERFLOW from above /
1623	goto out;
1624
1625	if (*num_found >= sk->nr_items) {
1626	ret = `1`;
1627	goto out;
1628	}
1629	}
1630	advance_key:
1631	ret = `0`;
1632	test.objectid = sk->max_objectid;
1633	test.type = sk->max_type;
1634	test.offset = sk->max_offset;
1635	if (btrfs_comp_cpu_keys(k1: key, k2: &test) >= `0`)
1636	ret = `1`;
1637	else if (key->offset < (u64)-`1`)
1638	key->offset++;
1639	else if (key->type < (u8)-`1`) {
1640	key->offset = `0`;
1641	key->type++;
1642	} else if (key->objectid < (u64)-`1`) {
1643	key->offset = `0`;
1644	key->type = `0`;
1645	key->objectid++;
1646	} else
1647	ret = `1`;
1648	out:
1649	/*
1650	* 0: all items from this leaf copied, continue with next
1651	* 1: * more items can be copied, but unused buffer is too small
1652	* * all items were found
1653	* Either way, it will stops the loop which iterates to the next
1654	* leaf
1655	* -EOVERFLOW: item was to large for buffer
1656	* -EFAULT: could not copy extent buffer back to userspace
1657	*/
1658	return ret;
1659	}
1660
1661	static noinline int search_ioctl(struct inode *inode,
1662	struct btrfs_ioctl_search_key *sk,
1663	size_t *buf_size,
1664	char __user *ubuf)
1665	{
1666	struct btrfs_fs_info *info = btrfs_sb(sb: inode->i_sb);
1667	struct btrfs_root *root;
1668	struct btrfs_key key;
1669	struct btrfs_path *path;
1670	int ret;
1671	int num_found = `0`;
1672	unsigned long sk_offset = `0`;
1673
1674	if (buf_size < sizeof(struct* btrfs_ioctl_search_header)) {
1675	buf_size = sizeof(struct* btrfs_ioctl_search_header);
1676	return -EOVERFLOW;
1677	}
1678
1679	path = btrfs_alloc_path();
1680	if (!path)
1681	return -ENOMEM;
1682
1683	if (sk->tree_id == `0`) {
1684	/ search the root of the inode that was passed /
1685	root = btrfs_grab_root(root: BTRFS_I(inode)->root);
1686	} else {
1687	root = btrfs_get_fs_root(fs_info: info, objectid: sk->tree_id, check_ref: true);
1688	if (IS_ERR(ptr: root)) {
1689	btrfs_free_path(p: path);
1690	return PTR_ERR(ptr: root);
1691	}
1692	}
1693
1694	key.objectid = sk->min_objectid;
1695	key.type = sk->min_type;
1696	key.offset = sk->min_offset;
1697
1698	while (`1`) {
1699	ret = -EFAULT;
1700	/*
1701	* Ensure that the whole user buffer is faulted in at sub-page
1702	* granularity, otherwise the loop may live-lock.
1703	*/
1704	if (fault_in_subpage_writeable(uaddr: ubuf + sk_offset,
1705	size: *buf_size - sk_offset))
1706	break;
1707
1708	ret = btrfs_search_forward(root, min_key: &key, path, min_trans: sk->min_transid);
1709	if (ret != `0`) {
1710	if (ret > `0`)
1711	ret = `0`;
1712	goto err;
1713	}
1714	ret = copy_to_sk(path, key: &key, sk, buf_size, ubuf,
1715	sk_offset: &sk_offset, num_found: &num_found);
1716	btrfs_release_path(p: path);
1717	if (ret)
1718	break;
1719
1720	}
1721	if (ret > `0`)
1722	ret = `0`;
1723	err:
1724	sk->nr_items = num_found;
1725	btrfs_put_root(root);
1726	btrfs_free_path(p: path);
1727	return ret;
1728	}
1729
1730	static noinline int btrfs_ioctl_tree_search(struct inode *inode,
1731	void __user *argp)
1732	{
1733	struct btrfs_ioctl_search_args __user *uargs = argp;
1734	struct btrfs_ioctl_search_key sk;
1735	int ret;
1736	size_t buf_size;
1737
1738	if (!capable(CAP_SYS_ADMIN))
1739	return -EPERM;
1740
1741	if (copy_from_user(to: &sk, from: &uargs->key, n: sizeof(sk)))
1742	return -EFAULT;
1743
1744	buf_size = sizeof(uargs->buf);
1745
1746	ret = search_ioctl(inode, sk: &sk, buf_size: &buf_size, ubuf: uargs->buf);
1747
1748	/*
1749	* In the origin implementation an overflow is handled by returning a
1750	* search header with a len of zero, so reset ret.
1751	*/
1752	if (ret == -EOVERFLOW)
1753	ret = `0`;
1754
1755	if (ret == `0` && copy_to_user(to: &uargs->key, from: &sk, n: sizeof(sk)))
1756	ret = -EFAULT;
1757	return ret;
1758	}
1759
1760	static noinline int btrfs_ioctl_tree_search_v2(struct inode *inode,
1761	void __user *argp)
1762	{
1763	struct btrfs_ioctl_search_args_v2 __user *uarg = argp;
1764	struct btrfs_ioctl_search_args_v2 args;
1765	int ret;
1766	size_t buf_size;
1767	const size_t buf_limit = SZ_16M;
1768
1769	if (!capable(CAP_SYS_ADMIN))
1770	return -EPERM;
1771
1772	/ copy search header and buffer size /
1773	if (copy_from_user(to: &args, from: uarg, n: sizeof(args)))
1774	return -EFAULT;
1775
1776	buf_size = args.buf_size;
1777
1778	/ limit result size to 16MB /
1779	if (buf_size > buf_limit)
1780	buf_size = buf_limit;
1781
1782	ret = search_ioctl(inode, sk: &args.key, buf_size: &buf_size,
1783	ubuf: (char __user *)(&uarg->buf[`0`]));
1784	if (ret == `0` && copy_to_user(to: &uarg->key, from: &args.key, n: sizeof(args.key)))
1785	ret = -EFAULT;
1786	else if (ret == -EOVERFLOW &&
1787	copy_to_user(to: &uarg->buf_size, from: &buf_size, n: sizeof(buf_size)))
1788	ret = -EFAULT;
1789
1790	return ret;
1791	}
1792
1793	/*
1794	* Search INODE_REFs to identify path name of 'dirid' directory
1795	* in a 'tree_id' tree. and sets path name to 'name'.
1796	*/
1797	static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
1798	u64 tree_id, u64 dirid, char *name)
1799	{
1800	struct btrfs_root *root;
1801	struct btrfs_key key;
1802	char *ptr;
1803	int ret = -`1`;
1804	int slot;
1805	int len;
1806	int total_len = `0`;
1807	struct btrfs_inode_ref *iref;
1808	struct extent_buffer *l;
1809	struct btrfs_path *path;
1810
1811	if (dirid == BTRFS_FIRST_FREE_OBJECTID) {
1812	name[`0`]=`'\0'`;
1813	return `0`;
1814	}
1815
1816	path = btrfs_alloc_path();
1817	if (!path)
1818	return -ENOMEM;
1819
1820	ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX - `1`];
1821
1822	root = btrfs_get_fs_root(fs_info: info, objectid: tree_id, check_ref: true);
1823	if (IS_ERR(ptr: root)) {
1824	ret = PTR_ERR(ptr: root);
1825	root = NULL;
1826	goto out;
1827	}
1828
1829	key.objectid = dirid;
1830	key.type = BTRFS_INODE_REF_KEY;
1831	key.offset = (u64)-`1`;
1832
1833	while (`1`) {
1834	ret = btrfs_search_backwards(root, key: &key, path);
1835	if (ret < `0`)
1836	goto out;
1837	else if (ret > `0`) {
1838	ret = -ENOENT;
1839	goto out;
1840	}
1841
1842	l = path->nodes[`0`];
1843	slot = path->slots[`0`];
1844
1845	iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref);
1846	len = btrfs_inode_ref_name_len(eb: l, s: iref);
1847	ptr -= len + `1`;
1848	total_len += len + `1`;
1849	if (ptr < name) {
1850	ret = -ENAMETOOLONG;
1851	goto out;
1852	}
1853
1854	*(ptr + len) = `'/'`;
1855	read_extent_buffer(eb: l, dst: ptr, start: (unsigned long)(iref + `1`), len);
1856
1857	if (key.offset == BTRFS_FIRST_FREE_OBJECTID)
1858	break;
1859
1860	btrfs_release_path(p: path);
1861	key.objectid = key.offset;
1862	key.offset = (u64)-`1`;
1863	dirid = key.objectid;
1864	}
1865	memmove(name, ptr, total_len);
1866	name[total_len] = `'\0'`;
1867	ret = `0`;
1868	out:
1869	btrfs_put_root(root);
1870	btrfs_free_path(p: path);
1871	return ret;
1872	}
1873
1874	static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap,
1875	struct inode *inode,
1876	struct btrfs_ioctl_ino_lookup_user_args *args)
1877	{
1878	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
1879	struct super_block *sb = inode->i_sb;
1880	struct btrfs_key upper_limit = BTRFS_I(inode)->location;
1881	u64 treeid = BTRFS_I(inode)->root->root_key.objectid;
1882	u64 dirid = args->dirid;
1883	unsigned long item_off;
1884	unsigned long item_len;
1885	struct btrfs_inode_ref *iref;
1886	struct btrfs_root_ref *rref;
1887	struct btrfs_root *root = NULL;
1888	struct btrfs_path *path;
1889	struct btrfs_key key, key2;
1890	struct extent_buffer *leaf;
1891	struct inode *temp_inode;
1892	char *ptr;
1893	int slot;
1894	int len;
1895	int total_len = `0`;
1896	int ret;
1897
1898	path = btrfs_alloc_path();
1899	if (!path)
1900	return -ENOMEM;
1901
1902	/*
1903	* If the bottom subvolume does not exist directly under upper_limit,
1904	* construct the path in from the bottom up.
1905	*/
1906	if (dirid != upper_limit.objectid) {
1907	ptr = &args->path[BTRFS_INO_LOOKUP_USER_PATH_MAX - `1`];
1908
1909	root = btrfs_get_fs_root(fs_info, objectid: treeid, check_ref: true);
1910	if (IS_ERR(ptr: root)) {
1911	ret = PTR_ERR(ptr: root);
1912	goto out;
1913	}
1914
1915	key.objectid = dirid;
1916	key.type = BTRFS_INODE_REF_KEY;
1917	key.offset = (u64)-`1`;
1918	while (`1`) {
1919	ret = btrfs_search_backwards(root, key: &key, path);
1920	if (ret < `0`)
1921	goto out_put;
1922	else if (ret > `0`) {
1923	ret = -ENOENT;
1924	goto out_put;
1925	}
1926
1927	leaf = path->nodes[`0`];
1928	slot = path->slots[`0`];
1929
1930	iref = btrfs_item_ptr(leaf, slot, struct btrfs_inode_ref);
1931	len = btrfs_inode_ref_name_len(eb: leaf, s: iref);
1932	ptr -= len + `1`;
1933	total_len += len + `1`;
1934	if (ptr < args->path) {
1935	ret = -ENAMETOOLONG;
1936	goto out_put;
1937	}
1938
1939	*(ptr + len) = `'/'`;
1940	read_extent_buffer(eb: leaf, dst: ptr,
1941	start: (unsigned long)(iref + `1`), len);
1942
1943	/ Check the read+exec permission of this directory /
1944	ret = btrfs_previous_item(root, path, min_objectid: dirid,
1945	BTRFS_INODE_ITEM_KEY);
1946	if (ret < `0`) {
1947	goto out_put;
1948	} else if (ret > `0`) {
1949	ret = -ENOENT;
1950	goto out_put;
1951	}
1952
1953	leaf = path->nodes[`0`];
1954	slot = path->slots[`0`];
1955	btrfs_item_key_to_cpu(eb: leaf, cpu_key: &key2, nr: slot);
1956	if (key2.objectid != dirid) {
1957	ret = -ENOENT;
1958	goto out_put;
1959	}
1960
1961	/*
1962	* We don't need the path anymore, so release it and
1963	* avoid deadlocks and lockdep warnings in case
1964	* btrfs_iget() needs to lookup the inode from its root
1965	* btree and lock the same leaf.
1966	*/
1967	btrfs_release_path(p: path);
1968	temp_inode = btrfs_iget(s: sb, ino: key2.objectid, root);
1969	if (IS_ERR(ptr: temp_inode)) {
1970	ret = PTR_ERR(ptr: temp_inode);
1971	goto out_put;
1972	}
1973	ret = inode_permission(idmap, temp_inode,
1974	MAY_READ \| MAY_EXEC);
1975	iput(temp_inode);
1976	if (ret) {
1977	ret = -EACCES;
1978	goto out_put;
1979	}
1980
1981	if (key.offset == upper_limit.objectid)
1982	break;
1983	if (key.objectid == BTRFS_FIRST_FREE_OBJECTID) {
1984	ret = -EACCES;
1985	goto out_put;
1986	}
1987
1988	key.objectid = key.offset;
1989	key.offset = (u64)-`1`;
1990	dirid = key.objectid;
1991	}
1992
1993	memmove(args->path, ptr, total_len);
1994	args->path[total_len] = `'\0'`;
1995	btrfs_put_root(root);
1996	root = NULL;
1997	btrfs_release_path(p: path);
1998	}
1999
2000	/ Get the bottom subvolume's name from ROOT_REF /
2001	key.objectid = treeid;
2002	key.type = BTRFS_ROOT_REF_KEY;
2003	key.offset = args->treeid;
2004	ret = btrfs_search_slot(NULL, root: fs_info->tree_root, key: &key, p: path, ins_len: `0`, cow: `0`);
2005	if (ret < `0`) {
2006	goto out;
2007	} else if (ret > `0`) {
2008	ret = -ENOENT;
2009	goto out;
2010	}
2011
2012	leaf = path->nodes[`0`];
2013	slot = path->slots[`0`];
2014	btrfs_item_key_to_cpu(eb: leaf, cpu_key: &key, nr: slot);
2015
2016	item_off = btrfs_item_ptr_offset(leaf, slot);
2017	item_len = btrfs_item_size(eb: leaf, slot);
2018	/ Check if dirid in ROOT_REF corresponds to passed dirid /
2019	rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref);
2020	if (args->dirid != btrfs_root_ref_dirid(eb: leaf, s: rref)) {
2021	ret = -EINVAL;
2022	goto out;
2023	}
2024
2025	/ Copy subvolume's name /
2026	item_off += sizeof(struct btrfs_root_ref);
2027	item_len -= sizeof(struct btrfs_root_ref);
2028	read_extent_buffer(eb: leaf, dst: args->name, start: item_off, len: item_len);
2029	args->name[item_len] = `0`;
2030
2031	out_put:
2032	btrfs_put_root(root);
2033	out:
2034	btrfs_free_path(p: path);
2035	return ret;
2036	}
2037
2038	static noinline int btrfs_ioctl_ino_lookup(struct btrfs_root *root,
2039	void __user *argp)
2040	{
2041	struct btrfs_ioctl_ino_lookup_args *args;
2042	int ret = `0`;
2043
2044	args = memdup_user(argp, sizeof(*args));
2045	if (IS_ERR(ptr: args))
2046	return PTR_ERR(ptr: args);
2047
2048	/*
2049	* Unprivileged query to obtain the containing subvolume root id. The
2050	* path is reset so it's consistent with btrfs_search_path_in_tree.
2051	*/
2052	if (args->treeid == `0`)
2053	args->treeid = root->root_key.objectid;
2054
2055	if (args->objectid == BTRFS_FIRST_FREE_OBJECTID) {
2056	args->name[`0`] = `0`;
2057	goto out;
2058	}
2059
2060	if (!capable(CAP_SYS_ADMIN)) {
2061	ret = -EPERM;
2062	goto out;
2063	}
2064
2065	ret = btrfs_search_path_in_tree(info: root->fs_info,
2066	tree_id: args->treeid, dirid: args->objectid,
2067	name: args->name);
2068
2069	out:
2070	if (ret == `0` && copy_to_user(to: argp, from: args, n: sizeof(*args)))
2071	ret = -EFAULT;
2072
2073	kfree(objp: args);
2074	return ret;
2075	}
2076
2077	/*
2078	* Version of ino_lookup ioctl (unprivileged)
2079	*
2080	* The main differences from ino_lookup ioctl are:
2081	*
2082	* 1. Read + Exec permission will be checked using inode_permission() during
2083	* path construction. -EACCES will be returned in case of failure.
2084	* 2. Path construction will be stopped at the inode number which corresponds
2085	* to the fd with which this ioctl is called. If constructed path does not
2086	* exist under fd's inode, -EACCES will be returned.
2087	* 3. The name of bottom subvolume is also searched and filled.
2088	*/
2089	static int btrfs_ioctl_ino_lookup_user(struct file file, void* __user *argp)
2090	{
2091	struct btrfs_ioctl_ino_lookup_user_args *args;
2092	struct inode *inode;
2093	int ret;
2094
2095	args = memdup_user(argp, sizeof(*args));
2096	if (IS_ERR(ptr: args))
2097	return PTR_ERR(ptr: args);
2098
2099	inode = file_inode(f: file);
2100
2101	if (args->dirid == BTRFS_FIRST_FREE_OBJECTID &&
2102	BTRFS_I(inode)->location.objectid != BTRFS_FIRST_FREE_OBJECTID) {
2103	/*
2104	* The subvolume does not exist under fd with which this is
2105	* called
2106	*/
2107	kfree(objp: args);
2108	return -EACCES;
2109	}
2110
2111	ret = btrfs_search_path_in_tree_user(idmap: file_mnt_idmap(file), inode, args);
2112
2113	if (ret == `0` && copy_to_user(to: argp, from: args, n: sizeof(*args)))
2114	ret = -EFAULT;
2115
2116	kfree(objp: args);
2117	return ret;
2118	}
2119
2120	/ Get the subvolume information in BTRFS_ROOT_ITEM and BTRFS_ROOT_BACKREF /
2121	static int btrfs_ioctl_get_subvol_info(struct inode inode, void* __user *argp)
2122	{
2123	struct btrfs_ioctl_get_subvol_info_args *subvol_info;
2124	struct btrfs_fs_info *fs_info;
2125	struct btrfs_root *root;
2126	struct btrfs_path *path;
2127	struct btrfs_key key;
2128	struct btrfs_root_item *root_item;
2129	struct btrfs_root_ref *rref;
2130	struct extent_buffer *leaf;
2131	unsigned long item_off;
2132	unsigned long item_len;
2133	int slot;
2134	int ret = `0`;
2135
2136	path = btrfs_alloc_path();
2137	if (!path)
2138	return -ENOMEM;
2139
2140	subvol_info = kzalloc(size: sizeof(*subvol_info), GFP_KERNEL);
2141	if (!subvol_info) {
2142	btrfs_free_path(p: path);
2143	return -ENOMEM;
2144	}
2145
2146	fs_info = BTRFS_I(inode)->root->fs_info;
2147
2148	/ Get root_item of inode's subvolume /
2149	key.objectid = BTRFS_I(inode)->root->root_key.objectid;
2150	root = btrfs_get_fs_root(fs_info, objectid: key.objectid, check_ref: true);
2151	if (IS_ERR(ptr: root)) {
2152	ret = PTR_ERR(ptr: root);
2153	goto out_free;
2154	}
2155	root_item = &root->root_item;
2156
2157	subvol_info->treeid = key.objectid;
2158
2159	subvol_info->generation = btrfs_root_generation(s: root_item);
2160	subvol_info->flags = btrfs_root_flags(s: root_item);
2161
2162	memcpy(subvol_info->uuid, root_item->uuid, BTRFS_UUID_SIZE);
2163	memcpy(subvol_info->parent_uuid, root_item->parent_uuid,
2164	BTRFS_UUID_SIZE);
2165	memcpy(subvol_info->received_uuid, root_item->received_uuid,
2166	BTRFS_UUID_SIZE);
2167
2168	subvol_info->ctransid = btrfs_root_ctransid(s: root_item);
2169	subvol_info->ctime.sec = btrfs_stack_timespec_sec(s: &root_item->ctime);
2170	subvol_info->ctime.nsec = btrfs_stack_timespec_nsec(s: &root_item->ctime);
2171
2172	subvol_info->otransid = btrfs_root_otransid(s: root_item);
2173	subvol_info->otime.sec = btrfs_stack_timespec_sec(s: &root_item->otime);
2174	subvol_info->otime.nsec = btrfs_stack_timespec_nsec(s: &root_item->otime);
2175
2176	subvol_info->stransid = btrfs_root_stransid(s: root_item);
2177	subvol_info->stime.sec = btrfs_stack_timespec_sec(s: &root_item->stime);
2178	subvol_info->stime.nsec = btrfs_stack_timespec_nsec(s: &root_item->stime);
2179
2180	subvol_info->rtransid = btrfs_root_rtransid(s: root_item);
2181	subvol_info->rtime.sec = btrfs_stack_timespec_sec(s: &root_item->rtime);
2182	subvol_info->rtime.nsec = btrfs_stack_timespec_nsec(s: &root_item->rtime);
2183
2184	if (key.objectid != BTRFS_FS_TREE_OBJECTID) {
2185	/ Search root tree for ROOT_BACKREF of this subvolume /
2186	key.type = BTRFS_ROOT_BACKREF_KEY;
2187	key.offset = `0`;
2188	ret = btrfs_search_slot(NULL, root: fs_info->tree_root, key: &key, p: path, ins_len: `0`, cow: `0`);
2189	if (ret < `0`) {
2190	goto out;
2191	} else if (path->slots[`0`] >=
2192	btrfs_header_nritems(eb: path->nodes[`0`])) {
2193	ret = btrfs_next_leaf(root: fs_info->tree_root, path);
2194	if (ret < `0`) {
2195	goto out;
2196	} else if (ret > `0`) {
2197	ret = -EUCLEAN;
2198	goto out;
2199	}
2200	}
2201
2202	leaf = path->nodes[`0`];
2203	slot = path->slots[`0`];
2204	btrfs_item_key_to_cpu(eb: leaf, cpu_key: &key, nr: slot);
2205	if (key.objectid == subvol_info->treeid &&
2206	key.type == BTRFS_ROOT_BACKREF_KEY) {
2207	subvol_info->parent_id = key.offset;
2208
2209	rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref);
2210	subvol_info->dirid = btrfs_root_ref_dirid(eb: leaf, s: rref);
2211
2212	item_off = btrfs_item_ptr_offset(leaf, slot)
2213	+ sizeof(struct btrfs_root_ref);
2214	item_len = btrfs_item_size(eb: leaf, slot)
2215	- sizeof(struct btrfs_root_ref);
2216	read_extent_buffer(eb: leaf, dst: subvol_info->name,
2217	start: item_off, len: item_len);
2218	} else {
2219	ret = -ENOENT;
2220	goto out;
2221	}
2222	}
2223
2224	btrfs_free_path(p: path);
2225	path = NULL;
2226	if (copy_to_user(to: argp, from: subvol_info, n: sizeof(*subvol_info)))
2227	ret = -EFAULT;
2228
2229	out:
2230	btrfs_put_root(root);
2231	out_free:
2232	btrfs_free_path(p: path);
2233	kfree(objp: subvol_info);
2234	return ret;
2235	}
2236
2237	/*
2238	* Return ROOT_REF information of the subvolume containing this inode
2239	* except the subvolume name.
2240	*/
2241	static int btrfs_ioctl_get_subvol_rootref(struct btrfs_root *root,
2242	void __user *argp)
2243	{
2244	struct btrfs_ioctl_get_subvol_rootref_args *rootrefs;
2245	struct btrfs_root_ref *rref;
2246	struct btrfs_path *path;
2247	struct btrfs_key key;
2248	struct extent_buffer *leaf;
2249	u64 objectid;
2250	int slot;
2251	int ret;
2252	u8 found;
2253
2254	path = btrfs_alloc_path();
2255	if (!path)
2256	return -ENOMEM;
2257
2258	rootrefs = memdup_user(argp, sizeof(*rootrefs));
2259	if (IS_ERR(ptr: rootrefs)) {
2260	btrfs_free_path(p: path);
2261	return PTR_ERR(ptr: rootrefs);
2262	}
2263
2264	objectid = root->root_key.objectid;
2265	key.objectid = objectid;
2266	key.type = BTRFS_ROOT_REF_KEY;
2267	key.offset = rootrefs->min_treeid;
2268	found = `0`;
2269
2270	root = root->fs_info->tree_root;
2271	ret = btrfs_search_slot(NULL, root, key: &key, p: path, ins_len: `0`, cow: `0`);
2272	if (ret < `0`) {
2273	goto out;
2274	} else if (path->slots[`0`] >=
2275	btrfs_header_nritems(eb: path->nodes[`0`])) {
2276	ret = btrfs_next_leaf(root, path);
2277	if (ret < `0`) {
2278	goto out;
2279	} else if (ret > `0`) {
2280	ret = -EUCLEAN;
2281	goto out;
2282	}
2283	}
2284	while (`1`) {
2285	leaf = path->nodes[`0`];
2286	slot = path->slots[`0`];
2287
2288	btrfs_item_key_to_cpu(eb: leaf, cpu_key: &key, nr: slot);
2289	if (key.objectid != objectid \|\| key.type != BTRFS_ROOT_REF_KEY) {
2290	ret = `0`;
2291	goto out;
2292	}
2293
2294	if (found == BTRFS_MAX_ROOTREF_BUFFER_NUM) {
2295	ret = -EOVERFLOW;
2296	goto out;
2297	}
2298
2299	rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref);
2300	rootrefs->rootref[found].treeid = key.offset;
2301	rootrefs->rootref[found].dirid =
2302	btrfs_root_ref_dirid(eb: leaf, s: rref);
2303	found++;
2304
2305	ret = btrfs_next_item(root, p: path);
2306	if (ret < `0`) {
2307	goto out;
2308	} else if (ret > `0`) {
2309	ret = -EUCLEAN;
2310	goto out;
2311	}
2312	}
2313
2314	out:
2315	btrfs_free_path(p: path);
2316
2317	if (!ret \|\| ret == -EOVERFLOW) {
2318	rootrefs->num_items = found;
2319	/ update min_treeid for next search /
2320	if (found)
2321	rootrefs->min_treeid =
2322	rootrefs->rootref[found - `1`].treeid + `1`;
2323	if (copy_to_user(to: argp, from: rootrefs, n: sizeof(*rootrefs)))
2324	ret = -EFAULT;
2325	}
2326
2327	kfree(objp: rootrefs);
2328
2329	return ret;
2330	}
2331
2332	static noinline int btrfs_ioctl_snap_destroy(struct file *file,
2333	void __user *arg,
2334	bool destroy_v2)
2335	{
2336	struct dentry *parent = file->f_path.dentry;
2337	struct btrfs_fs_info *fs_info = btrfs_sb(sb: parent->d_sb);
2338	struct dentry *dentry;
2339	struct inode *dir = d_inode(dentry: parent);
2340	struct inode *inode;
2341	struct btrfs_root *root = BTRFS_I(inode: dir)->root;
2342	struct btrfs_root *dest = NULL;
2343	struct btrfs_ioctl_vol_args *vol_args = NULL;
2344	struct btrfs_ioctl_vol_args_v2 *vol_args2 = NULL;
2345	struct mnt_idmap *idmap = file_mnt_idmap(file);
2346	char subvol_name, subvol_name_ptr = NULL;
2347	int subvol_namelen;
2348	int err = `0`;
2349	bool destroy_parent = false;
2350
2351	/ We don't support snapshots with extent tree v2 yet. /
2352	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
2353	btrfs_err(fs_info,
2354	"extent tree v2 doesn't support snapshot deletion yet");
2355	return -EOPNOTSUPP;
2356	}
2357
2358	if (destroy_v2) {
2359	vol_args2 = memdup_user(arg, sizeof(*vol_args2));
2360	if (IS_ERR(ptr: vol_args2))
2361	return PTR_ERR(ptr: vol_args2);
2362
2363	if (vol_args2->flags & ~BTRFS_SUBVOL_DELETE_ARGS_MASK) {
2364	err = -EOPNOTSUPP;
2365	goto out;
2366	}
2367
2368	/*
2369	* If SPEC_BY_ID is not set, we are looking for the subvolume by
2370	* name, same as v1 currently does.
2371	*/
2372	if (!(vol_args2->flags & BTRFS_SUBVOL_SPEC_BY_ID)) {
2373	vol_args2->name[BTRFS_SUBVOL_NAME_MAX] = `0`;
2374	subvol_name = vol_args2->name;
2375
2376	err = mnt_want_write_file(file);
2377	if (err)
2378	goto out;
2379	} else {
2380	struct inode *old_dir;
2381
2382	if (vol_args2->subvolid < BTRFS_FIRST_FREE_OBJECTID) {
2383	err = -EINVAL;
2384	goto out;
2385	}
2386
2387	err = mnt_want_write_file(file);
2388	if (err)
2389	goto out;
2390
2391	dentry = btrfs_get_dentry(sb: fs_info->sb,
2392	BTRFS_FIRST_FREE_OBJECTID,
2393	root_objectid: vol_args2->subvolid, generation: `0`);
2394	if (IS_ERR(ptr: dentry)) {
2395	err = PTR_ERR(ptr: dentry);
2396	goto out_drop_write;
2397	}
2398
2399	/*
2400	* Change the default parent since the subvolume being
2401	* deleted can be outside of the current mount point.
2402	*/
2403	parent = btrfs_get_parent(child: dentry);
2404
2405	/*
2406	* At this point dentry->d_name can point to '/' if the
2407	* subvolume we want to destroy is outsite of the
2408	* current mount point, so we need to release the
2409	* current dentry and execute the lookup to return a new
2410	* one with ->d_name pointing to the
2411	* <mount point>/subvol_name.
2412	*/
2413	dput(dentry);
2414	if (IS_ERR(ptr: parent)) {
2415	err = PTR_ERR(ptr: parent);
2416	goto out_drop_write;
2417	}
2418	old_dir = dir;
2419	dir = d_inode(dentry: parent);
2420
2421	/*
2422	* If v2 was used with SPEC_BY_ID, a new parent was
2423	* allocated since the subvolume can be outside of the
2424	* current mount point. Later on we need to release this
2425	* new parent dentry.
2426	*/
2427	destroy_parent = true;
2428
2429	/*
2430	* On idmapped mounts, deletion via subvolid is
2431	* restricted to subvolumes that are immediate
2432	* ancestors of the inode referenced by the file
2433	* descriptor in the ioctl. Otherwise the idmapping
2434	* could potentially be abused to delete subvolumes
2435	* anywhere in the filesystem the user wouldn't be able
2436	* to delete without an idmapped mount.
2437	*/
2438	if (old_dir != dir && idmap != &nop_mnt_idmap) {
2439	err = -EOPNOTSUPP;
2440	goto free_parent;
2441	}
2442
2443	subvol_name_ptr = btrfs_get_subvol_name_from_objectid(
2444	fs_info, subvol_objectid: vol_args2->subvolid);
2445	if (IS_ERR(ptr: subvol_name_ptr)) {
2446	err = PTR_ERR(ptr: subvol_name_ptr);
2447	goto free_parent;
2448	}
2449	/ subvol_name_ptr is already nul terminated /
2450	subvol_name = (char *)kbasename(path: subvol_name_ptr);
2451	}
2452	} else {
2453	vol_args = memdup_user(arg, sizeof(*vol_args));
2454	if (IS_ERR(ptr: vol_args))
2455	return PTR_ERR(ptr: vol_args);
2456
2457	vol_args->name[BTRFS_PATH_NAME_MAX] = `0`;
2458	subvol_name = vol_args->name;
2459
2460	err = mnt_want_write_file(file);
2461	if (err)
2462	goto out;
2463	}
2464
2465	subvol_namelen = strlen(subvol_name);
2466
2467	if (strchr(subvol_name, `'/'`) \|\|
2468	strncmp(subvol_name, "..", subvol_namelen) == `0`) {
2469	err = -EINVAL;
2470	goto free_subvol_name;
2471	}
2472
2473	if (!S_ISDIR(dir->i_mode)) {
2474	err = -ENOTDIR;
2475	goto free_subvol_name;
2476	}
2477
2478	err = down_write_killable_nested(sem: &dir->i_rwsem, subclass: I_MUTEX_PARENT);
2479	if (err == -EINTR)
2480	goto free_subvol_name;
2481	dentry = lookup_one(idmap, subvol_name, parent, subvol_namelen);
2482	if (IS_ERR(ptr: dentry)) {
2483	err = PTR_ERR(ptr: dentry);
2484	goto out_unlock_dir;
2485	}
2486
2487	if (d_really_is_negative(dentry)) {
2488	err = -ENOENT;
2489	goto out_dput;
2490	}
2491
2492	inode = d_inode(dentry);
2493	dest = BTRFS_I(inode)->root;
2494	if (!capable(CAP_SYS_ADMIN)) {
2495	/*
2496	* Regular user. Only allow this with a special mount
2497	* option, when the user has write+exec access to the
2498	* subvol root, and when rmdir(2) would have been
2499	* allowed.
2500	*
2501	* Note that this is _not_ check that the subvol is
2502	* empty or doesn't contain data that we wouldn't
2503	* otherwise be able to delete.
2504	*
2505	* Users who want to delete empty subvols should try
2506	* rmdir(2).
2507	*/
2508	err = -EPERM;
2509	if (!btrfs_test_opt(fs_info, USER_SUBVOL_RM_ALLOWED))
2510	goto out_dput;
2511
2512	/*
2513	* Do not allow deletion if the parent dir is the same
2514	* as the dir to be deleted. That means the ioctl
2515	* must be called on the dentry referencing the root
2516	* of the subvol, not a random directory contained
2517	* within it.
2518	*/
2519	err = -EINVAL;
2520	if (root == dest)
2521	goto out_dput;
2522
2523	err = inode_permission(idmap, inode, MAY_WRITE \| MAY_EXEC);
2524	if (err)
2525	goto out_dput;
2526	}
2527
2528	/ check if subvolume may be deleted by a user /
2529	err = btrfs_may_delete(idmap, dir, victim: dentry, isdir: `1`);
2530	if (err)
2531	goto out_dput;
2532
2533	if (btrfs_ino(inode: BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) {
2534	err = -EINVAL;
2535	goto out_dput;
2536	}
2537
2538	btrfs_inode_lock(inode: BTRFS_I(inode), ilock_flags: `0`);
2539	err = btrfs_delete_subvolume(dir: BTRFS_I(inode: dir), dentry);
2540	btrfs_inode_unlock(inode: BTRFS_I(inode), ilock_flags: `0`);
2541	if (!err)
2542	d_delete_notify(dir, dentry);
2543
2544	out_dput:
2545	dput(dentry);
2546	out_unlock_dir:
2547	btrfs_inode_unlock(inode: BTRFS_I(inode: dir), ilock_flags: `0`);
2548	free_subvol_name:
2549	kfree(objp: subvol_name_ptr);
2550	free_parent:
2551	if (destroy_parent)
2552	dput(parent);
2553	out_drop_write:
2554	mnt_drop_write_file(file);
2555	out:
2556	kfree(objp: vol_args2);
2557	kfree(objp: vol_args);
2558	return err;
2559	}
2560
2561	static int btrfs_ioctl_defrag(struct file file, void* __user *argp)
2562	{
2563	struct inode *inode = file_inode(f: file);
2564	struct btrfs_root *root = BTRFS_I(inode)->root;
2565	struct btrfs_ioctl_defrag_range_args range = {`0`};
2566	int ret;
2567
2568	ret = mnt_want_write_file(file);
2569	if (ret)
2570	return ret;
2571
2572	if (btrfs_root_readonly(root)) {
2573	ret = -EROFS;
2574	goto out;
2575	}
2576
2577	switch (inode->i_mode & S_IFMT) {
2578	case S_IFDIR:
2579	if (!capable(CAP_SYS_ADMIN)) {
2580	ret = -EPERM;
2581	goto out;
2582	}
2583	ret = btrfs_defrag_root(root);
2584	break;
2585	case S_IFREG:
2586	/*
2587	* Note that this does not check the file descriptor for write
2588	* access. This prevents defragmenting executables that are
2589	* running and allows defrag on files open in read-only mode.
2590	*/
2591	if (!capable(CAP_SYS_ADMIN) &&
2592	inode_permission(&nop_mnt_idmap, inode, MAY_WRITE)) {
2593	ret = -EPERM;
2594	goto out;
2595	}
2596
2597	if (argp) {
2598	if (copy_from_user(to: &range, from: argp, n: sizeof(range))) {
2599	ret = -EFAULT;
2600	goto out;
2601	}
2602	/ compression requires us to start the IO /
2603	if ((range.flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {
2604	range.flags \|= BTRFS_DEFRAG_RANGE_START_IO;
2605	range.extent_thresh = (u32)-`1`;
2606	}
2607	} else {
2608	/ the rest are all set to zero by kzalloc /
2609	range.len = (u64)-`1`;
2610	}
2611	ret = btrfs_defrag_file(inode: file_inode(f: file), ra: &file->f_ra,
2612	range: &range, BTRFS_OLDEST_GENERATION, max_to_defrag: `0`);
2613	if (ret > `0`)
2614	ret = `0`;
2615	break;
2616	default:
2617	ret = -EINVAL;
2618	}
2619	out:
2620	mnt_drop_write_file(file);
2621	return ret;
2622	}
2623
2624	static long btrfs_ioctl_add_dev(struct btrfs_fs_info fs_info, void* __user *arg)
2625	{
2626	struct btrfs_ioctl_vol_args *vol_args;
2627	bool restore_op = false;
2628	int ret;
2629
2630	if (!capable(CAP_SYS_ADMIN))
2631	return -EPERM;
2632
2633	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
2634	btrfs_err(fs_info, "device add not supported on extent tree v2 yet");
2635	return -EINVAL;
2636	}
2637
2638	if (fs_info->fs_devices->temp_fsid) {
2639	btrfs_err(fs_info,
2640	"device add not supported on cloned temp-fsid mount");
2641	return -EINVAL;
2642	}
2643
2644	if (!btrfs_exclop_start(fs_info, type: BTRFS_EXCLOP_DEV_ADD)) {
2645	if (!btrfs_exclop_start_try_lock(fs_info, type: BTRFS_EXCLOP_DEV_ADD))
2646	return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
2647
2648	/*
2649	* We can do the device add because we have a paused balanced,
2650	* change the exclusive op type and remember we should bring
2651	* back the paused balance
2652	*/
2653	fs_info->exclusive_operation = BTRFS_EXCLOP_DEV_ADD;
2654	btrfs_exclop_start_unlock(fs_info);
2655	restore_op = true;
2656	}
2657
2658	vol_args = memdup_user(arg, sizeof(*vol_args));
2659	if (IS_ERR(ptr: vol_args)) {
2660	ret = PTR_ERR(ptr: vol_args);
2661	goto out;
2662	}
2663
2664	vol_args->name[BTRFS_PATH_NAME_MAX] = `'\0'`;
2665	ret = btrfs_init_new_device(fs_info, path: vol_args->name);
2666
2667	if (!ret)
2668	btrfs_info(fs_info, "disk added %s", vol_args->name);
2669
2670	kfree(objp: vol_args);
2671	out:
2672	if (restore_op)
2673	btrfs_exclop_balance(fs_info, op: BTRFS_EXCLOP_BALANCE_PAUSED);
2674	else
2675	btrfs_exclop_finish(fs_info);
2676	return ret;
2677	}
2678
2679	static long btrfs_ioctl_rm_dev_v2(struct file file, void* __user *arg)
2680	{
2681	BTRFS_DEV_LOOKUP_ARGS(args);
2682	struct inode *inode = file_inode(f: file);
2683	struct btrfs_fs_info *fs_info = btrfs_sb(sb: inode->i_sb);
2684	struct btrfs_ioctl_vol_args_v2 *vol_args;
2685	struct bdev_handle *bdev_handle = NULL;
2686	int ret;
2687	bool cancel = false;
2688
2689	if (!capable(CAP_SYS_ADMIN))
2690	return -EPERM;
2691
2692	vol_args = memdup_user(arg, sizeof(*vol_args));
2693	if (IS_ERR(ptr: vol_args))
2694	return PTR_ERR(ptr: vol_args);
2695
2696	if (vol_args->flags & ~BTRFS_DEVICE_REMOVE_ARGS_MASK) {
2697	ret = -EOPNOTSUPP;
2698	goto out;
2699	}
2700
2701	vol_args->name[BTRFS_SUBVOL_NAME_MAX] = `'\0'`;
2702	if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) {
2703	args.devid = vol_args->devid;
2704	} else if (!strcmp("cancel", vol_args->name)) {
2705	cancel = true;
2706	} else {
2707	ret = btrfs_get_dev_args_from_path(fs_info, args: &args, path: vol_args->name);
2708	if (ret)
2709	goto out;
2710	}
2711
2712	ret = mnt_want_write_file(file);
2713	if (ret)
2714	goto out;
2715
2716	ret = exclop_start_or_cancel_reloc(fs_info, type: BTRFS_EXCLOP_DEV_REMOVE,
2717	cancel);
2718	if (ret)
2719	goto err_drop;
2720
2721	/ Exclusive operation is now claimed /
2722	ret = btrfs_rm_device(fs_info, args: &args, bdev_handle: &bdev_handle);
2723
2724	btrfs_exclop_finish(fs_info);
2725
2726	if (!ret) {
2727	if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID)
2728	btrfs_info(fs_info, "device deleted: id %llu",
2729	vol_args->devid);
2730	else
2731	btrfs_info(fs_info, "device deleted: %s",
2732	vol_args->name);
2733	}
2734	err_drop:
2735	mnt_drop_write_file(file);
2736	if (bdev_handle)
2737	bdev_release(handle: bdev_handle);
2738	out:
2739	btrfs_put_dev_args_from_path(args: &args);
2740	kfree(objp: vol_args);
2741	return ret;
2742	}
2743
2744	static long btrfs_ioctl_rm_dev(struct file file, void* __user *arg)
2745	{
2746	BTRFS_DEV_LOOKUP_ARGS(args);
2747	struct inode *inode = file_inode(f: file);
2748	struct btrfs_fs_info *fs_info = btrfs_sb(sb: inode->i_sb);
2749	struct btrfs_ioctl_vol_args *vol_args;
2750	struct bdev_handle *bdev_handle = NULL;
2751	int ret;
2752	bool cancel = false;
2753
2754	if (!capable(CAP_SYS_ADMIN))
2755	return -EPERM;
2756
2757	vol_args = memdup_user(arg, sizeof(*vol_args));
2758	if (IS_ERR(ptr: vol_args))
2759	return PTR_ERR(ptr: vol_args);
2760
2761	vol_args->name[BTRFS_PATH_NAME_MAX] = `'\0'`;
2762	if (!strcmp("cancel", vol_args->name)) {
2763	cancel = true;
2764	} else {
2765	ret = btrfs_get_dev_args_from_path(fs_info, args: &args, path: vol_args->name);
2766	if (ret)
2767	goto out;
2768	}
2769
2770	ret = mnt_want_write_file(file);
2771	if (ret)
2772	goto out;
2773
2774	ret = exclop_start_or_cancel_reloc(fs_info, type: BTRFS_EXCLOP_DEV_REMOVE,
2775	cancel);
2776	if (ret == `0`) {
2777	ret = btrfs_rm_device(fs_info, args: &args, bdev_handle: &bdev_handle);
2778	if (!ret)
2779	btrfs_info(fs_info, "disk deleted %s", vol_args->name);
2780	btrfs_exclop_finish(fs_info);
2781	}
2782
2783	mnt_drop_write_file(file);
2784	if (bdev_handle)
2785	bdev_release(handle: bdev_handle);
2786	out:
2787	btrfs_put_dev_args_from_path(args: &args);
2788	kfree(objp: vol_args);
2789	return ret;
2790	}
2791
2792	static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info,
2793	void __user *arg)
2794	{
2795	struct btrfs_ioctl_fs_info_args *fi_args;
2796	struct btrfs_device *device;
2797	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
2798	u64 flags_in;
2799	int ret = `0`;
2800
2801	fi_args = memdup_user(arg, sizeof(*fi_args));
2802	if (IS_ERR(ptr: fi_args))
2803	return PTR_ERR(ptr: fi_args);
2804
2805	flags_in = fi_args->flags;
2806	memset(fi_args, `0`, sizeof(*fi_args));
2807
2808	rcu_read_lock();
2809	fi_args->num_devices = fs_devices->num_devices;
2810
2811	list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
2812	if (device->devid > fi_args->max_id)
2813	fi_args->max_id = device->devid;
2814	}
2815	rcu_read_unlock();
2816
2817	memcpy(&fi_args->fsid, fs_devices->fsid, sizeof(fi_args->fsid));
2818	fi_args->nodesize = fs_info->nodesize;
2819	fi_args->sectorsize = fs_info->sectorsize;
2820	fi_args->clone_alignment = fs_info->sectorsize;
2821
2822	if (flags_in & BTRFS_FS_INFO_FLAG_CSUM_INFO) {
2823	fi_args->csum_type = btrfs_super_csum_type(s: fs_info->super_copy);
2824	fi_args->csum_size = btrfs_super_csum_size(s: fs_info->super_copy);
2825	fi_args->flags \|= BTRFS_FS_INFO_FLAG_CSUM_INFO;
2826	}
2827
2828	if (flags_in & BTRFS_FS_INFO_FLAG_GENERATION) {
2829	fi_args->generation = btrfs_get_fs_generation(fs_info);
2830	fi_args->flags \|= BTRFS_FS_INFO_FLAG_GENERATION;
2831	}
2832
2833	if (flags_in & BTRFS_FS_INFO_FLAG_METADATA_UUID) {
2834	memcpy(&fi_args->metadata_uuid, fs_devices->metadata_uuid,
2835	sizeof(fi_args->metadata_uuid));
2836	fi_args->flags \|= BTRFS_FS_INFO_FLAG_METADATA_UUID;
2837	}
2838
2839	if (copy_to_user(to: arg, from: fi_args, n: sizeof(*fi_args)))
2840	ret = -EFAULT;
2841
2842	kfree(objp: fi_args);
2843	return ret;
2844	}
2845
2846	static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info,
2847	void __user *arg)
2848	{
2849	BTRFS_DEV_LOOKUP_ARGS(args);
2850	struct btrfs_ioctl_dev_info_args *di_args;
2851	struct btrfs_device *dev;
2852	int ret = `0`;
2853
2854	di_args = memdup_user(arg, sizeof(*di_args));
2855	if (IS_ERR(ptr: di_args))
2856	return PTR_ERR(ptr: di_args);
2857
2858	args.devid = di_args->devid;
2859	if (!btrfs_is_empty_uuid(uuid: di_args->uuid))
2860	args.uuid = di_args->uuid;
2861
2862	rcu_read_lock();
2863	dev = btrfs_find_device(fs_devices: fs_info->fs_devices, args: &args);
2864	if (!dev) {
2865	ret = -ENODEV;
2866	goto out;
2867	}
2868
2869	di_args->devid = dev->devid;
2870	di_args->bytes_used = btrfs_device_get_bytes_used(dev);
2871	di_args->total_bytes = btrfs_device_get_total_bytes(dev);
2872	memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
2873	memcpy(di_args->fsid, dev->fs_devices->fsid, BTRFS_UUID_SIZE);
2874	if (dev->name)
2875	strscpy(p: di_args->path, q: btrfs_dev_name(device: dev), size: sizeof(di_args->path));
2876	else
2877	di_args->path[`0`] = `'\0'`;
2878
2879	out:
2880	rcu_read_unlock();
2881	if (ret == `0` && copy_to_user(to: arg, from: di_args, n: sizeof(*di_args)))
2882	ret = -EFAULT;
2883
2884	kfree(objp: di_args);
2885	return ret;
2886	}
2887
2888	static long btrfs_ioctl_default_subvol(struct file file, void* __user *argp)
2889	{
2890	struct inode *inode = file_inode(f: file);
2891	struct btrfs_fs_info *fs_info = btrfs_sb(sb: inode->i_sb);
2892	struct btrfs_root *root = BTRFS_I(inode)->root;
2893	struct btrfs_root *new_root;
2894	struct btrfs_dir_item *di;
2895	struct btrfs_trans_handle *trans;
2896	struct btrfs_path *path = NULL;
2897	struct btrfs_disk_key disk_key;
2898	struct fscrypt_str name = FSTR_INIT("default", `7`);
2899	u64 objectid = `0`;
2900	u64 dir_id;
2901	int ret;
2902
2903	if (!capable(CAP_SYS_ADMIN))
2904	return -EPERM;
2905
2906	ret = mnt_want_write_file(file);
2907	if (ret)
2908	return ret;
2909
2910	if (copy_from_user(to: &objectid, from: argp, n: sizeof(objectid))) {
2911	ret = -EFAULT;
2912	goto out;
2913	}
2914
2915	if (!objectid)
2916	objectid = BTRFS_FS_TREE_OBJECTID;
2917
2918	new_root = btrfs_get_fs_root(fs_info, objectid, check_ref: true);
2919	if (IS_ERR(ptr: new_root)) {
2920	ret = PTR_ERR(ptr: new_root);
2921	goto out;
2922	}
2923	if (!is_fstree(rootid: new_root->root_key.objectid)) {
2924	ret = -ENOENT;
2925	goto out_free;
2926	}
2927
2928	path = btrfs_alloc_path();
2929	if (!path) {
2930	ret = -ENOMEM;
2931	goto out_free;
2932	}
2933
2934	trans = btrfs_start_transaction(root, num_items: `1`);
2935	if (IS_ERR(ptr: trans)) {
2936	ret = PTR_ERR(ptr: trans);
2937	goto out_free;
2938	}
2939
2940	dir_id = btrfs_super_root_dir(s: fs_info->super_copy);
2941	di = btrfs_lookup_dir_item(trans, root: fs_info->tree_root, path,
2942	dir: dir_id, name: &name, mod: `1`);
2943	if (IS_ERR_OR_NULL(ptr: di)) {
2944	btrfs_release_path(p: path);
2945	btrfs_end_transaction(trans);
2946	btrfs_err(fs_info,
2947	"Umm, you don't have the default diritem, this isn't going to work");
2948	ret = -ENOENT;
2949	goto out_free;
2950	}
2951
2952	btrfs_cpu_key_to_disk(disk_key: &disk_key, cpu_key: &new_root->root_key);
2953	btrfs_set_dir_item_key(eb: path->nodes[`0`], item: di, key: &disk_key);
2954	btrfs_mark_buffer_dirty(trans, buf: path->nodes[`0`]);
2955	btrfs_release_path(p: path);
2956
2957	btrfs_set_fs_incompat(fs_info, DEFAULT_SUBVOL);
2958	btrfs_end_transaction(trans);
2959	out_free:
2960	btrfs_put_root(root: new_root);
2961	btrfs_free_path(p: path);
2962	out:
2963	mnt_drop_write_file(file);
2964	return ret;
2965	}
2966
2967	static void get_block_group_info(struct list_head *groups_list,
2968	struct btrfs_ioctl_space_info *space)
2969	{
2970	struct btrfs_block_group *block_group;
2971
2972	space->total_bytes = `0`;
2973	space->used_bytes = `0`;
2974	space->flags = `0`;
2975	list_for_each_entry(block_group, groups_list, list) {
2976	space->flags = block_group->flags;
2977	space->total_bytes += block_group->length;
2978	space->used_bytes += block_group->used;
2979	}
2980	}
2981
2982	static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info,
2983	void __user *arg)
2984	{
2985	struct btrfs_ioctl_space_args space_args = { `0` };
2986	struct btrfs_ioctl_space_info space;
2987	struct btrfs_ioctl_space_info *dest;
2988	struct btrfs_ioctl_space_info *dest_orig;
2989	struct btrfs_ioctl_space_info __user *user_dest;
2990	struct btrfs_space_info *info;
2991	static const u64 types[] = {
2992	BTRFS_BLOCK_GROUP_DATA,
2993	BTRFS_BLOCK_GROUP_SYSTEM,
2994	BTRFS_BLOCK_GROUP_METADATA,
2995	BTRFS_BLOCK_GROUP_DATA \| BTRFS_BLOCK_GROUP_METADATA
2996	};
2997	int num_types = `4`;
2998	int alloc_size;
2999	int ret = `0`;
3000	u64 slot_count = `0`;
3001	int i, c;
3002
3003	if (copy_from_user(to: &space_args,
3004	from: (struct btrfs_ioctl_space_args __user *)arg,
3005	n: sizeof(space_args)))
3006	return -EFAULT;
3007
3008	for (i = `0`; i < num_types; i++) {
3009	struct btrfs_space_info *tmp;
3010
3011	info = NULL;
3012	list_for_each_entry(tmp, &fs_info->space_info, list) {
3013	if (tmp->flags == types[i]) {
3014	info = tmp;
3015	break;
3016	}
3017	}
3018
3019	if (!info)
3020	continue;
3021
3022	down_read(sem: &info->groups_sem);
3023	for (c = `0`; c < BTRFS_NR_RAID_TYPES; c++) {
3024	if (!list_empty(head: &info->block_groups[c]))
3025	slot_count++;
3026	}
3027	up_read(sem: &info->groups_sem);
3028	}
3029
3030	/*
3031	* Global block reserve, exported as a space_info
3032	*/
3033	slot_count++;
3034
3035	/ space_slots == 0 means they are asking for a count /
3036	if (space_args.space_slots == `0`) {
3037	space_args.total_spaces = slot_count;
3038	goto out;
3039	}
3040
3041	slot_count = min_t(u64, space_args.space_slots, slot_count);
3042
3043	alloc_size = sizeof(dest) slot_count;
3044
3045	/ we generally have at most 6 or so space infos, one for each raid*
3046	* level. So, a whole page should be more than enough for everyone
3047	*/
3048	if (alloc_size > PAGE_SIZE)
3049	return -ENOMEM;
3050
3051	space_args.total_spaces = `0`;
3052	dest = kmalloc(size: alloc_size, GFP_KERNEL);
3053	if (!dest)
3054	return -ENOMEM;
3055	dest_orig = dest;
3056
3057	/ now we have a buffer to copy into /
3058	for (i = `0`; i < num_types; i++) {
3059	struct btrfs_space_info *tmp;
3060
3061	if (!slot_count)
3062	break;
3063
3064	info = NULL;
3065	list_for_each_entry(tmp, &fs_info->space_info, list) {
3066	if (tmp->flags == types[i]) {
3067	info = tmp;
3068	break;
3069	}
3070	}
3071
3072	if (!info)
3073	continue;
3074	down_read(sem: &info->groups_sem);
3075	for (c = `0`; c < BTRFS_NR_RAID_TYPES; c++) {
3076	if (!list_empty(head: &info->block_groups[c])) {
3077	get_block_group_info(groups_list: &info->block_groups[c],
3078	space: &space);
3079	memcpy(dest, &space, sizeof(space));
3080	dest++;
3081	space_args.total_spaces++;
3082	slot_count--;
3083	}
3084	if (!slot_count)
3085	break;
3086	}
3087	up_read(sem: &info->groups_sem);
3088	}
3089
3090	/*
3091	* Add global block reserve
3092	*/
3093	if (slot_count) {
3094	struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
3095
3096	spin_lock(lock: &block_rsv->lock);
3097	space.total_bytes = block_rsv->size;
3098	space.used_bytes = block_rsv->size - block_rsv->reserved;
3099	spin_unlock(lock: &block_rsv->lock);
3100	space.flags = BTRFS_SPACE_INFO_GLOBAL_RSV;
3101	memcpy(dest, &space, sizeof(space));
3102	space_args.total_spaces++;
3103	}
3104
3105	user_dest = (struct btrfs_ioctl_space_info __user *)
3106	(arg + sizeof(struct btrfs_ioctl_space_args));
3107
3108	if (copy_to_user(to: user_dest, from: dest_orig, n: alloc_size))
3109	ret = -EFAULT;
3110
3111	kfree(objp: dest_orig);
3112	out:
3113	if (ret == `0` && copy_to_user(to: arg, from: &space_args, n: sizeof(space_args)))
3114	ret = -EFAULT;
3115
3116	return ret;
3117	}
3118
3119	static noinline long btrfs_ioctl_start_sync(struct btrfs_root *root,
3120	void __user *argp)
3121	{
3122	struct btrfs_trans_handle *trans;
3123	u64 transid;
3124
3125	/*
3126	* Start orphan cleanup here for the given root in case it hasn't been
3127	* started already by other means. Errors are handled in the other
3128	* functions during transaction commit.
3129	*/
3130	btrfs_orphan_cleanup(root);
3131
3132	trans = btrfs_attach_transaction_barrier(root);
3133	if (IS_ERR(ptr: trans)) {
3134	if (PTR_ERR(ptr: trans) != -ENOENT)
3135	return PTR_ERR(ptr: trans);
3136
3137	/ No running transaction, don't bother /
3138	transid = btrfs_get_last_trans_committed(fs_info: root->fs_info);
3139	goto out;
3140	}
3141	transid = trans->transid;
3142	btrfs_commit_transaction_async(trans);
3143	out:
3144	if (argp)
3145	if (copy_to_user(to: argp, from: &transid, n: sizeof(transid)))
3146	return -EFAULT;
3147	return `0`;
3148	}
3149
3150	static noinline long btrfs_ioctl_wait_sync(struct btrfs_fs_info *fs_info,
3151	void __user *argp)
3152	{
3153	/ By default wait for the current transaction. /
3154	u64 transid = `0`;
3155
3156	if (argp)
3157	if (copy_from_user(to: &transid, from: argp, n: sizeof(transid)))
3158	return -EFAULT;
3159
3160	return btrfs_wait_for_commit(fs_info, transid);
3161	}
3162
3163	static long btrfs_ioctl_scrub(struct file file, void* __user *arg)
3164	{
3165	struct btrfs_fs_info *fs_info = btrfs_sb(sb: file_inode(f: file)->i_sb);
3166	struct btrfs_ioctl_scrub_args *sa;
3167	int ret;
3168
3169	if (!capable(CAP_SYS_ADMIN))
3170	return -EPERM;
3171
3172	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
3173	btrfs_err(fs_info, "scrub is not supported on extent tree v2 yet");
3174	return -EINVAL;
3175	}
3176
3177	sa = memdup_user(arg, sizeof(*sa));
3178	if (IS_ERR(ptr: sa))
3179	return PTR_ERR(ptr: sa);
3180
3181	if (sa->flags & ~BTRFS_SCRUB_SUPPORTED_FLAGS) {
3182	ret = -EOPNOTSUPP;
3183	goto out;
3184	}
3185
3186	if (!(sa->flags & BTRFS_SCRUB_READONLY)) {
3187	ret = mnt_want_write_file(file);
3188	if (ret)
3189	goto out;
3190	}
3191
3192	ret = btrfs_scrub_dev(fs_info, devid: sa->devid, start: sa->start, end: sa->end,
3193	progress: &sa->progress, readonly: sa->flags & BTRFS_SCRUB_READONLY,
3194	is_dev_replace: `0`);
3195
3196	/*
3197	* Copy scrub args to user space even if btrfs_scrub_dev() returned an
3198	* error. This is important as it allows user space to know how much
3199	* progress scrub has done. For example, if scrub is canceled we get
3200	* -ECANCELED from btrfs_scrub_dev() and return that error back to user
3201	* space. Later user space can inspect the progress from the structure
3202	* btrfs_ioctl_scrub_args and resume scrub from where it left off
3203	* previously (btrfs-progs does this).
3204	* If we fail to copy the btrfs_ioctl_scrub_args structure to user space
3205	* then return -EFAULT to signal the structure was not copied or it may
3206	* be corrupt and unreliable due to a partial copy.
3207	*/
3208	if (copy_to_user(to: arg, from: sa, n: sizeof(*sa)))
3209	ret = -EFAULT;
3210
3211	if (!(sa->flags & BTRFS_SCRUB_READONLY))
3212	mnt_drop_write_file(file);
3213	out:
3214	kfree(objp: sa);
3215	return ret;
3216	}
3217
3218	static long btrfs_ioctl_scrub_cancel(struct btrfs_fs_info *fs_info)
3219	{
3220	if (!capable(CAP_SYS_ADMIN))
3221	return -EPERM;
3222
3223	return btrfs_scrub_cancel(info: fs_info);
3224	}
3225
3226	static long btrfs_ioctl_scrub_progress(struct btrfs_fs_info *fs_info,
3227	void __user *arg)
3228	{
3229	struct btrfs_ioctl_scrub_args *sa;
3230	int ret;
3231
3232	if (!capable(CAP_SYS_ADMIN))
3233	return -EPERM;
3234
3235	sa = memdup_user(arg, sizeof(*sa));
3236	if (IS_ERR(ptr: sa))
3237	return PTR_ERR(ptr: sa);
3238
3239	ret = btrfs_scrub_progress(fs_info, devid: sa->devid, progress: &sa->progress);
3240
3241	if (ret == `0` && copy_to_user(to: arg, from: sa, n: sizeof(*sa)))
3242	ret = -EFAULT;
3243
3244	kfree(objp: sa);
3245	return ret;
3246	}
3247
3248	static long btrfs_ioctl_get_dev_stats(struct btrfs_fs_info *fs_info,
3249	void __user *arg)
3250	{
3251	struct btrfs_ioctl_get_dev_stats *sa;
3252	int ret;
3253
3254	sa = memdup_user(arg, sizeof(*sa));
3255	if (IS_ERR(ptr: sa))
3256	return PTR_ERR(ptr: sa);
3257
3258	if ((sa->flags & BTRFS_DEV_STATS_RESET) && !capable(CAP_SYS_ADMIN)) {
3259	kfree(objp: sa);
3260	return -EPERM;
3261	}
3262
3263	ret = btrfs_get_dev_stats(fs_info, stats: sa);
3264
3265	if (ret == `0` && copy_to_user(to: arg, from: sa, n: sizeof(*sa)))
3266	ret = -EFAULT;
3267
3268	kfree(objp: sa);
3269	return ret;
3270	}
3271
3272	static long btrfs_ioctl_dev_replace(struct btrfs_fs_info *fs_info,
3273	void __user *arg)
3274	{
3275	struct btrfs_ioctl_dev_replace_args *p;
3276	int ret;
3277
3278	if (!capable(CAP_SYS_ADMIN))
3279	return -EPERM;
3280
3281	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
3282	btrfs_err(fs_info, "device replace not supported on extent tree v2 yet");
3283	return -EINVAL;
3284	}
3285
3286	p = memdup_user(arg, sizeof(*p));
3287	if (IS_ERR(ptr: p))
3288	return PTR_ERR(ptr: p);
3289
3290	switch (p->cmd) {
3291	case BTRFS_IOCTL_DEV_REPLACE_CMD_START:
3292	if (sb_rdonly(sb: fs_info->sb)) {
3293	ret = -EROFS;
3294	goto out;
3295	}
3296	if (!btrfs_exclop_start(fs_info, type: BTRFS_EXCLOP_DEV_REPLACE)) {
3297	ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
3298	} else {
3299	ret = btrfs_dev_replace_by_ioctl(fs_info, args: p);
3300	btrfs_exclop_finish(fs_info);
3301	}
3302	break;
3303	case BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS:
3304	btrfs_dev_replace_status(fs_info, args: p);
3305	ret = `0`;
3306	break;
3307	case BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL:
3308	p->result = btrfs_dev_replace_cancel(fs_info);
3309	ret = `0`;
3310	break;
3311	default:
3312	ret = -EINVAL;
3313	break;
3314	}
3315
3316	if ((ret == `0` \|\| ret == -ECANCELED) && copy_to_user(to: arg, from: p, n: sizeof(*p)))
3317	ret = -EFAULT;
3318	out:
3319	kfree(objp: p);
3320	return ret;
3321	}
3322
3323	static long btrfs_ioctl_ino_to_path(struct btrfs_root root, void* __user *arg)
3324	{
3325	int ret = `0`;
3326	int i;
3327	u64 rel_ptr;
3328	int size;
3329	struct btrfs_ioctl_ino_path_args *ipa = NULL;
3330	struct inode_fs_paths *ipath = NULL;
3331	struct btrfs_path *path;
3332
3333	if (!capable(CAP_DAC_READ_SEARCH))
3334	return -EPERM;
3335
3336	path = btrfs_alloc_path();
3337	if (!path) {
3338	ret = -ENOMEM;
3339	goto out;
3340	}
3341
3342	ipa = memdup_user(arg, sizeof(*ipa));
3343	if (IS_ERR(ptr: ipa)) {
3344	ret = PTR_ERR(ptr: ipa);
3345	ipa = NULL;
3346	goto out;
3347	}
3348
3349	size = min_t(u32, ipa->size, `4096`);
3350	ipath = init_ipath(total_bytes: size, fs_root: root, path);
3351	if (IS_ERR(ptr: ipath)) {
3352	ret = PTR_ERR(ptr: ipath);
3353	ipath = NULL;
3354	goto out;
3355	}
3356
3357	ret = paths_from_inode(inum: ipa->inum, ipath);
3358	if (ret < `0`)
3359	goto out;
3360
3361	for (i = `0`; i < ipath->fspath->elem_cnt; ++i) {
3362	rel_ptr = ipath->fspath->val[i] -
3363	(u64)(unsigned long)ipath->fspath->val;
3364	ipath->fspath->val[i] = rel_ptr;
3365	}
3366
3367	btrfs_free_path(p: path);
3368	path = NULL;
3369	ret = copy_to_user(to: (void __user )(unsigned* long)ipa->fspath,
3370	from: ipath->fspath, n: size);
3371	if (ret) {
3372	ret = -EFAULT;
3373	goto out;
3374	}
3375
3376	out:
3377	btrfs_free_path(p: path);
3378	free_ipath(ipath);
3379	kfree(objp: ipa);
3380
3381	return ret;
3382	}
3383
3384	static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
3385	void __user arg, int* version)
3386	{
3387	int ret = `0`;
3388	int size;
3389	struct btrfs_ioctl_logical_ino_args *loi;
3390	struct btrfs_data_container *inodes = NULL;
3391	struct btrfs_path *path = NULL;
3392	bool ignore_offset;
3393
3394	if (!capable(CAP_SYS_ADMIN))
3395	return -EPERM;
3396
3397	loi = memdup_user(arg, sizeof(*loi));
3398	if (IS_ERR(ptr: loi))
3399	return PTR_ERR(ptr: loi);
3400
3401	if (version == `1`) {
3402	ignore_offset = false;
3403	size = min_t(u32, loi->size, SZ_64K);
3404	} else {
3405	/ All reserved bits must be 0 for now /
3406	if (memchr_inv(p: loi->reserved, c: `0`, size: sizeof(loi->reserved))) {
3407	ret = -EINVAL;
3408	goto out_loi;
3409	}
3410	/ Only accept flags we have defined so far /
3411	if (loi->flags & ~(BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET)) {
3412	ret = -EINVAL;
3413	goto out_loi;
3414	}
3415	ignore_offset = loi->flags & BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET;
3416	size = min_t(u32, loi->size, SZ_16M);
3417	}
3418
3419	inodes = init_data_container(total_bytes: size);
3420	if (IS_ERR(ptr: inodes)) {
3421	ret = PTR_ERR(ptr: inodes);
3422	goto out_loi;
3423	}
3424
3425	path = btrfs_alloc_path();
3426	if (!path) {
3427	ret = -ENOMEM;
3428	goto out;
3429	}
3430	ret = iterate_inodes_from_logical(logical: loi->logical, fs_info, path,
3431	ctx: inodes, ignore_offset);
3432	btrfs_free_path(p: path);
3433	if (ret == -EINVAL)
3434	ret = -ENOENT;
3435	if (ret < `0`)
3436	goto out;
3437
3438	ret = copy_to_user(to: (void __user )(unsigned* long)loi->inodes, from: inodes,
3439	n: size);
3440	if (ret)
3441	ret = -EFAULT;
3442
3443	out:
3444	kvfree(addr: inodes);
3445	out_loi:
3446	kfree(objp: loi);
3447
3448	return ret;
3449	}
3450
3451	void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
3452	struct btrfs_ioctl_balance_args *bargs)
3453	{
3454	struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3455
3456	bargs->flags = bctl->flags;
3457
3458	if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags))
3459	bargs->state \|= BTRFS_BALANCE_STATE_RUNNING;
3460	if (atomic_read(v: &fs_info->balance_pause_req))
3461	bargs->state \|= BTRFS_BALANCE_STATE_PAUSE_REQ;
3462	if (atomic_read(v: &fs_info->balance_cancel_req))
3463	bargs->state \|= BTRFS_BALANCE_STATE_CANCEL_REQ;
3464
3465	memcpy(&bargs->data, &bctl->data, sizeof(bargs->data));
3466	memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta));
3467	memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys));
3468
3469	spin_lock(lock: &fs_info->balance_lock);
3470	memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
3471	spin_unlock(lock: &fs_info->balance_lock);
3472	}
3473
3474	/*
3475	* Try to acquire fs_info::balance_mutex as well as set BTRFS_EXLCOP_BALANCE as
3476	* required.
3477	*
3478	* @fs_info: the filesystem
3479	* @excl_acquired: ptr to boolean value which is set to false in case balance
3480	* is being resumed
3481	*
3482	* Return 0 on success in which case both fs_info::balance is acquired as well
3483	* as exclusive ops are blocked. In case of failure return an error code.
3484	*/
3485	static int btrfs_try_lock_balance(struct btrfs_fs_info fs_info, bool excl_acquired)
3486	{
3487	int ret;
3488
3489	/*
3490	* Exclusive operation is locked. Three possibilities:
3491	* (1) some other op is running
3492	* (2) balance is running
3493	* (3) balance is paused -- special case (think resume)
3494	*/
3495	while (`1`) {
3496	if (btrfs_exclop_start(fs_info, type: BTRFS_EXCLOP_BALANCE)) {
3497	*excl_acquired = true;
3498	mutex_lock(&fs_info->balance_mutex);
3499	return `0`;
3500	}
3501
3502	mutex_lock(&fs_info->balance_mutex);
3503	if (fs_info->balance_ctl) {
3504	/ This is either (2) or (3) /
3505	if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
3506	/ This is (2) /
3507	ret = -EINPROGRESS;
3508	goto out_failure;
3509
3510	} else {
3511	mutex_unlock(lock: &fs_info->balance_mutex);
3512	/*
3513	* Lock released to allow other waiters to
3514	* continue, we'll reexamine the status again.
3515	*/
3516	mutex_lock(&fs_info->balance_mutex);
3517
3518	if (fs_info->balance_ctl &&
3519	!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
3520	/ This is (3) /
3521	*excl_acquired = false;
3522	return `0`;
3523	}
3524	}
3525	} else {
3526	/ This is (1) /
3527	ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
3528	goto out_failure;
3529	}
3530
3531	mutex_unlock(lock: &fs_info->balance_mutex);
3532	}
3533
3534	out_failure:
3535	mutex_unlock(lock: &fs_info->balance_mutex);
3536	*excl_acquired = false;
3537	return ret;
3538	}
3539
3540	static long btrfs_ioctl_balance(struct file file, void* __user *arg)
3541	{
3542	struct btrfs_root *root = BTRFS_I(inode: file_inode(f: file))->root;
3543	struct btrfs_fs_info *fs_info = root->fs_info;
3544	struct btrfs_ioctl_balance_args *bargs;
3545	struct btrfs_balance_control *bctl;
3546	bool need_unlock = true;
3547	int ret;
3548
3549	if (!capable(CAP_SYS_ADMIN))
3550	return -EPERM;
3551
3552	ret = mnt_want_write_file(file);
3553	if (ret)
3554	return ret;
3555
3556	bargs = memdup_user(arg, sizeof(*bargs));
3557	if (IS_ERR(ptr: bargs)) {
3558	ret = PTR_ERR(ptr: bargs);
3559	bargs = NULL;
3560	goto out;
3561	}
3562
3563	ret = btrfs_try_lock_balance(fs_info, excl_acquired: &need_unlock);
3564	if (ret)
3565	goto out;
3566
3567	lockdep_assert_held(&fs_info->balance_mutex);
3568
3569	if (bargs->flags & BTRFS_BALANCE_RESUME) {
3570	if (!fs_info->balance_ctl) {
3571	ret = -ENOTCONN;
3572	goto out_unlock;
3573	}
3574
3575	bctl = fs_info->balance_ctl;
3576	spin_lock(lock: &fs_info->balance_lock);
3577	bctl->flags \|= BTRFS_BALANCE_RESUME;
3578	spin_unlock(lock: &fs_info->balance_lock);
3579	btrfs_exclop_balance(fs_info, op: BTRFS_EXCLOP_BALANCE);
3580
3581	goto do_balance;
3582	}
3583
3584	if (bargs->flags & ~(BTRFS_BALANCE_ARGS_MASK \| BTRFS_BALANCE_TYPE_MASK)) {
3585	ret = -EINVAL;
3586	goto out_unlock;
3587	}
3588
3589	if (fs_info->balance_ctl) {
3590	ret = -EINPROGRESS;
3591	goto out_unlock;
3592	}
3593
3594	bctl = kzalloc(size: sizeof(*bctl), GFP_KERNEL);
3595	if (!bctl) {
3596	ret = -ENOMEM;
3597	goto out_unlock;
3598	}
3599
3600	memcpy(&bctl->data, &bargs->data, sizeof(bctl->data));
3601	memcpy(&bctl->meta, &bargs->meta, sizeof(bctl->meta));
3602	memcpy(&bctl->sys, &bargs->sys, sizeof(bctl->sys));
3603
3604	bctl->flags = bargs->flags;
3605	do_balance:
3606	/*
3607	* Ownership of bctl and exclusive operation goes to btrfs_balance.
3608	* bctl is freed in reset_balance_state, or, if restriper was paused
3609	* all the way until unmount, in free_fs_info. The flag should be
3610	* cleared after reset_balance_state.
3611	*/
3612	need_unlock = false;
3613
3614	ret = btrfs_balance(fs_info, bctl, bargs);
3615	bctl = NULL;
3616
3617	if (ret == `0` \|\| ret == -ECANCELED) {
3618	if (copy_to_user(to: arg, from: bargs, n: sizeof(*bargs)))
3619	ret = -EFAULT;
3620	}
3621
3622	kfree(objp: bctl);
3623	out_unlock:
3624	mutex_unlock(lock: &fs_info->balance_mutex);
3625	if (need_unlock)
3626	btrfs_exclop_finish(fs_info);
3627	out:
3628	mnt_drop_write_file(file);
3629	kfree(objp: bargs);
3630	return ret;
3631	}
3632
3633	static long btrfs_ioctl_balance_ctl(struct btrfs_fs_info fs_info, int* cmd)
3634	{
3635	if (!capable(CAP_SYS_ADMIN))
3636	return -EPERM;
3637
3638	switch (cmd) {
3639	case BTRFS_BALANCE_CTL_PAUSE:
3640	return btrfs_pause_balance(fs_info);
3641	case BTRFS_BALANCE_CTL_CANCEL:
3642	return btrfs_cancel_balance(fs_info);
3643	}
3644
3645	return -EINVAL;
3646	}
3647
3648	static long btrfs_ioctl_balance_progress(struct btrfs_fs_info *fs_info,
3649	void __user *arg)
3650	{
3651	struct btrfs_ioctl_balance_args *bargs;
3652	int ret = `0`;
3653
3654	if (!capable(CAP_SYS_ADMIN))
3655	return -EPERM;
3656
3657	mutex_lock(&fs_info->balance_mutex);
3658	if (!fs_info->balance_ctl) {
3659	ret = -ENOTCONN;
3660	goto out;
3661	}
3662
3663	bargs = kzalloc(size: sizeof(*bargs), GFP_KERNEL);
3664	if (!bargs) {
3665	ret = -ENOMEM;
3666	goto out;
3667	}
3668
3669	btrfs_update_ioctl_balance_args(fs_info, bargs);
3670
3671	if (copy_to_user(to: arg, from: bargs, n: sizeof(*bargs)))
3672	ret = -EFAULT;
3673
3674	kfree(objp: bargs);
3675	out:
3676	mutex_unlock(lock: &fs_info->balance_mutex);
3677	return ret;
3678	}
3679
3680	static long btrfs_ioctl_quota_ctl(struct file file, void* __user *arg)
3681	{
3682	struct inode *inode = file_inode(f: file);
3683	struct btrfs_fs_info *fs_info = btrfs_sb(sb: inode->i_sb);
3684	struct btrfs_ioctl_quota_ctl_args *sa;
3685	int ret;
3686
3687	if (!capable(CAP_SYS_ADMIN))
3688	return -EPERM;
3689
3690	ret = mnt_want_write_file(file);
3691	if (ret)
3692	return ret;
3693
3694	sa = memdup_user(arg, sizeof(*sa));
3695	if (IS_ERR(ptr: sa)) {
3696	ret = PTR_ERR(ptr: sa);
3697	goto drop_write;
3698	}
3699
3700	down_write(sem: &fs_info->subvol_sem);
3701
3702	switch (sa->cmd) {
3703	case BTRFS_QUOTA_CTL_ENABLE:
3704	case BTRFS_QUOTA_CTL_ENABLE_SIMPLE_QUOTA:
3705	ret = btrfs_quota_enable(fs_info, quota_ctl_args: sa);
3706	break;
3707	case BTRFS_QUOTA_CTL_DISABLE:
3708	ret = btrfs_quota_disable(fs_info);
3709	break;
3710	default:
3711	ret = -EINVAL;
3712	break;
3713	}
3714
3715	kfree(objp: sa);
3716	up_write(sem: &fs_info->subvol_sem);
3717	drop_write:
3718	mnt_drop_write_file(file);
3719	return ret;
3720	}
3721
3722	static long btrfs_ioctl_qgroup_assign(struct file file, void* __user *arg)
3723	{
3724	struct inode *inode = file_inode(f: file);
3725	struct btrfs_fs_info *fs_info = btrfs_sb(sb: inode->i_sb);
3726	struct btrfs_root *root = BTRFS_I(inode)->root;
3727	struct btrfs_ioctl_qgroup_assign_args *sa;
3728	struct btrfs_trans_handle *trans;
3729	int ret;
3730	int err;
3731
3732	if (!capable(CAP_SYS_ADMIN))
3733	return -EPERM;
3734
3735	ret = mnt_want_write_file(file);
3736	if (ret)
3737	return ret;
3738
3739	sa = memdup_user(arg, sizeof(*sa));
3740	if (IS_ERR(ptr: sa)) {
3741	ret = PTR_ERR(ptr: sa);
3742	goto drop_write;
3743	}
3744
3745	trans = btrfs_join_transaction(root);
3746	if (IS_ERR(ptr: trans)) {
3747	ret = PTR_ERR(ptr: trans);
3748	goto out;
3749	}
3750
3751	if (sa->assign) {
3752	ret = btrfs_add_qgroup_relation(trans, src: sa->src, dst: sa->dst);
3753	} else {
3754	ret = btrfs_del_qgroup_relation(trans, src: sa->src, dst: sa->dst);
3755	}
3756
3757	/ update qgroup status and info /
3758	mutex_lock(&fs_info->qgroup_ioctl_lock);
3759	err = btrfs_run_qgroups(trans);
3760	mutex_unlock(lock: &fs_info->qgroup_ioctl_lock);
3761	if (err < `0`)
3762	btrfs_handle_fs_error(fs_info, err,
3763	"failed to update qgroup status and info");
3764	err = btrfs_end_transaction(trans);
3765	if (err && !ret)
3766	ret = err;
3767
3768	out:
3769	kfree(objp: sa);
3770	drop_write:
3771	mnt_drop_write_file(file);
3772	return ret;
3773	}
3774
3775	static long btrfs_ioctl_qgroup_create(struct file file, void* __user *arg)
3776	{
3777	struct inode *inode = file_inode(f: file);
3778	struct btrfs_root *root = BTRFS_I(inode)->root;
3779	struct btrfs_ioctl_qgroup_create_args *sa;
3780	struct btrfs_trans_handle *trans;
3781	int ret;
3782	int err;
3783
3784	if (!capable(CAP_SYS_ADMIN))
3785	return -EPERM;
3786
3787	ret = mnt_want_write_file(file);
3788	if (ret)
3789	return ret;
3790
3791	sa = memdup_user(arg, sizeof(*sa));
3792	if (IS_ERR(ptr: sa)) {
3793	ret = PTR_ERR(ptr: sa);
3794	goto drop_write;
3795	}
3796
3797	if (!sa->qgroupid) {
3798	ret = -EINVAL;
3799	goto out;
3800	}
3801
3802	trans = btrfs_join_transaction(root);
3803	if (IS_ERR(ptr: trans)) {
3804	ret = PTR_ERR(ptr: trans);
3805	goto out;
3806	}
3807
3808	if (sa->create) {
3809	ret = btrfs_create_qgroup(trans, qgroupid: sa->qgroupid);
3810	} else {
3811	ret = btrfs_remove_qgroup(trans, qgroupid: sa->qgroupid);
3812	}
3813
3814	err = btrfs_end_transaction(trans);
3815	if (err && !ret)
3816	ret = err;
3817
3818	out:
3819	kfree(objp: sa);
3820	drop_write:
3821	mnt_drop_write_file(file);
3822	return ret;
3823	}
3824
3825	static long btrfs_ioctl_qgroup_limit(struct file file, void* __user *arg)
3826	{
3827	struct inode *inode = file_inode(f: file);
3828	struct btrfs_root *root = BTRFS_I(inode)->root;
3829	struct btrfs_ioctl_qgroup_limit_args *sa;
3830	struct btrfs_trans_handle *trans;
3831	int ret;
3832	int err;
3833	u64 qgroupid;
3834
3835	if (!capable(CAP_SYS_ADMIN))
3836	return -EPERM;
3837
3838	ret = mnt_want_write_file(file);
3839	if (ret)
3840	return ret;
3841
3842	sa = memdup_user(arg, sizeof(*sa));
3843	if (IS_ERR(ptr: sa)) {
3844	ret = PTR_ERR(ptr: sa);
3845	goto drop_write;
3846	}
3847
3848	trans = btrfs_join_transaction(root);
3849	if (IS_ERR(ptr: trans)) {
3850	ret = PTR_ERR(ptr: trans);
3851	goto out;
3852	}
3853
3854	qgroupid = sa->qgroupid;
3855	if (!qgroupid) {
3856	/ take the current subvol as qgroup /
3857	qgroupid = root->root_key.objectid;
3858	}
3859
3860	ret = btrfs_limit_qgroup(trans, qgroupid, limit: &sa->lim);
3861
3862	err = btrfs_end_transaction(trans);
3863	if (err && !ret)
3864	ret = err;
3865
3866	out:
3867	kfree(objp: sa);
3868	drop_write:
3869	mnt_drop_write_file(file);
3870	return ret;
3871	}
3872
3873	static long btrfs_ioctl_quota_rescan(struct file file, void* __user *arg)
3874	{
3875	struct inode *inode = file_inode(f: file);
3876	struct btrfs_fs_info *fs_info = btrfs_sb(sb: inode->i_sb);
3877	struct btrfs_ioctl_quota_rescan_args *qsa;
3878	int ret;
3879
3880	if (!capable(CAP_SYS_ADMIN))
3881	return -EPERM;
3882
3883	ret = mnt_want_write_file(file);
3884	if (ret)
3885	return ret;
3886
3887	qsa = memdup_user(arg, sizeof(*qsa));
3888	if (IS_ERR(ptr: qsa)) {
3889	ret = PTR_ERR(ptr: qsa);
3890	goto drop_write;
3891	}
3892
3893	if (qsa->flags) {
3894	ret = -EINVAL;
3895	goto out;
3896	}
3897
3898	ret = btrfs_qgroup_rescan(fs_info);
3899
3900	out:
3901	kfree(objp: qsa);
3902	drop_write:
3903	mnt_drop_write_file(file);
3904	return ret;
3905	}
3906
3907	static long btrfs_ioctl_quota_rescan_status(struct btrfs_fs_info *fs_info,
3908	void __user *arg)
3909	{
3910	struct btrfs_ioctl_quota_rescan_args qsa = {`0`};
3911
3912	if (!capable(CAP_SYS_ADMIN))
3913	return -EPERM;
3914
3915	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
3916	qsa.flags = `1`;
3917	qsa.progress = fs_info->qgroup_rescan_progress.objectid;
3918	}
3919
3920	if (copy_to_user(to: arg, from: &qsa, n: sizeof(qsa)))
3921	return -EFAULT;
3922
3923	return `0`;
3924	}
3925
3926	static long btrfs_ioctl_quota_rescan_wait(struct btrfs_fs_info *fs_info,
3927	void __user *arg)
3928	{
3929	if (!capable(CAP_SYS_ADMIN))
3930	return -EPERM;
3931
3932	return btrfs_qgroup_wait_for_completion(fs_info, interruptible: true);
3933	}
3934
3935	static long _btrfs_ioctl_set_received_subvol(struct file *file,
3936	struct mnt_idmap *idmap,
3937	struct btrfs_ioctl_received_subvol_args *sa)
3938	{
3939	struct inode *inode = file_inode(f: file);
3940	struct btrfs_fs_info *fs_info = btrfs_sb(sb: inode->i_sb);
3941	struct btrfs_root *root = BTRFS_I(inode)->root;
3942	struct btrfs_root_item *root_item = &root->root_item;
3943	struct btrfs_trans_handle *trans;
3944	struct timespec64 ct = current_time(inode);
3945	int ret = `0`;
3946	int received_uuid_changed;
3947
3948	if (!inode_owner_or_capable(idmap, inode))
3949	return -EPERM;
3950
3951	ret = mnt_want_write_file(file);
3952	if (ret < `0`)
3953	return ret;
3954
3955	down_write(sem: &fs_info->subvol_sem);
3956
3957	if (btrfs_ino(inode: BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) {
3958	ret = -EINVAL;
3959	goto out;
3960	}
3961
3962	if (btrfs_root_readonly(root)) {
3963	ret = -EROFS;
3964	goto out;
3965	}
3966
3967	/*
3968	* 1 - root item
3969	* 2 - uuid items (received uuid + subvol uuid)
3970	*/
3971	trans = btrfs_start_transaction(root, num_items: `3`);
3972	if (IS_ERR(ptr: trans)) {
3973	ret = PTR_ERR(ptr: trans);
3974	trans = NULL;
3975	goto out;
3976	}
3977
3978	sa->rtransid = trans->transid;
3979	sa->rtime.sec = ct.tv_sec;
3980	sa->rtime.nsec = ct.tv_nsec;
3981
3982	received_uuid_changed = memcmp(p: root_item->received_uuid, q: sa->uuid,
3983	BTRFS_UUID_SIZE);
3984	if (received_uuid_changed &&
3985	!btrfs_is_empty_uuid(uuid: root_item->received_uuid)) {
3986	ret = btrfs_uuid_tree_remove(trans, uuid: root_item->received_uuid,
3987	BTRFS_UUID_KEY_RECEIVED_SUBVOL,
3988	subid: root->root_key.objectid);
3989	if (ret && ret != -ENOENT) {
3990	btrfs_abort_transaction(trans, ret);
3991	btrfs_end_transaction(trans);
3992	goto out;
3993	}
3994	}
3995	memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE);
3996	btrfs_set_root_stransid(s: root_item, val: sa->stransid);
3997	btrfs_set_root_rtransid(s: root_item, val: sa->rtransid);
3998	btrfs_set_stack_timespec_sec(s: &root_item->stime, val: sa->stime.sec);
3999	btrfs_set_stack_timespec_nsec(s: &root_item->stime, val: sa->stime.nsec);
4000	btrfs_set_stack_timespec_sec(s: &root_item->rtime, val: sa->rtime.sec);
4001	btrfs_set_stack_timespec_nsec(s: &root_item->rtime, val: sa->rtime.nsec);
4002
4003	ret = btrfs_update_root(trans, root: fs_info->tree_root,
4004	key: &root->root_key, item: &root->root_item);
4005	if (ret < `0`) {
4006	btrfs_end_transaction(trans);
4007	goto out;
4008	}
4009	if (received_uuid_changed && !btrfs_is_empty_uuid(uuid: sa->uuid)) {
4010	ret = btrfs_uuid_tree_add(trans, uuid: sa->uuid,
4011	BTRFS_UUID_KEY_RECEIVED_SUBVOL,
4012	subid: root->root_key.objectid);
4013	if (ret < `0` && ret != -EEXIST) {
4014	btrfs_abort_transaction(trans, ret);
4015	btrfs_end_transaction(trans);
4016	goto out;
4017	}
4018	}
4019	ret = btrfs_commit_transaction(trans);
4020	out:
4021	up_write(sem: &fs_info->subvol_sem);
4022	mnt_drop_write_file(file);
4023	return ret;
4024	}
4025
4026	#ifdef CONFIG_64BIT
4027	static long btrfs_ioctl_set_received_subvol_32(struct file *file,
4028	void __user *arg)
4029	{
4030	struct btrfs_ioctl_received_subvol_args_32 *args32 = NULL;
4031	struct btrfs_ioctl_received_subvol_args *args64 = NULL;
4032	int ret = `0`;
4033
4034	args32 = memdup_user(arg, sizeof(*args32));
4035	if (IS_ERR(ptr: args32))
4036	return PTR_ERR(ptr: args32);
4037
4038	args64 = kmalloc(size: sizeof(*args64), GFP_KERNEL);
4039	if (!args64) {
4040	ret = -ENOMEM;
4041	goto out;
4042	}
4043
4044	memcpy(args64->uuid, args32->uuid, BTRFS_UUID_SIZE);
4045	args64->stransid = args32->stransid;
4046	args64->rtransid = args32->rtransid;
4047	args64->stime.sec = args32->stime.sec;
4048	args64->stime.nsec = args32->stime.nsec;
4049	args64->rtime.sec = args32->rtime.sec;
4050	args64->rtime.nsec = args32->rtime.nsec;
4051	args64->flags = args32->flags;
4052
4053	ret = _btrfs_ioctl_set_received_subvol(file, idmap: file_mnt_idmap(file), sa: args64);
4054	if (ret)
4055	goto out;
4056
4057	memcpy(args32->uuid, args64->uuid, BTRFS_UUID_SIZE);
4058	args32->stransid = args64->stransid;
4059	args32->rtransid = args64->rtransid;
4060	args32->stime.sec = args64->stime.sec;
4061	args32->stime.nsec = args64->stime.nsec;
4062	args32->rtime.sec = args64->rtime.sec;
4063	args32->rtime.nsec = args64->rtime.nsec;
4064	args32->flags = args64->flags;
4065
4066	ret = copy_to_user(to: arg, from: args32, n: sizeof(*args32));
4067	if (ret)
4068	ret = -EFAULT;
4069
4070	out:
4071	kfree(objp: args32);
4072	kfree(objp: args64);
4073	return ret;
4074	}
4075	#endif
4076
4077	static long btrfs_ioctl_set_received_subvol(struct file *file,
4078	void __user *arg)
4079	{
4080	struct btrfs_ioctl_received_subvol_args *sa = NULL;
4081	int ret = `0`;
4082
4083	sa = memdup_user(arg, sizeof(*sa));
4084	if (IS_ERR(ptr: sa))
4085	return PTR_ERR(ptr: sa);
4086
4087	ret = _btrfs_ioctl_set_received_subvol(file, idmap: file_mnt_idmap(file), sa);
4088
4089	if (ret)
4090	goto out;
4091
4092	ret = copy_to_user(to: arg, from: sa, n: sizeof(*sa));
4093	if (ret)
4094	ret = -EFAULT;
4095
4096	out:
4097	kfree(objp: sa);
4098	return ret;
4099	}
4100
4101	static int btrfs_ioctl_get_fslabel(struct btrfs_fs_info *fs_info,
4102	void __user *arg)
4103	{
4104	size_t len;
4105	int ret;
4106	char label[BTRFS_LABEL_SIZE];
4107
4108	spin_lock(lock: &fs_info->super_lock);
4109	memcpy(label, fs_info->super_copy->label, BTRFS_LABEL_SIZE);
4110	spin_unlock(lock: &fs_info->super_lock);
4111
4112	len = strnlen(p: label, BTRFS_LABEL_SIZE);
4113
4114	if (len == BTRFS_LABEL_SIZE) {
4115	btrfs_warn(fs_info,
4116	"label is too long, return the first %zu bytes",
4117	--len);
4118	}
4119
4120	ret = copy_to_user(to: arg, from: label, n: len);
4121
4122	return ret ? -EFAULT : `0`;
4123	}
4124
4125	static int btrfs_ioctl_set_fslabel(struct file file, void* __user *arg)
4126	{
4127	struct inode *inode = file_inode(f: file);
4128	struct btrfs_fs_info *fs_info = btrfs_sb(sb: inode->i_sb);
4129	struct btrfs_root *root = BTRFS_I(inode)->root;
4130	struct btrfs_super_block *super_block = fs_info->super_copy;
4131	struct btrfs_trans_handle *trans;
4132	char label[BTRFS_LABEL_SIZE];
4133	int ret;
4134
4135	if (!capable(CAP_SYS_ADMIN))
4136	return -EPERM;
4137
4138	if (copy_from_user(to: label, from: arg, n: sizeof(label)))
4139	return -EFAULT;
4140
4141	if (strnlen(p: label, BTRFS_LABEL_SIZE) == BTRFS_LABEL_SIZE) {
4142	btrfs_err(fs_info,
4143	"unable to set label with more than %d bytes",
4144	BTRFS_LABEL_SIZE - `1`);
4145	return -EINVAL;
4146	}
4147
4148	ret = mnt_want_write_file(file);
4149	if (ret)
4150	return ret;
4151
4152	trans = btrfs_start_transaction(root, num_items: `0`);
4153	if (IS_ERR(ptr: trans)) {
4154	ret = PTR_ERR(ptr: trans);
4155	goto out_unlock;
4156	}
4157
4158	spin_lock(lock: &fs_info->super_lock);
4159	strcpy(p: super_block->label, q: label);
4160	spin_unlock(lock: &fs_info->super_lock);
4161	ret = btrfs_commit_transaction(trans);
4162
4163	out_unlock:
4164	mnt_drop_write_file(file);
4165	return ret;
4166	}
4167
4168	#define INIT_FEATURE_FLAGS(suffix) \
4169	{ .compat_flags = BTRFS_FEATURE_COMPAT_##suffix, \
4170	.compat_ro_flags = BTRFS_FEATURE_COMPAT_RO_##suffix, \
4171	.incompat_flags = BTRFS_FEATURE_INCOMPAT_##suffix }
4172
4173	int btrfs_ioctl_get_supported_features(void __user *arg)
4174	{
4175	static const struct btrfs_ioctl_feature_flags features[`3`] = {
4176	INIT_FEATURE_FLAGS(SUPP),
4177	INIT_FEATURE_FLAGS(SAFE_SET),
4178	INIT_FEATURE_FLAGS(SAFE_CLEAR)
4179	};
4180
4181	if (copy_to_user(to: arg, from: &features, n: sizeof(features)))
4182	return -EFAULT;
4183
4184	return `0`;
4185	}
4186
4187	static int btrfs_ioctl_get_features(struct btrfs_fs_info *fs_info,
4188	void __user *arg)
4189	{
4190	struct btrfs_super_block *super_block = fs_info->super_copy;
4191	struct btrfs_ioctl_feature_flags features;
4192
4193	features.compat_flags = btrfs_super_compat_flags(s: super_block);
4194	features.compat_ro_flags = btrfs_super_compat_ro_flags(s: super_block);
4195	features.incompat_flags = btrfs_super_incompat_flags(s: super_block);
4196
4197	if (copy_to_user(to: arg, from: &features, n: sizeof(features)))
4198	return -EFAULT;
4199
4200	return `0`;
4201	}
4202
4203	static int check_feature_bits(struct btrfs_fs_info *fs_info,
4204	enum btrfs_feature_set set,
4205	u64 change_mask, u64 flags, u64 supported_flags,
4206	u64 safe_set, u64 safe_clear)
4207	{
4208	const char *type = btrfs_feature_set_name(set);
4209	char *names;
4210	u64 disallowed, unsupported;
4211	u64 set_mask = flags & change_mask;
4212	u64 clear_mask = ~flags & change_mask;
4213
4214	unsupported = set_mask & ~supported_flags;
4215	if (unsupported) {
4216	names = btrfs_printable_features(set, flags: unsupported);
4217	if (names) {
4218	btrfs_warn(fs_info,
4219	"this kernel does not support the %s feature bit%s",
4220	names, strchr(names, `','`) ? "s" : "");
4221	kfree(objp: names);
4222	} else
4223	btrfs_warn(fs_info,
4224	"this kernel does not support %s bits 0x%llx",
4225	type, unsupported);
4226	return -EOPNOTSUPP;
4227	}
4228
4229	disallowed = set_mask & ~safe_set;
4230	if (disallowed) {
4231	names = btrfs_printable_features(set, flags: disallowed);
4232	if (names) {
4233	btrfs_warn(fs_info,
4234	"can't set the %s feature bit%s while mounted",
4235	names, strchr(names, `','`) ? "s" : "");
4236	kfree(objp: names);
4237	} else
4238	btrfs_warn(fs_info,
4239	"can't set %s bits 0x%llx while mounted",
4240	type, disallowed);
4241	return -EPERM;
4242	}
4243
4244	disallowed = clear_mask & ~safe_clear;
4245	if (disallowed) {
4246	names = btrfs_printable_features(set, flags: disallowed);
4247	if (names) {
4248	btrfs_warn(fs_info,
4249	"can't clear the %s feature bit%s while mounted",
4250	names, strchr(names, `','`) ? "s" : "");
4251	kfree(objp: names);
4252	} else
4253	btrfs_warn(fs_info,
4254	"can't clear %s bits 0x%llx while mounted",
4255	type, disallowed);
4256	return -EPERM;
4257	}
4258
4259	return `0`;
4260	}
4261
4262	#define check_feature(fs_info, change_mask, flags, mask_base) \
4263	check_feature_bits(fs_info, FEAT_##mask_base, change_mask, flags, \
4264	BTRFS_FEATURE_ ## mask_base ## _SUPP, \
4265	BTRFS_FEATURE_ ## mask_base ## _SAFE_SET, \
4266	BTRFS_FEATURE_ ## mask_base ## _SAFE_CLEAR)
4267
4268	static int btrfs_ioctl_set_features(struct file file, void* __user *arg)
4269	{
4270	struct inode *inode = file_inode(f: file);
4271	struct btrfs_fs_info *fs_info = btrfs_sb(sb: inode->i_sb);
4272	struct btrfs_root *root = BTRFS_I(inode)->root;
4273	struct btrfs_super_block *super_block = fs_info->super_copy;
4274	struct btrfs_ioctl_feature_flags flags[`2`];
4275	struct btrfs_trans_handle *trans;
4276	u64 newflags;
4277	int ret;
4278
4279	if (!capable(CAP_SYS_ADMIN))
4280	return -EPERM;
4281
4282	if (copy_from_user(to: flags, from: arg, n: sizeof(flags)))
4283	return -EFAULT;
4284
4285	/ Nothing to do /
4286	if (!flags[`0`].compat_flags && !flags[`0`].compat_ro_flags &&
4287	!flags[`0`].incompat_flags)
4288	return `0`;
4289
4290	ret = check_feature(fs_info, flags[`0`].compat_flags,
4291	flags[`1`].compat_flags, COMPAT);
4292	if (ret)
4293	return ret;
4294
4295	ret = check_feature(fs_info, flags[`0`].compat_ro_flags,
4296	flags[`1`].compat_ro_flags, COMPAT_RO);
4297	if (ret)
4298	return ret;
4299
4300	ret = check_feature(fs_info, flags[`0`].incompat_flags,
4301	flags[`1`].incompat_flags, INCOMPAT);
4302	if (ret)
4303	return ret;
4304
4305	ret = mnt_want_write_file(file);
4306	if (ret)
4307	return ret;
4308
4309	trans = btrfs_start_transaction(root, num_items: `0`);
4310	if (IS_ERR(ptr: trans)) {
4311	ret = PTR_ERR(ptr: trans);
4312	goto out_drop_write;
4313	}
4314
4315	spin_lock(lock: &fs_info->super_lock);
4316	newflags = btrfs_super_compat_flags(s: super_block);
4317	newflags \|= flags[`0`].compat_flags & flags[`1`].compat_flags;
4318	newflags &= ~(flags[`0`].compat_flags & ~flags[`1`].compat_flags);
4319	btrfs_set_super_compat_flags(s: super_block, val: newflags);
4320
4321	newflags = btrfs_super_compat_ro_flags(s: super_block);
4322	newflags \|= flags[`0`].compat_ro_flags & flags[`1`].compat_ro_flags;
4323	newflags &= ~(flags[`0`].compat_ro_flags & ~flags[`1`].compat_ro_flags);
4324	btrfs_set_super_compat_ro_flags(s: super_block, val: newflags);
4325
4326	newflags = btrfs_super_incompat_flags(s: super_block);
4327	newflags \|= flags[`0`].incompat_flags & flags[`1`].incompat_flags;
4328	newflags &= ~(flags[`0`].incompat_flags & ~flags[`1`].incompat_flags);
4329	btrfs_set_super_incompat_flags(s: super_block, val: newflags);
4330	spin_unlock(lock: &fs_info->super_lock);
4331
4332	ret = btrfs_commit_transaction(trans);
4333	out_drop_write:
4334	mnt_drop_write_file(file);
4335
4336	return ret;
4337	}
4338
4339	static int _btrfs_ioctl_send(struct inode inode, void* __user *argp, bool compat)
4340	{
4341	struct btrfs_ioctl_send_args *arg;
4342	int ret;
4343
4344	if (compat) {
4345	#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
4346	struct btrfs_ioctl_send_args_32 args32 = { `0` };
4347
4348	ret = copy_from_user(to: &args32, from: argp, n: sizeof(args32));
4349	if (ret)
4350	return -EFAULT;
4351	arg = kzalloc(size: sizeof(*arg), GFP_KERNEL);
4352	if (!arg)
4353	return -ENOMEM;
4354	arg->send_fd = args32.send_fd;
4355	arg->clone_sources_count = args32.clone_sources_count;
4356	arg->clone_sources = compat_ptr(uptr: args32.clone_sources);
4357	arg->parent_root = args32.parent_root;
4358	arg->flags = args32.flags;
4359	memcpy(arg->reserved, args32.reserved,
4360	sizeof(args32.reserved));
4361	#else
4362	return -ENOTTY;
4363	#endif
4364	} else {
4365	arg = memdup_user(argp, sizeof(*arg));
4366	if (IS_ERR(ptr: arg))
4367	return PTR_ERR(ptr: arg);
4368	}
4369	ret = btrfs_ioctl_send(inode, arg);
4370	kfree(objp: arg);
4371	return ret;
4372	}
4373
4374	static int btrfs_ioctl_encoded_read(struct file file, void* __user *argp,
4375	bool compat)
4376	{
4377	struct btrfs_ioctl_encoded_io_args args = { `0` };
4378	size_t copy_end_kernel = offsetofend(struct btrfs_ioctl_encoded_io_args,
4379	flags);
4380	size_t copy_end;
4381	struct iovec iovstack[UIO_FASTIOV];
4382	struct iovec *iov = iovstack;
4383	struct iov_iter iter;
4384	loff_t pos;
4385	struct kiocb kiocb;
4386	ssize_t ret;
4387
4388	if (!capable(CAP_SYS_ADMIN)) {
4389	ret = -EPERM;
4390	goto out_acct;
4391	}
4392
4393	if (compat) {
4394	#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
4395	struct btrfs_ioctl_encoded_io_args_32 args32;
4396
4397	copy_end = offsetofend(struct btrfs_ioctl_encoded_io_args_32,
4398	flags);
4399	if (copy_from_user(to: &args32, from: argp, n: copy_end)) {
4400	ret = -EFAULT;
4401	goto out_acct;
4402	}
4403	args.iov = compat_ptr(uptr: args32.iov);
4404	args.iovcnt = args32.iovcnt;
4405	args.offset = args32.offset;
4406	args.flags = args32.flags;
4407	#else
4408	return -ENOTTY;
4409	#endif
4410	} else {
4411	copy_end = copy_end_kernel;
4412	if (copy_from_user(to: &args, from: argp, n: copy_end)) {
4413	ret = -EFAULT;
4414	goto out_acct;
4415	}
4416	}
4417	if (args.flags != `0`) {
4418	ret = -EINVAL;
4419	goto out_acct;
4420	}
4421
4422	ret = import_iovec(ITER_DEST, uvec: args.iov, nr_segs: args.iovcnt, ARRAY_SIZE(iovstack),
4423	iovp: &iov, i: &iter);
4424	if (ret < `0`)
4425	goto out_acct;
4426
4427	if (iov_iter_count(i: &iter) == `0`) {
4428	ret = `0`;
4429	goto out_iov;
4430	}
4431	pos = args.offset;
4432	ret = rw_verify_area(READ, file, &pos, args.len);
4433	if (ret < `0`)
4434	goto out_iov;
4435
4436	init_sync_kiocb(kiocb: &kiocb, filp: file);
4437	kiocb.ki_pos = pos;
4438
4439	ret = btrfs_encoded_read(iocb: &kiocb, iter: &iter, encoded: &args);
4440	if (ret >= `0`) {
4441	fsnotify_access(file);
4442	if (copy_to_user(to: argp + copy_end,
4443	from: (char *)&args + copy_end_kernel,
4444	n: sizeof(args) - copy_end_kernel))
4445	ret = -EFAULT;
4446	}
4447
4448	out_iov:
4449	kfree(objp: iov);
4450	out_acct:
4451	if (ret > `0`)
4452	add_rchar(current, amt: ret);
4453	inc_syscr(current);
4454	return ret;
4455	}
4456
4457	static int btrfs_ioctl_encoded_write(struct file file, void* __user *argp, bool compat)
4458	{
4459	struct btrfs_ioctl_encoded_io_args args;
4460	struct iovec iovstack[UIO_FASTIOV];
4461	struct iovec *iov = iovstack;
4462	struct iov_iter iter;
4463	loff_t pos;
4464	struct kiocb kiocb;
4465	ssize_t ret;
4466
4467	if (!capable(CAP_SYS_ADMIN)) {
4468	ret = -EPERM;
4469	goto out_acct;
4470	}
4471
4472	if (!(file->f_mode & FMODE_WRITE)) {
4473	ret = -EBADF;
4474	goto out_acct;
4475	}
4476
4477	if (compat) {
4478	#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
4479	struct btrfs_ioctl_encoded_io_args_32 args32;
4480
4481	if (copy_from_user(to: &args32, from: argp, n: sizeof(args32))) {
4482	ret = -EFAULT;
4483	goto out_acct;
4484	}
4485	args.iov = compat_ptr(uptr: args32.iov);
4486	args.iovcnt = args32.iovcnt;
4487	args.offset = args32.offset;
4488	args.flags = args32.flags;
4489	args.len = args32.len;
4490	args.unencoded_len = args32.unencoded_len;
4491	args.unencoded_offset = args32.unencoded_offset;
4492	args.compression = args32.compression;
4493	args.encryption = args32.encryption;
4494	memcpy(args.reserved, args32.reserved, sizeof(args.reserved));
4495	#else
4496	return -ENOTTY;
4497	#endif
4498	} else {
4499	if (copy_from_user(to: &args, from: argp, n: sizeof(args))) {
4500	ret = -EFAULT;
4501	goto out_acct;
4502	}
4503	}
4504
4505	ret = -EINVAL;
4506	if (args.flags != `0`)
4507	goto out_acct;
4508	if (memchr_inv(p: args.reserved, c: `0`, size: sizeof(args.reserved)))
4509	goto out_acct;
4510	if (args.compression == BTRFS_ENCODED_IO_COMPRESSION_NONE &&
4511	args.encryption == BTRFS_ENCODED_IO_ENCRYPTION_NONE)
4512	goto out_acct;
4513	if (args.compression >= BTRFS_ENCODED_IO_COMPRESSION_TYPES \|\|
4514	args.encryption >= BTRFS_ENCODED_IO_ENCRYPTION_TYPES)
4515	goto out_acct;
4516	if (args.unencoded_offset > args.unencoded_len)
4517	goto out_acct;
4518	if (args.len > args.unencoded_len - args.unencoded_offset)
4519	goto out_acct;
4520
4521	ret = import_iovec(ITER_SOURCE, uvec: args.iov, nr_segs: args.iovcnt, ARRAY_SIZE(iovstack),
4522	iovp: &iov, i: &iter);
4523	if (ret < `0`)
4524	goto out_acct;
4525
4526	file_start_write(file);
4527
4528	if (iov_iter_count(i: &iter) == `0`) {
4529	ret = `0`;
4530	goto out_end_write;
4531	}
4532	pos = args.offset;
4533	ret = rw_verify_area(WRITE, file, &pos, args.len);
4534	if (ret < `0`)
4535	goto out_end_write;
4536
4537	init_sync_kiocb(kiocb: &kiocb, filp: file);
4538	ret = kiocb_set_rw_flags(ki: &kiocb, flags: `0`);
4539	if (ret)
4540	goto out_end_write;
4541	kiocb.ki_pos = pos;
4542
4543	ret = btrfs_do_write_iter(iocb: &kiocb, from: &iter, encoded: &args);
4544	if (ret > `0`)
4545	fsnotify_modify(file);
4546
4547	out_end_write:
4548	file_end_write(file);
4549	kfree(objp: iov);
4550	out_acct:
4551	if (ret > `0`)
4552	add_wchar(current, amt: ret);
4553	inc_syscw(current);
4554	return ret;
4555	}
4556
4557	long btrfs_ioctl(struct file file, unsigned* int
4558	cmd, unsigned long arg)
4559	{
4560	struct inode *inode = file_inode(f: file);
4561	struct btrfs_fs_info *fs_info = btrfs_sb(sb: inode->i_sb);
4562	struct btrfs_root *root = BTRFS_I(inode)->root;
4563	void __user argp = (void* __user *)arg;
4564
4565	switch (cmd) {
4566	case FS_IOC_GETVERSION:
4567	return btrfs_ioctl_getversion(inode, arg: argp);
4568	case FS_IOC_GETFSLABEL:
4569	return btrfs_ioctl_get_fslabel(fs_info, arg: argp);
4570	case FS_IOC_SETFSLABEL:
4571	return btrfs_ioctl_set_fslabel(file, arg: argp);
4572	case FITRIM:
4573	return btrfs_ioctl_fitrim(fs_info, arg: argp);
4574	case BTRFS_IOC_SNAP_CREATE:
4575	return btrfs_ioctl_snap_create(file, arg: argp, subvol: `0`);
4576	case BTRFS_IOC_SNAP_CREATE_V2:
4577	return btrfs_ioctl_snap_create_v2(file, arg: argp, subvol: `0`);
4578	case BTRFS_IOC_SUBVOL_CREATE:
4579	return btrfs_ioctl_snap_create(file, arg: argp, subvol: `1`);
4580	case BTRFS_IOC_SUBVOL_CREATE_V2:
4581	return btrfs_ioctl_snap_create_v2(file, arg: argp, subvol: `1`);
4582	case BTRFS_IOC_SNAP_DESTROY:
4583	return btrfs_ioctl_snap_destroy(file, arg: argp, destroy_v2: false);
4584	case BTRFS_IOC_SNAP_DESTROY_V2:
4585	return btrfs_ioctl_snap_destroy(file, arg: argp, destroy_v2: true);
4586	case BTRFS_IOC_SUBVOL_GETFLAGS:
4587	return btrfs_ioctl_subvol_getflags(inode, arg: argp);
4588	case BTRFS_IOC_SUBVOL_SETFLAGS:
4589	return btrfs_ioctl_subvol_setflags(file, arg: argp);
4590	case BTRFS_IOC_DEFAULT_SUBVOL:
4591	return btrfs_ioctl_default_subvol(file, argp);
4592	case BTRFS_IOC_DEFRAG:
4593	return btrfs_ioctl_defrag(file, NULL);
4594	case BTRFS_IOC_DEFRAG_RANGE:
4595	return btrfs_ioctl_defrag(file, argp);
4596	case BTRFS_IOC_RESIZE:
4597	return btrfs_ioctl_resize(file, arg: argp);
4598	case BTRFS_IOC_ADD_DEV:
4599	return btrfs_ioctl_add_dev(fs_info, arg: argp);
4600	case BTRFS_IOC_RM_DEV:
4601	return btrfs_ioctl_rm_dev(file, arg: argp);
4602	case BTRFS_IOC_RM_DEV_V2:
4603	return btrfs_ioctl_rm_dev_v2(file, arg: argp);
4604	case BTRFS_IOC_FS_INFO:
4605	return btrfs_ioctl_fs_info(fs_info, arg: argp);
4606	case BTRFS_IOC_DEV_INFO:
4607	return btrfs_ioctl_dev_info(fs_info, arg: argp);
4608	case BTRFS_IOC_TREE_SEARCH:
4609	return btrfs_ioctl_tree_search(inode, argp);
4610	case BTRFS_IOC_TREE_SEARCH_V2:
4611	return btrfs_ioctl_tree_search_v2(inode, argp);
4612	case BTRFS_IOC_INO_LOOKUP:
4613	return btrfs_ioctl_ino_lookup(root, argp);
4614	case BTRFS_IOC_INO_PATHS:
4615	return btrfs_ioctl_ino_to_path(root, arg: argp);
4616	case BTRFS_IOC_LOGICAL_INO:
4617	return btrfs_ioctl_logical_to_ino(fs_info, arg: argp, version: `1`);
4618	case BTRFS_IOC_LOGICAL_INO_V2:
4619	return btrfs_ioctl_logical_to_ino(fs_info, arg: argp, version: `2`);
4620	case BTRFS_IOC_SPACE_INFO:
4621	return btrfs_ioctl_space_info(fs_info, arg: argp);
4622	case BTRFS_IOC_SYNC: {
4623	int ret;
4624
4625	ret = btrfs_start_delalloc_roots(fs_info, LONG_MAX, in_reclaim_context: false);
4626	if (ret)
4627	return ret;
4628	ret = btrfs_sync_fs(sb: inode->i_sb, wait: `1`);
4629	/*
4630	* The transaction thread may want to do more work,
4631	* namely it pokes the cleaner kthread that will start
4632	* processing uncleaned subvols.
4633	*/
4634	wake_up_process(tsk: fs_info->transaction_kthread);
4635	return ret;
4636	}
4637	case BTRFS_IOC_START_SYNC:
4638	return btrfs_ioctl_start_sync(root, argp);
4639	case BTRFS_IOC_WAIT_SYNC:
4640	return btrfs_ioctl_wait_sync(fs_info, argp);
4641	case BTRFS_IOC_SCRUB:
4642	return btrfs_ioctl_scrub(file, arg: argp);
4643	case BTRFS_IOC_SCRUB_CANCEL:
4644	return btrfs_ioctl_scrub_cancel(fs_info);
4645	case BTRFS_IOC_SCRUB_PROGRESS:
4646	return btrfs_ioctl_scrub_progress(fs_info, arg: argp);
4647	case BTRFS_IOC_BALANCE_V2:
4648	return btrfs_ioctl_balance(file, arg: argp);
4649	case BTRFS_IOC_BALANCE_CTL:
4650	return btrfs_ioctl_balance_ctl(fs_info, cmd: arg);
4651	case BTRFS_IOC_BALANCE_PROGRESS:
4652	return btrfs_ioctl_balance_progress(fs_info, arg: argp);
4653	case BTRFS_IOC_SET_RECEIVED_SUBVOL:
4654	return btrfs_ioctl_set_received_subvol(file, arg: argp);
4655	#ifdef CONFIG_64BIT
4656	case BTRFS_IOC_SET_RECEIVED_SUBVOL_32:
4657	return btrfs_ioctl_set_received_subvol_32(file, arg: argp);
4658	#endif
4659	case BTRFS_IOC_SEND:
4660	return _btrfs_ioctl_send(inode, argp, compat: false);
4661	#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
4662	case BTRFS_IOC_SEND_32:
4663	return _btrfs_ioctl_send(inode, argp, compat: true);
4664	#endif
4665	case BTRFS_IOC_GET_DEV_STATS:
4666	return btrfs_ioctl_get_dev_stats(fs_info, arg: argp);
4667	case BTRFS_IOC_QUOTA_CTL:
4668	return btrfs_ioctl_quota_ctl(file, arg: argp);
4669	case BTRFS_IOC_QGROUP_ASSIGN:
4670	return btrfs_ioctl_qgroup_assign(file, arg: argp);
4671	case BTRFS_IOC_QGROUP_CREATE:
4672	return btrfs_ioctl_qgroup_create(file, arg: argp);
4673	case BTRFS_IOC_QGROUP_LIMIT:
4674	return btrfs_ioctl_qgroup_limit(file, arg: argp);
4675	case BTRFS_IOC_QUOTA_RESCAN:
4676	return btrfs_ioctl_quota_rescan(file, arg: argp);
4677	case BTRFS_IOC_QUOTA_RESCAN_STATUS:
4678	return btrfs_ioctl_quota_rescan_status(fs_info, arg: argp);
4679	case BTRFS_IOC_QUOTA_RESCAN_WAIT:
4680	return btrfs_ioctl_quota_rescan_wait(fs_info, arg: argp);
4681	case BTRFS_IOC_DEV_REPLACE:
4682	return btrfs_ioctl_dev_replace(fs_info, arg: argp);
4683	case BTRFS_IOC_GET_SUPPORTED_FEATURES:
4684	return btrfs_ioctl_get_supported_features(arg: argp);
4685	case BTRFS_IOC_GET_FEATURES:
4686	return btrfs_ioctl_get_features(fs_info, arg: argp);
4687	case BTRFS_IOC_SET_FEATURES:
4688	return btrfs_ioctl_set_features(file, arg: argp);
4689	case BTRFS_IOC_GET_SUBVOL_INFO:
4690	return btrfs_ioctl_get_subvol_info(inode, argp);
4691	case BTRFS_IOC_GET_SUBVOL_ROOTREF:
4692	return btrfs_ioctl_get_subvol_rootref(root, argp);
4693	case BTRFS_IOC_INO_LOOKUP_USER:
4694	return btrfs_ioctl_ino_lookup_user(file, argp);
4695	case FS_IOC_ENABLE_VERITY:
4696	return fsverity_ioctl_enable(filp: file, arg: (const void __user *)argp);
4697	case FS_IOC_MEASURE_VERITY:
4698	return fsverity_ioctl_measure(filp: file, arg: argp);
4699	case BTRFS_IOC_ENCODED_READ:
4700	return btrfs_ioctl_encoded_read(file, argp, compat: false);
4701	case BTRFS_IOC_ENCODED_WRITE:
4702	return btrfs_ioctl_encoded_write(file, argp, compat: false);
4703	#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
4704	case BTRFS_IOC_ENCODED_READ_32:
4705	return btrfs_ioctl_encoded_read(file, argp, compat: true);
4706	case BTRFS_IOC_ENCODED_WRITE_32:
4707	return btrfs_ioctl_encoded_write(file, argp, compat: true);
4708	#endif
4709	}
4710
4711	return -ENOTTY;
4712	}
4713
4714	#ifdef CONFIG_COMPAT
4715	long btrfs_compat_ioctl(struct file file, unsigned* int cmd, unsigned long arg)
4716	{
4717	/*
4718	* These all access 32-bit values anyway so no further
4719	* handling is necessary.
4720	*/
4721	switch (cmd) {
4722	case FS_IOC32_GETVERSION:
4723	cmd = FS_IOC_GETVERSION;
4724	break;
4725	}
4726
4727	return btrfs_ioctl(file, cmd, arg: (unsigned long) compat_ptr(uptr: arg));
4728	}
4729	#endif
4730

source code of linux/fs/btrfs/ioctl.c