1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #ifndef NO_BCACHEFS_FS |
3 | |
4 | #include "bcachefs.h" |
5 | #include "acl.h" |
6 | #include "bkey_buf.h" |
7 | #include "btree_update.h" |
8 | #include "buckets.h" |
9 | #include "chardev.h" |
10 | #include "dirent.h" |
11 | #include "errcode.h" |
12 | #include "extents.h" |
13 | #include "fs.h" |
14 | #include "fs-common.h" |
15 | #include "fs-io.h" |
16 | #include "fs-ioctl.h" |
17 | #include "fs-io-buffered.h" |
18 | #include "fs-io-direct.h" |
19 | #include "fs-io-pagecache.h" |
20 | #include "fsck.h" |
21 | #include "inode.h" |
22 | #include "io_read.h" |
23 | #include "journal.h" |
24 | #include "keylist.h" |
25 | #include "quota.h" |
26 | #include "snapshot.h" |
27 | #include "super.h" |
28 | #include "xattr.h" |
29 | |
30 | #include <linux/aio.h> |
31 | #include <linux/backing-dev.h> |
32 | #include <linux/exportfs.h> |
33 | #include <linux/fiemap.h> |
34 | #include <linux/module.h> |
35 | #include <linux/pagemap.h> |
36 | #include <linux/posix_acl.h> |
37 | #include <linux/random.h> |
38 | #include <linux/seq_file.h> |
39 | #include <linux/statfs.h> |
40 | #include <linux/string.h> |
41 | #include <linux/xattr.h> |
42 | |
43 | static struct kmem_cache *bch2_inode_cache; |
44 | |
45 | static void bch2_vfs_inode_init(struct btree_trans *, subvol_inum, |
46 | struct bch_inode_info *, |
47 | struct bch_inode_unpacked *, |
48 | struct bch_subvolume *); |
49 | |
50 | void bch2_inode_update_after_write(struct btree_trans *trans, |
51 | struct bch_inode_info *inode, |
52 | struct bch_inode_unpacked *bi, |
53 | unsigned fields) |
54 | { |
55 | struct bch_fs *c = trans->c; |
56 | |
57 | BUG_ON(bi->bi_inum != inode->v.i_ino); |
58 | |
59 | bch2_assert_pos_locked(trans, BTREE_ID_inodes, |
60 | POS(0, bi->bi_inum), |
61 | c->opts.inodes_use_key_cache); |
62 | |
63 | set_nlink(inode: &inode->v, nlink: bch2_inode_nlink_get(bi)); |
64 | i_uid_write(inode: &inode->v, uid: bi->bi_uid); |
65 | i_gid_write(inode: &inode->v, gid: bi->bi_gid); |
66 | inode->v.i_mode = bi->bi_mode; |
67 | |
68 | if (fields & ATTR_ATIME) |
69 | inode_set_atime_to_ts(inode: &inode->v, ts: bch2_time_to_timespec(c, time: bi->bi_atime)); |
70 | if (fields & ATTR_MTIME) |
71 | inode_set_mtime_to_ts(inode: &inode->v, ts: bch2_time_to_timespec(c, time: bi->bi_mtime)); |
72 | if (fields & ATTR_CTIME) |
73 | inode_set_ctime_to_ts(inode: &inode->v, ts: bch2_time_to_timespec(c, time: bi->bi_ctime)); |
74 | |
75 | inode->ei_inode = *bi; |
76 | |
77 | bch2_inode_flags_to_vfs(inode); |
78 | } |
79 | |
80 | int __must_check bch2_write_inode(struct bch_fs *c, |
81 | struct bch_inode_info *inode, |
82 | inode_set_fn set, |
83 | void *p, unsigned fields) |
84 | { |
85 | struct btree_trans *trans = bch2_trans_get(c); |
86 | struct btree_iter iter = { NULL }; |
87 | struct bch_inode_unpacked inode_u; |
88 | int ret; |
89 | retry: |
90 | bch2_trans_begin(trans); |
91 | |
92 | ret = bch2_inode_peek(trans, &iter, &inode_u, inode_inum(inode), |
93 | BTREE_ITER_INTENT) ?: |
94 | (set ? set(trans, inode, &inode_u, p) : 0) ?: |
95 | bch2_inode_write(trans, iter: &iter, inode: &inode_u) ?: |
96 | bch2_trans_commit(trans, NULL, NULL, flags: BCH_TRANS_COMMIT_no_enospc); |
97 | |
98 | /* |
99 | * the btree node lock protects inode->ei_inode, not ei_update_lock; |
100 | * this is important for inode updates via bchfs_write_index_update |
101 | */ |
102 | if (!ret) |
103 | bch2_inode_update_after_write(trans, inode, bi: &inode_u, fields); |
104 | |
105 | bch2_trans_iter_exit(trans, &iter); |
106 | |
107 | if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) |
108 | goto retry; |
109 | |
110 | bch2_fs_fatal_err_on(bch2_err_matches(ret, ENOENT), c, |
111 | "%s: inode %u:%llu not found when updating" , |
112 | bch2_err_str(ret), |
113 | inode_inum(inode).subvol, |
114 | inode_inum(inode).inum); |
115 | |
116 | bch2_trans_put(trans); |
117 | return ret < 0 ? ret : 0; |
118 | } |
119 | |
120 | int bch2_fs_quota_transfer(struct bch_fs *c, |
121 | struct bch_inode_info *inode, |
122 | struct bch_qid new_qid, |
123 | unsigned qtypes, |
124 | enum quota_acct_mode mode) |
125 | { |
126 | unsigned i; |
127 | int ret; |
128 | |
129 | qtypes &= enabled_qtypes(c); |
130 | |
131 | for (i = 0; i < QTYP_NR; i++) |
132 | if (new_qid.q[i] == inode->ei_qid.q[i]) |
133 | qtypes &= ~(1U << i); |
134 | |
135 | if (!qtypes) |
136 | return 0; |
137 | |
138 | mutex_lock(&inode->ei_quota_lock); |
139 | |
140 | ret = bch2_quota_transfer(c, qtypes, new_qid, |
141 | inode->ei_qid, |
142 | inode->v.i_blocks + |
143 | inode->ei_quota_reserved, |
144 | mode); |
145 | if (!ret) |
146 | for (i = 0; i < QTYP_NR; i++) |
147 | if (qtypes & (1 << i)) |
148 | inode->ei_qid.q[i] = new_qid.q[i]; |
149 | |
150 | mutex_unlock(lock: &inode->ei_quota_lock); |
151 | |
152 | return ret; |
153 | } |
154 | |
155 | static int bch2_iget5_test(struct inode *vinode, void *p) |
156 | { |
157 | struct bch_inode_info *inode = to_bch_ei(vinode); |
158 | subvol_inum *inum = p; |
159 | |
160 | return inode->ei_subvol == inum->subvol && |
161 | inode->ei_inode.bi_inum == inum->inum; |
162 | } |
163 | |
164 | static int bch2_iget5_set(struct inode *vinode, void *p) |
165 | { |
166 | struct bch_inode_info *inode = to_bch_ei(vinode); |
167 | subvol_inum *inum = p; |
168 | |
169 | inode->v.i_ino = inum->inum; |
170 | inode->ei_subvol = inum->subvol; |
171 | inode->ei_inode.bi_inum = inum->inum; |
172 | return 0; |
173 | } |
174 | |
175 | static unsigned bch2_inode_hash(subvol_inum inum) |
176 | { |
177 | return jhash_3words(a: inum.subvol, b: inum.inum >> 32, c: inum.inum, JHASH_INITVAL); |
178 | } |
179 | |
180 | static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_inode_info *inode) |
181 | { |
182 | subvol_inum inum = inode_inum(inode); |
183 | struct bch_inode_info *old = to_bch_ei(inode_insert5(&inode->v, |
184 | bch2_inode_hash(inum), |
185 | bch2_iget5_test, |
186 | bch2_iget5_set, |
187 | &inum)); |
188 | BUG_ON(!old); |
189 | |
190 | if (unlikely(old != inode)) { |
191 | __destroy_inode(&inode->v); |
192 | kmem_cache_free(s: bch2_inode_cache, objp: inode); |
193 | inode = old; |
194 | } else { |
195 | mutex_lock(&c->vfs_inodes_lock); |
196 | list_add(new: &inode->ei_vfs_inode_list, head: &c->vfs_inodes_list); |
197 | mutex_unlock(lock: &c->vfs_inodes_lock); |
198 | /* |
199 | * we really don't want insert_inode_locked2() to be setting |
200 | * I_NEW... |
201 | */ |
202 | unlock_new_inode(&inode->v); |
203 | } |
204 | |
205 | return inode; |
206 | } |
207 | |
208 | #define memalloc_flags_do(_flags, _do) \ |
209 | ({ \ |
210 | unsigned _saved_flags = memalloc_flags_save(_flags); \ |
211 | typeof(_do) _ret = _do; \ |
212 | memalloc_noreclaim_restore(_saved_flags); \ |
213 | _ret; \ |
214 | }) |
215 | |
216 | /* |
217 | * Allocate a new inode, dropping/retaking btree locks if necessary: |
218 | */ |
219 | static struct bch_inode_info *bch2_new_inode(struct btree_trans *trans) |
220 | { |
221 | struct bch_fs *c = trans->c; |
222 | |
223 | struct bch_inode_info *inode = |
224 | memalloc_flags_do(PF_MEMALLOC_NORECLAIM|PF_MEMALLOC_NOWARN, |
225 | to_bch_ei(new_inode(c->vfs_sb))); |
226 | |
227 | if (unlikely(!inode)) { |
228 | int ret = drop_locks_do(trans, (inode = to_bch_ei(new_inode(c->vfs_sb))) ? 0 : -ENOMEM); |
229 | if (ret && inode) { |
230 | __destroy_inode(&inode->v); |
231 | kmem_cache_free(s: bch2_inode_cache, objp: inode); |
232 | } |
233 | if (ret) |
234 | return ERR_PTR(error: ret); |
235 | } |
236 | |
237 | return inode; |
238 | } |
239 | |
240 | struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum) |
241 | { |
242 | struct bch_inode_info *inode = |
243 | to_bch_ei(ilookup5_nowait(c->vfs_sb, |
244 | bch2_inode_hash(inum), |
245 | bch2_iget5_test, |
246 | &inum)); |
247 | if (inode) |
248 | return &inode->v; |
249 | |
250 | struct btree_trans *trans = bch2_trans_get(c); |
251 | |
252 | struct bch_inode_unpacked inode_u; |
253 | struct bch_subvolume subvol; |
254 | int ret = lockrestart_do(trans, |
255 | bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?: |
256 | bch2_inode_find_by_inum_trans(trans, inum, &inode_u)) ?: |
257 | PTR_ERR_OR_ZERO(ptr: inode = bch2_new_inode(trans)); |
258 | if (!ret) { |
259 | bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol); |
260 | inode = bch2_inode_insert(c, inode); |
261 | } |
262 | bch2_trans_put(trans); |
263 | |
264 | return ret ? ERR_PTR(error: ret) : &inode->v; |
265 | } |
266 | |
267 | struct bch_inode_info * |
268 | __bch2_create(struct mnt_idmap *idmap, |
269 | struct bch_inode_info *dir, struct dentry *dentry, |
270 | umode_t mode, dev_t rdev, subvol_inum snapshot_src, |
271 | unsigned flags) |
272 | { |
273 | struct bch_fs *c = dir->v.i_sb->s_fs_info; |
274 | struct btree_trans *trans; |
275 | struct bch_inode_unpacked dir_u; |
276 | struct bch_inode_info *inode; |
277 | struct bch_inode_unpacked inode_u; |
278 | struct posix_acl *default_acl = NULL, *acl = NULL; |
279 | subvol_inum inum; |
280 | struct bch_subvolume subvol; |
281 | u64 journal_seq = 0; |
282 | int ret; |
283 | |
284 | /* |
285 | * preallocate acls + vfs inode before btree transaction, so that |
286 | * nothing can fail after the transaction succeeds: |
287 | */ |
288 | #ifdef CONFIG_BCACHEFS_POSIX_ACL |
289 | ret = posix_acl_create(&dir->v, &mode, &default_acl, &acl); |
290 | if (ret) |
291 | return ERR_PTR(error: ret); |
292 | #endif |
293 | inode = to_bch_ei(new_inode(c->vfs_sb)); |
294 | if (unlikely(!inode)) { |
295 | inode = ERR_PTR(error: -ENOMEM); |
296 | goto err; |
297 | } |
298 | |
299 | bch2_inode_init_early(c, &inode_u); |
300 | |
301 | if (!(flags & BCH_CREATE_TMPFILE)) |
302 | mutex_lock(&dir->ei_update_lock); |
303 | |
304 | trans = bch2_trans_get(c); |
305 | retry: |
306 | bch2_trans_begin(trans); |
307 | |
308 | ret = bch2_subvol_is_ro_trans(trans, dir->ei_subvol) ?: |
309 | bch2_create_trans(trans, |
310 | inode_inum(inode: dir), &dir_u, &inode_u, |
311 | !(flags & BCH_CREATE_TMPFILE) |
312 | ? &dentry->d_name : NULL, |
313 | from_kuid(to: i_user_ns(inode: &dir->v), current_fsuid()), |
314 | from_kgid(to: i_user_ns(inode: &dir->v), current_fsgid()), |
315 | mode, rdev, |
316 | default_acl, acl, snapshot_src, flags) ?: |
317 | bch2_quota_acct(c, bch_qid(u: &inode_u), Q_INO, 1, |
318 | KEY_TYPE_QUOTA_PREALLOC); |
319 | if (unlikely(ret)) |
320 | goto err_before_quota; |
321 | |
322 | inum.subvol = inode_u.bi_subvol ?: dir->ei_subvol; |
323 | inum.inum = inode_u.bi_inum; |
324 | |
325 | ret = bch2_subvolume_get(trans, inum.subvol, true, |
326 | BTREE_ITER_WITH_UPDATES, &subvol) ?: |
327 | bch2_trans_commit(trans, NULL, journal_seq: &journal_seq, flags: 0); |
328 | if (unlikely(ret)) { |
329 | bch2_quota_acct(c, bch_qid(u: &inode_u), Q_INO, -1, |
330 | KEY_TYPE_QUOTA_WARN); |
331 | err_before_quota: |
332 | if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) |
333 | goto retry; |
334 | goto err_trans; |
335 | } |
336 | |
337 | if (!(flags & BCH_CREATE_TMPFILE)) { |
338 | bch2_inode_update_after_write(trans, inode: dir, bi: &dir_u, |
339 | ATTR_MTIME|ATTR_CTIME); |
340 | mutex_unlock(lock: &dir->ei_update_lock); |
341 | } |
342 | |
343 | bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol); |
344 | |
345 | set_cached_acl(inode: &inode->v, ACL_TYPE_ACCESS, acl); |
346 | set_cached_acl(inode: &inode->v, ACL_TYPE_DEFAULT, acl: default_acl); |
347 | |
348 | /* |
349 | * we must insert the new inode into the inode cache before calling |
350 | * bch2_trans_exit() and dropping locks, else we could race with another |
351 | * thread pulling the inode in and modifying it: |
352 | */ |
353 | inode = bch2_inode_insert(c, inode); |
354 | bch2_trans_put(trans); |
355 | err: |
356 | posix_acl_release(acl: default_acl); |
357 | posix_acl_release(acl); |
358 | return inode; |
359 | err_trans: |
360 | if (!(flags & BCH_CREATE_TMPFILE)) |
361 | mutex_unlock(lock: &dir->ei_update_lock); |
362 | |
363 | bch2_trans_put(trans); |
364 | make_bad_inode(&inode->v); |
365 | iput(&inode->v); |
366 | inode = ERR_PTR(error: ret); |
367 | goto err; |
368 | } |
369 | |
370 | /* methods */ |
371 | |
372 | static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans, |
373 | subvol_inum dir, struct bch_hash_info *dir_hash_info, |
374 | const struct qstr *name) |
375 | { |
376 | struct bch_fs *c = trans->c; |
377 | struct btree_iter dirent_iter = {}; |
378 | subvol_inum inum = {}; |
379 | |
380 | int ret = bch2_hash_lookup(trans, iter: &dirent_iter, desc: bch2_dirent_hash_desc, |
381 | info: dir_hash_info, inum: dir, key: name, flags: 0); |
382 | if (ret) |
383 | return ERR_PTR(error: ret); |
384 | |
385 | struct bkey_s_c k = bch2_btree_iter_peek_slot(&dirent_iter); |
386 | ret = bkey_err(k); |
387 | if (ret) |
388 | goto err; |
389 | |
390 | ret = bch2_dirent_read_target(trans, dir, bkey_s_c_to_dirent(k), &inum); |
391 | if (ret > 0) |
392 | ret = -ENOENT; |
393 | if (ret) |
394 | goto err; |
395 | |
396 | struct bch_inode_info *inode = |
397 | to_bch_ei(ilookup5_nowait(c->vfs_sb, |
398 | bch2_inode_hash(inum), |
399 | bch2_iget5_test, |
400 | &inum)); |
401 | if (inode) |
402 | goto out; |
403 | |
404 | struct bch_subvolume subvol; |
405 | struct bch_inode_unpacked inode_u; |
406 | ret = bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?: |
407 | bch2_inode_find_by_inum_nowarn_trans(trans, inum, &inode_u) ?: |
408 | PTR_ERR_OR_ZERO(ptr: inode = bch2_new_inode(trans)); |
409 | if (bch2_err_matches(ret, ENOENT)) { |
410 | struct printbuf buf = PRINTBUF; |
411 | |
412 | bch2_bkey_val_to_text(&buf, c, k); |
413 | bch_err(c, "%s points to missing inode" , buf.buf); |
414 | printbuf_exit(&buf); |
415 | } |
416 | if (ret) |
417 | goto err; |
418 | |
419 | bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol); |
420 | inode = bch2_inode_insert(c, inode); |
421 | out: |
422 | bch2_trans_iter_exit(trans, &dirent_iter); |
423 | return inode; |
424 | err: |
425 | inode = ERR_PTR(error: ret); |
426 | goto out; |
427 | } |
428 | |
429 | static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry, |
430 | unsigned int flags) |
431 | { |
432 | struct bch_fs *c = vdir->i_sb->s_fs_info; |
433 | struct bch_inode_info *dir = to_bch_ei(vdir); |
434 | struct bch_hash_info hash = bch2_hash_info_init(c, bi: &dir->ei_inode); |
435 | |
436 | struct bch_inode_info *inode; |
437 | bch2_trans_do(c, NULL, NULL, 0, |
438 | PTR_ERR_OR_ZERO(inode = bch2_lookup_trans(trans, inode_inum(dir), |
439 | &hash, &dentry->d_name))); |
440 | if (IS_ERR(ptr: inode)) |
441 | inode = NULL; |
442 | |
443 | return d_splice_alias(&inode->v, dentry); |
444 | } |
445 | |
446 | static int bch2_mknod(struct mnt_idmap *idmap, |
447 | struct inode *vdir, struct dentry *dentry, |
448 | umode_t mode, dev_t rdev) |
449 | { |
450 | struct bch_inode_info *inode = |
451 | __bch2_create(idmap, to_bch_ei(vdir), dentry, mode, rdev, |
452 | snapshot_src: (subvol_inum) { 0 }, flags: 0); |
453 | |
454 | if (IS_ERR(ptr: inode)) |
455 | return bch2_err_class(err: PTR_ERR(ptr: inode)); |
456 | |
457 | d_instantiate(dentry, &inode->v); |
458 | return 0; |
459 | } |
460 | |
461 | static int bch2_create(struct mnt_idmap *idmap, |
462 | struct inode *vdir, struct dentry *dentry, |
463 | umode_t mode, bool excl) |
464 | { |
465 | return bch2_mknod(idmap, vdir, dentry, mode: mode|S_IFREG, rdev: 0); |
466 | } |
467 | |
468 | static int __bch2_link(struct bch_fs *c, |
469 | struct bch_inode_info *inode, |
470 | struct bch_inode_info *dir, |
471 | struct dentry *dentry) |
472 | { |
473 | struct btree_trans *trans = bch2_trans_get(c); |
474 | struct bch_inode_unpacked dir_u, inode_u; |
475 | int ret; |
476 | |
477 | mutex_lock(&inode->ei_update_lock); |
478 | |
479 | ret = commit_do(trans, NULL, NULL, 0, |
480 | bch2_link_trans(trans, |
481 | inode_inum(dir), &dir_u, |
482 | inode_inum(inode), &inode_u, |
483 | &dentry->d_name)); |
484 | |
485 | if (likely(!ret)) { |
486 | bch2_inode_update_after_write(trans, inode: dir, bi: &dir_u, |
487 | ATTR_MTIME|ATTR_CTIME); |
488 | bch2_inode_update_after_write(trans, inode, bi: &inode_u, ATTR_CTIME); |
489 | } |
490 | |
491 | bch2_trans_put(trans); |
492 | mutex_unlock(lock: &inode->ei_update_lock); |
493 | return ret; |
494 | } |
495 | |
496 | static int bch2_link(struct dentry *old_dentry, struct inode *vdir, |
497 | struct dentry *dentry) |
498 | { |
499 | struct bch_fs *c = vdir->i_sb->s_fs_info; |
500 | struct bch_inode_info *dir = to_bch_ei(vdir); |
501 | struct bch_inode_info *inode = to_bch_ei(old_dentry->d_inode); |
502 | int ret; |
503 | |
504 | lockdep_assert_held(&inode->v.i_rwsem); |
505 | |
506 | ret = bch2_subvol_is_ro(c, dir->ei_subvol) ?: |
507 | bch2_subvol_is_ro(c, inode->ei_subvol) ?: |
508 | __bch2_link(c, inode, dir, dentry); |
509 | if (unlikely(ret)) |
510 | return bch2_err_class(err: ret); |
511 | |
512 | ihold(inode: &inode->v); |
513 | d_instantiate(dentry, &inode->v); |
514 | return 0; |
515 | } |
516 | |
517 | int __bch2_unlink(struct inode *vdir, struct dentry *dentry, |
518 | bool deleting_snapshot) |
519 | { |
520 | struct bch_fs *c = vdir->i_sb->s_fs_info; |
521 | struct bch_inode_info *dir = to_bch_ei(vdir); |
522 | struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); |
523 | struct bch_inode_unpacked dir_u, inode_u; |
524 | struct btree_trans *trans = bch2_trans_get(c); |
525 | int ret; |
526 | |
527 | bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode); |
528 | |
529 | ret = commit_do(trans, NULL, NULL, |
530 | BCH_TRANS_COMMIT_no_enospc, |
531 | bch2_unlink_trans(trans, |
532 | inode_inum(dir), &dir_u, |
533 | &inode_u, &dentry->d_name, |
534 | deleting_snapshot)); |
535 | if (unlikely(ret)) |
536 | goto err; |
537 | |
538 | bch2_inode_update_after_write(trans, inode: dir, bi: &dir_u, |
539 | ATTR_MTIME|ATTR_CTIME); |
540 | bch2_inode_update_after_write(trans, inode, bi: &inode_u, |
541 | ATTR_MTIME); |
542 | |
543 | if (inode_u.bi_subvol) { |
544 | /* |
545 | * Subvolume deletion is asynchronous, but we still want to tell |
546 | * the VFS that it's been deleted here: |
547 | */ |
548 | set_nlink(inode: &inode->v, nlink: 0); |
549 | } |
550 | err: |
551 | bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode); |
552 | bch2_trans_put(trans); |
553 | |
554 | return ret; |
555 | } |
556 | |
557 | static int bch2_unlink(struct inode *vdir, struct dentry *dentry) |
558 | { |
559 | struct bch_inode_info *dir= to_bch_ei(vdir); |
560 | struct bch_fs *c = dir->v.i_sb->s_fs_info; |
561 | |
562 | int ret = bch2_subvol_is_ro(c, dir->ei_subvol) ?: |
563 | __bch2_unlink(vdir, dentry, deleting_snapshot: false); |
564 | return bch2_err_class(err: ret); |
565 | } |
566 | |
567 | static int bch2_symlink(struct mnt_idmap *idmap, |
568 | struct inode *vdir, struct dentry *dentry, |
569 | const char *symname) |
570 | { |
571 | struct bch_fs *c = vdir->i_sb->s_fs_info; |
572 | struct bch_inode_info *dir = to_bch_ei(vdir), *inode; |
573 | int ret; |
574 | |
575 | inode = __bch2_create(idmap, dir, dentry, S_IFLNK|S_IRWXUGO, rdev: 0, |
576 | snapshot_src: (subvol_inum) { 0 }, BCH_CREATE_TMPFILE); |
577 | if (IS_ERR(inode)) |
578 | return bch2_err_class(PTR_ERR(inode)); |
579 | |
580 | inode_lock(&inode->v); |
581 | ret = page_symlink(&inode->v, symname, strlen(symname) + 1); |
582 | inode_unlock(&inode->v); |
583 | |
584 | if (unlikely(ret)) |
585 | goto err; |
586 | |
587 | ret = filemap_write_and_wait_range(inode->v.i_mapping, 0, LLONG_MAX); |
588 | if (unlikely(ret)) |
589 | goto err; |
590 | |
591 | ret = __bch2_link(c, inode, dir, dentry); |
592 | if (unlikely(ret)) |
593 | goto err; |
594 | |
595 | d_instantiate(dentry, &inode->v); |
596 | return 0; |
597 | err: |
598 | iput(&inode->v); |
599 | return bch2_err_class(ret); |
600 | } |
601 | |
602 | static int bch2_mkdir(struct mnt_idmap *idmap, |
603 | struct inode *vdir, struct dentry *dentry, umode_t mode) |
604 | { |
605 | return bch2_mknod(idmap, vdir, dentry, mode: mode|S_IFDIR, rdev: 0); |
606 | } |
607 | |
608 | static int bch2_rename2(struct mnt_idmap *idmap, |
609 | struct inode *src_vdir, struct dentry *src_dentry, |
610 | struct inode *dst_vdir, struct dentry *dst_dentry, |
611 | unsigned flags) |
612 | { |
613 | struct bch_fs *c = src_vdir->i_sb->s_fs_info; |
614 | struct bch_inode_info *src_dir = to_bch_ei(src_vdir); |
615 | struct bch_inode_info *dst_dir = to_bch_ei(dst_vdir); |
616 | struct bch_inode_info *src_inode = to_bch_ei(src_dentry->d_inode); |
617 | struct bch_inode_info *dst_inode = to_bch_ei(dst_dentry->d_inode); |
618 | struct bch_inode_unpacked dst_dir_u, src_dir_u; |
619 | struct bch_inode_unpacked src_inode_u, dst_inode_u; |
620 | struct btree_trans *trans; |
621 | enum bch_rename_mode mode = flags & RENAME_EXCHANGE |
622 | ? BCH_RENAME_EXCHANGE |
623 | : dst_dentry->d_inode |
624 | ? BCH_RENAME_OVERWRITE : BCH_RENAME; |
625 | int ret; |
626 | |
627 | if (flags & ~(RENAME_NOREPLACE|RENAME_EXCHANGE)) |
628 | return -EINVAL; |
629 | |
630 | if (mode == BCH_RENAME_OVERWRITE) { |
631 | ret = filemap_write_and_wait_range(mapping: src_inode->v.i_mapping, |
632 | lstart: 0, LLONG_MAX); |
633 | if (ret) |
634 | return ret; |
635 | } |
636 | |
637 | trans = bch2_trans_get(c); |
638 | |
639 | bch2_lock_inodes(INODE_UPDATE_LOCK, |
640 | src_dir, |
641 | dst_dir, |
642 | src_inode, |
643 | dst_inode); |
644 | |
645 | ret = bch2_subvol_is_ro_trans(trans, src_dir->ei_subvol) ?: |
646 | bch2_subvol_is_ro_trans(trans, dst_dir->ei_subvol); |
647 | if (ret) |
648 | goto err; |
649 | |
650 | if (inode_attr_changing(dir: dst_dir, inode: src_inode, id: Inode_opt_project)) { |
651 | ret = bch2_fs_quota_transfer(c, inode: src_inode, |
652 | new_qid: dst_dir->ei_qid, |
653 | qtypes: 1 << QTYP_PRJ, |
654 | mode: KEY_TYPE_QUOTA_PREALLOC); |
655 | if (ret) |
656 | goto err; |
657 | } |
658 | |
659 | if (mode == BCH_RENAME_EXCHANGE && |
660 | inode_attr_changing(dir: src_dir, inode: dst_inode, id: Inode_opt_project)) { |
661 | ret = bch2_fs_quota_transfer(c, inode: dst_inode, |
662 | new_qid: src_dir->ei_qid, |
663 | qtypes: 1 << QTYP_PRJ, |
664 | mode: KEY_TYPE_QUOTA_PREALLOC); |
665 | if (ret) |
666 | goto err; |
667 | } |
668 | |
669 | ret = commit_do(trans, NULL, NULL, 0, |
670 | bch2_rename_trans(trans, |
671 | inode_inum(src_dir), &src_dir_u, |
672 | inode_inum(dst_dir), &dst_dir_u, |
673 | &src_inode_u, |
674 | &dst_inode_u, |
675 | &src_dentry->d_name, |
676 | &dst_dentry->d_name, |
677 | mode)); |
678 | if (unlikely(ret)) |
679 | goto err; |
680 | |
681 | BUG_ON(src_inode->v.i_ino != src_inode_u.bi_inum); |
682 | BUG_ON(dst_inode && |
683 | dst_inode->v.i_ino != dst_inode_u.bi_inum); |
684 | |
685 | bch2_inode_update_after_write(trans, inode: src_dir, bi: &src_dir_u, |
686 | ATTR_MTIME|ATTR_CTIME); |
687 | |
688 | if (src_dir != dst_dir) |
689 | bch2_inode_update_after_write(trans, inode: dst_dir, bi: &dst_dir_u, |
690 | ATTR_MTIME|ATTR_CTIME); |
691 | |
692 | bch2_inode_update_after_write(trans, inode: src_inode, bi: &src_inode_u, |
693 | ATTR_CTIME); |
694 | |
695 | if (dst_inode) |
696 | bch2_inode_update_after_write(trans, inode: dst_inode, bi: &dst_inode_u, |
697 | ATTR_CTIME); |
698 | err: |
699 | bch2_trans_put(trans); |
700 | |
701 | bch2_fs_quota_transfer(c, inode: src_inode, |
702 | new_qid: bch_qid(u: &src_inode->ei_inode), |
703 | qtypes: 1 << QTYP_PRJ, |
704 | mode: KEY_TYPE_QUOTA_NOCHECK); |
705 | if (dst_inode) |
706 | bch2_fs_quota_transfer(c, inode: dst_inode, |
707 | new_qid: bch_qid(u: &dst_inode->ei_inode), |
708 | qtypes: 1 << QTYP_PRJ, |
709 | mode: KEY_TYPE_QUOTA_NOCHECK); |
710 | |
711 | bch2_unlock_inodes(INODE_UPDATE_LOCK, |
712 | src_dir, |
713 | dst_dir, |
714 | src_inode, |
715 | dst_inode); |
716 | |
717 | return bch2_err_class(err: ret); |
718 | } |
719 | |
720 | static void bch2_setattr_copy(struct mnt_idmap *idmap, |
721 | struct bch_inode_info *inode, |
722 | struct bch_inode_unpacked *bi, |
723 | struct iattr *attr) |
724 | { |
725 | struct bch_fs *c = inode->v.i_sb->s_fs_info; |
726 | unsigned int ia_valid = attr->ia_valid; |
727 | |
728 | if (ia_valid & ATTR_UID) |
729 | bi->bi_uid = from_kuid(to: i_user_ns(inode: &inode->v), uid: attr->ia_uid); |
730 | if (ia_valid & ATTR_GID) |
731 | bi->bi_gid = from_kgid(to: i_user_ns(inode: &inode->v), gid: attr->ia_gid); |
732 | |
733 | if (ia_valid & ATTR_SIZE) |
734 | bi->bi_size = attr->ia_size; |
735 | |
736 | if (ia_valid & ATTR_ATIME) |
737 | bi->bi_atime = timespec_to_bch2_time(c, ts: attr->ia_atime); |
738 | if (ia_valid & ATTR_MTIME) |
739 | bi->bi_mtime = timespec_to_bch2_time(c, ts: attr->ia_mtime); |
740 | if (ia_valid & ATTR_CTIME) |
741 | bi->bi_ctime = timespec_to_bch2_time(c, ts: attr->ia_ctime); |
742 | |
743 | if (ia_valid & ATTR_MODE) { |
744 | umode_t mode = attr->ia_mode; |
745 | kgid_t gid = ia_valid & ATTR_GID |
746 | ? attr->ia_gid |
747 | : inode->v.i_gid; |
748 | |
749 | if (!in_group_p(gid) && |
750 | !capable_wrt_inode_uidgid(idmap, inode: &inode->v, CAP_FSETID)) |
751 | mode &= ~S_ISGID; |
752 | bi->bi_mode = mode; |
753 | } |
754 | } |
755 | |
756 | int bch2_setattr_nonsize(struct mnt_idmap *idmap, |
757 | struct bch_inode_info *inode, |
758 | struct iattr *attr) |
759 | { |
760 | struct bch_fs *c = inode->v.i_sb->s_fs_info; |
761 | struct bch_qid qid; |
762 | struct btree_trans *trans; |
763 | struct btree_iter inode_iter = { NULL }; |
764 | struct bch_inode_unpacked inode_u; |
765 | struct posix_acl *acl = NULL; |
766 | int ret; |
767 | |
768 | mutex_lock(&inode->ei_update_lock); |
769 | |
770 | qid = inode->ei_qid; |
771 | |
772 | if (attr->ia_valid & ATTR_UID) |
773 | qid.q[QTYP_USR] = from_kuid(to: i_user_ns(inode: &inode->v), uid: attr->ia_uid); |
774 | |
775 | if (attr->ia_valid & ATTR_GID) |
776 | qid.q[QTYP_GRP] = from_kgid(to: i_user_ns(inode: &inode->v), gid: attr->ia_gid); |
777 | |
778 | ret = bch2_fs_quota_transfer(c, inode, new_qid: qid, qtypes: ~0, |
779 | mode: KEY_TYPE_QUOTA_PREALLOC); |
780 | if (ret) |
781 | goto err; |
782 | |
783 | trans = bch2_trans_get(c); |
784 | retry: |
785 | bch2_trans_begin(trans); |
786 | kfree(objp: acl); |
787 | acl = NULL; |
788 | |
789 | ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode), |
790 | BTREE_ITER_INTENT); |
791 | if (ret) |
792 | goto btree_err; |
793 | |
794 | bch2_setattr_copy(idmap, inode, bi: &inode_u, attr); |
795 | |
796 | if (attr->ia_valid & ATTR_MODE) { |
797 | ret = bch2_acl_chmod(trans, inode_inum(inode), &inode_u, |
798 | inode_u.bi_mode, &acl); |
799 | if (ret) |
800 | goto btree_err; |
801 | } |
802 | |
803 | ret = bch2_inode_write(trans, iter: &inode_iter, inode: &inode_u) ?: |
804 | bch2_trans_commit(trans, NULL, NULL, |
805 | flags: BCH_TRANS_COMMIT_no_enospc); |
806 | btree_err: |
807 | bch2_trans_iter_exit(trans, &inode_iter); |
808 | |
809 | if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) |
810 | goto retry; |
811 | if (unlikely(ret)) |
812 | goto err_trans; |
813 | |
814 | bch2_inode_update_after_write(trans, inode, bi: &inode_u, fields: attr->ia_valid); |
815 | |
816 | if (acl) |
817 | set_cached_acl(inode: &inode->v, ACL_TYPE_ACCESS, acl); |
818 | err_trans: |
819 | bch2_trans_put(trans); |
820 | err: |
821 | mutex_unlock(lock: &inode->ei_update_lock); |
822 | |
823 | return bch2_err_class(err: ret); |
824 | } |
825 | |
826 | static int bch2_getattr(struct mnt_idmap *idmap, |
827 | const struct path *path, struct kstat *stat, |
828 | u32 request_mask, unsigned query_flags) |
829 | { |
830 | struct bch_inode_info *inode = to_bch_ei(d_inode(path->dentry)); |
831 | struct bch_fs *c = inode->v.i_sb->s_fs_info; |
832 | |
833 | stat->dev = inode->v.i_sb->s_dev; |
834 | stat->ino = inode->v.i_ino; |
835 | stat->mode = inode->v.i_mode; |
836 | stat->nlink = inode->v.i_nlink; |
837 | stat->uid = inode->v.i_uid; |
838 | stat->gid = inode->v.i_gid; |
839 | stat->rdev = inode->v.i_rdev; |
840 | stat->size = i_size_read(inode: &inode->v); |
841 | stat->atime = inode_get_atime(inode: &inode->v); |
842 | stat->mtime = inode_get_mtime(inode: &inode->v); |
843 | stat->ctime = inode_get_ctime(inode: &inode->v); |
844 | stat->blksize = block_bytes(c); |
845 | stat->blocks = inode->v.i_blocks; |
846 | |
847 | if (request_mask & STATX_BTIME) { |
848 | stat->result_mask |= STATX_BTIME; |
849 | stat->btime = bch2_time_to_timespec(c, time: inode->ei_inode.bi_otime); |
850 | } |
851 | |
852 | if (inode->ei_inode.bi_flags & BCH_INODE_immutable) |
853 | stat->attributes |= STATX_ATTR_IMMUTABLE; |
854 | stat->attributes_mask |= STATX_ATTR_IMMUTABLE; |
855 | |
856 | if (inode->ei_inode.bi_flags & BCH_INODE_append) |
857 | stat->attributes |= STATX_ATTR_APPEND; |
858 | stat->attributes_mask |= STATX_ATTR_APPEND; |
859 | |
860 | if (inode->ei_inode.bi_flags & BCH_INODE_nodump) |
861 | stat->attributes |= STATX_ATTR_NODUMP; |
862 | stat->attributes_mask |= STATX_ATTR_NODUMP; |
863 | |
864 | return 0; |
865 | } |
866 | |
867 | static int bch2_setattr(struct mnt_idmap *idmap, |
868 | struct dentry *dentry, struct iattr *iattr) |
869 | { |
870 | struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); |
871 | struct bch_fs *c = inode->v.i_sb->s_fs_info; |
872 | int ret; |
873 | |
874 | lockdep_assert_held(&inode->v.i_rwsem); |
875 | |
876 | ret = bch2_subvol_is_ro(c, inode->ei_subvol) ?: |
877 | setattr_prepare(idmap, dentry, iattr); |
878 | if (ret) |
879 | return ret; |
880 | |
881 | return iattr->ia_valid & ATTR_SIZE |
882 | ? bchfs_truncate(idmap, inode, iattr) |
883 | : bch2_setattr_nonsize(idmap, inode, attr: iattr); |
884 | } |
885 | |
886 | static int bch2_tmpfile(struct mnt_idmap *idmap, |
887 | struct inode *vdir, struct file *file, umode_t mode) |
888 | { |
889 | struct bch_inode_info *inode = |
890 | __bch2_create(idmap, to_bch_ei(vdir), |
891 | dentry: file->f_path.dentry, mode, rdev: 0, |
892 | snapshot_src: (subvol_inum) { 0 }, BCH_CREATE_TMPFILE); |
893 | |
894 | if (IS_ERR(ptr: inode)) |
895 | return bch2_err_class(err: PTR_ERR(ptr: inode)); |
896 | |
897 | d_mark_tmpfile(file, &inode->v); |
898 | d_instantiate(file->f_path.dentry, &inode->v); |
899 | return finish_open_simple(file, error: 0); |
900 | } |
901 | |
902 | static int bch2_fill_extent(struct bch_fs *c, |
903 | struct fiemap_extent_info *info, |
904 | struct bkey_s_c k, unsigned flags) |
905 | { |
906 | if (bkey_extent_is_direct_data(k: k.k)) { |
907 | struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); |
908 | const union bch_extent_entry *entry; |
909 | struct extent_ptr_decoded p; |
910 | int ret; |
911 | |
912 | if (k.k->type == KEY_TYPE_reflink_v) |
913 | flags |= FIEMAP_EXTENT_SHARED; |
914 | |
915 | bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { |
916 | int flags2 = 0; |
917 | u64 offset = p.ptr.offset; |
918 | |
919 | if (p.ptr.unwritten) |
920 | flags2 |= FIEMAP_EXTENT_UNWRITTEN; |
921 | |
922 | if (p.crc.compression_type) |
923 | flags2 |= FIEMAP_EXTENT_ENCODED; |
924 | else |
925 | offset += p.crc.offset; |
926 | |
927 | if ((offset & (block_sectors(c) - 1)) || |
928 | (k.k->size & (block_sectors(c) - 1))) |
929 | flags2 |= FIEMAP_EXTENT_NOT_ALIGNED; |
930 | |
931 | ret = fiemap_fill_next_extent(info, |
932 | logical: bkey_start_offset(k: k.k) << 9, |
933 | phys: offset << 9, |
934 | len: k.k->size << 9, flags: flags|flags2); |
935 | if (ret) |
936 | return ret; |
937 | } |
938 | |
939 | return 0; |
940 | } else if (bkey_extent_is_inline_data(k: k.k)) { |
941 | return fiemap_fill_next_extent(info, |
942 | logical: bkey_start_offset(k: k.k) << 9, |
943 | phys: 0, len: k.k->size << 9, |
944 | flags: flags| |
945 | FIEMAP_EXTENT_DATA_INLINE); |
946 | } else if (k.k->type == KEY_TYPE_reservation) { |
947 | return fiemap_fill_next_extent(info, |
948 | logical: bkey_start_offset(k: k.k) << 9, |
949 | phys: 0, len: k.k->size << 9, |
950 | flags: flags| |
951 | FIEMAP_EXTENT_DELALLOC| |
952 | FIEMAP_EXTENT_UNWRITTEN); |
953 | } else { |
954 | BUG(); |
955 | } |
956 | } |
957 | |
958 | static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, |
959 | u64 start, u64 len) |
960 | { |
961 | struct bch_fs *c = vinode->i_sb->s_fs_info; |
962 | struct bch_inode_info *ei = to_bch_ei(vinode); |
963 | struct btree_trans *trans; |
964 | struct btree_iter iter; |
965 | struct bkey_s_c k; |
966 | struct bkey_buf cur, prev; |
967 | struct bpos end = POS(ei->v.i_ino, (start + len) >> 9); |
968 | unsigned offset_into_extent, sectors; |
969 | bool have_extent = false; |
970 | u32 snapshot; |
971 | int ret = 0; |
972 | |
973 | ret = fiemap_prep(inode: &ei->v, fieinfo: info, start, len: &len, FIEMAP_FLAG_SYNC); |
974 | if (ret) |
975 | return ret; |
976 | |
977 | if (start + len < start) |
978 | return -EINVAL; |
979 | |
980 | start >>= 9; |
981 | |
982 | bch2_bkey_buf_init(s: &cur); |
983 | bch2_bkey_buf_init(s: &prev); |
984 | trans = bch2_trans_get(c); |
985 | retry: |
986 | bch2_trans_begin(trans); |
987 | |
988 | ret = bch2_subvolume_get_snapshot(trans, ei->ei_subvol, &snapshot); |
989 | if (ret) |
990 | goto err; |
991 | |
992 | bch2_trans_iter_init(trans, iter: &iter, btree_id: BTREE_ID_extents, |
993 | pos: SPOS(inode: ei->v.i_ino, offset: start, snapshot), flags: 0); |
994 | |
995 | while (!(ret = btree_trans_too_many_iters(trans)) && |
996 | (k = bch2_btree_iter_peek_upto(&iter, end)).k && |
997 | !(ret = bkey_err(k))) { |
998 | enum btree_id data_btree = BTREE_ID_extents; |
999 | |
1000 | if (!bkey_extent_is_data(k: k.k) && |
1001 | k.k->type != KEY_TYPE_reservation) { |
1002 | bch2_btree_iter_advance(&iter); |
1003 | continue; |
1004 | } |
1005 | |
1006 | offset_into_extent = iter.pos.offset - |
1007 | bkey_start_offset(k: k.k); |
1008 | sectors = k.k->size - offset_into_extent; |
1009 | |
1010 | bch2_bkey_buf_reassemble(s: &cur, c, k); |
1011 | |
1012 | ret = bch2_read_indirect_extent(trans, data_btree: &data_btree, |
1013 | offset_into_extent: &offset_into_extent, k: &cur); |
1014 | if (ret) |
1015 | break; |
1016 | |
1017 | k = bkey_i_to_s_c(k: cur.k); |
1018 | bch2_bkey_buf_realloc(s: &prev, c, u64s: k.k->u64s); |
1019 | |
1020 | sectors = min(sectors, k.k->size - offset_into_extent); |
1021 | |
1022 | bch2_cut_front(POS(k.k->p.inode, |
1023 | bkey_start_offset(k.k) + |
1024 | offset_into_extent), |
1025 | k: cur.k); |
1026 | bch2_key_resize(k: &cur.k->k, new_size: sectors); |
1027 | cur.k->k.p = iter.pos; |
1028 | cur.k->k.p.offset += cur.k->k.size; |
1029 | |
1030 | if (have_extent) { |
1031 | bch2_trans_unlock(trans); |
1032 | ret = bch2_fill_extent(c, info, |
1033 | k: bkey_i_to_s_c(k: prev.k), flags: 0); |
1034 | if (ret) |
1035 | break; |
1036 | } |
1037 | |
1038 | bkey_copy(dst: prev.k, src: cur.k); |
1039 | have_extent = true; |
1040 | |
1041 | bch2_btree_iter_set_pos(iter: &iter, |
1042 | POS(iter.pos.inode, iter.pos.offset + sectors)); |
1043 | } |
1044 | start = iter.pos.offset; |
1045 | bch2_trans_iter_exit(trans, &iter); |
1046 | err: |
1047 | if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) |
1048 | goto retry; |
1049 | |
1050 | if (!ret && have_extent) { |
1051 | bch2_trans_unlock(trans); |
1052 | ret = bch2_fill_extent(c, info, k: bkey_i_to_s_c(k: prev.k), |
1053 | FIEMAP_EXTENT_LAST); |
1054 | } |
1055 | |
1056 | bch2_trans_put(trans); |
1057 | bch2_bkey_buf_exit(s: &cur, c); |
1058 | bch2_bkey_buf_exit(s: &prev, c); |
1059 | return ret < 0 ? ret : 0; |
1060 | } |
1061 | |
1062 | static const struct vm_operations_struct bch_vm_ops = { |
1063 | .fault = bch2_page_fault, |
1064 | .map_pages = filemap_map_pages, |
1065 | .page_mkwrite = bch2_page_mkwrite, |
1066 | }; |
1067 | |
1068 | static int bch2_mmap(struct file *file, struct vm_area_struct *vma) |
1069 | { |
1070 | file_accessed(file); |
1071 | |
1072 | vma->vm_ops = &bch_vm_ops; |
1073 | return 0; |
1074 | } |
1075 | |
1076 | /* Directories: */ |
1077 | |
1078 | static loff_t bch2_dir_llseek(struct file *file, loff_t offset, int whence) |
1079 | { |
1080 | return generic_file_llseek_size(file, offset, whence, |
1081 | S64_MAX, S64_MAX); |
1082 | } |
1083 | |
1084 | static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx) |
1085 | { |
1086 | struct bch_inode_info *inode = file_bch_inode(file); |
1087 | struct bch_fs *c = inode->v.i_sb->s_fs_info; |
1088 | |
1089 | if (!dir_emit_dots(file, ctx)) |
1090 | return 0; |
1091 | |
1092 | int ret = bch2_readdir(c, inode_inum(inode), ctx); |
1093 | |
1094 | bch_err_fn(c, ret); |
1095 | return bch2_err_class(err: ret); |
1096 | } |
1097 | |
1098 | static int bch2_open(struct inode *vinode, struct file *file) |
1099 | { |
1100 | if (file->f_flags & (O_WRONLY|O_RDWR)) { |
1101 | struct bch_inode_info *inode = to_bch_ei(vinode); |
1102 | struct bch_fs *c = inode->v.i_sb->s_fs_info; |
1103 | |
1104 | int ret = bch2_subvol_is_ro(c, inode->ei_subvol); |
1105 | if (ret) |
1106 | return ret; |
1107 | } |
1108 | |
1109 | return generic_file_open(inode: vinode, filp: file); |
1110 | } |
1111 | |
1112 | static const struct file_operations bch_file_operations = { |
1113 | .open = bch2_open, |
1114 | .llseek = bch2_llseek, |
1115 | .read_iter = bch2_read_iter, |
1116 | .write_iter = bch2_write_iter, |
1117 | .mmap = bch2_mmap, |
1118 | .fsync = bch2_fsync, |
1119 | .splice_read = filemap_splice_read, |
1120 | .splice_write = iter_file_splice_write, |
1121 | .fallocate = bch2_fallocate_dispatch, |
1122 | .unlocked_ioctl = bch2_fs_file_ioctl, |
1123 | #ifdef CONFIG_COMPAT |
1124 | .compat_ioctl = bch2_compat_fs_ioctl, |
1125 | #endif |
1126 | .remap_file_range = bch2_remap_file_range, |
1127 | }; |
1128 | |
1129 | static const struct inode_operations bch_file_inode_operations = { |
1130 | .getattr = bch2_getattr, |
1131 | .setattr = bch2_setattr, |
1132 | .fiemap = bch2_fiemap, |
1133 | .listxattr = bch2_xattr_list, |
1134 | #ifdef CONFIG_BCACHEFS_POSIX_ACL |
1135 | .get_acl = bch2_get_acl, |
1136 | .set_acl = bch2_set_acl, |
1137 | #endif |
1138 | }; |
1139 | |
1140 | static const struct inode_operations bch_dir_inode_operations = { |
1141 | .lookup = bch2_lookup, |
1142 | .create = bch2_create, |
1143 | .link = bch2_link, |
1144 | .unlink = bch2_unlink, |
1145 | .symlink = bch2_symlink, |
1146 | .mkdir = bch2_mkdir, |
1147 | .rmdir = bch2_unlink, |
1148 | .mknod = bch2_mknod, |
1149 | .rename = bch2_rename2, |
1150 | .getattr = bch2_getattr, |
1151 | .setattr = bch2_setattr, |
1152 | .tmpfile = bch2_tmpfile, |
1153 | .listxattr = bch2_xattr_list, |
1154 | #ifdef CONFIG_BCACHEFS_POSIX_ACL |
1155 | .get_acl = bch2_get_acl, |
1156 | .set_acl = bch2_set_acl, |
1157 | #endif |
1158 | }; |
1159 | |
1160 | static const struct file_operations bch_dir_file_operations = { |
1161 | .llseek = bch2_dir_llseek, |
1162 | .read = generic_read_dir, |
1163 | .iterate_shared = bch2_vfs_readdir, |
1164 | .fsync = bch2_fsync, |
1165 | .unlocked_ioctl = bch2_fs_file_ioctl, |
1166 | #ifdef CONFIG_COMPAT |
1167 | .compat_ioctl = bch2_compat_fs_ioctl, |
1168 | #endif |
1169 | }; |
1170 | |
1171 | static const struct inode_operations bch_symlink_inode_operations = { |
1172 | .get_link = page_get_link, |
1173 | .getattr = bch2_getattr, |
1174 | .setattr = bch2_setattr, |
1175 | .listxattr = bch2_xattr_list, |
1176 | #ifdef CONFIG_BCACHEFS_POSIX_ACL |
1177 | .get_acl = bch2_get_acl, |
1178 | .set_acl = bch2_set_acl, |
1179 | #endif |
1180 | }; |
1181 | |
1182 | static const struct inode_operations bch_special_inode_operations = { |
1183 | .getattr = bch2_getattr, |
1184 | .setattr = bch2_setattr, |
1185 | .listxattr = bch2_xattr_list, |
1186 | #ifdef CONFIG_BCACHEFS_POSIX_ACL |
1187 | .get_acl = bch2_get_acl, |
1188 | .set_acl = bch2_set_acl, |
1189 | #endif |
1190 | }; |
1191 | |
1192 | static const struct address_space_operations bch_address_space_operations = { |
1193 | .read_folio = bch2_read_folio, |
1194 | .writepages = bch2_writepages, |
1195 | .readahead = bch2_readahead, |
1196 | .dirty_folio = filemap_dirty_folio, |
1197 | .write_begin = bch2_write_begin, |
1198 | .write_end = bch2_write_end, |
1199 | .invalidate_folio = bch2_invalidate_folio, |
1200 | .release_folio = bch2_release_folio, |
1201 | .direct_IO = noop_direct_IO, |
1202 | #ifdef CONFIG_MIGRATION |
1203 | .migrate_folio = filemap_migrate_folio, |
1204 | #endif |
1205 | .error_remove_folio = generic_error_remove_folio, |
1206 | }; |
1207 | |
1208 | struct bcachefs_fid { |
1209 | u64 inum; |
1210 | u32 subvol; |
1211 | u32 gen; |
1212 | } __packed; |
1213 | |
1214 | struct bcachefs_fid_with_parent { |
1215 | struct bcachefs_fid fid; |
1216 | struct bcachefs_fid dir; |
1217 | } __packed; |
1218 | |
1219 | static int bcachefs_fid_valid(int fh_len, int fh_type) |
1220 | { |
1221 | switch (fh_type) { |
1222 | case FILEID_BCACHEFS_WITHOUT_PARENT: |
1223 | return fh_len == sizeof(struct bcachefs_fid) / sizeof(u32); |
1224 | case FILEID_BCACHEFS_WITH_PARENT: |
1225 | return fh_len == sizeof(struct bcachefs_fid_with_parent) / sizeof(u32); |
1226 | default: |
1227 | return false; |
1228 | } |
1229 | } |
1230 | |
1231 | static struct bcachefs_fid bch2_inode_to_fid(struct bch_inode_info *inode) |
1232 | { |
1233 | return (struct bcachefs_fid) { |
1234 | .inum = inode->ei_inode.bi_inum, |
1235 | .subvol = inode->ei_subvol, |
1236 | .gen = inode->ei_inode.bi_generation, |
1237 | }; |
1238 | } |
1239 | |
1240 | static int bch2_encode_fh(struct inode *vinode, u32 *fh, int *len, |
1241 | struct inode *vdir) |
1242 | { |
1243 | struct bch_inode_info *inode = to_bch_ei(vinode); |
1244 | struct bch_inode_info *dir = to_bch_ei(vdir); |
1245 | int min_len; |
1246 | |
1247 | if (!S_ISDIR(inode->v.i_mode) && dir) { |
1248 | struct bcachefs_fid_with_parent *fid = (void *) fh; |
1249 | |
1250 | min_len = sizeof(*fid) / sizeof(u32); |
1251 | if (*len < min_len) { |
1252 | *len = min_len; |
1253 | return FILEID_INVALID; |
1254 | } |
1255 | |
1256 | fid->fid = bch2_inode_to_fid(inode); |
1257 | fid->dir = bch2_inode_to_fid(inode: dir); |
1258 | |
1259 | *len = min_len; |
1260 | return FILEID_BCACHEFS_WITH_PARENT; |
1261 | } else { |
1262 | struct bcachefs_fid *fid = (void *) fh; |
1263 | |
1264 | min_len = sizeof(*fid) / sizeof(u32); |
1265 | if (*len < min_len) { |
1266 | *len = min_len; |
1267 | return FILEID_INVALID; |
1268 | } |
1269 | *fid = bch2_inode_to_fid(inode); |
1270 | |
1271 | *len = min_len; |
1272 | return FILEID_BCACHEFS_WITHOUT_PARENT; |
1273 | } |
1274 | } |
1275 | |
1276 | static struct inode *bch2_nfs_get_inode(struct super_block *sb, |
1277 | struct bcachefs_fid fid) |
1278 | { |
1279 | struct bch_fs *c = sb->s_fs_info; |
1280 | struct inode *vinode = bch2_vfs_inode_get(c, inum: (subvol_inum) { |
1281 | .subvol = fid.subvol, |
1282 | .inum = fid.inum, |
1283 | }); |
1284 | if (!IS_ERR(ptr: vinode) && vinode->i_generation != fid.gen) { |
1285 | iput(vinode); |
1286 | vinode = ERR_PTR(error: -ESTALE); |
1287 | } |
1288 | return vinode; |
1289 | } |
1290 | |
1291 | static struct dentry *bch2_fh_to_dentry(struct super_block *sb, struct fid *_fid, |
1292 | int fh_len, int fh_type) |
1293 | { |
1294 | struct bcachefs_fid *fid = (void *) _fid; |
1295 | |
1296 | if (!bcachefs_fid_valid(fh_len, fh_type)) |
1297 | return NULL; |
1298 | |
1299 | return d_obtain_alias(bch2_nfs_get_inode(sb, fid: *fid)); |
1300 | } |
1301 | |
1302 | static struct dentry *bch2_fh_to_parent(struct super_block *sb, struct fid *_fid, |
1303 | int fh_len, int fh_type) |
1304 | { |
1305 | struct bcachefs_fid_with_parent *fid = (void *) _fid; |
1306 | |
1307 | if (!bcachefs_fid_valid(fh_len, fh_type) || |
1308 | fh_type != FILEID_BCACHEFS_WITH_PARENT) |
1309 | return NULL; |
1310 | |
1311 | return d_obtain_alias(bch2_nfs_get_inode(sb, fid: fid->dir)); |
1312 | } |
1313 | |
1314 | static struct dentry *bch2_get_parent(struct dentry *child) |
1315 | { |
1316 | struct bch_inode_info *inode = to_bch_ei(child->d_inode); |
1317 | struct bch_fs *c = inode->v.i_sb->s_fs_info; |
1318 | subvol_inum parent_inum = { |
1319 | .subvol = inode->ei_inode.bi_parent_subvol ?: |
1320 | inode->ei_subvol, |
1321 | .inum = inode->ei_inode.bi_dir, |
1322 | }; |
1323 | |
1324 | return d_obtain_alias(bch2_vfs_inode_get(c, inum: parent_inum)); |
1325 | } |
1326 | |
1327 | static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child) |
1328 | { |
1329 | struct bch_inode_info *inode = to_bch_ei(child->d_inode); |
1330 | struct bch_inode_info *dir = to_bch_ei(parent->d_inode); |
1331 | struct bch_fs *c = inode->v.i_sb->s_fs_info; |
1332 | struct btree_trans *trans; |
1333 | struct btree_iter iter1; |
1334 | struct btree_iter iter2; |
1335 | struct bkey_s_c k; |
1336 | struct bkey_s_c_dirent d; |
1337 | struct bch_inode_unpacked inode_u; |
1338 | subvol_inum target; |
1339 | u32 snapshot; |
1340 | struct qstr dirent_name; |
1341 | unsigned name_len = 0; |
1342 | int ret; |
1343 | |
1344 | if (!S_ISDIR(dir->v.i_mode)) |
1345 | return -EINVAL; |
1346 | |
1347 | trans = bch2_trans_get(c); |
1348 | |
1349 | bch2_trans_iter_init(trans, iter: &iter1, btree_id: BTREE_ID_dirents, |
1350 | POS(dir->ei_inode.bi_inum, 0), flags: 0); |
1351 | bch2_trans_iter_init(trans, iter: &iter2, btree_id: BTREE_ID_dirents, |
1352 | POS(dir->ei_inode.bi_inum, 0), flags: 0); |
1353 | retry: |
1354 | bch2_trans_begin(trans); |
1355 | |
1356 | ret = bch2_subvolume_get_snapshot(trans, dir->ei_subvol, &snapshot); |
1357 | if (ret) |
1358 | goto err; |
1359 | |
1360 | bch2_btree_iter_set_snapshot(iter: &iter1, snapshot); |
1361 | bch2_btree_iter_set_snapshot(iter: &iter2, snapshot); |
1362 | |
1363 | ret = bch2_inode_find_by_inum_trans(trans, inode_inum(inode), &inode_u); |
1364 | if (ret) |
1365 | goto err; |
1366 | |
1367 | if (inode_u.bi_dir == dir->ei_inode.bi_inum) { |
1368 | bch2_btree_iter_set_pos(iter: &iter1, POS(inode_u.bi_dir, inode_u.bi_dir_offset)); |
1369 | |
1370 | k = bch2_btree_iter_peek_slot(&iter1); |
1371 | ret = bkey_err(k); |
1372 | if (ret) |
1373 | goto err; |
1374 | |
1375 | if (k.k->type != KEY_TYPE_dirent) { |
1376 | ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode; |
1377 | goto err; |
1378 | } |
1379 | |
1380 | d = bkey_s_c_to_dirent(k); |
1381 | ret = bch2_dirent_read_target(trans, inode_inum(inode: dir), d, &target); |
1382 | if (ret > 0) |
1383 | ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode; |
1384 | if (ret) |
1385 | goto err; |
1386 | |
1387 | if (target.subvol == inode->ei_subvol && |
1388 | target.inum == inode->ei_inode.bi_inum) |
1389 | goto found; |
1390 | } else { |
1391 | /* |
1392 | * File with multiple hardlinks and our backref is to the wrong |
1393 | * directory - linear search: |
1394 | */ |
1395 | for_each_btree_key_continue_norestart(iter2, 0, k, ret) { |
1396 | if (k.k->p.inode > dir->ei_inode.bi_inum) |
1397 | break; |
1398 | |
1399 | if (k.k->type != KEY_TYPE_dirent) |
1400 | continue; |
1401 | |
1402 | d = bkey_s_c_to_dirent(k); |
1403 | ret = bch2_dirent_read_target(trans, inode_inum(inode: dir), d, &target); |
1404 | if (ret < 0) |
1405 | break; |
1406 | if (ret) |
1407 | continue; |
1408 | |
1409 | if (target.subvol == inode->ei_subvol && |
1410 | target.inum == inode->ei_inode.bi_inum) |
1411 | goto found; |
1412 | } |
1413 | } |
1414 | |
1415 | ret = -ENOENT; |
1416 | goto err; |
1417 | found: |
1418 | dirent_name = bch2_dirent_get_name(d); |
1419 | |
1420 | name_len = min_t(unsigned, dirent_name.len, NAME_MAX); |
1421 | memcpy(name, dirent_name.name, name_len); |
1422 | name[name_len] = '\0'; |
1423 | err: |
1424 | if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) |
1425 | goto retry; |
1426 | |
1427 | bch2_trans_iter_exit(trans, &iter1); |
1428 | bch2_trans_iter_exit(trans, &iter2); |
1429 | bch2_trans_put(trans); |
1430 | |
1431 | return ret; |
1432 | } |
1433 | |
1434 | static const struct export_operations bch_export_ops = { |
1435 | .encode_fh = bch2_encode_fh, |
1436 | .fh_to_dentry = bch2_fh_to_dentry, |
1437 | .fh_to_parent = bch2_fh_to_parent, |
1438 | .get_parent = bch2_get_parent, |
1439 | .get_name = bch2_get_name, |
1440 | }; |
1441 | |
1442 | static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum, |
1443 | struct bch_inode_info *inode, |
1444 | struct bch_inode_unpacked *bi, |
1445 | struct bch_subvolume *subvol) |
1446 | { |
1447 | bch2_iget5_set(vinode: &inode->v, p: &inum); |
1448 | bch2_inode_update_after_write(trans, inode, bi, fields: ~0); |
1449 | |
1450 | if (BCH_SUBVOLUME_SNAP(k: subvol)) |
1451 | set_bit(EI_INODE_SNAPSHOT, addr: &inode->ei_flags); |
1452 | else |
1453 | clear_bit(EI_INODE_SNAPSHOT, addr: &inode->ei_flags); |
1454 | |
1455 | inode->v.i_blocks = bi->bi_sectors; |
1456 | inode->v.i_ino = bi->bi_inum; |
1457 | inode->v.i_rdev = bi->bi_dev; |
1458 | inode->v.i_generation = bi->bi_generation; |
1459 | inode->v.i_size = bi->bi_size; |
1460 | |
1461 | inode->ei_flags = 0; |
1462 | inode->ei_quota_reserved = 0; |
1463 | inode->ei_qid = bch_qid(u: bi); |
1464 | inode->ei_subvol = inum.subvol; |
1465 | |
1466 | inode->v.i_mapping->a_ops = &bch_address_space_operations; |
1467 | |
1468 | switch (inode->v.i_mode & S_IFMT) { |
1469 | case S_IFREG: |
1470 | inode->v.i_op = &bch_file_inode_operations; |
1471 | inode->v.i_fop = &bch_file_operations; |
1472 | break; |
1473 | case S_IFDIR: |
1474 | inode->v.i_op = &bch_dir_inode_operations; |
1475 | inode->v.i_fop = &bch_dir_file_operations; |
1476 | break; |
1477 | case S_IFLNK: |
1478 | inode_nohighmem(inode: &inode->v); |
1479 | inode->v.i_op = &bch_symlink_inode_operations; |
1480 | break; |
1481 | default: |
1482 | init_special_inode(&inode->v, inode->v.i_mode, inode->v.i_rdev); |
1483 | inode->v.i_op = &bch_special_inode_operations; |
1484 | break; |
1485 | } |
1486 | |
1487 | mapping_set_large_folios(mapping: inode->v.i_mapping); |
1488 | } |
1489 | |
1490 | static struct inode *bch2_alloc_inode(struct super_block *sb) |
1491 | { |
1492 | struct bch_inode_info *inode; |
1493 | |
1494 | inode = kmem_cache_alloc(cachep: bch2_inode_cache, GFP_NOFS); |
1495 | if (!inode) |
1496 | return NULL; |
1497 | |
1498 | inode_init_once(&inode->v); |
1499 | mutex_init(&inode->ei_update_lock); |
1500 | two_state_lock_init(lock: &inode->ei_pagecache_lock); |
1501 | INIT_LIST_HEAD(list: &inode->ei_vfs_inode_list); |
1502 | mutex_init(&inode->ei_quota_lock); |
1503 | |
1504 | return &inode->v; |
1505 | } |
1506 | |
1507 | static void bch2_i_callback(struct rcu_head *head) |
1508 | { |
1509 | struct inode *vinode = container_of(head, struct inode, i_rcu); |
1510 | struct bch_inode_info *inode = to_bch_ei(vinode); |
1511 | |
1512 | kmem_cache_free(s: bch2_inode_cache, objp: inode); |
1513 | } |
1514 | |
1515 | static void bch2_destroy_inode(struct inode *vinode) |
1516 | { |
1517 | call_rcu(head: &vinode->i_rcu, func: bch2_i_callback); |
1518 | } |
1519 | |
1520 | static int inode_update_times_fn(struct btree_trans *trans, |
1521 | struct bch_inode_info *inode, |
1522 | struct bch_inode_unpacked *bi, |
1523 | void *p) |
1524 | { |
1525 | struct bch_fs *c = inode->v.i_sb->s_fs_info; |
1526 | |
1527 | bi->bi_atime = timespec_to_bch2_time(c, ts: inode_get_atime(inode: &inode->v)); |
1528 | bi->bi_mtime = timespec_to_bch2_time(c, ts: inode_get_mtime(inode: &inode->v)); |
1529 | bi->bi_ctime = timespec_to_bch2_time(c, ts: inode_get_ctime(inode: &inode->v)); |
1530 | |
1531 | return 0; |
1532 | } |
1533 | |
1534 | static int bch2_vfs_write_inode(struct inode *vinode, |
1535 | struct writeback_control *wbc) |
1536 | { |
1537 | struct bch_fs *c = vinode->i_sb->s_fs_info; |
1538 | struct bch_inode_info *inode = to_bch_ei(vinode); |
1539 | int ret; |
1540 | |
1541 | mutex_lock(&inode->ei_update_lock); |
1542 | ret = bch2_write_inode(c, inode, set: inode_update_times_fn, NULL, |
1543 | ATTR_ATIME|ATTR_MTIME|ATTR_CTIME); |
1544 | mutex_unlock(lock: &inode->ei_update_lock); |
1545 | |
1546 | return bch2_err_class(err: ret); |
1547 | } |
1548 | |
1549 | static void bch2_evict_inode(struct inode *vinode) |
1550 | { |
1551 | struct bch_fs *c = vinode->i_sb->s_fs_info; |
1552 | struct bch_inode_info *inode = to_bch_ei(vinode); |
1553 | |
1554 | truncate_inode_pages_final(&inode->v.i_data); |
1555 | |
1556 | clear_inode(&inode->v); |
1557 | |
1558 | BUG_ON(!is_bad_inode(&inode->v) && inode->ei_quota_reserved); |
1559 | |
1560 | if (!inode->v.i_nlink && !is_bad_inode(&inode->v)) { |
1561 | bch2_quota_acct(c, inode->ei_qid, Q_SPC, -((s64) inode->v.i_blocks), |
1562 | KEY_TYPE_QUOTA_WARN); |
1563 | bch2_quota_acct(c, inode->ei_qid, Q_INO, -1, |
1564 | KEY_TYPE_QUOTA_WARN); |
1565 | bch2_inode_rm(c, inode_inum(inode)); |
1566 | } |
1567 | |
1568 | mutex_lock(&c->vfs_inodes_lock); |
1569 | list_del_init(entry: &inode->ei_vfs_inode_list); |
1570 | mutex_unlock(lock: &c->vfs_inodes_lock); |
1571 | } |
1572 | |
1573 | void bch2_evict_subvolume_inodes(struct bch_fs *c, snapshot_id_list *s) |
1574 | { |
1575 | struct bch_inode_info *inode; |
1576 | DARRAY(struct bch_inode_info *) grabbed; |
1577 | bool clean_pass = false, this_pass_clean; |
1578 | |
1579 | /* |
1580 | * Initially, we scan for inodes without I_DONTCACHE, then mark them to |
1581 | * be pruned with d_mark_dontcache(). |
1582 | * |
1583 | * Once we've had a clean pass where we didn't find any inodes without |
1584 | * I_DONTCACHE, we wait for them to be freed: |
1585 | */ |
1586 | |
1587 | darray_init(&grabbed); |
1588 | darray_make_room(&grabbed, 1024); |
1589 | again: |
1590 | cond_resched(); |
1591 | this_pass_clean = true; |
1592 | |
1593 | mutex_lock(&c->vfs_inodes_lock); |
1594 | list_for_each_entry(inode, &c->vfs_inodes_list, ei_vfs_inode_list) { |
1595 | if (!snapshot_list_has_id(s, id: inode->ei_subvol)) |
1596 | continue; |
1597 | |
1598 | if (!(inode->v.i_state & I_DONTCACHE) && |
1599 | !(inode->v.i_state & I_FREEING) && |
1600 | igrab(&inode->v)) { |
1601 | this_pass_clean = false; |
1602 | |
1603 | if (darray_push_gfp(&grabbed, inode, GFP_ATOMIC|__GFP_NOWARN)) { |
1604 | iput(&inode->v); |
1605 | break; |
1606 | } |
1607 | } else if (clean_pass && this_pass_clean) { |
1608 | wait_queue_head_t *wq = bit_waitqueue(word: &inode->v.i_state, __I_NEW); |
1609 | DEFINE_WAIT_BIT(wait, &inode->v.i_state, __I_NEW); |
1610 | |
1611 | prepare_to_wait(wq_head: wq, wq_entry: &wait.wq_entry, TASK_UNINTERRUPTIBLE); |
1612 | mutex_unlock(lock: &c->vfs_inodes_lock); |
1613 | |
1614 | schedule(); |
1615 | finish_wait(wq_head: wq, wq_entry: &wait.wq_entry); |
1616 | goto again; |
1617 | } |
1618 | } |
1619 | mutex_unlock(lock: &c->vfs_inodes_lock); |
1620 | |
1621 | darray_for_each(grabbed, i) { |
1622 | inode = *i; |
1623 | d_mark_dontcache(inode: &inode->v); |
1624 | d_prune_aliases(&inode->v); |
1625 | iput(&inode->v); |
1626 | } |
1627 | grabbed.nr = 0; |
1628 | |
1629 | if (!clean_pass || !this_pass_clean) { |
1630 | clean_pass = this_pass_clean; |
1631 | goto again; |
1632 | } |
1633 | |
1634 | darray_exit(&grabbed); |
1635 | } |
1636 | |
1637 | static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf) |
1638 | { |
1639 | struct super_block *sb = dentry->d_sb; |
1640 | struct bch_fs *c = sb->s_fs_info; |
1641 | struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c); |
1642 | unsigned shift = sb->s_blocksize_bits - 9; |
1643 | /* |
1644 | * this assumes inodes take up 64 bytes, which is a decent average |
1645 | * number: |
1646 | */ |
1647 | u64 avail_inodes = ((usage.capacity - usage.used) << 3); |
1648 | |
1649 | buf->f_type = BCACHEFS_STATFS_MAGIC; |
1650 | buf->f_bsize = sb->s_blocksize; |
1651 | buf->f_blocks = usage.capacity >> shift; |
1652 | buf->f_bfree = usage.free >> shift; |
1653 | buf->f_bavail = avail_factor(r: usage.free) >> shift; |
1654 | |
1655 | buf->f_files = usage.nr_inodes + avail_inodes; |
1656 | buf->f_ffree = avail_inodes; |
1657 | |
1658 | buf->f_fsid = uuid_to_fsid(uuid: c->sb.user_uuid.b); |
1659 | buf->f_namelen = BCH_NAME_MAX; |
1660 | |
1661 | return 0; |
1662 | } |
1663 | |
1664 | static int bch2_sync_fs(struct super_block *sb, int wait) |
1665 | { |
1666 | struct bch_fs *c = sb->s_fs_info; |
1667 | int ret; |
1668 | |
1669 | if (c->opts.journal_flush_disabled) |
1670 | return 0; |
1671 | |
1672 | if (!wait) { |
1673 | bch2_journal_flush_async(&c->journal, NULL); |
1674 | return 0; |
1675 | } |
1676 | |
1677 | ret = bch2_journal_flush(&c->journal); |
1678 | return bch2_err_class(err: ret); |
1679 | } |
1680 | |
1681 | static struct bch_fs *bch2_path_to_fs(const char *path) |
1682 | { |
1683 | struct bch_fs *c; |
1684 | dev_t dev; |
1685 | int ret; |
1686 | |
1687 | ret = lookup_bdev(pathname: path, dev: &dev); |
1688 | if (ret) |
1689 | return ERR_PTR(error: ret); |
1690 | |
1691 | c = bch2_dev_to_fs(dev); |
1692 | if (c) |
1693 | closure_put(cl: &c->cl); |
1694 | return c ?: ERR_PTR(error: -ENOENT); |
1695 | } |
1696 | |
1697 | static int bch2_remount(struct super_block *sb, int *flags, char *data) |
1698 | { |
1699 | struct bch_fs *c = sb->s_fs_info; |
1700 | struct bch_opts opts = bch2_opts_empty(); |
1701 | int ret; |
1702 | |
1703 | ret = bch2_parse_mount_opts(c, &opts, data); |
1704 | if (ret) |
1705 | goto err; |
1706 | |
1707 | opt_set(opts, read_only, (*flags & SB_RDONLY) != 0); |
1708 | |
1709 | if (opts.read_only != c->opts.read_only) { |
1710 | down_write(sem: &c->state_lock); |
1711 | |
1712 | if (opts.read_only) { |
1713 | bch2_fs_read_only(c); |
1714 | |
1715 | sb->s_flags |= SB_RDONLY; |
1716 | } else { |
1717 | ret = bch2_fs_read_write(c); |
1718 | if (ret) { |
1719 | bch_err(c, "error going rw: %i" , ret); |
1720 | up_write(sem: &c->state_lock); |
1721 | ret = -EINVAL; |
1722 | goto err; |
1723 | } |
1724 | |
1725 | sb->s_flags &= ~SB_RDONLY; |
1726 | } |
1727 | |
1728 | c->opts.read_only = opts.read_only; |
1729 | |
1730 | up_write(sem: &c->state_lock); |
1731 | } |
1732 | |
1733 | if (opt_defined(opts, errors)) |
1734 | c->opts.errors = opts.errors; |
1735 | err: |
1736 | return bch2_err_class(err: ret); |
1737 | } |
1738 | |
1739 | static int bch2_show_devname(struct seq_file *seq, struct dentry *root) |
1740 | { |
1741 | struct bch_fs *c = root->d_sb->s_fs_info; |
1742 | bool first = true; |
1743 | |
1744 | for_each_online_member(c, ca) { |
1745 | if (!first) |
1746 | seq_putc(m: seq, c: ':'); |
1747 | first = false; |
1748 | seq_puts(m: seq, s: ca->disk_sb.sb_name); |
1749 | } |
1750 | |
1751 | return 0; |
1752 | } |
1753 | |
1754 | static int bch2_show_options(struct seq_file *seq, struct dentry *root) |
1755 | { |
1756 | struct bch_fs *c = root->d_sb->s_fs_info; |
1757 | enum bch_opt_id i; |
1758 | struct printbuf buf = PRINTBUF; |
1759 | int ret = 0; |
1760 | |
1761 | for (i = 0; i < bch2_opts_nr; i++) { |
1762 | const struct bch_option *opt = &bch2_opt_table[i]; |
1763 | u64 v = bch2_opt_get_by_id(&c->opts, i); |
1764 | |
1765 | if (!(opt->flags & OPT_MOUNT)) |
1766 | continue; |
1767 | |
1768 | if (v == bch2_opt_get_by_id(&bch2_opts_default, i)) |
1769 | continue; |
1770 | |
1771 | printbuf_reset(buf: &buf); |
1772 | bch2_opt_to_text(&buf, c, c->disk_sb.sb, opt, v, |
1773 | OPT_SHOW_MOUNT_STYLE); |
1774 | seq_putc(m: seq, c: ','); |
1775 | seq_puts(m: seq, s: buf.buf); |
1776 | } |
1777 | |
1778 | if (buf.allocation_failure) |
1779 | ret = -ENOMEM; |
1780 | printbuf_exit(&buf); |
1781 | return ret; |
1782 | } |
1783 | |
1784 | static void bch2_put_super(struct super_block *sb) |
1785 | { |
1786 | struct bch_fs *c = sb->s_fs_info; |
1787 | |
1788 | __bch2_fs_stop(c); |
1789 | } |
1790 | |
1791 | /* |
1792 | * bcachefs doesn't currently integrate intwrite freeze protection but the |
1793 | * internal write references serve the same purpose. Therefore reuse the |
1794 | * read-only transition code to perform the quiesce. The caveat is that we don't |
1795 | * currently have the ability to block tasks that want a write reference while |
1796 | * the superblock is frozen. This is fine for now, but we should either add |
1797 | * blocking support or find a way to integrate sb_start_intwrite() and friends. |
1798 | */ |
1799 | static int bch2_freeze(struct super_block *sb) |
1800 | { |
1801 | struct bch_fs *c = sb->s_fs_info; |
1802 | |
1803 | down_write(sem: &c->state_lock); |
1804 | bch2_fs_read_only(c); |
1805 | up_write(sem: &c->state_lock); |
1806 | return 0; |
1807 | } |
1808 | |
1809 | static int bch2_unfreeze(struct super_block *sb) |
1810 | { |
1811 | struct bch_fs *c = sb->s_fs_info; |
1812 | int ret; |
1813 | |
1814 | if (test_bit(BCH_FS_emergency_ro, &c->flags)) |
1815 | return 0; |
1816 | |
1817 | down_write(sem: &c->state_lock); |
1818 | ret = bch2_fs_read_write(c); |
1819 | up_write(sem: &c->state_lock); |
1820 | return ret; |
1821 | } |
1822 | |
1823 | static const struct super_operations bch_super_operations = { |
1824 | .alloc_inode = bch2_alloc_inode, |
1825 | .destroy_inode = bch2_destroy_inode, |
1826 | .write_inode = bch2_vfs_write_inode, |
1827 | .evict_inode = bch2_evict_inode, |
1828 | .sync_fs = bch2_sync_fs, |
1829 | .statfs = bch2_statfs, |
1830 | .show_devname = bch2_show_devname, |
1831 | .show_options = bch2_show_options, |
1832 | .remount_fs = bch2_remount, |
1833 | .put_super = bch2_put_super, |
1834 | .freeze_fs = bch2_freeze, |
1835 | .unfreeze_fs = bch2_unfreeze, |
1836 | }; |
1837 | |
1838 | static int bch2_set_super(struct super_block *s, void *data) |
1839 | { |
1840 | s->s_fs_info = data; |
1841 | return 0; |
1842 | } |
1843 | |
1844 | static int bch2_noset_super(struct super_block *s, void *data) |
1845 | { |
1846 | return -EBUSY; |
1847 | } |
1848 | |
1849 | typedef DARRAY(struct bch_fs *) darray_fs; |
1850 | |
1851 | static int bch2_test_super(struct super_block *s, void *data) |
1852 | { |
1853 | struct bch_fs *c = s->s_fs_info; |
1854 | darray_fs *d = data; |
1855 | |
1856 | if (!c) |
1857 | return false; |
1858 | |
1859 | darray_for_each(*d, i) |
1860 | if (c != *i) |
1861 | return false; |
1862 | return true; |
1863 | } |
1864 | |
1865 | static struct dentry *bch2_mount(struct file_system_type *fs_type, |
1866 | int flags, const char *dev_name, void *data) |
1867 | { |
1868 | struct bch_fs *c; |
1869 | struct super_block *sb; |
1870 | struct inode *vinode; |
1871 | struct bch_opts opts = bch2_opts_empty(); |
1872 | int ret; |
1873 | |
1874 | opt_set(opts, read_only, (flags & SB_RDONLY) != 0); |
1875 | |
1876 | ret = bch2_parse_mount_opts(NULL, &opts, data); |
1877 | if (ret) { |
1878 | ret = bch2_err_class(err: ret); |
1879 | return ERR_PTR(error: ret); |
1880 | } |
1881 | |
1882 | if (!dev_name || strlen(dev_name) == 0) |
1883 | return ERR_PTR(error: -EINVAL); |
1884 | |
1885 | darray_str devs; |
1886 | ret = bch2_split_devs(dev_name, &devs); |
1887 | if (ret) |
1888 | return ERR_PTR(error: ret); |
1889 | |
1890 | darray_fs devs_to_fs = {}; |
1891 | darray_for_each(devs, i) { |
1892 | ret = darray_push(&devs_to_fs, bch2_path_to_fs(*i)); |
1893 | if (ret) { |
1894 | sb = ERR_PTR(error: ret); |
1895 | goto got_sb; |
1896 | } |
1897 | } |
1898 | |
1899 | sb = sget(type: fs_type, test: bch2_test_super, set: bch2_noset_super, flags: flags|SB_NOSEC, data: &devs_to_fs); |
1900 | if (!IS_ERR(ptr: sb)) |
1901 | goto got_sb; |
1902 | |
1903 | c = bch2_fs_open(devs.data, devs.nr, opts); |
1904 | if (IS_ERR(ptr: c)) { |
1905 | sb = ERR_CAST(ptr: c); |
1906 | goto got_sb; |
1907 | } |
1908 | |
1909 | /* Some options can't be parsed until after the fs is started: */ |
1910 | ret = bch2_parse_mount_opts(c, &opts, data); |
1911 | if (ret) { |
1912 | bch2_fs_stop(c); |
1913 | sb = ERR_PTR(error: ret); |
1914 | goto got_sb; |
1915 | } |
1916 | |
1917 | bch2_opts_apply(&c->opts, opts); |
1918 | |
1919 | sb = sget(type: fs_type, NULL, set: bch2_set_super, flags: flags|SB_NOSEC, data: c); |
1920 | if (IS_ERR(ptr: sb)) |
1921 | bch2_fs_stop(c); |
1922 | got_sb: |
1923 | darray_exit(&devs_to_fs); |
1924 | bch2_darray_str_exit(&devs); |
1925 | |
1926 | if (IS_ERR(ptr: sb)) { |
1927 | ret = PTR_ERR(ptr: sb); |
1928 | ret = bch2_err_class(err: ret); |
1929 | return ERR_PTR(error: ret); |
1930 | } |
1931 | |
1932 | c = sb->s_fs_info; |
1933 | |
1934 | if (sb->s_root) { |
1935 | if ((flags ^ sb->s_flags) & SB_RDONLY) { |
1936 | ret = -EBUSY; |
1937 | goto err_put_super; |
1938 | } |
1939 | goto out; |
1940 | } |
1941 | |
1942 | sb->s_blocksize = block_bytes(c); |
1943 | sb->s_blocksize_bits = ilog2(block_bytes(c)); |
1944 | sb->s_maxbytes = MAX_LFS_FILESIZE; |
1945 | sb->s_op = &bch_super_operations; |
1946 | sb->s_export_op = &bch_export_ops; |
1947 | #ifdef CONFIG_BCACHEFS_QUOTA |
1948 | sb->s_qcop = &bch2_quotactl_operations; |
1949 | sb->s_quota_types = QTYPE_MASK_USR|QTYPE_MASK_GRP|QTYPE_MASK_PRJ; |
1950 | #endif |
1951 | sb->s_xattr = bch2_xattr_handlers; |
1952 | sb->s_magic = BCACHEFS_STATFS_MAGIC; |
1953 | sb->s_time_gran = c->sb.nsec_per_time_unit; |
1954 | sb->s_time_min = div_s64(S64_MIN, divisor: c->sb.time_units_per_sec) + 1; |
1955 | sb->s_time_max = div_s64(S64_MAX, divisor: c->sb.time_units_per_sec); |
1956 | sb->s_uuid = c->sb.user_uuid; |
1957 | c->vfs_sb = sb; |
1958 | strscpy(sb->s_id, c->name, sizeof(sb->s_id)); |
1959 | |
1960 | ret = super_setup_bdi(sb); |
1961 | if (ret) |
1962 | goto err_put_super; |
1963 | |
1964 | sb->s_bdi->ra_pages = VM_READAHEAD_PAGES; |
1965 | |
1966 | for_each_online_member(c, ca) { |
1967 | struct block_device *bdev = ca->disk_sb.bdev; |
1968 | |
1969 | /* XXX: create an anonymous device for multi device filesystems */ |
1970 | sb->s_bdev = bdev; |
1971 | sb->s_dev = bdev->bd_dev; |
1972 | percpu_ref_put(ref: &ca->io_ref); |
1973 | break; |
1974 | } |
1975 | |
1976 | c->dev = sb->s_dev; |
1977 | |
1978 | #ifdef CONFIG_BCACHEFS_POSIX_ACL |
1979 | if (c->opts.acl) |
1980 | sb->s_flags |= SB_POSIXACL; |
1981 | #endif |
1982 | |
1983 | sb->s_shrink->seeks = 0; |
1984 | |
1985 | vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM); |
1986 | ret = PTR_ERR_OR_ZERO(ptr: vinode); |
1987 | bch_err_msg(c, ret, "mounting: error getting root inode" ); |
1988 | if (ret) |
1989 | goto err_put_super; |
1990 | |
1991 | sb->s_root = d_make_root(vinode); |
1992 | if (!sb->s_root) { |
1993 | bch_err(c, "error mounting: error allocating root dentry" ); |
1994 | ret = -ENOMEM; |
1995 | goto err_put_super; |
1996 | } |
1997 | |
1998 | sb->s_flags |= SB_ACTIVE; |
1999 | out: |
2000 | return dget(dentry: sb->s_root); |
2001 | |
2002 | err_put_super: |
2003 | __bch2_fs_stop(c); |
2004 | deactivate_locked_super(sb); |
2005 | return ERR_PTR(error: bch2_err_class(err: ret)); |
2006 | } |
2007 | |
2008 | static void bch2_kill_sb(struct super_block *sb) |
2009 | { |
2010 | struct bch_fs *c = sb->s_fs_info; |
2011 | |
2012 | generic_shutdown_super(sb); |
2013 | bch2_fs_free(c); |
2014 | } |
2015 | |
2016 | static struct file_system_type bcache_fs_type = { |
2017 | .owner = THIS_MODULE, |
2018 | .name = "bcachefs" , |
2019 | .mount = bch2_mount, |
2020 | .kill_sb = bch2_kill_sb, |
2021 | .fs_flags = FS_REQUIRES_DEV, |
2022 | }; |
2023 | |
2024 | MODULE_ALIAS_FS("bcachefs" ); |
2025 | |
2026 | void bch2_vfs_exit(void) |
2027 | { |
2028 | unregister_filesystem(&bcache_fs_type); |
2029 | kmem_cache_destroy(s: bch2_inode_cache); |
2030 | } |
2031 | |
2032 | int __init bch2_vfs_init(void) |
2033 | { |
2034 | int ret = -ENOMEM; |
2035 | |
2036 | bch2_inode_cache = KMEM_CACHE(bch_inode_info, SLAB_RECLAIM_ACCOUNT); |
2037 | if (!bch2_inode_cache) |
2038 | goto err; |
2039 | |
2040 | ret = register_filesystem(&bcache_fs_type); |
2041 | if (ret) |
2042 | goto err; |
2043 | |
2044 | return 0; |
2045 | err: |
2046 | bch2_vfs_exit(); |
2047 | return ret; |
2048 | } |
2049 | |
2050 | #endif /* NO_BCACHEFS_FS */ |
2051 | |