1 | // SPDX-License-Identifier: GPL-2.0 |
2 | |
3 | #include "bcachefs.h" |
4 | #include "btree_key_cache.h" |
5 | #include "btree_write_buffer.h" |
6 | #include "bkey_methods.h" |
7 | #include "btree_update.h" |
8 | #include "buckets.h" |
9 | #include "compress.h" |
10 | #include "dirent.h" |
11 | #include "error.h" |
12 | #include "extents.h" |
13 | #include "extent_update.h" |
14 | #include "inode.h" |
15 | #include "str_hash.h" |
16 | #include "snapshot.h" |
17 | #include "subvolume.h" |
18 | #include "varint.h" |
19 | |
20 | #include <linux/random.h> |
21 | |
22 | #include <asm/unaligned.h> |
23 | |
24 | #define x(name, ...) #name, |
25 | const char * const bch2_inode_opts[] = { |
26 | BCH_INODE_OPTS() |
27 | NULL, |
28 | }; |
29 | |
30 | static const char * const bch2_inode_flag_strs[] = { |
31 | BCH_INODE_FLAGS() |
32 | NULL |
33 | }; |
34 | #undef x |
35 | |
36 | static const u8 byte_table[8] = { 1, 2, 3, 4, 6, 8, 10, 13 }; |
37 | |
38 | static int inode_decode_field(const u8 *in, const u8 *end, |
39 | u64 out[2], unsigned *out_bits) |
40 | { |
41 | __be64 be[2] = { 0, 0 }; |
42 | unsigned bytes, shift; |
43 | u8 *p; |
44 | |
45 | if (in >= end) |
46 | return -1; |
47 | |
48 | if (!*in) |
49 | return -1; |
50 | |
51 | /* |
52 | * position of highest set bit indicates number of bytes: |
53 | * shift = number of bits to remove in high byte: |
54 | */ |
55 | shift = 8 - __fls(word: *in); /* 1 <= shift <= 8 */ |
56 | bytes = byte_table[shift - 1]; |
57 | |
58 | if (in + bytes > end) |
59 | return -1; |
60 | |
61 | p = (u8 *) be + 16 - bytes; |
62 | memcpy(p, in, bytes); |
63 | *p ^= (1 << 8) >> shift; |
64 | |
65 | out[0] = be64_to_cpu(be[0]); |
66 | out[1] = be64_to_cpu(be[1]); |
67 | *out_bits = out[0] ? 64 + fls64(x: out[0]) : fls64(x: out[1]); |
68 | |
69 | return bytes; |
70 | } |
71 | |
72 | static inline void bch2_inode_pack_inlined(struct bkey_inode_buf *packed, |
73 | const struct bch_inode_unpacked *inode) |
74 | { |
75 | struct bkey_i_inode_v3 *k = &packed->inode; |
76 | u8 *out = k->v.fields; |
77 | u8 *end = (void *) &packed[1]; |
78 | u8 *last_nonzero_field = out; |
79 | unsigned nr_fields = 0, last_nonzero_fieldnr = 0; |
80 | unsigned bytes; |
81 | int ret; |
82 | |
83 | bkey_inode_v3_init(k: &packed->inode.k_i); |
84 | packed->inode.k.p.offset = inode->bi_inum; |
85 | packed->inode.v.bi_journal_seq = cpu_to_le64(inode->bi_journal_seq); |
86 | packed->inode.v.bi_hash_seed = inode->bi_hash_seed; |
87 | packed->inode.v.bi_flags = cpu_to_le64(inode->bi_flags); |
88 | packed->inode.v.bi_sectors = cpu_to_le64(inode->bi_sectors); |
89 | packed->inode.v.bi_size = cpu_to_le64(inode->bi_size); |
90 | packed->inode.v.bi_version = cpu_to_le64(inode->bi_version); |
91 | SET_INODEv3_MODE(k: &packed->inode.v, v: inode->bi_mode); |
92 | SET_INODEv3_FIELDS_START(k: &packed->inode.v, INODEv3_FIELDS_START_CUR); |
93 | |
94 | |
95 | #define x(_name, _bits) \ |
96 | nr_fields++; \ |
97 | \ |
98 | if (inode->_name) { \ |
99 | ret = bch2_varint_encode_fast(out, inode->_name); \ |
100 | out += ret; \ |
101 | \ |
102 | if (_bits > 64) \ |
103 | *out++ = 0; \ |
104 | \ |
105 | last_nonzero_field = out; \ |
106 | last_nonzero_fieldnr = nr_fields; \ |
107 | } else { \ |
108 | *out++ = 0; \ |
109 | \ |
110 | if (_bits > 64) \ |
111 | *out++ = 0; \ |
112 | } |
113 | |
114 | BCH_INODE_FIELDS_v3() |
115 | #undef x |
116 | BUG_ON(out > end); |
117 | |
118 | out = last_nonzero_field; |
119 | nr_fields = last_nonzero_fieldnr; |
120 | |
121 | bytes = out - (u8 *) &packed->inode.v; |
122 | set_bkey_val_bytes(k: &packed->inode.k, bytes); |
123 | memset_u64s_tail(s: &packed->inode.v, c: 0, bytes); |
124 | |
125 | SET_INODEv3_NR_FIELDS(k: &k->v, v: nr_fields); |
126 | |
127 | if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) { |
128 | struct bch_inode_unpacked unpacked; |
129 | |
130 | ret = bch2_inode_unpack(bkey_i_to_s_c(k: &packed->inode.k_i), &unpacked); |
131 | BUG_ON(ret); |
132 | BUG_ON(unpacked.bi_inum != inode->bi_inum); |
133 | BUG_ON(unpacked.bi_hash_seed != inode->bi_hash_seed); |
134 | BUG_ON(unpacked.bi_sectors != inode->bi_sectors); |
135 | BUG_ON(unpacked.bi_size != inode->bi_size); |
136 | BUG_ON(unpacked.bi_version != inode->bi_version); |
137 | BUG_ON(unpacked.bi_mode != inode->bi_mode); |
138 | |
139 | #define x(_name, _bits) if (unpacked._name != inode->_name) \ |
140 | panic("unpacked %llu should be %llu", \ |
141 | (u64) unpacked._name, (u64) inode->_name); |
142 | BCH_INODE_FIELDS_v3() |
143 | #undef x |
144 | } |
145 | } |
146 | |
147 | void bch2_inode_pack(struct bkey_inode_buf *packed, |
148 | const struct bch_inode_unpacked *inode) |
149 | { |
150 | bch2_inode_pack_inlined(packed, inode); |
151 | } |
152 | |
153 | static noinline int bch2_inode_unpack_v1(struct bkey_s_c_inode inode, |
154 | struct bch_inode_unpacked *unpacked) |
155 | { |
156 | const u8 *in = inode.v->fields; |
157 | const u8 *end = bkey_val_end(inode); |
158 | u64 field[2]; |
159 | unsigned fieldnr = 0, field_bits; |
160 | int ret; |
161 | |
162 | #define x(_name, _bits) \ |
163 | if (fieldnr++ == INODE_NR_FIELDS(inode.v)) { \ |
164 | unsigned offset = offsetof(struct bch_inode_unpacked, _name);\ |
165 | memset((void *) unpacked + offset, 0, \ |
166 | sizeof(*unpacked) - offset); \ |
167 | return 0; \ |
168 | } \ |
169 | \ |
170 | ret = inode_decode_field(in, end, field, &field_bits); \ |
171 | if (ret < 0) \ |
172 | return ret; \ |
173 | \ |
174 | if (field_bits > sizeof(unpacked->_name) * 8) \ |
175 | return -1; \ |
176 | \ |
177 | unpacked->_name = field[1]; \ |
178 | in += ret; |
179 | |
180 | BCH_INODE_FIELDS_v2() |
181 | #undef x |
182 | |
183 | /* XXX: signal if there were more fields than expected? */ |
184 | return 0; |
185 | } |
186 | |
187 | static int bch2_inode_unpack_v2(struct bch_inode_unpacked *unpacked, |
188 | const u8 *in, const u8 *end, |
189 | unsigned nr_fields) |
190 | { |
191 | unsigned fieldnr = 0; |
192 | int ret; |
193 | u64 v[2]; |
194 | |
195 | #define x(_name, _bits) \ |
196 | if (fieldnr < nr_fields) { \ |
197 | ret = bch2_varint_decode_fast(in, end, &v[0]); \ |
198 | if (ret < 0) \ |
199 | return ret; \ |
200 | in += ret; \ |
201 | \ |
202 | if (_bits > 64) { \ |
203 | ret = bch2_varint_decode_fast(in, end, &v[1]); \ |
204 | if (ret < 0) \ |
205 | return ret; \ |
206 | in += ret; \ |
207 | } else { \ |
208 | v[1] = 0; \ |
209 | } \ |
210 | } else { \ |
211 | v[0] = v[1] = 0; \ |
212 | } \ |
213 | \ |
214 | unpacked->_name = v[0]; \ |
215 | if (v[1] || v[0] != unpacked->_name) \ |
216 | return -1; \ |
217 | fieldnr++; |
218 | |
219 | BCH_INODE_FIELDS_v2() |
220 | #undef x |
221 | |
222 | /* XXX: signal if there were more fields than expected? */ |
223 | return 0; |
224 | } |
225 | |
226 | static int bch2_inode_unpack_v3(struct bkey_s_c k, |
227 | struct bch_inode_unpacked *unpacked) |
228 | { |
229 | struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k); |
230 | const u8 *in = inode.v->fields; |
231 | const u8 *end = bkey_val_end(inode); |
232 | unsigned nr_fields = INODEv3_NR_FIELDS(k: inode.v); |
233 | unsigned fieldnr = 0; |
234 | int ret; |
235 | u64 v[2]; |
236 | |
237 | unpacked->bi_inum = inode.k->p.offset; |
238 | unpacked->bi_journal_seq= le64_to_cpu(inode.v->bi_journal_seq); |
239 | unpacked->bi_hash_seed = inode.v->bi_hash_seed; |
240 | unpacked->bi_flags = le64_to_cpu(inode.v->bi_flags); |
241 | unpacked->bi_sectors = le64_to_cpu(inode.v->bi_sectors); |
242 | unpacked->bi_size = le64_to_cpu(inode.v->bi_size); |
243 | unpacked->bi_version = le64_to_cpu(inode.v->bi_version); |
244 | unpacked->bi_mode = INODEv3_MODE(k: inode.v); |
245 | |
246 | #define x(_name, _bits) \ |
247 | if (fieldnr < nr_fields) { \ |
248 | ret = bch2_varint_decode_fast(in, end, &v[0]); \ |
249 | if (ret < 0) \ |
250 | return ret; \ |
251 | in += ret; \ |
252 | \ |
253 | if (_bits > 64) { \ |
254 | ret = bch2_varint_decode_fast(in, end, &v[1]); \ |
255 | if (ret < 0) \ |
256 | return ret; \ |
257 | in += ret; \ |
258 | } else { \ |
259 | v[1] = 0; \ |
260 | } \ |
261 | } else { \ |
262 | v[0] = v[1] = 0; \ |
263 | } \ |
264 | \ |
265 | unpacked->_name = v[0]; \ |
266 | if (v[1] || v[0] != unpacked->_name) \ |
267 | return -1; \ |
268 | fieldnr++; |
269 | |
270 | BCH_INODE_FIELDS_v3() |
271 | #undef x |
272 | |
273 | /* XXX: signal if there were more fields than expected? */ |
274 | return 0; |
275 | } |
276 | |
277 | static noinline int bch2_inode_unpack_slowpath(struct bkey_s_c k, |
278 | struct bch_inode_unpacked *unpacked) |
279 | { |
280 | memset(unpacked, 0, sizeof(*unpacked)); |
281 | |
282 | switch (k.k->type) { |
283 | case KEY_TYPE_inode: { |
284 | struct bkey_s_c_inode inode = bkey_s_c_to_inode(k); |
285 | |
286 | unpacked->bi_inum = inode.k->p.offset; |
287 | unpacked->bi_journal_seq= 0; |
288 | unpacked->bi_hash_seed = inode.v->bi_hash_seed; |
289 | unpacked->bi_flags = le32_to_cpu(inode.v->bi_flags); |
290 | unpacked->bi_mode = le16_to_cpu(inode.v->bi_mode); |
291 | |
292 | if (INODE_NEW_VARINT(k: inode.v)) { |
293 | return bch2_inode_unpack_v2(unpacked, in: inode.v->fields, |
294 | bkey_val_end(inode), |
295 | nr_fields: INODE_NR_FIELDS(k: inode.v)); |
296 | } else { |
297 | return bch2_inode_unpack_v1(inode, unpacked); |
298 | } |
299 | break; |
300 | } |
301 | case KEY_TYPE_inode_v2: { |
302 | struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2(k); |
303 | |
304 | unpacked->bi_inum = inode.k->p.offset; |
305 | unpacked->bi_journal_seq= le64_to_cpu(inode.v->bi_journal_seq); |
306 | unpacked->bi_hash_seed = inode.v->bi_hash_seed; |
307 | unpacked->bi_flags = le64_to_cpu(inode.v->bi_flags); |
308 | unpacked->bi_mode = le16_to_cpu(inode.v->bi_mode); |
309 | |
310 | return bch2_inode_unpack_v2(unpacked, in: inode.v->fields, |
311 | bkey_val_end(inode), |
312 | nr_fields: INODEv2_NR_FIELDS(k: inode.v)); |
313 | } |
314 | default: |
315 | BUG(); |
316 | } |
317 | } |
318 | |
319 | int bch2_inode_unpack(struct bkey_s_c k, |
320 | struct bch_inode_unpacked *unpacked) |
321 | { |
322 | if (likely(k.k->type == KEY_TYPE_inode_v3)) |
323 | return bch2_inode_unpack_v3(k, unpacked); |
324 | return bch2_inode_unpack_slowpath(k, unpacked); |
325 | } |
326 | |
327 | int bch2_inode_peek_nowarn(struct btree_trans *trans, |
328 | struct btree_iter *iter, |
329 | struct bch_inode_unpacked *inode, |
330 | subvol_inum inum, unsigned flags) |
331 | { |
332 | struct bkey_s_c k; |
333 | u32 snapshot; |
334 | int ret; |
335 | |
336 | ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); |
337 | if (ret) |
338 | return ret; |
339 | |
340 | k = bch2_bkey_get_iter(trans, iter, btree_id: BTREE_ID_inodes, |
341 | pos: SPOS(inode: 0, offset: inum.inum, snapshot), |
342 | flags: flags|BTREE_ITER_CACHED); |
343 | ret = bkey_err(k); |
344 | if (ret) |
345 | return ret; |
346 | |
347 | ret = bkey_is_inode(k: k.k) ? 0 : -BCH_ERR_ENOENT_inode; |
348 | if (ret) |
349 | goto err; |
350 | |
351 | ret = bch2_inode_unpack(k, unpacked: inode); |
352 | if (ret) |
353 | goto err; |
354 | |
355 | return 0; |
356 | err: |
357 | bch2_trans_iter_exit(trans, iter); |
358 | return ret; |
359 | } |
360 | |
361 | int bch2_inode_peek(struct btree_trans *trans, |
362 | struct btree_iter *iter, |
363 | struct bch_inode_unpacked *inode, |
364 | subvol_inum inum, unsigned flags) |
365 | { |
366 | int ret = bch2_inode_peek_nowarn(trans, iter, inode, inum, flags); |
367 | bch_err_msg(trans->c, ret, "looking up inum %u:%llu:" , inum.subvol, inum.inum); |
368 | return ret; |
369 | } |
370 | |
371 | int bch2_inode_write_flags(struct btree_trans *trans, |
372 | struct btree_iter *iter, |
373 | struct bch_inode_unpacked *inode, |
374 | enum btree_update_flags flags) |
375 | { |
376 | struct bkey_inode_buf *inode_p; |
377 | |
378 | inode_p = bch2_trans_kmalloc(trans, size: sizeof(*inode_p)); |
379 | if (IS_ERR(ptr: inode_p)) |
380 | return PTR_ERR(ptr: inode_p); |
381 | |
382 | bch2_inode_pack_inlined(packed: inode_p, inode); |
383 | inode_p->inode.k.p.snapshot = iter->snapshot; |
384 | return bch2_trans_update(trans, iter, &inode_p->inode.k_i, flags); |
385 | } |
386 | |
387 | int __bch2_fsck_write_inode(struct btree_trans *trans, |
388 | struct bch_inode_unpacked *inode, |
389 | u32 snapshot) |
390 | { |
391 | struct bkey_inode_buf *inode_p = |
392 | bch2_trans_kmalloc(trans, size: sizeof(*inode_p)); |
393 | |
394 | if (IS_ERR(ptr: inode_p)) |
395 | return PTR_ERR(ptr: inode_p); |
396 | |
397 | bch2_inode_pack(packed: inode_p, inode); |
398 | inode_p->inode.k.p.snapshot = snapshot; |
399 | |
400 | return bch2_btree_insert_nonextent(trans, BTREE_ID_inodes, |
401 | &inode_p->inode.k_i, |
402 | BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); |
403 | } |
404 | |
405 | int bch2_fsck_write_inode(struct btree_trans *trans, |
406 | struct bch_inode_unpacked *inode, |
407 | u32 snapshot) |
408 | { |
409 | int ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, |
410 | __bch2_fsck_write_inode(trans, inode, snapshot)); |
411 | bch_err_fn(trans->c, ret); |
412 | return ret; |
413 | } |
414 | |
415 | struct bkey_i *bch2_inode_to_v3(struct btree_trans *trans, struct bkey_i *k) |
416 | { |
417 | struct bch_inode_unpacked u; |
418 | struct bkey_inode_buf *inode_p; |
419 | int ret; |
420 | |
421 | if (!bkey_is_inode(k: &k->k)) |
422 | return ERR_PTR(error: -ENOENT); |
423 | |
424 | inode_p = bch2_trans_kmalloc(trans, size: sizeof(*inode_p)); |
425 | if (IS_ERR(ptr: inode_p)) |
426 | return ERR_CAST(ptr: inode_p); |
427 | |
428 | ret = bch2_inode_unpack(k: bkey_i_to_s_c(k), unpacked: &u); |
429 | if (ret) |
430 | return ERR_PTR(error: ret); |
431 | |
432 | bch2_inode_pack(packed: inode_p, inode: &u); |
433 | return &inode_p->inode.k_i; |
434 | } |
435 | |
436 | static int __bch2_inode_invalid(struct bch_fs *c, struct bkey_s_c k, struct printbuf *err) |
437 | { |
438 | struct bch_inode_unpacked unpacked; |
439 | int ret = 0; |
440 | |
441 | bkey_fsck_err_on(k.k->p.inode, c, err, |
442 | inode_pos_inode_nonzero, |
443 | "nonzero k.p.inode" ); |
444 | |
445 | bkey_fsck_err_on(k.k->p.offset < BLOCKDEV_INODE_MAX, c, err, |
446 | inode_pos_blockdev_range, |
447 | "fs inode in blockdev range" ); |
448 | |
449 | bkey_fsck_err_on(bch2_inode_unpack(k, &unpacked), c, err, |
450 | inode_unpack_error, |
451 | "invalid variable length fields" ); |
452 | |
453 | bkey_fsck_err_on(unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1, c, err, |
454 | inode_checksum_type_invalid, |
455 | "invalid data checksum type (%u >= %u" , |
456 | unpacked.bi_data_checksum, BCH_CSUM_OPT_NR + 1); |
457 | |
458 | bkey_fsck_err_on(unpacked.bi_compression && |
459 | !bch2_compression_opt_valid(unpacked.bi_compression - 1), c, err, |
460 | inode_compression_type_invalid, |
461 | "invalid compression opt %u" , unpacked.bi_compression - 1); |
462 | |
463 | bkey_fsck_err_on((unpacked.bi_flags & BCH_INODE_unlinked) && |
464 | unpacked.bi_nlink != 0, c, err, |
465 | inode_unlinked_but_nlink_nonzero, |
466 | "flagged as unlinked but bi_nlink != 0" ); |
467 | |
468 | bkey_fsck_err_on(unpacked.bi_subvol && !S_ISDIR(unpacked.bi_mode), c, err, |
469 | inode_subvol_root_but_not_dir, |
470 | "subvolume root but not a directory" ); |
471 | fsck_err: |
472 | return ret; |
473 | } |
474 | |
475 | int bch2_inode_invalid(struct bch_fs *c, struct bkey_s_c k, |
476 | enum bkey_invalid_flags flags, |
477 | struct printbuf *err) |
478 | { |
479 | struct bkey_s_c_inode inode = bkey_s_c_to_inode(k); |
480 | int ret = 0; |
481 | |
482 | bkey_fsck_err_on(INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err, |
483 | inode_str_hash_invalid, |
484 | "invalid str hash type (%llu >= %u)" , |
485 | INODE_STR_HASH(inode.v), BCH_STR_HASH_NR); |
486 | |
487 | ret = __bch2_inode_invalid(c, k, err); |
488 | fsck_err: |
489 | return ret; |
490 | } |
491 | |
492 | int bch2_inode_v2_invalid(struct bch_fs *c, struct bkey_s_c k, |
493 | enum bkey_invalid_flags flags, |
494 | struct printbuf *err) |
495 | { |
496 | struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2(k); |
497 | int ret = 0; |
498 | |
499 | bkey_fsck_err_on(INODEv2_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err, |
500 | inode_str_hash_invalid, |
501 | "invalid str hash type (%llu >= %u)" , |
502 | INODEv2_STR_HASH(inode.v), BCH_STR_HASH_NR); |
503 | |
504 | ret = __bch2_inode_invalid(c, k, err); |
505 | fsck_err: |
506 | return ret; |
507 | } |
508 | |
509 | int bch2_inode_v3_invalid(struct bch_fs *c, struct bkey_s_c k, |
510 | enum bkey_invalid_flags flags, |
511 | struct printbuf *err) |
512 | { |
513 | struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k); |
514 | int ret = 0; |
515 | |
516 | bkey_fsck_err_on(INODEv3_FIELDS_START(inode.v) < INODEv3_FIELDS_START_INITIAL || |
517 | INODEv3_FIELDS_START(inode.v) > bkey_val_u64s(inode.k), c, err, |
518 | inode_v3_fields_start_bad, |
519 | "invalid fields_start (got %llu, min %u max %zu)" , |
520 | INODEv3_FIELDS_START(inode.v), |
521 | INODEv3_FIELDS_START_INITIAL, |
522 | bkey_val_u64s(inode.k)); |
523 | |
524 | bkey_fsck_err_on(INODEv3_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err, |
525 | inode_str_hash_invalid, |
526 | "invalid str hash type (%llu >= %u)" , |
527 | INODEv3_STR_HASH(inode.v), BCH_STR_HASH_NR); |
528 | |
529 | ret = __bch2_inode_invalid(c, k, err); |
530 | fsck_err: |
531 | return ret; |
532 | } |
533 | |
534 | static void __bch2_inode_unpacked_to_text(struct printbuf *out, |
535 | struct bch_inode_unpacked *inode) |
536 | { |
537 | printbuf_indent_add(out, 2); |
538 | prt_printf(out, "mode=%o" , inode->bi_mode); |
539 | prt_newline(out); |
540 | |
541 | prt_str(out, str: "flags=" ); |
542 | prt_bitflags(out, bch2_inode_flag_strs, inode->bi_flags & ((1U << 20) - 1)); |
543 | prt_printf(out, " (%x)" , inode->bi_flags); |
544 | prt_newline(out); |
545 | |
546 | prt_printf(out, "journal_seq=%llu" , inode->bi_journal_seq); |
547 | prt_newline(out); |
548 | |
549 | prt_printf(out, "bi_size=%llu" , inode->bi_size); |
550 | prt_newline(out); |
551 | |
552 | prt_printf(out, "bi_sectors=%llu" , inode->bi_sectors); |
553 | prt_newline(out); |
554 | |
555 | prt_printf(out, "bi_version=%llu" , inode->bi_version); |
556 | prt_newline(out); |
557 | |
558 | #define x(_name, _bits) \ |
559 | prt_printf(out, #_name "=%llu", (u64) inode->_name); \ |
560 | prt_newline(out); |
561 | BCH_INODE_FIELDS_v3() |
562 | #undef x |
563 | printbuf_indent_sub(out, 2); |
564 | } |
565 | |
566 | void bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode) |
567 | { |
568 | prt_printf(out, "inum: %llu " , inode->bi_inum); |
569 | __bch2_inode_unpacked_to_text(out, inode); |
570 | } |
571 | |
572 | void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) |
573 | { |
574 | struct bch_inode_unpacked inode; |
575 | |
576 | if (bch2_inode_unpack(k, unpacked: &inode)) { |
577 | prt_printf(out, "(unpack error)" ); |
578 | return; |
579 | } |
580 | |
581 | __bch2_inode_unpacked_to_text(out, inode: &inode); |
582 | } |
583 | |
584 | static inline u64 bkey_inode_flags(struct bkey_s_c k) |
585 | { |
586 | switch (k.k->type) { |
587 | case KEY_TYPE_inode: |
588 | return le32_to_cpu(bkey_s_c_to_inode(k).v->bi_flags); |
589 | case KEY_TYPE_inode_v2: |
590 | return le64_to_cpu(bkey_s_c_to_inode_v2(k).v->bi_flags); |
591 | case KEY_TYPE_inode_v3: |
592 | return le64_to_cpu(bkey_s_c_to_inode_v3(k).v->bi_flags); |
593 | default: |
594 | return 0; |
595 | } |
596 | } |
597 | |
598 | static inline bool bkey_is_deleted_inode(struct bkey_s_c k) |
599 | { |
600 | return bkey_inode_flags(k) & BCH_INODE_unlinked; |
601 | } |
602 | |
603 | int bch2_trigger_inode(struct btree_trans *trans, |
604 | enum btree_id btree_id, unsigned level, |
605 | struct bkey_s_c old, |
606 | struct bkey_s new, |
607 | unsigned flags) |
608 | { |
609 | s64 nr = bkey_is_inode(k: new.k) - bkey_is_inode(k: old.k); |
610 | |
611 | if (flags & BTREE_TRIGGER_TRANSACTIONAL) { |
612 | if (nr) { |
613 | int ret = bch2_replicas_deltas_realloc(trans, 0); |
614 | if (ret) |
615 | return ret; |
616 | |
617 | trans->fs_usage_deltas->nr_inodes += nr; |
618 | } |
619 | |
620 | bool old_deleted = bkey_is_deleted_inode(k: old); |
621 | bool new_deleted = bkey_is_deleted_inode(k: new.s_c); |
622 | if (old_deleted != new_deleted) { |
623 | int ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_deleted_inodes, |
624 | new.k->p, new_deleted); |
625 | if (ret) |
626 | return ret; |
627 | } |
628 | } |
629 | |
630 | if ((flags & BTREE_TRIGGER_ATOMIC) && (flags & BTREE_TRIGGER_INSERT)) { |
631 | BUG_ON(!trans->journal_res.seq); |
632 | |
633 | bkey_s_to_inode_v3(k: new).v->bi_journal_seq = cpu_to_le64(trans->journal_res.seq); |
634 | } |
635 | |
636 | if (flags & BTREE_TRIGGER_GC) { |
637 | struct bch_fs *c = trans->c; |
638 | |
639 | percpu_down_read(sem: &c->mark_lock); |
640 | this_cpu_add(c->usage_gc->b.nr_inodes, nr); |
641 | percpu_up_read(sem: &c->mark_lock); |
642 | } |
643 | |
644 | return 0; |
645 | } |
646 | |
647 | int bch2_inode_generation_invalid(struct bch_fs *c, struct bkey_s_c k, |
648 | enum bkey_invalid_flags flags, |
649 | struct printbuf *err) |
650 | { |
651 | int ret = 0; |
652 | |
653 | bkey_fsck_err_on(k.k->p.inode, c, err, |
654 | inode_pos_inode_nonzero, |
655 | "nonzero k.p.inode" ); |
656 | fsck_err: |
657 | return ret; |
658 | } |
659 | |
660 | void bch2_inode_generation_to_text(struct printbuf *out, struct bch_fs *c, |
661 | struct bkey_s_c k) |
662 | { |
663 | struct bkey_s_c_inode_generation gen = bkey_s_c_to_inode_generation(k); |
664 | |
665 | prt_printf(out, "generation: %u" , le32_to_cpu(gen.v->bi_generation)); |
666 | } |
667 | |
668 | void bch2_inode_init_early(struct bch_fs *c, |
669 | struct bch_inode_unpacked *inode_u) |
670 | { |
671 | enum bch_str_hash_type str_hash = |
672 | bch2_str_hash_opt_to_type(c, opt: c->opts.str_hash); |
673 | |
674 | memset(inode_u, 0, sizeof(*inode_u)); |
675 | |
676 | /* ick */ |
677 | inode_u->bi_flags |= str_hash << INODE_STR_HASH_OFFSET; |
678 | get_random_bytes(buf: &inode_u->bi_hash_seed, |
679 | len: sizeof(inode_u->bi_hash_seed)); |
680 | } |
681 | |
682 | void bch2_inode_init_late(struct bch_inode_unpacked *inode_u, u64 now, |
683 | uid_t uid, gid_t gid, umode_t mode, dev_t rdev, |
684 | struct bch_inode_unpacked *parent) |
685 | { |
686 | inode_u->bi_mode = mode; |
687 | inode_u->bi_uid = uid; |
688 | inode_u->bi_gid = gid; |
689 | inode_u->bi_dev = rdev; |
690 | inode_u->bi_atime = now; |
691 | inode_u->bi_mtime = now; |
692 | inode_u->bi_ctime = now; |
693 | inode_u->bi_otime = now; |
694 | |
695 | if (parent && parent->bi_mode & S_ISGID) { |
696 | inode_u->bi_gid = parent->bi_gid; |
697 | if (S_ISDIR(mode)) |
698 | inode_u->bi_mode |= S_ISGID; |
699 | } |
700 | |
701 | if (parent) { |
702 | #define x(_name, ...) inode_u->bi_##_name = parent->bi_##_name; |
703 | BCH_INODE_OPTS() |
704 | #undef x |
705 | } |
706 | } |
707 | |
708 | void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u, |
709 | uid_t uid, gid_t gid, umode_t mode, dev_t rdev, |
710 | struct bch_inode_unpacked *parent) |
711 | { |
712 | bch2_inode_init_early(c, inode_u); |
713 | bch2_inode_init_late(inode_u, now: bch2_current_time(c), |
714 | uid, gid, mode, rdev, parent); |
715 | } |
716 | |
717 | static inline u32 bkey_generation(struct bkey_s_c k) |
718 | { |
719 | switch (k.k->type) { |
720 | case KEY_TYPE_inode: |
721 | case KEY_TYPE_inode_v2: |
722 | BUG(); |
723 | case KEY_TYPE_inode_generation: |
724 | return le32_to_cpu(bkey_s_c_to_inode_generation(k).v->bi_generation); |
725 | default: |
726 | return 0; |
727 | } |
728 | } |
729 | |
730 | /* |
731 | * This just finds an empty slot: |
732 | */ |
733 | int bch2_inode_create(struct btree_trans *trans, |
734 | struct btree_iter *iter, |
735 | struct bch_inode_unpacked *inode_u, |
736 | u32 snapshot, u64 cpu) |
737 | { |
738 | struct bch_fs *c = trans->c; |
739 | struct bkey_s_c k; |
740 | u64 min, max, start, pos, *hint; |
741 | int ret = 0; |
742 | unsigned bits = (c->opts.inodes_32bit ? 31 : 63); |
743 | |
744 | if (c->opts.shard_inode_numbers) { |
745 | bits -= c->inode_shard_bits; |
746 | |
747 | min = (cpu << bits); |
748 | max = (cpu << bits) | ~(ULLONG_MAX << bits); |
749 | |
750 | min = max_t(u64, min, BLOCKDEV_INODE_MAX); |
751 | hint = c->unused_inode_hints + cpu; |
752 | } else { |
753 | min = BLOCKDEV_INODE_MAX; |
754 | max = ~(ULLONG_MAX << bits); |
755 | hint = c->unused_inode_hints; |
756 | } |
757 | |
758 | start = READ_ONCE(*hint); |
759 | |
760 | if (start >= max || start < min) |
761 | start = min; |
762 | |
763 | pos = start; |
764 | bch2_trans_iter_init(trans, iter, btree_id: BTREE_ID_inodes, POS(0, pos), |
765 | flags: BTREE_ITER_ALL_SNAPSHOTS| |
766 | BTREE_ITER_INTENT); |
767 | again: |
768 | while ((k = bch2_btree_iter_peek(iter)).k && |
769 | !(ret = bkey_err(k)) && |
770 | bkey_lt(l: k.k->p, POS(0, max))) { |
771 | if (pos < iter->pos.offset) |
772 | goto found_slot; |
773 | |
774 | /* |
775 | * We don't need to iterate over keys in every snapshot once |
776 | * we've found just one: |
777 | */ |
778 | pos = iter->pos.offset + 1; |
779 | bch2_btree_iter_set_pos(iter, POS(0, pos)); |
780 | } |
781 | |
782 | if (!ret && pos < max) |
783 | goto found_slot; |
784 | |
785 | if (!ret && start == min) |
786 | ret = -BCH_ERR_ENOSPC_inode_create; |
787 | |
788 | if (ret) { |
789 | bch2_trans_iter_exit(trans, iter); |
790 | return ret; |
791 | } |
792 | |
793 | /* Retry from start */ |
794 | pos = start = min; |
795 | bch2_btree_iter_set_pos(iter, POS(0, pos)); |
796 | goto again; |
797 | found_slot: |
798 | bch2_btree_iter_set_pos(iter, new_pos: SPOS(inode: 0, offset: pos, snapshot)); |
799 | k = bch2_btree_iter_peek_slot(iter); |
800 | ret = bkey_err(k); |
801 | if (ret) { |
802 | bch2_trans_iter_exit(trans, iter); |
803 | return ret; |
804 | } |
805 | |
806 | *hint = k.k->p.offset; |
807 | inode_u->bi_inum = k.k->p.offset; |
808 | inode_u->bi_generation = bkey_generation(k); |
809 | return 0; |
810 | } |
811 | |
812 | static int bch2_inode_delete_keys(struct btree_trans *trans, |
813 | subvol_inum inum, enum btree_id id) |
814 | { |
815 | struct btree_iter iter; |
816 | struct bkey_s_c k; |
817 | struct bkey_i delete; |
818 | struct bpos end = POS(inum.inum, U64_MAX); |
819 | u32 snapshot; |
820 | int ret = 0; |
821 | |
822 | /* |
823 | * We're never going to be deleting partial extents, no need to use an |
824 | * extent iterator: |
825 | */ |
826 | bch2_trans_iter_init(trans, iter: &iter, btree_id: id, POS(inum.inum, 0), |
827 | flags: BTREE_ITER_INTENT); |
828 | |
829 | while (1) { |
830 | bch2_trans_begin(trans); |
831 | |
832 | ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); |
833 | if (ret) |
834 | goto err; |
835 | |
836 | bch2_btree_iter_set_snapshot(iter: &iter, snapshot); |
837 | |
838 | k = bch2_btree_iter_peek_upto(&iter, end); |
839 | ret = bkey_err(k); |
840 | if (ret) |
841 | goto err; |
842 | |
843 | if (!k.k) |
844 | break; |
845 | |
846 | bkey_init(k: &delete.k); |
847 | delete.k.p = iter.pos; |
848 | |
849 | if (iter.flags & BTREE_ITER_IS_EXTENTS) |
850 | bch2_key_resize(k: &delete.k, |
851 | new_size: bpos_min(l: end, r: k.k->p).offset - |
852 | iter.pos.offset); |
853 | |
854 | ret = bch2_trans_update(trans, &iter, &delete, 0) ?: |
855 | bch2_trans_commit(trans, NULL, NULL, |
856 | flags: BCH_TRANS_COMMIT_no_enospc); |
857 | err: |
858 | if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) |
859 | break; |
860 | } |
861 | |
862 | bch2_trans_iter_exit(trans, &iter); |
863 | return ret; |
864 | } |
865 | |
866 | int bch2_inode_rm(struct bch_fs *c, subvol_inum inum) |
867 | { |
868 | struct btree_trans *trans = bch2_trans_get(c); |
869 | struct btree_iter iter = { NULL }; |
870 | struct bkey_i_inode_generation delete; |
871 | struct bch_inode_unpacked inode_u; |
872 | struct bkey_s_c k; |
873 | u32 snapshot; |
874 | int ret; |
875 | |
876 | /* |
877 | * If this was a directory, there shouldn't be any real dirents left - |
878 | * but there could be whiteouts (from hash collisions) that we should |
879 | * delete: |
880 | * |
881 | * XXX: the dirent could ideally would delete whiteouts when they're no |
882 | * longer needed |
883 | */ |
884 | ret = bch2_inode_delete_keys(trans, inum, id: BTREE_ID_extents) ?: |
885 | bch2_inode_delete_keys(trans, inum, id: BTREE_ID_xattrs) ?: |
886 | bch2_inode_delete_keys(trans, inum, id: BTREE_ID_dirents); |
887 | if (ret) |
888 | goto err; |
889 | retry: |
890 | bch2_trans_begin(trans); |
891 | |
892 | ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); |
893 | if (ret) |
894 | goto err; |
895 | |
896 | k = bch2_bkey_get_iter(trans, iter: &iter, btree_id: BTREE_ID_inodes, |
897 | pos: SPOS(inode: 0, offset: inum.inum, snapshot), |
898 | flags: BTREE_ITER_INTENT|BTREE_ITER_CACHED); |
899 | ret = bkey_err(k); |
900 | if (ret) |
901 | goto err; |
902 | |
903 | if (!bkey_is_inode(k: k.k)) { |
904 | bch2_fs_inconsistent(c, |
905 | "inode %llu:%u not found when deleting" , |
906 | inum.inum, snapshot); |
907 | ret = -EIO; |
908 | goto err; |
909 | } |
910 | |
911 | bch2_inode_unpack(k, unpacked: &inode_u); |
912 | |
913 | bkey_inode_generation_init(k: &delete.k_i); |
914 | delete.k.p = iter.pos; |
915 | delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1); |
916 | |
917 | ret = bch2_trans_update(trans, &iter, &delete.k_i, 0) ?: |
918 | bch2_trans_commit(trans, NULL, NULL, |
919 | flags: BCH_TRANS_COMMIT_no_enospc); |
920 | err: |
921 | bch2_trans_iter_exit(trans, &iter); |
922 | if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) |
923 | goto retry; |
924 | |
925 | bch2_trans_put(trans); |
926 | return ret; |
927 | } |
928 | |
929 | int bch2_inode_find_by_inum_nowarn_trans(struct btree_trans *trans, |
930 | subvol_inum inum, |
931 | struct bch_inode_unpacked *inode) |
932 | { |
933 | struct btree_iter iter; |
934 | int ret; |
935 | |
936 | ret = bch2_inode_peek_nowarn(trans, iter: &iter, inode, inum, flags: 0); |
937 | if (!ret) |
938 | bch2_trans_iter_exit(trans, &iter); |
939 | return ret; |
940 | } |
941 | |
942 | int bch2_inode_find_by_inum_trans(struct btree_trans *trans, |
943 | subvol_inum inum, |
944 | struct bch_inode_unpacked *inode) |
945 | { |
946 | struct btree_iter iter; |
947 | int ret; |
948 | |
949 | ret = bch2_inode_peek(trans, iter: &iter, inode, inum, flags: 0); |
950 | if (!ret) |
951 | bch2_trans_iter_exit(trans, &iter); |
952 | return ret; |
953 | } |
954 | |
955 | int bch2_inode_find_by_inum(struct bch_fs *c, subvol_inum inum, |
956 | struct bch_inode_unpacked *inode) |
957 | { |
958 | return bch2_trans_do(c, NULL, NULL, 0, |
959 | bch2_inode_find_by_inum_trans(trans, inum, inode)); |
960 | } |
961 | |
962 | int bch2_inode_nlink_inc(struct bch_inode_unpacked *bi) |
963 | { |
964 | if (bi->bi_flags & BCH_INODE_unlinked) |
965 | bi->bi_flags &= ~BCH_INODE_unlinked; |
966 | else { |
967 | if (bi->bi_nlink == U32_MAX) |
968 | return -EINVAL; |
969 | |
970 | bi->bi_nlink++; |
971 | } |
972 | |
973 | return 0; |
974 | } |
975 | |
976 | void bch2_inode_nlink_dec(struct btree_trans *trans, struct bch_inode_unpacked *bi) |
977 | { |
978 | if (bi->bi_nlink && (bi->bi_flags & BCH_INODE_unlinked)) { |
979 | bch2_trans_inconsistent(trans, "inode %llu unlinked but link count nonzero" , |
980 | bi->bi_inum); |
981 | return; |
982 | } |
983 | |
984 | if (bi->bi_flags & BCH_INODE_unlinked) { |
985 | bch2_trans_inconsistent(trans, "inode %llu link count underflow" , bi->bi_inum); |
986 | return; |
987 | } |
988 | |
989 | if (bi->bi_nlink) |
990 | bi->bi_nlink--; |
991 | else |
992 | bi->bi_flags |= BCH_INODE_unlinked; |
993 | } |
994 | |
995 | struct bch_opts bch2_inode_opts_to_opts(struct bch_inode_unpacked *inode) |
996 | { |
997 | struct bch_opts ret = { 0 }; |
998 | #define x(_name, _bits) \ |
999 | if (inode->bi_##_name) \ |
1000 | opt_set(ret, _name, inode->bi_##_name - 1); |
1001 | BCH_INODE_OPTS() |
1002 | #undef x |
1003 | return ret; |
1004 | } |
1005 | |
1006 | void bch2_inode_opts_get(struct bch_io_opts *opts, struct bch_fs *c, |
1007 | struct bch_inode_unpacked *inode) |
1008 | { |
1009 | #define x(_name, _bits) opts->_name = inode_opt_get(c, inode, _name); |
1010 | BCH_INODE_OPTS() |
1011 | #undef x |
1012 | |
1013 | if (opts->nocow) |
1014 | opts->compression = opts->background_compression = opts->data_checksum = opts->erasure_code = 0; |
1015 | } |
1016 | |
1017 | int bch2_inum_opts_get(struct btree_trans *trans, subvol_inum inum, struct bch_io_opts *opts) |
1018 | { |
1019 | struct bch_inode_unpacked inode; |
1020 | int ret = lockrestart_do(trans, bch2_inode_find_by_inum_trans(trans, inum, &inode)); |
1021 | |
1022 | if (ret) |
1023 | return ret; |
1024 | |
1025 | bch2_inode_opts_get(opts, c: trans->c, inode: &inode); |
1026 | return 0; |
1027 | } |
1028 | |
1029 | int bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot) |
1030 | { |
1031 | struct bch_fs *c = trans->c; |
1032 | struct btree_iter iter = { NULL }; |
1033 | struct bkey_i_inode_generation delete; |
1034 | struct bch_inode_unpacked inode_u; |
1035 | struct bkey_s_c k; |
1036 | int ret; |
1037 | |
1038 | do { |
1039 | ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents, |
1040 | SPOS(inode: inum, offset: 0, snapshot), |
1041 | SPOS(inode: inum, U64_MAX, snapshot), |
1042 | 0, NULL) ?: |
1043 | bch2_btree_delete_range_trans(trans, BTREE_ID_dirents, |
1044 | SPOS(inode: inum, offset: 0, snapshot), |
1045 | SPOS(inode: inum, U64_MAX, snapshot), |
1046 | 0, NULL) ?: |
1047 | bch2_btree_delete_range_trans(trans, BTREE_ID_xattrs, |
1048 | SPOS(inode: inum, offset: 0, snapshot), |
1049 | SPOS(inode: inum, U64_MAX, snapshot), |
1050 | 0, NULL); |
1051 | } while (ret == -BCH_ERR_transaction_restart_nested); |
1052 | if (ret) |
1053 | goto err; |
1054 | retry: |
1055 | bch2_trans_begin(trans); |
1056 | |
1057 | k = bch2_bkey_get_iter(trans, iter: &iter, btree_id: BTREE_ID_inodes, |
1058 | pos: SPOS(inode: 0, offset: inum, snapshot), flags: BTREE_ITER_INTENT); |
1059 | ret = bkey_err(k); |
1060 | if (ret) |
1061 | goto err; |
1062 | |
1063 | if (!bkey_is_inode(k: k.k)) { |
1064 | bch2_fs_inconsistent(c, |
1065 | "inode %llu:%u not found when deleting" , |
1066 | inum, snapshot); |
1067 | ret = -EIO; |
1068 | goto err; |
1069 | } |
1070 | |
1071 | bch2_inode_unpack(k, unpacked: &inode_u); |
1072 | |
1073 | /* Subvolume root? */ |
1074 | if (inode_u.bi_subvol) |
1075 | bch_warn(c, "deleting inode %llu marked as unlinked, but also a subvolume root!?" , inode_u.bi_inum); |
1076 | |
1077 | bkey_inode_generation_init(k: &delete.k_i); |
1078 | delete.k.p = iter.pos; |
1079 | delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1); |
1080 | |
1081 | ret = bch2_trans_update(trans, &iter, &delete.k_i, 0) ?: |
1082 | bch2_trans_commit(trans, NULL, NULL, |
1083 | flags: BCH_TRANS_COMMIT_no_enospc); |
1084 | err: |
1085 | bch2_trans_iter_exit(trans, &iter); |
1086 | if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) |
1087 | goto retry; |
1088 | |
1089 | return ret ?: -BCH_ERR_transaction_restart_nested; |
1090 | } |
1091 | |
1092 | static int may_delete_deleted_inode(struct btree_trans *trans, |
1093 | struct btree_iter *iter, |
1094 | struct bpos pos, |
1095 | bool *need_another_pass) |
1096 | { |
1097 | struct bch_fs *c = trans->c; |
1098 | struct btree_iter inode_iter; |
1099 | struct bkey_s_c k; |
1100 | struct bch_inode_unpacked inode; |
1101 | int ret; |
1102 | |
1103 | k = bch2_bkey_get_iter(trans, iter: &inode_iter, btree_id: BTREE_ID_inodes, pos, flags: BTREE_ITER_CACHED); |
1104 | ret = bkey_err(k); |
1105 | if (ret) |
1106 | return ret; |
1107 | |
1108 | ret = bkey_is_inode(k: k.k) ? 0 : -BCH_ERR_ENOENT_inode; |
1109 | if (fsck_err_on(!bkey_is_inode(k.k), c, |
1110 | deleted_inode_missing, |
1111 | "nonexistent inode %llu:%u in deleted_inodes btree" , |
1112 | pos.offset, pos.snapshot)) |
1113 | goto delete; |
1114 | |
1115 | ret = bch2_inode_unpack(k, unpacked: &inode); |
1116 | if (ret) |
1117 | goto out; |
1118 | |
1119 | if (S_ISDIR(inode.bi_mode)) { |
1120 | ret = bch2_empty_dir_snapshot(trans, pos.offset, 0, pos.snapshot); |
1121 | if (fsck_err_on(bch2_err_matches(ret, ENOTEMPTY), |
1122 | c, deleted_inode_is_dir, |
1123 | "non empty directory %llu:%u in deleted_inodes btree" , |
1124 | pos.offset, pos.snapshot)) |
1125 | goto delete; |
1126 | if (ret) |
1127 | goto out; |
1128 | } |
1129 | |
1130 | if (fsck_err_on(!(inode.bi_flags & BCH_INODE_unlinked), c, |
1131 | deleted_inode_not_unlinked, |
1132 | "non-deleted inode %llu:%u in deleted_inodes btree" , |
1133 | pos.offset, pos.snapshot)) |
1134 | goto delete; |
1135 | |
1136 | if (c->sb.clean && |
1137 | !fsck_err(c, |
1138 | deleted_inode_but_clean, |
1139 | "filesystem marked as clean but have deleted inode %llu:%u" , |
1140 | pos.offset, pos.snapshot)) { |
1141 | ret = 0; |
1142 | goto out; |
1143 | } |
1144 | |
1145 | if (bch2_snapshot_is_internal_node(c, id: pos.snapshot)) { |
1146 | struct bpos new_min_pos; |
1147 | |
1148 | ret = bch2_propagate_key_to_snapshot_leaves(trans, inode_iter.btree_id, k, &new_min_pos); |
1149 | if (ret) |
1150 | goto out; |
1151 | |
1152 | inode.bi_flags &= ~BCH_INODE_unlinked; |
1153 | |
1154 | ret = bch2_inode_write_flags(trans, iter: &inode_iter, inode: &inode, |
1155 | BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); |
1156 | bch_err_msg(c, ret, "clearing inode unlinked flag" ); |
1157 | if (ret) |
1158 | goto out; |
1159 | |
1160 | /* |
1161 | * We'll need another write buffer flush to pick up the new |
1162 | * unlinked inodes in the snapshot leaves: |
1163 | */ |
1164 | *need_another_pass = true; |
1165 | goto out; |
1166 | } |
1167 | |
1168 | ret = 1; |
1169 | out: |
1170 | fsck_err: |
1171 | bch2_trans_iter_exit(trans, &inode_iter); |
1172 | return ret; |
1173 | delete: |
1174 | ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_deleted_inodes, pos, false); |
1175 | goto out; |
1176 | } |
1177 | |
1178 | int bch2_delete_dead_inodes(struct bch_fs *c) |
1179 | { |
1180 | struct btree_trans *trans = bch2_trans_get(c); |
1181 | bool need_another_pass; |
1182 | int ret; |
1183 | again: |
1184 | /* |
1185 | * if we ran check_inodes() unlinked inodes will have already been |
1186 | * cleaned up but the write buffer will be out of sync; therefore we |
1187 | * alway need a write buffer flush |
1188 | */ |
1189 | ret = bch2_btree_write_buffer_flush_sync(trans); |
1190 | if (ret) |
1191 | goto err; |
1192 | |
1193 | need_another_pass = false; |
1194 | |
1195 | /* |
1196 | * Weird transaction restart handling here because on successful delete, |
1197 | * bch2_inode_rm_snapshot() will return a nested transaction restart, |
1198 | * but we can't retry because the btree write buffer won't have been |
1199 | * flushed and we'd spin: |
1200 | */ |
1201 | ret = for_each_btree_key_commit(trans, iter, BTREE_ID_deleted_inodes, POS_MIN, |
1202 | BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, |
1203 | NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ |
1204 | ret = may_delete_deleted_inode(trans, &iter, k.k->p, &need_another_pass); |
1205 | if (ret > 0) { |
1206 | bch_verbose(c, "deleting unlinked inode %llu:%u" , k.k->p.offset, k.k->p.snapshot); |
1207 | |
1208 | ret = bch2_inode_rm_snapshot(trans, k.k->p.offset, k.k->p.snapshot); |
1209 | /* |
1210 | * We don't want to loop here: a transaction restart |
1211 | * error here means we handled a transaction restart and |
1212 | * we're actually done, but if we loop we'll retry the |
1213 | * same key because the write buffer hasn't been flushed |
1214 | * yet |
1215 | */ |
1216 | if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { |
1217 | ret = 0; |
1218 | continue; |
1219 | } |
1220 | } |
1221 | |
1222 | ret; |
1223 | })); |
1224 | |
1225 | if (!ret && need_another_pass) |
1226 | goto again; |
1227 | err: |
1228 | bch2_trans_put(trans); |
1229 | return ret; |
1230 | } |
1231 | |