1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright (C) 2007 Oracle. All rights reserved. |
4 | */ |
5 | |
6 | #include <linux/err.h> |
7 | #include <linux/uuid.h> |
8 | #include "ctree.h" |
9 | #include "fs.h" |
10 | #include "messages.h" |
11 | #include "transaction.h" |
12 | #include "disk-io.h" |
13 | #include "print-tree.h" |
14 | #include "qgroup.h" |
15 | #include "space-info.h" |
16 | #include "accessors.h" |
17 | #include "root-tree.h" |
18 | #include "orphan.h" |
19 | |
20 | /* |
21 | * Read a root item from the tree. In case we detect a root item smaller then |
22 | * sizeof(root_item), we know it's an old version of the root structure and |
23 | * initialize all new fields to zero. The same happens if we detect mismatching |
24 | * generation numbers as then we know the root was once mounted with an older |
25 | * kernel that was not aware of the root item structure change. |
26 | */ |
27 | static void btrfs_read_root_item(struct extent_buffer *eb, int slot, |
28 | struct btrfs_root_item *item) |
29 | { |
30 | u32 len; |
31 | int need_reset = 0; |
32 | |
33 | len = btrfs_item_size(eb, slot); |
34 | read_extent_buffer(eb, dst: item, btrfs_item_ptr_offset(eb, slot), |
35 | min_t(u32, len, sizeof(*item))); |
36 | if (len < sizeof(*item)) |
37 | need_reset = 1; |
38 | if (!need_reset && btrfs_root_generation(s: item) |
39 | != btrfs_root_generation_v2(s: item)) { |
40 | if (btrfs_root_generation_v2(s: item) != 0) { |
41 | btrfs_warn(eb->fs_info, |
42 | "mismatching generation and generation_v2 found in root item. This root was probably mounted with an older kernel. Resetting all new fields." ); |
43 | } |
44 | need_reset = 1; |
45 | } |
46 | if (need_reset) { |
47 | /* Clear all members from generation_v2 onwards. */ |
48 | memset_startat(item, 0, generation_v2); |
49 | generate_random_guid(guid: item->uuid); |
50 | } |
51 | } |
52 | |
53 | /* |
54 | * Lookup the root by the key. |
55 | * |
56 | * root: the root of the root tree |
57 | * search_key: the key to search |
58 | * path: the path we search |
59 | * root_item: the root item of the tree we look for |
60 | * root_key: the root key of the tree we look for |
61 | * |
62 | * If ->offset of 'search_key' is -1ULL, it means we are not sure the offset |
63 | * of the search key, just lookup the root with the highest offset for a |
64 | * given objectid. |
65 | * |
66 | * If we find something return 0, otherwise > 0, < 0 on error. |
67 | */ |
68 | int btrfs_find_root(struct btrfs_root *root, const struct btrfs_key *search_key, |
69 | struct btrfs_path *path, struct btrfs_root_item *root_item, |
70 | struct btrfs_key *root_key) |
71 | { |
72 | struct btrfs_key found_key; |
73 | struct extent_buffer *l; |
74 | int ret; |
75 | int slot; |
76 | |
77 | ret = btrfs_search_slot(NULL, root, key: search_key, p: path, ins_len: 0, cow: 0); |
78 | if (ret < 0) |
79 | return ret; |
80 | |
81 | if (search_key->offset != -1ULL) { /* the search key is exact */ |
82 | if (ret > 0) |
83 | goto out; |
84 | } else { |
85 | BUG_ON(ret == 0); /* Logical error */ |
86 | if (path->slots[0] == 0) |
87 | goto out; |
88 | path->slots[0]--; |
89 | ret = 0; |
90 | } |
91 | |
92 | l = path->nodes[0]; |
93 | slot = path->slots[0]; |
94 | |
95 | btrfs_item_key_to_cpu(eb: l, cpu_key: &found_key, nr: slot); |
96 | if (found_key.objectid != search_key->objectid || |
97 | found_key.type != BTRFS_ROOT_ITEM_KEY) { |
98 | ret = 1; |
99 | goto out; |
100 | } |
101 | |
102 | if (root_item) |
103 | btrfs_read_root_item(eb: l, slot, item: root_item); |
104 | if (root_key) |
105 | memcpy(root_key, &found_key, sizeof(found_key)); |
106 | out: |
107 | btrfs_release_path(p: path); |
108 | return ret; |
109 | } |
110 | |
111 | void btrfs_set_root_node(struct btrfs_root_item *item, |
112 | struct extent_buffer *node) |
113 | { |
114 | btrfs_set_root_bytenr(s: item, val: node->start); |
115 | btrfs_set_root_level(s: item, val: btrfs_header_level(eb: node)); |
116 | btrfs_set_root_generation(s: item, val: btrfs_header_generation(eb: node)); |
117 | } |
118 | |
119 | /* |
120 | * copy the data in 'item' into the btree |
121 | */ |
122 | int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root |
123 | *root, struct btrfs_key *key, struct btrfs_root_item |
124 | *item) |
125 | { |
126 | struct btrfs_fs_info *fs_info = root->fs_info; |
127 | struct btrfs_path *path; |
128 | struct extent_buffer *l; |
129 | int ret; |
130 | int slot; |
131 | unsigned long ptr; |
132 | u32 old_len; |
133 | |
134 | path = btrfs_alloc_path(); |
135 | if (!path) |
136 | return -ENOMEM; |
137 | |
138 | ret = btrfs_search_slot(trans, root, key, p: path, ins_len: 0, cow: 1); |
139 | if (ret < 0) |
140 | goto out; |
141 | |
142 | if (ret > 0) { |
143 | btrfs_crit(fs_info, |
144 | "unable to find root key (%llu %u %llu) in tree %llu" , |
145 | key->objectid, key->type, key->offset, |
146 | root->root_key.objectid); |
147 | ret = -EUCLEAN; |
148 | btrfs_abort_transaction(trans, ret); |
149 | goto out; |
150 | } |
151 | |
152 | l = path->nodes[0]; |
153 | slot = path->slots[0]; |
154 | ptr = btrfs_item_ptr_offset(l, slot); |
155 | old_len = btrfs_item_size(eb: l, slot); |
156 | |
157 | /* |
158 | * If this is the first time we update the root item which originated |
159 | * from an older kernel, we need to enlarge the item size to make room |
160 | * for the added fields. |
161 | */ |
162 | if (old_len < sizeof(*item)) { |
163 | btrfs_release_path(p: path); |
164 | ret = btrfs_search_slot(trans, root, key, p: path, |
165 | ins_len: -1, cow: 1); |
166 | if (ret < 0) { |
167 | btrfs_abort_transaction(trans, ret); |
168 | goto out; |
169 | } |
170 | |
171 | ret = btrfs_del_item(trans, root, path); |
172 | if (ret < 0) { |
173 | btrfs_abort_transaction(trans, ret); |
174 | goto out; |
175 | } |
176 | btrfs_release_path(p: path); |
177 | ret = btrfs_insert_empty_item(trans, root, path, |
178 | key, data_size: sizeof(*item)); |
179 | if (ret < 0) { |
180 | btrfs_abort_transaction(trans, ret); |
181 | goto out; |
182 | } |
183 | l = path->nodes[0]; |
184 | slot = path->slots[0]; |
185 | ptr = btrfs_item_ptr_offset(l, slot); |
186 | } |
187 | |
188 | /* |
189 | * Update generation_v2 so at the next mount we know the new root |
190 | * fields are valid. |
191 | */ |
192 | btrfs_set_root_generation_v2(s: item, val: btrfs_root_generation(s: item)); |
193 | |
194 | write_extent_buffer(eb: l, src: item, start: ptr, len: sizeof(*item)); |
195 | btrfs_mark_buffer_dirty(trans, buf: path->nodes[0]); |
196 | out: |
197 | btrfs_free_path(p: path); |
198 | return ret; |
199 | } |
200 | |
201 | int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
202 | const struct btrfs_key *key, struct btrfs_root_item *item) |
203 | { |
204 | /* |
205 | * Make sure generation v1 and v2 match. See update_root for details. |
206 | */ |
207 | btrfs_set_root_generation_v2(s: item, val: btrfs_root_generation(s: item)); |
208 | return btrfs_insert_item(trans, root, key, data: item, data_size: sizeof(*item)); |
209 | } |
210 | |
211 | int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info) |
212 | { |
213 | struct btrfs_root *tree_root = fs_info->tree_root; |
214 | struct extent_buffer *leaf; |
215 | struct btrfs_path *path; |
216 | struct btrfs_key key; |
217 | struct btrfs_root *root; |
218 | int err = 0; |
219 | int ret; |
220 | |
221 | path = btrfs_alloc_path(); |
222 | if (!path) |
223 | return -ENOMEM; |
224 | |
225 | key.objectid = BTRFS_ORPHAN_OBJECTID; |
226 | key.type = BTRFS_ORPHAN_ITEM_KEY; |
227 | key.offset = 0; |
228 | |
229 | while (1) { |
230 | u64 root_objectid; |
231 | |
232 | ret = btrfs_search_slot(NULL, root: tree_root, key: &key, p: path, ins_len: 0, cow: 0); |
233 | if (ret < 0) { |
234 | err = ret; |
235 | break; |
236 | } |
237 | |
238 | leaf = path->nodes[0]; |
239 | if (path->slots[0] >= btrfs_header_nritems(eb: leaf)) { |
240 | ret = btrfs_next_leaf(root: tree_root, path); |
241 | if (ret < 0) |
242 | err = ret; |
243 | if (ret != 0) |
244 | break; |
245 | leaf = path->nodes[0]; |
246 | } |
247 | |
248 | btrfs_item_key_to_cpu(eb: leaf, cpu_key: &key, nr: path->slots[0]); |
249 | btrfs_release_path(p: path); |
250 | |
251 | if (key.objectid != BTRFS_ORPHAN_OBJECTID || |
252 | key.type != BTRFS_ORPHAN_ITEM_KEY) |
253 | break; |
254 | |
255 | root_objectid = key.offset; |
256 | key.offset++; |
257 | |
258 | root = btrfs_get_fs_root(fs_info, objectid: root_objectid, check_ref: false); |
259 | err = PTR_ERR_OR_ZERO(ptr: root); |
260 | if (err && err != -ENOENT) { |
261 | break; |
262 | } else if (err == -ENOENT) { |
263 | struct btrfs_trans_handle *trans; |
264 | |
265 | btrfs_release_path(p: path); |
266 | |
267 | trans = btrfs_join_transaction(root: tree_root); |
268 | if (IS_ERR(ptr: trans)) { |
269 | err = PTR_ERR(ptr: trans); |
270 | btrfs_handle_fs_error(fs_info, err, |
271 | "Failed to start trans to delete orphan item" ); |
272 | break; |
273 | } |
274 | err = btrfs_del_orphan_item(trans, root: tree_root, |
275 | offset: root_objectid); |
276 | btrfs_end_transaction(trans); |
277 | if (err) { |
278 | btrfs_handle_fs_error(fs_info, err, |
279 | "Failed to delete root orphan item" ); |
280 | break; |
281 | } |
282 | continue; |
283 | } |
284 | |
285 | WARN_ON(!test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state)); |
286 | if (btrfs_root_refs(s: &root->root_item) == 0) { |
287 | struct btrfs_key drop_key; |
288 | |
289 | btrfs_disk_key_to_cpu(cpu_key: &drop_key, disk_key: &root->root_item.drop_progress); |
290 | /* |
291 | * If we have a non-zero drop_progress then we know we |
292 | * made it partly through deleting this snapshot, and |
293 | * thus we need to make sure we block any balance from |
294 | * happening until this snapshot is completely dropped. |
295 | */ |
296 | if (drop_key.objectid != 0 || drop_key.type != 0 || |
297 | drop_key.offset != 0) { |
298 | set_bit(nr: BTRFS_FS_UNFINISHED_DROPS, addr: &fs_info->flags); |
299 | set_bit(nr: BTRFS_ROOT_UNFINISHED_DROP, addr: &root->state); |
300 | } |
301 | |
302 | set_bit(nr: BTRFS_ROOT_DEAD_TREE, addr: &root->state); |
303 | btrfs_add_dead_root(root); |
304 | } |
305 | btrfs_put_root(root); |
306 | } |
307 | |
308 | btrfs_free_path(p: path); |
309 | return err; |
310 | } |
311 | |
312 | /* drop the root item for 'key' from the tree root */ |
313 | int btrfs_del_root(struct btrfs_trans_handle *trans, |
314 | const struct btrfs_key *key) |
315 | { |
316 | struct btrfs_root *root = trans->fs_info->tree_root; |
317 | struct btrfs_path *path; |
318 | int ret; |
319 | |
320 | path = btrfs_alloc_path(); |
321 | if (!path) |
322 | return -ENOMEM; |
323 | ret = btrfs_search_slot(trans, root, key, p: path, ins_len: -1, cow: 1); |
324 | if (ret < 0) |
325 | goto out; |
326 | |
327 | BUG_ON(ret != 0); |
328 | |
329 | ret = btrfs_del_item(trans, root, path); |
330 | out: |
331 | btrfs_free_path(p: path); |
332 | return ret; |
333 | } |
334 | |
335 | int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id, |
336 | u64 ref_id, u64 dirid, u64 *sequence, |
337 | const struct fscrypt_str *name) |
338 | { |
339 | struct btrfs_root *tree_root = trans->fs_info->tree_root; |
340 | struct btrfs_path *path; |
341 | struct btrfs_root_ref *ref; |
342 | struct extent_buffer *leaf; |
343 | struct btrfs_key key; |
344 | unsigned long ptr; |
345 | int ret; |
346 | |
347 | path = btrfs_alloc_path(); |
348 | if (!path) |
349 | return -ENOMEM; |
350 | |
351 | key.objectid = root_id; |
352 | key.type = BTRFS_ROOT_BACKREF_KEY; |
353 | key.offset = ref_id; |
354 | again: |
355 | ret = btrfs_search_slot(trans, root: tree_root, key: &key, p: path, ins_len: -1, cow: 1); |
356 | if (ret < 0) { |
357 | goto out; |
358 | } else if (ret == 0) { |
359 | leaf = path->nodes[0]; |
360 | ref = btrfs_item_ptr(leaf, path->slots[0], |
361 | struct btrfs_root_ref); |
362 | ptr = (unsigned long)(ref + 1); |
363 | if ((btrfs_root_ref_dirid(eb: leaf, s: ref) != dirid) || |
364 | (btrfs_root_ref_name_len(eb: leaf, s: ref) != name->len) || |
365 | memcmp_extent_buffer(eb: leaf, ptrv: name->name, start: ptr, len: name->len)) { |
366 | ret = -ENOENT; |
367 | goto out; |
368 | } |
369 | *sequence = btrfs_root_ref_sequence(eb: leaf, s: ref); |
370 | |
371 | ret = btrfs_del_item(trans, root: tree_root, path); |
372 | if (ret) |
373 | goto out; |
374 | } else { |
375 | ret = -ENOENT; |
376 | goto out; |
377 | } |
378 | |
379 | if (key.type == BTRFS_ROOT_BACKREF_KEY) { |
380 | btrfs_release_path(p: path); |
381 | key.objectid = ref_id; |
382 | key.type = BTRFS_ROOT_REF_KEY; |
383 | key.offset = root_id; |
384 | goto again; |
385 | } |
386 | |
387 | out: |
388 | btrfs_free_path(p: path); |
389 | return ret; |
390 | } |
391 | |
392 | /* |
393 | * add a btrfs_root_ref item. type is either BTRFS_ROOT_REF_KEY |
394 | * or BTRFS_ROOT_BACKREF_KEY. |
395 | * |
396 | * The dirid, sequence, name and name_len refer to the directory entry |
397 | * that is referencing the root. |
398 | * |
399 | * For a forward ref, the root_id is the id of the tree referencing |
400 | * the root and ref_id is the id of the subvol or snapshot. |
401 | * |
402 | * For a back ref the root_id is the id of the subvol or snapshot and |
403 | * ref_id is the id of the tree referencing it. |
404 | * |
405 | * Will return 0, -ENOMEM, or anything from the CoW path |
406 | */ |
407 | int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id, |
408 | u64 ref_id, u64 dirid, u64 sequence, |
409 | const struct fscrypt_str *name) |
410 | { |
411 | struct btrfs_root *tree_root = trans->fs_info->tree_root; |
412 | struct btrfs_key key; |
413 | int ret; |
414 | struct btrfs_path *path; |
415 | struct btrfs_root_ref *ref; |
416 | struct extent_buffer *leaf; |
417 | unsigned long ptr; |
418 | |
419 | path = btrfs_alloc_path(); |
420 | if (!path) |
421 | return -ENOMEM; |
422 | |
423 | key.objectid = root_id; |
424 | key.type = BTRFS_ROOT_BACKREF_KEY; |
425 | key.offset = ref_id; |
426 | again: |
427 | ret = btrfs_insert_empty_item(trans, root: tree_root, path, key: &key, |
428 | data_size: sizeof(*ref) + name->len); |
429 | if (ret) { |
430 | btrfs_abort_transaction(trans, ret); |
431 | btrfs_free_path(p: path); |
432 | return ret; |
433 | } |
434 | |
435 | leaf = path->nodes[0]; |
436 | ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); |
437 | btrfs_set_root_ref_dirid(eb: leaf, s: ref, val: dirid); |
438 | btrfs_set_root_ref_sequence(eb: leaf, s: ref, val: sequence); |
439 | btrfs_set_root_ref_name_len(eb: leaf, s: ref, val: name->len); |
440 | ptr = (unsigned long)(ref + 1); |
441 | write_extent_buffer(eb: leaf, src: name->name, start: ptr, len: name->len); |
442 | btrfs_mark_buffer_dirty(trans, buf: leaf); |
443 | |
444 | if (key.type == BTRFS_ROOT_BACKREF_KEY) { |
445 | btrfs_release_path(p: path); |
446 | key.objectid = ref_id; |
447 | key.type = BTRFS_ROOT_REF_KEY; |
448 | key.offset = root_id; |
449 | goto again; |
450 | } |
451 | |
452 | btrfs_free_path(p: path); |
453 | return 0; |
454 | } |
455 | |
456 | /* |
457 | * Old btrfs forgets to init root_item->flags and root_item->byte_limit |
458 | * for subvolumes. To work around this problem, we steal a bit from |
459 | * root_item->inode_item->flags, and use it to indicate if those fields |
460 | * have been properly initialized. |
461 | */ |
462 | void btrfs_check_and_init_root_item(struct btrfs_root_item *root_item) |
463 | { |
464 | u64 inode_flags = btrfs_stack_inode_flags(s: &root_item->inode); |
465 | |
466 | if (!(inode_flags & BTRFS_INODE_ROOT_ITEM_INIT)) { |
467 | inode_flags |= BTRFS_INODE_ROOT_ITEM_INIT; |
468 | btrfs_set_stack_inode_flags(s: &root_item->inode, val: inode_flags); |
469 | btrfs_set_root_flags(s: root_item, val: 0); |
470 | btrfs_set_root_limit(s: root_item, val: 0); |
471 | } |
472 | } |
473 | |
474 | void btrfs_update_root_times(struct btrfs_trans_handle *trans, |
475 | struct btrfs_root *root) |
476 | { |
477 | struct btrfs_root_item *item = &root->root_item; |
478 | struct timespec64 ct; |
479 | |
480 | ktime_get_real_ts64(tv: &ct); |
481 | spin_lock(lock: &root->root_item_lock); |
482 | btrfs_set_root_ctransid(s: item, val: trans->transid); |
483 | btrfs_set_stack_timespec_sec(s: &item->ctime, val: ct.tv_sec); |
484 | btrfs_set_stack_timespec_nsec(s: &item->ctime, val: ct.tv_nsec); |
485 | spin_unlock(lock: &root->root_item_lock); |
486 | } |
487 | |
488 | /* |
489 | * Reserve space for subvolume operation. |
490 | * |
491 | * root: the root of the parent directory |
492 | * rsv: block reservation |
493 | * items: the number of items that we need do reservation |
494 | * use_global_rsv: allow fallback to the global block reservation |
495 | * |
496 | * This function is used to reserve the space for snapshot/subvolume |
497 | * creation and deletion. Those operations are different with the |
498 | * common file/directory operations, they change two fs/file trees |
499 | * and root tree, the number of items that the qgroup reserves is |
500 | * different with the free space reservation. So we can not use |
501 | * the space reservation mechanism in start_transaction(). |
502 | */ |
503 | int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, |
504 | struct btrfs_block_rsv *rsv, int items, |
505 | bool use_global_rsv) |
506 | { |
507 | u64 qgroup_num_bytes = 0; |
508 | u64 num_bytes; |
509 | int ret; |
510 | struct btrfs_fs_info *fs_info = root->fs_info; |
511 | struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; |
512 | |
513 | if (btrfs_qgroup_enabled(fs_info)) { |
514 | /* One for parent inode, two for dir entries */ |
515 | qgroup_num_bytes = 3 * fs_info->nodesize; |
516 | ret = btrfs_qgroup_reserve_meta_prealloc(root, |
517 | num_bytes: qgroup_num_bytes, enforce: true, |
518 | noflush: false); |
519 | if (ret) |
520 | return ret; |
521 | } |
522 | |
523 | num_bytes = btrfs_calc_insert_metadata_size(fs_info, num_items: items); |
524 | rsv->space_info = btrfs_find_space_info(info: fs_info, |
525 | BTRFS_BLOCK_GROUP_METADATA); |
526 | ret = btrfs_block_rsv_add(fs_info, block_rsv: rsv, num_bytes, |
527 | flush: BTRFS_RESERVE_FLUSH_ALL); |
528 | |
529 | if (ret == -ENOSPC && use_global_rsv) |
530 | ret = btrfs_block_rsv_migrate(src_rsv: global_rsv, dst_rsv: rsv, num_bytes, update_size: true); |
531 | |
532 | if (ret && qgroup_num_bytes) |
533 | btrfs_qgroup_free_meta_prealloc(root, num_bytes: qgroup_num_bytes); |
534 | |
535 | if (!ret) { |
536 | spin_lock(lock: &rsv->lock); |
537 | rsv->qgroup_rsv_reserved += qgroup_num_bytes; |
538 | spin_unlock(lock: &rsv->lock); |
539 | } |
540 | return ret; |
541 | } |
542 | |
543 | void btrfs_subvolume_release_metadata(struct btrfs_root *root, |
544 | struct btrfs_block_rsv *rsv) |
545 | { |
546 | struct btrfs_fs_info *fs_info = root->fs_info; |
547 | u64 qgroup_to_release; |
548 | |
549 | btrfs_block_rsv_release(fs_info, block_rsv: rsv, num_bytes: (u64)-1, qgroup_to_release: &qgroup_to_release); |
550 | btrfs_qgroup_convert_reserved_meta(root, num_bytes: qgroup_to_release); |
551 | } |
552 | |