1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* |
3 | * Copyright (C) 2007 Oracle. All rights reserved. |
4 | */ |
5 | |
6 | #ifndef BTRFS_CTREE_H |
7 | #define BTRFS_CTREE_H |
8 | |
9 | #include <linux/pagemap.h> |
10 | #include "locking.h" |
11 | #include "fs.h" |
12 | #include "accessors.h" |
13 | |
14 | struct btrfs_trans_handle; |
15 | struct btrfs_transaction; |
16 | struct btrfs_pending_snapshot; |
17 | struct btrfs_delayed_ref_root; |
18 | struct btrfs_space_info; |
19 | struct btrfs_block_group; |
20 | struct btrfs_ordered_sum; |
21 | struct btrfs_ref; |
22 | struct btrfs_bio; |
23 | struct btrfs_ioctl_encoded_io_args; |
24 | struct btrfs_device; |
25 | struct btrfs_fs_devices; |
26 | struct btrfs_balance_control; |
27 | struct btrfs_delayed_root; |
28 | struct reloc_control; |
29 | |
30 | /* Read ahead values for struct btrfs_path.reada */ |
31 | enum { |
32 | READA_NONE, |
33 | READA_BACK, |
34 | READA_FORWARD, |
35 | /* |
36 | * Similar to READA_FORWARD but unlike it: |
37 | * |
38 | * 1) It will trigger readahead even for leaves that are not close to |
39 | * each other on disk; |
40 | * 2) It also triggers readahead for nodes; |
41 | * 3) During a search, even when a node or leaf is already in memory, it |
42 | * will still trigger readahead for other nodes and leaves that follow |
43 | * it. |
44 | * |
45 | * This is meant to be used only when we know we are iterating over the |
46 | * entire tree or a very large part of it. |
47 | */ |
48 | READA_FORWARD_ALWAYS, |
49 | }; |
50 | |
51 | /* |
52 | * btrfs_paths remember the path taken from the root down to the leaf. |
53 | * level 0 is always the leaf, and nodes[1...BTRFS_MAX_LEVEL] will point |
54 | * to any other levels that are present. |
55 | * |
56 | * The slots array records the index of the item or block pointer |
57 | * used while walking the tree. |
58 | */ |
59 | struct btrfs_path { |
60 | struct extent_buffer *nodes[BTRFS_MAX_LEVEL]; |
61 | int slots[BTRFS_MAX_LEVEL]; |
62 | /* if there is real range locking, this locks field will change */ |
63 | u8 locks[BTRFS_MAX_LEVEL]; |
64 | u8 reada; |
65 | /* keep some upper locks as we walk down */ |
66 | u8 lowest_level; |
67 | |
68 | /* |
69 | * set by btrfs_split_item, tells search_slot to keep all locks |
70 | * and to force calls to keep space in the nodes |
71 | */ |
72 | unsigned int search_for_split:1; |
73 | unsigned int keep_locks:1; |
74 | unsigned int skip_locking:1; |
75 | unsigned int search_commit_root:1; |
76 | unsigned int need_commit_sem:1; |
77 | unsigned int skip_release_on_error:1; |
78 | /* |
79 | * Indicate that new item (btrfs_search_slot) is extending already |
80 | * existing item and ins_len contains only the data size and not item |
81 | * header (ie. sizeof(struct btrfs_item) is not included). |
82 | */ |
83 | unsigned int search_for_extension:1; |
84 | /* Stop search if any locks need to be taken (for read) */ |
85 | unsigned int nowait:1; |
86 | }; |
87 | |
88 | /* |
89 | * The state of btrfs root |
90 | */ |
91 | enum { |
92 | /* |
93 | * btrfs_record_root_in_trans is a multi-step process, and it can race |
94 | * with the balancing code. But the race is very small, and only the |
95 | * first time the root is added to each transaction. So IN_TRANS_SETUP |
96 | * is used to tell us when more checks are required |
97 | */ |
98 | BTRFS_ROOT_IN_TRANS_SETUP, |
99 | |
100 | /* |
101 | * Set if tree blocks of this root can be shared by other roots. |
102 | * Only subvolume trees and their reloc trees have this bit set. |
103 | * Conflicts with TRACK_DIRTY bit. |
104 | * |
105 | * This affects two things: |
106 | * |
107 | * - How balance works |
108 | * For shareable roots, we need to use reloc tree and do path |
109 | * replacement for balance, and need various pre/post hooks for |
110 | * snapshot creation to handle them. |
111 | * |
112 | * While for non-shareable trees, we just simply do a tree search |
113 | * with COW. |
114 | * |
115 | * - How dirty roots are tracked |
116 | * For shareable roots, btrfs_record_root_in_trans() is needed to |
117 | * track them, while non-subvolume roots have TRACK_DIRTY bit, they |
118 | * don't need to set this manually. |
119 | */ |
120 | BTRFS_ROOT_SHAREABLE, |
121 | BTRFS_ROOT_TRACK_DIRTY, |
122 | BTRFS_ROOT_IN_RADIX, |
123 | BTRFS_ROOT_ORPHAN_ITEM_INSERTED, |
124 | BTRFS_ROOT_DEFRAG_RUNNING, |
125 | BTRFS_ROOT_FORCE_COW, |
126 | BTRFS_ROOT_MULTI_LOG_TASKS, |
127 | BTRFS_ROOT_DIRTY, |
128 | BTRFS_ROOT_DELETING, |
129 | |
130 | /* |
131 | * Reloc tree is orphan, only kept here for qgroup delayed subtree scan |
132 | * |
133 | * Set for the subvolume tree owning the reloc tree. |
134 | */ |
135 | BTRFS_ROOT_DEAD_RELOC_TREE, |
136 | /* Mark dead root stored on device whose cleanup needs to be resumed */ |
137 | BTRFS_ROOT_DEAD_TREE, |
138 | /* The root has a log tree. Used for subvolume roots and the tree root. */ |
139 | BTRFS_ROOT_HAS_LOG_TREE, |
140 | /* Qgroup flushing is in progress */ |
141 | BTRFS_ROOT_QGROUP_FLUSHING, |
142 | /* We started the orphan cleanup for this root. */ |
143 | BTRFS_ROOT_ORPHAN_CLEANUP, |
144 | /* This root has a drop operation that was started previously. */ |
145 | BTRFS_ROOT_UNFINISHED_DROP, |
146 | /* This reloc root needs to have its buffers lockdep class reset. */ |
147 | BTRFS_ROOT_RESET_LOCKDEP_CLASS, |
148 | }; |
149 | |
150 | /* |
151 | * Record swapped tree blocks of a subvolume tree for delayed subtree trace |
152 | * code. For detail check comment in fs/btrfs/qgroup.c. |
153 | */ |
154 | struct btrfs_qgroup_swapped_blocks { |
155 | spinlock_t lock; |
156 | /* RM_EMPTY_ROOT() of above blocks[] */ |
157 | bool swapped; |
158 | struct rb_root blocks[BTRFS_MAX_LEVEL]; |
159 | }; |
160 | |
161 | /* |
162 | * in ram representation of the tree. extent_root is used for all allocations |
163 | * and for the extent tree extent_root root. |
164 | */ |
165 | struct btrfs_root { |
166 | struct rb_node rb_node; |
167 | |
168 | struct extent_buffer *node; |
169 | |
170 | struct extent_buffer *commit_root; |
171 | struct btrfs_root *log_root; |
172 | struct btrfs_root *reloc_root; |
173 | |
174 | unsigned long state; |
175 | struct btrfs_root_item root_item; |
176 | struct btrfs_key root_key; |
177 | struct btrfs_fs_info *fs_info; |
178 | struct extent_io_tree dirty_log_pages; |
179 | |
180 | struct mutex objectid_mutex; |
181 | |
182 | spinlock_t accounting_lock; |
183 | struct btrfs_block_rsv *block_rsv; |
184 | |
185 | struct mutex log_mutex; |
186 | wait_queue_head_t log_writer_wait; |
187 | wait_queue_head_t log_commit_wait[2]; |
188 | struct list_head log_ctxs[2]; |
189 | /* Used only for log trees of subvolumes, not for the log root tree */ |
190 | atomic_t log_writers; |
191 | atomic_t log_commit[2]; |
192 | /* Used only for log trees of subvolumes, not for the log root tree */ |
193 | atomic_t log_batch; |
194 | /* |
195 | * Protected by the 'log_mutex' lock but can be read without holding |
196 | * that lock to avoid unnecessary lock contention, in which case it |
197 | * should be read using btrfs_get_root_log_transid() except if it's a |
198 | * log tree in which case it can be directly accessed. Updates to this |
199 | * field should always use btrfs_set_root_log_transid(), except for log |
200 | * trees where the field can be updated directly. |
201 | */ |
202 | int log_transid; |
203 | /* No matter the commit succeeds or not*/ |
204 | int log_transid_committed; |
205 | /* |
206 | * Just be updated when the commit succeeds. Use |
207 | * btrfs_get_root_last_log_commit() and btrfs_set_root_last_log_commit() |
208 | * to access this field. |
209 | */ |
210 | int last_log_commit; |
211 | pid_t log_start_pid; |
212 | |
213 | u64 last_trans; |
214 | |
215 | u32 type; |
216 | |
217 | u64 free_objectid; |
218 | |
219 | struct btrfs_key defrag_progress; |
220 | struct btrfs_key defrag_max; |
221 | |
222 | /* The dirty list is only used by non-shareable roots */ |
223 | struct list_head dirty_list; |
224 | |
225 | struct list_head root_list; |
226 | |
227 | spinlock_t log_extents_lock[2]; |
228 | struct list_head logged_list[2]; |
229 | |
230 | spinlock_t inode_lock; |
231 | /* red-black tree that keeps track of in-memory inodes */ |
232 | struct rb_root inode_tree; |
233 | |
234 | /* |
235 | * radix tree that keeps track of delayed nodes of every inode, |
236 | * protected by inode_lock |
237 | */ |
238 | struct radix_tree_root delayed_nodes_tree; |
239 | /* |
240 | * right now this just gets used so that a root has its own devid |
241 | * for stat. It may be used for more later |
242 | */ |
243 | dev_t anon_dev; |
244 | |
245 | spinlock_t root_item_lock; |
246 | refcount_t refs; |
247 | |
248 | struct mutex delalloc_mutex; |
249 | spinlock_t delalloc_lock; |
250 | /* |
251 | * all of the inodes that have delalloc bytes. It is possible for |
252 | * this list to be empty even when there is still dirty data=ordered |
253 | * extents waiting to finish IO. |
254 | */ |
255 | struct list_head delalloc_inodes; |
256 | struct list_head delalloc_root; |
257 | u64 nr_delalloc_inodes; |
258 | |
259 | struct mutex ordered_extent_mutex; |
260 | /* |
261 | * this is used by the balancing code to wait for all the pending |
262 | * ordered extents |
263 | */ |
264 | spinlock_t ordered_extent_lock; |
265 | |
266 | /* |
267 | * all of the data=ordered extents pending writeback |
268 | * these can span multiple transactions and basically include |
269 | * every dirty data page that isn't from nodatacow |
270 | */ |
271 | struct list_head ordered_extents; |
272 | struct list_head ordered_root; |
273 | u64 nr_ordered_extents; |
274 | |
275 | /* |
276 | * Not empty if this subvolume root has gone through tree block swap |
277 | * (relocation) |
278 | * |
279 | * Will be used by reloc_control::dirty_subvol_roots. |
280 | */ |
281 | struct list_head reloc_dirty_list; |
282 | |
283 | /* |
284 | * Number of currently running SEND ioctls to prevent |
285 | * manipulation with the read-only status via SUBVOL_SETFLAGS |
286 | */ |
287 | int send_in_progress; |
288 | /* |
289 | * Number of currently running deduplication operations that have a |
290 | * destination inode belonging to this root. Protected by the lock |
291 | * root_item_lock. |
292 | */ |
293 | int dedupe_in_progress; |
294 | /* For exclusion of snapshot creation and nocow writes */ |
295 | struct btrfs_drew_lock snapshot_lock; |
296 | |
297 | atomic_t snapshot_force_cow; |
298 | |
299 | /* For qgroup metadata reserved space */ |
300 | spinlock_t qgroup_meta_rsv_lock; |
301 | u64 qgroup_meta_rsv_pertrans; |
302 | u64 qgroup_meta_rsv_prealloc; |
303 | wait_queue_head_t qgroup_flush_wait; |
304 | |
305 | /* Number of active swapfiles */ |
306 | atomic_t nr_swapfiles; |
307 | |
308 | /* Record pairs of swapped blocks for qgroup */ |
309 | struct btrfs_qgroup_swapped_blocks swapped_blocks; |
310 | |
311 | /* Used only by log trees, when logging csum items */ |
312 | struct extent_io_tree log_csum_range; |
313 | |
314 | /* Used in simple quotas, track root during relocation. */ |
315 | u64 relocation_src_root; |
316 | |
317 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS |
318 | u64 alloc_bytenr; |
319 | #endif |
320 | |
321 | #ifdef CONFIG_BTRFS_DEBUG |
322 | struct list_head leak_list; |
323 | #endif |
324 | }; |
325 | |
326 | static inline bool btrfs_root_readonly(const struct btrfs_root *root) |
327 | { |
328 | /* Byte-swap the constant at compile time, root_item::flags is LE */ |
329 | return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_RDONLY)) != 0; |
330 | } |
331 | |
332 | static inline bool btrfs_root_dead(const struct btrfs_root *root) |
333 | { |
334 | /* Byte-swap the constant at compile time, root_item::flags is LE */ |
335 | return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_DEAD)) != 0; |
336 | } |
337 | |
338 | static inline u64 btrfs_root_id(const struct btrfs_root *root) |
339 | { |
340 | return root->root_key.objectid; |
341 | } |
342 | |
343 | static inline int btrfs_get_root_log_transid(const struct btrfs_root *root) |
344 | { |
345 | return READ_ONCE(root->log_transid); |
346 | } |
347 | |
348 | static inline void btrfs_set_root_log_transid(struct btrfs_root *root, int log_transid) |
349 | { |
350 | WRITE_ONCE(root->log_transid, log_transid); |
351 | } |
352 | |
353 | static inline int btrfs_get_root_last_log_commit(const struct btrfs_root *root) |
354 | { |
355 | return READ_ONCE(root->last_log_commit); |
356 | } |
357 | |
358 | static inline void btrfs_set_root_last_log_commit(struct btrfs_root *root, int commit_id) |
359 | { |
360 | WRITE_ONCE(root->last_log_commit, commit_id); |
361 | } |
362 | |
363 | /* |
364 | * Structure that conveys information about an extent that is going to replace |
365 | * all the extents in a file range. |
366 | */ |
367 | struct btrfs_replace_extent_info { |
368 | u64 disk_offset; |
369 | u64 disk_len; |
370 | u64 data_offset; |
371 | u64 data_len; |
372 | u64 file_offset; |
373 | /* Pointer to a file extent item of type regular or prealloc. */ |
374 | char *extent_buf; |
375 | /* |
376 | * Set to true when attempting to replace a file range with a new extent |
377 | * described by this structure, set to false when attempting to clone an |
378 | * existing extent into a file range. |
379 | */ |
380 | bool is_new_extent; |
381 | /* Indicate if we should update the inode's mtime and ctime. */ |
382 | bool update_times; |
383 | /* Meaningful only if is_new_extent is true. */ |
384 | int qgroup_reserved; |
385 | /* |
386 | * Meaningful only if is_new_extent is true. |
387 | * Used to track how many extent items we have already inserted in a |
388 | * subvolume tree that refer to the extent described by this structure, |
389 | * so that we know when to create a new delayed ref or update an existing |
390 | * one. |
391 | */ |
392 | int insertions; |
393 | }; |
394 | |
395 | /* Arguments for btrfs_drop_extents() */ |
396 | struct btrfs_drop_extents_args { |
397 | /* Input parameters */ |
398 | |
399 | /* |
400 | * If NULL, btrfs_drop_extents() will allocate and free its own path. |
401 | * If 'replace_extent' is true, this must not be NULL. Also the path |
402 | * is always released except if 'replace_extent' is true and |
403 | * btrfs_drop_extents() sets 'extent_inserted' to true, in which case |
404 | * the path is kept locked. |
405 | */ |
406 | struct btrfs_path *path; |
407 | /* Start offset of the range to drop extents from */ |
408 | u64 start; |
409 | /* End (exclusive, last byte + 1) of the range to drop extents from */ |
410 | u64 end; |
411 | /* If true drop all the extent maps in the range */ |
412 | bool drop_cache; |
413 | /* |
414 | * If true it means we want to insert a new extent after dropping all |
415 | * the extents in the range. If this is true, the 'extent_item_size' |
416 | * parameter must be set as well and the 'extent_inserted' field will |
417 | * be set to true by btrfs_drop_extents() if it could insert the new |
418 | * extent. |
419 | * Note: when this is set to true the path must not be NULL. |
420 | */ |
421 | bool replace_extent; |
422 | /* |
423 | * Used if 'replace_extent' is true. Size of the file extent item to |
424 | * insert after dropping all existing extents in the range |
425 | */ |
426 | u32 extent_item_size; |
427 | |
428 | /* Output parameters */ |
429 | |
430 | /* |
431 | * Set to the minimum between the input parameter 'end' and the end |
432 | * (exclusive, last byte + 1) of the last dropped extent. This is always |
433 | * set even if btrfs_drop_extents() returns an error. |
434 | */ |
435 | u64 drop_end; |
436 | /* |
437 | * The number of allocated bytes found in the range. This can be smaller |
438 | * than the range's length when there are holes in the range. |
439 | */ |
440 | u64 bytes_found; |
441 | /* |
442 | * Only set if 'replace_extent' is true. Set to true if we were able |
443 | * to insert a replacement extent after dropping all extents in the |
444 | * range, otherwise set to false by btrfs_drop_extents(). |
445 | * Also, if btrfs_drop_extents() has set this to true it means it |
446 | * returned with the path locked, otherwise if it has set this to |
447 | * false it has returned with the path released. |
448 | */ |
449 | bool extent_inserted; |
450 | }; |
451 | |
452 | struct btrfs_file_private { |
453 | void *filldir_buf; |
454 | u64 last_index; |
455 | struct extent_state *llseek_cached_state; |
456 | }; |
457 | |
458 | static inline u32 BTRFS_LEAF_DATA_SIZE(const struct btrfs_fs_info *info) |
459 | { |
460 | return info->nodesize - sizeof(struct btrfs_header); |
461 | } |
462 | |
463 | static inline u32 BTRFS_MAX_ITEM_SIZE(const struct btrfs_fs_info *info) |
464 | { |
465 | return BTRFS_LEAF_DATA_SIZE(info) - sizeof(struct btrfs_item); |
466 | } |
467 | |
468 | static inline u32 BTRFS_NODEPTRS_PER_BLOCK(const struct btrfs_fs_info *info) |
469 | { |
470 | return BTRFS_LEAF_DATA_SIZE(info) / sizeof(struct btrfs_key_ptr); |
471 | } |
472 | |
473 | static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info) |
474 | { |
475 | return BTRFS_MAX_ITEM_SIZE(info) - sizeof(struct btrfs_dir_item); |
476 | } |
477 | |
478 | #define BTRFS_BYTES_TO_BLKS(fs_info, bytes) \ |
479 | ((bytes) >> (fs_info)->sectorsize_bits) |
480 | |
481 | static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping) |
482 | { |
483 | return mapping_gfp_constraint(mapping, gfp_mask: ~__GFP_FS); |
484 | } |
485 | |
486 | int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, |
487 | u64 start, u64 end); |
488 | int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr, |
489 | u64 num_bytes, u64 *actual_bytes); |
490 | int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range); |
491 | |
492 | /* ctree.c */ |
493 | int __init btrfs_ctree_init(void); |
494 | void __cold btrfs_ctree_exit(void); |
495 | |
496 | int btrfs_bin_search(struct extent_buffer *eb, int first_slot, |
497 | const struct btrfs_key *key, int *slot); |
498 | |
499 | int __pure btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2); |
500 | |
501 | #ifdef __LITTLE_ENDIAN |
502 | |
503 | /* |
504 | * Compare two keys, on little-endian the disk order is same as CPU order and |
505 | * we can avoid the conversion. |
506 | */ |
507 | static inline int btrfs_comp_keys(const struct btrfs_disk_key *disk_key, |
508 | const struct btrfs_key *k2) |
509 | { |
510 | const struct btrfs_key *k1 = (const struct btrfs_key *)disk_key; |
511 | |
512 | return btrfs_comp_cpu_keys(k1, k2); |
513 | } |
514 | |
515 | #else |
516 | |
517 | /* Compare two keys in a memcmp fashion. */ |
518 | static inline int btrfs_comp_keys(const struct btrfs_disk_key *disk, |
519 | const struct btrfs_key *k2) |
520 | { |
521 | struct btrfs_key k1; |
522 | |
523 | btrfs_disk_key_to_cpu(&k1, disk); |
524 | |
525 | return btrfs_comp_cpu_keys(&k1, k2); |
526 | } |
527 | |
528 | #endif |
529 | |
530 | int btrfs_previous_item(struct btrfs_root *root, |
531 | struct btrfs_path *path, u64 min_objectid, |
532 | int type); |
533 | int btrfs_previous_extent_item(struct btrfs_root *root, |
534 | struct btrfs_path *path, u64 min_objectid); |
535 | void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, |
536 | struct btrfs_path *path, |
537 | const struct btrfs_key *new_key); |
538 | struct extent_buffer *btrfs_root_node(struct btrfs_root *root); |
539 | int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, |
540 | struct btrfs_key *key, int lowest_level, |
541 | u64 min_trans); |
542 | int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, |
543 | struct btrfs_path *path, |
544 | u64 min_trans); |
545 | struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent, |
546 | int slot); |
547 | |
548 | int btrfs_cow_block(struct btrfs_trans_handle *trans, |
549 | struct btrfs_root *root, struct extent_buffer *buf, |
550 | struct extent_buffer *parent, int parent_slot, |
551 | struct extent_buffer **cow_ret, |
552 | enum btrfs_lock_nesting nest); |
553 | int btrfs_force_cow_block(struct btrfs_trans_handle *trans, |
554 | struct btrfs_root *root, |
555 | struct extent_buffer *buf, |
556 | struct extent_buffer *parent, int parent_slot, |
557 | struct extent_buffer **cow_ret, |
558 | u64 search_start, u64 empty_size, |
559 | enum btrfs_lock_nesting nest); |
560 | int btrfs_copy_root(struct btrfs_trans_handle *trans, |
561 | struct btrfs_root *root, |
562 | struct extent_buffer *buf, |
563 | struct extent_buffer **cow_ret, u64 new_root_objectid); |
564 | int btrfs_block_can_be_shared(struct btrfs_trans_handle *trans, |
565 | struct btrfs_root *root, |
566 | struct extent_buffer *buf); |
567 | int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
568 | struct btrfs_path *path, int level, int slot); |
569 | void btrfs_extend_item(struct btrfs_trans_handle *trans, |
570 | struct btrfs_path *path, u32 data_size); |
571 | void btrfs_truncate_item(struct btrfs_trans_handle *trans, |
572 | struct btrfs_path *path, u32 new_size, int from_end); |
573 | int btrfs_split_item(struct btrfs_trans_handle *trans, |
574 | struct btrfs_root *root, |
575 | struct btrfs_path *path, |
576 | const struct btrfs_key *new_key, |
577 | unsigned long split_offset); |
578 | int btrfs_duplicate_item(struct btrfs_trans_handle *trans, |
579 | struct btrfs_root *root, |
580 | struct btrfs_path *path, |
581 | const struct btrfs_key *new_key); |
582 | int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path, |
583 | u64 inum, u64 ioff, u8 key_type, struct btrfs_key *found_key); |
584 | int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
585 | const struct btrfs_key *key, struct btrfs_path *p, |
586 | int ins_len, int cow); |
587 | int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key, |
588 | struct btrfs_path *p, u64 time_seq); |
589 | int btrfs_search_slot_for_read(struct btrfs_root *root, |
590 | const struct btrfs_key *key, |
591 | struct btrfs_path *p, int find_higher, |
592 | int return_any); |
593 | void btrfs_release_path(struct btrfs_path *p); |
594 | struct btrfs_path *btrfs_alloc_path(void); |
595 | void btrfs_free_path(struct btrfs_path *p); |
596 | |
597 | int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
598 | struct btrfs_path *path, int slot, int nr); |
599 | static inline int btrfs_del_item(struct btrfs_trans_handle *trans, |
600 | struct btrfs_root *root, |
601 | struct btrfs_path *path) |
602 | { |
603 | return btrfs_del_items(trans, root, path, slot: path->slots[0], nr: 1); |
604 | } |
605 | |
606 | /* |
607 | * Describes a batch of items to insert in a btree. This is used by |
608 | * btrfs_insert_empty_items(). |
609 | */ |
610 | struct btrfs_item_batch { |
611 | /* |
612 | * Pointer to an array containing the keys of the items to insert (in |
613 | * sorted order). |
614 | */ |
615 | const struct btrfs_key *keys; |
616 | /* Pointer to an array containing the data size for each item to insert. */ |
617 | const u32 *data_sizes; |
618 | /* |
619 | * The sum of data sizes for all items. The caller can compute this while |
620 | * setting up the data_sizes array, so it ends up being more efficient |
621 | * than having btrfs_insert_empty_items() or setup_item_for_insert() |
622 | * doing it, as it would avoid an extra loop over a potentially large |
623 | * array, and in the case of setup_item_for_insert(), we would be doing |
624 | * it while holding a write lock on a leaf and often on upper level nodes |
625 | * too, unnecessarily increasing the size of a critical section. |
626 | */ |
627 | u32 total_data_size; |
628 | /* Size of the keys and data_sizes arrays (number of items in the batch). */ |
629 | int nr; |
630 | }; |
631 | |
632 | void btrfs_setup_item_for_insert(struct btrfs_trans_handle *trans, |
633 | struct btrfs_root *root, |
634 | struct btrfs_path *path, |
635 | const struct btrfs_key *key, |
636 | u32 data_size); |
637 | int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
638 | const struct btrfs_key *key, void *data, u32 data_size); |
639 | int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, |
640 | struct btrfs_root *root, |
641 | struct btrfs_path *path, |
642 | const struct btrfs_item_batch *batch); |
643 | |
644 | static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, |
645 | struct btrfs_root *root, |
646 | struct btrfs_path *path, |
647 | const struct btrfs_key *key, |
648 | u32 data_size) |
649 | { |
650 | struct btrfs_item_batch batch; |
651 | |
652 | batch.keys = key; |
653 | batch.data_sizes = &data_size; |
654 | batch.total_data_size = data_size; |
655 | batch.nr = 1; |
656 | |
657 | return btrfs_insert_empty_items(trans, root, path, batch: &batch); |
658 | } |
659 | |
660 | int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, |
661 | u64 time_seq); |
662 | |
663 | int btrfs_search_backwards(struct btrfs_root *root, struct btrfs_key *key, |
664 | struct btrfs_path *path); |
665 | |
666 | int btrfs_get_next_valid_item(struct btrfs_root *root, struct btrfs_key *key, |
667 | struct btrfs_path *path); |
668 | |
669 | /* |
670 | * Search in @root for a given @key, and store the slot found in @found_key. |
671 | * |
672 | * @root: The root node of the tree. |
673 | * @key: The key we are looking for. |
674 | * @found_key: Will hold the found item. |
675 | * @path: Holds the current slot/leaf. |
676 | * @iter_ret: Contains the value returned from btrfs_search_slot or |
677 | * btrfs_get_next_valid_item, whichever was executed last. |
678 | * |
679 | * The @iter_ret is an output variable that will contain the return value of |
680 | * btrfs_search_slot, if it encountered an error, or the value returned from |
681 | * btrfs_get_next_valid_item otherwise. That return value can be 0, if a valid |
682 | * slot was found, 1 if there were no more leaves, and <0 if there was an error. |
683 | * |
684 | * It's recommended to use a separate variable for iter_ret and then use it to |
685 | * set the function return value so there's no confusion of the 0/1/errno |
686 | * values stemming from btrfs_search_slot. |
687 | */ |
688 | #define btrfs_for_each_slot(root, key, found_key, path, iter_ret) \ |
689 | for (iter_ret = btrfs_search_slot(NULL, (root), (key), (path), 0, 0); \ |
690 | (iter_ret) >= 0 && \ |
691 | (iter_ret = btrfs_get_next_valid_item((root), (found_key), (path))) == 0; \ |
692 | (path)->slots[0]++ \ |
693 | ) |
694 | |
695 | int btrfs_next_old_item(struct btrfs_root *root, struct btrfs_path *path, u64 time_seq); |
696 | |
697 | /* |
698 | * Search the tree again to find a leaf with greater keys. |
699 | * |
700 | * Returns 0 if it found something or 1 if there are no greater leaves. |
701 | * Returns < 0 on error. |
702 | */ |
703 | static inline int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) |
704 | { |
705 | return btrfs_next_old_leaf(root, path, time_seq: 0); |
706 | } |
707 | |
708 | static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p) |
709 | { |
710 | return btrfs_next_old_item(root, path: p, time_seq: 0); |
711 | } |
712 | int btrfs_leaf_free_space(const struct extent_buffer *leaf); |
713 | |
714 | static inline int is_fstree(u64 rootid) |
715 | { |
716 | if (rootid == BTRFS_FS_TREE_OBJECTID || |
717 | ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID && |
718 | !btrfs_qgroup_level(qgroupid: rootid))) |
719 | return 1; |
720 | return 0; |
721 | } |
722 | |
723 | static inline bool btrfs_is_data_reloc_root(const struct btrfs_root *root) |
724 | { |
725 | return root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID; |
726 | } |
727 | |
728 | u16 btrfs_csum_type_size(u16 type); |
729 | int btrfs_super_csum_size(const struct btrfs_super_block *s); |
730 | const char *btrfs_super_csum_name(u16 csum_type); |
731 | const char *btrfs_super_csum_driver(u16 csum_type); |
732 | size_t __attribute_const__ btrfs_get_num_csums(void); |
733 | |
734 | /* |
735 | * We use page status Private2 to indicate there is an ordered extent with |
736 | * unfinished IO. |
737 | * |
738 | * Rename the Private2 accessors to Ordered, to improve readability. |
739 | */ |
740 | #define PageOrdered(page) PagePrivate2(page) |
741 | #define SetPageOrdered(page) SetPagePrivate2(page) |
742 | #define ClearPageOrdered(page) ClearPagePrivate2(page) |
743 | #define folio_test_ordered(folio) folio_test_private_2(folio) |
744 | #define folio_set_ordered(folio) folio_set_private_2(folio) |
745 | #define folio_clear_ordered(folio) folio_clear_private_2(folio) |
746 | |
747 | #endif |
748 | |