1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* |
3 | * Copyright (C) 2008 Oracle. All rights reserved. |
4 | */ |
5 | |
6 | #ifndef BTRFS_DELAYED_REF_H |
7 | #define BTRFS_DELAYED_REF_H |
8 | |
9 | #include <linux/refcount.h> |
10 | |
11 | /* these are the possible values of struct btrfs_delayed_ref_node->action */ |
12 | enum btrfs_delayed_ref_action { |
13 | /* Add one backref to the tree */ |
14 | BTRFS_ADD_DELAYED_REF = 1, |
15 | /* Delete one backref from the tree */ |
16 | BTRFS_DROP_DELAYED_REF, |
17 | /* Record a full extent allocation */ |
18 | BTRFS_ADD_DELAYED_EXTENT, |
19 | /* Not changing ref count on head ref */ |
20 | BTRFS_UPDATE_DELAYED_HEAD, |
21 | } __packed; |
22 | |
23 | struct btrfs_delayed_ref_node { |
24 | struct rb_node ref_node; |
25 | /* |
26 | * If action is BTRFS_ADD_DELAYED_REF, also link this node to |
27 | * ref_head->ref_add_list, then we do not need to iterate the |
28 | * whole ref_head->ref_list to find BTRFS_ADD_DELAYED_REF nodes. |
29 | */ |
30 | struct list_head add_list; |
31 | |
32 | /* the starting bytenr of the extent */ |
33 | u64 bytenr; |
34 | |
35 | /* the size of the extent */ |
36 | u64 num_bytes; |
37 | |
38 | /* seq number to keep track of insertion order */ |
39 | u64 seq; |
40 | |
41 | /* ref count on this data structure */ |
42 | refcount_t refs; |
43 | |
44 | /* |
45 | * how many refs is this entry adding or deleting. For |
46 | * head refs, this may be a negative number because it is keeping |
47 | * track of the total mods done to the reference count. |
48 | * For individual refs, this will always be a positive number |
49 | * |
50 | * It may be more than one, since it is possible for a single |
51 | * parent to have more than one ref on an extent |
52 | */ |
53 | int ref_mod; |
54 | |
55 | unsigned int action:8; |
56 | unsigned int type:8; |
57 | }; |
58 | |
59 | struct btrfs_delayed_extent_op { |
60 | struct btrfs_disk_key key; |
61 | u8 level; |
62 | bool update_key; |
63 | bool update_flags; |
64 | u64 flags_to_set; |
65 | }; |
66 | |
67 | /* |
68 | * the head refs are used to hold a lock on a given extent, which allows us |
69 | * to make sure that only one process is running the delayed refs |
70 | * at a time for a single extent. They also store the sum of all the |
71 | * reference count modifications we've queued up. |
72 | */ |
73 | struct btrfs_delayed_ref_head { |
74 | u64 bytenr; |
75 | u64 num_bytes; |
76 | /* |
77 | * For insertion into struct btrfs_delayed_ref_root::href_root. |
78 | * Keep it in the same cache line as 'bytenr' for more efficient |
79 | * searches in the rbtree. |
80 | */ |
81 | struct rb_node href_node; |
82 | /* |
83 | * the mutex is held while running the refs, and it is also |
84 | * held when checking the sum of reference modifications. |
85 | */ |
86 | struct mutex mutex; |
87 | |
88 | refcount_t refs; |
89 | |
90 | /* Protects 'ref_tree' and 'ref_add_list'. */ |
91 | spinlock_t lock; |
92 | struct rb_root_cached ref_tree; |
93 | /* accumulate add BTRFS_ADD_DELAYED_REF nodes to this ref_add_list. */ |
94 | struct list_head ref_add_list; |
95 | |
96 | struct btrfs_delayed_extent_op *extent_op; |
97 | |
98 | /* |
99 | * This is used to track the final ref_mod from all the refs associated |
100 | * with this head ref, this is not adjusted as delayed refs are run, |
101 | * this is meant to track if we need to do the csum accounting or not. |
102 | */ |
103 | int total_ref_mod; |
104 | |
105 | /* |
106 | * This is the current outstanding mod references for this bytenr. This |
107 | * is used with lookup_extent_info to get an accurate reference count |
108 | * for a bytenr, so it is adjusted as delayed refs are run so that any |
109 | * on disk reference count + ref_mod is accurate. |
110 | */ |
111 | int ref_mod; |
112 | |
113 | /* |
114 | * The root that triggered the allocation when must_insert_reserved is |
115 | * set to true. |
116 | */ |
117 | u64 owning_root; |
118 | |
119 | /* |
120 | * Track reserved bytes when setting must_insert_reserved. On success |
121 | * or cleanup, we will need to free the reservation. |
122 | */ |
123 | u64 reserved_bytes; |
124 | |
125 | /* |
126 | * when a new extent is allocated, it is just reserved in memory |
127 | * The actual extent isn't inserted into the extent allocation tree |
128 | * until the delayed ref is processed. must_insert_reserved is |
129 | * used to flag a delayed ref so the accounting can be updated |
130 | * when a full insert is done. |
131 | * |
132 | * It is possible the extent will be freed before it is ever |
133 | * inserted into the extent allocation tree. In this case |
134 | * we need to update the in ram accounting to properly reflect |
135 | * the free has happened. |
136 | */ |
137 | bool must_insert_reserved; |
138 | |
139 | bool is_data; |
140 | bool is_system; |
141 | bool processing; |
142 | }; |
143 | |
144 | struct btrfs_delayed_tree_ref { |
145 | struct btrfs_delayed_ref_node node; |
146 | u64 root; |
147 | u64 parent; |
148 | int level; |
149 | }; |
150 | |
151 | struct btrfs_delayed_data_ref { |
152 | struct btrfs_delayed_ref_node node; |
153 | u64 root; |
154 | u64 parent; |
155 | u64 objectid; |
156 | u64 offset; |
157 | }; |
158 | |
159 | enum btrfs_delayed_ref_flags { |
160 | /* Indicate that we are flushing delayed refs for the commit */ |
161 | BTRFS_DELAYED_REFS_FLUSHING, |
162 | }; |
163 | |
164 | struct btrfs_delayed_ref_root { |
165 | /* head ref rbtree */ |
166 | struct rb_root_cached href_root; |
167 | |
168 | /* dirty extent records */ |
169 | struct rb_root dirty_extent_root; |
170 | |
171 | /* this spin lock protects the rbtree and the entries inside */ |
172 | spinlock_t lock; |
173 | |
174 | /* how many delayed ref updates we've queued, used by the |
175 | * throttling code |
176 | */ |
177 | atomic_t num_entries; |
178 | |
179 | /* total number of head nodes in tree */ |
180 | unsigned long num_heads; |
181 | |
182 | /* total number of head nodes ready for processing */ |
183 | unsigned long num_heads_ready; |
184 | |
185 | u64 pending_csums; |
186 | |
187 | unsigned long flags; |
188 | |
189 | u64 run_delayed_start; |
190 | |
191 | /* |
192 | * To make qgroup to skip given root. |
193 | * This is for snapshot, as btrfs_qgroup_inherit() will manually |
194 | * modify counters for snapshot and its source, so we should skip |
195 | * the snapshot in new_root/old_roots or it will get calculated twice |
196 | */ |
197 | u64 qgroup_to_skip; |
198 | }; |
199 | |
200 | enum btrfs_ref_type { |
201 | BTRFS_REF_NOT_SET, |
202 | BTRFS_REF_DATA, |
203 | BTRFS_REF_METADATA, |
204 | BTRFS_REF_LAST, |
205 | } __packed; |
206 | |
207 | struct btrfs_data_ref { |
208 | /* For EXTENT_DATA_REF */ |
209 | |
210 | /* Root which owns this data reference. */ |
211 | u64 ref_root; |
212 | |
213 | /* Inode which refers to this data extent */ |
214 | u64 ino; |
215 | |
216 | /* |
217 | * file_offset - extent_offset |
218 | * |
219 | * file_offset is the key.offset of the EXTENT_DATA key. |
220 | * extent_offset is btrfs_file_extent_offset() of the EXTENT_DATA data. |
221 | */ |
222 | u64 offset; |
223 | }; |
224 | |
225 | struct btrfs_tree_ref { |
226 | /* |
227 | * Level of this tree block |
228 | * |
229 | * Shared for skinny (TREE_BLOCK_REF) and normal tree ref. |
230 | */ |
231 | int level; |
232 | |
233 | /* |
234 | * Root which owns this tree block reference. |
235 | * |
236 | * For TREE_BLOCK_REF (skinny metadata, either inline or keyed) |
237 | */ |
238 | u64 ref_root; |
239 | |
240 | /* For non-skinny metadata, no special member needed */ |
241 | }; |
242 | |
243 | struct btrfs_ref { |
244 | enum btrfs_ref_type type; |
245 | enum btrfs_delayed_ref_action action; |
246 | |
247 | /* |
248 | * Whether this extent should go through qgroup record. |
249 | * |
250 | * Normally false, but for certain cases like delayed subtree scan, |
251 | * setting this flag can hugely reduce qgroup overhead. |
252 | */ |
253 | bool skip_qgroup; |
254 | |
255 | #ifdef CONFIG_BTRFS_FS_REF_VERIFY |
256 | /* Through which root is this modification. */ |
257 | u64 real_root; |
258 | #endif |
259 | u64 bytenr; |
260 | u64 len; |
261 | u64 owning_root; |
262 | |
263 | /* Bytenr of the parent tree block */ |
264 | u64 parent; |
265 | union { |
266 | struct btrfs_data_ref data_ref; |
267 | struct btrfs_tree_ref tree_ref; |
268 | }; |
269 | }; |
270 | |
271 | extern struct kmem_cache *btrfs_delayed_ref_head_cachep; |
272 | extern struct kmem_cache *btrfs_delayed_tree_ref_cachep; |
273 | extern struct kmem_cache *btrfs_delayed_data_ref_cachep; |
274 | extern struct kmem_cache *btrfs_delayed_extent_op_cachep; |
275 | |
276 | int __init btrfs_delayed_ref_init(void); |
277 | void __cold btrfs_delayed_ref_exit(void); |
278 | |
279 | static inline u64 btrfs_calc_delayed_ref_bytes(const struct btrfs_fs_info *fs_info, |
280 | int num_delayed_refs) |
281 | { |
282 | u64 num_bytes; |
283 | |
284 | num_bytes = btrfs_calc_insert_metadata_size(fs_info, num_items: num_delayed_refs); |
285 | |
286 | /* |
287 | * We have to check the mount option here because we could be enabling |
288 | * the free space tree for the first time and don't have the compat_ro |
289 | * option set yet. |
290 | * |
291 | * We need extra reservations if we have the free space tree because |
292 | * we'll have to modify that tree as well. |
293 | */ |
294 | if (btrfs_test_opt(fs_info, FREE_SPACE_TREE)) |
295 | num_bytes *= 2; |
296 | |
297 | return num_bytes; |
298 | } |
299 | |
300 | static inline u64 btrfs_calc_delayed_ref_csum_bytes(const struct btrfs_fs_info *fs_info, |
301 | int num_csum_items) |
302 | { |
303 | /* |
304 | * Deleting csum items does not result in new nodes/leaves and does not |
305 | * require changing the free space tree, only the csum tree, so this is |
306 | * all we need. |
307 | */ |
308 | return btrfs_calc_metadata_size(fs_info, num_items: num_csum_items); |
309 | } |
310 | |
311 | static inline void btrfs_init_generic_ref(struct btrfs_ref *generic_ref, |
312 | int action, u64 bytenr, u64 len, |
313 | u64 parent, u64 owning_root) |
314 | { |
315 | generic_ref->action = action; |
316 | generic_ref->bytenr = bytenr; |
317 | generic_ref->len = len; |
318 | generic_ref->parent = parent; |
319 | generic_ref->owning_root = owning_root; |
320 | } |
321 | |
322 | static inline void btrfs_init_tree_ref(struct btrfs_ref *generic_ref, int level, |
323 | u64 root, u64 mod_root, bool skip_qgroup) |
324 | { |
325 | #ifdef CONFIG_BTRFS_FS_REF_VERIFY |
326 | /* If @real_root not set, use @root as fallback */ |
327 | generic_ref->real_root = mod_root ?: root; |
328 | #endif |
329 | generic_ref->tree_ref.level = level; |
330 | generic_ref->tree_ref.ref_root = root; |
331 | generic_ref->type = BTRFS_REF_METADATA; |
332 | if (skip_qgroup || !(is_fstree(rootid: root) && |
333 | (!mod_root || is_fstree(rootid: mod_root)))) |
334 | generic_ref->skip_qgroup = true; |
335 | else |
336 | generic_ref->skip_qgroup = false; |
337 | |
338 | } |
339 | |
340 | static inline void btrfs_init_data_ref(struct btrfs_ref *generic_ref, |
341 | u64 ref_root, u64 ino, u64 offset, u64 mod_root, |
342 | bool skip_qgroup) |
343 | { |
344 | #ifdef CONFIG_BTRFS_FS_REF_VERIFY |
345 | /* If @real_root not set, use @root as fallback */ |
346 | generic_ref->real_root = mod_root ?: ref_root; |
347 | #endif |
348 | generic_ref->data_ref.ref_root = ref_root; |
349 | generic_ref->data_ref.ino = ino; |
350 | generic_ref->data_ref.offset = offset; |
351 | generic_ref->type = BTRFS_REF_DATA; |
352 | if (skip_qgroup || !(is_fstree(rootid: ref_root) && |
353 | (!mod_root || is_fstree(rootid: mod_root)))) |
354 | generic_ref->skip_qgroup = true; |
355 | else |
356 | generic_ref->skip_qgroup = false; |
357 | } |
358 | |
359 | static inline struct btrfs_delayed_extent_op * |
360 | btrfs_alloc_delayed_extent_op(void) |
361 | { |
362 | return kmem_cache_alloc(cachep: btrfs_delayed_extent_op_cachep, GFP_NOFS); |
363 | } |
364 | |
365 | static inline void |
366 | btrfs_free_delayed_extent_op(struct btrfs_delayed_extent_op *op) |
367 | { |
368 | if (op) |
369 | kmem_cache_free(s: btrfs_delayed_extent_op_cachep, objp: op); |
370 | } |
371 | |
372 | static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) |
373 | { |
374 | if (refcount_dec_and_test(r: &ref->refs)) { |
375 | WARN_ON(!RB_EMPTY_NODE(&ref->ref_node)); |
376 | switch (ref->type) { |
377 | case BTRFS_TREE_BLOCK_REF_KEY: |
378 | case BTRFS_SHARED_BLOCK_REF_KEY: |
379 | kmem_cache_free(s: btrfs_delayed_tree_ref_cachep, objp: ref); |
380 | break; |
381 | case BTRFS_EXTENT_DATA_REF_KEY: |
382 | case BTRFS_SHARED_DATA_REF_KEY: |
383 | kmem_cache_free(s: btrfs_delayed_data_ref_cachep, objp: ref); |
384 | break; |
385 | default: |
386 | BUG(); |
387 | } |
388 | } |
389 | } |
390 | |
391 | static inline u64 btrfs_ref_head_to_space_flags( |
392 | struct btrfs_delayed_ref_head *head_ref) |
393 | { |
394 | if (head_ref->is_data) |
395 | return BTRFS_BLOCK_GROUP_DATA; |
396 | else if (head_ref->is_system) |
397 | return BTRFS_BLOCK_GROUP_SYSTEM; |
398 | return BTRFS_BLOCK_GROUP_METADATA; |
399 | } |
400 | |
401 | static inline void btrfs_put_delayed_ref_head(struct btrfs_delayed_ref_head *head) |
402 | { |
403 | if (refcount_dec_and_test(r: &head->refs)) |
404 | kmem_cache_free(s: btrfs_delayed_ref_head_cachep, objp: head); |
405 | } |
406 | |
407 | int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, |
408 | struct btrfs_ref *generic_ref, |
409 | struct btrfs_delayed_extent_op *extent_op); |
410 | int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, |
411 | struct btrfs_ref *generic_ref, |
412 | u64 reserved); |
413 | int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans, |
414 | u64 bytenr, u64 num_bytes, |
415 | struct btrfs_delayed_extent_op *extent_op); |
416 | void btrfs_merge_delayed_refs(struct btrfs_fs_info *fs_info, |
417 | struct btrfs_delayed_ref_root *delayed_refs, |
418 | struct btrfs_delayed_ref_head *head); |
419 | |
420 | struct btrfs_delayed_ref_head * |
421 | btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, |
422 | u64 bytenr); |
423 | int btrfs_delayed_ref_lock(struct btrfs_delayed_ref_root *delayed_refs, |
424 | struct btrfs_delayed_ref_head *head); |
425 | static inline void btrfs_delayed_ref_unlock(struct btrfs_delayed_ref_head *head) |
426 | { |
427 | mutex_unlock(lock: &head->mutex); |
428 | } |
429 | void btrfs_delete_ref_head(struct btrfs_delayed_ref_root *delayed_refs, |
430 | struct btrfs_delayed_ref_head *head); |
431 | |
432 | struct btrfs_delayed_ref_head *btrfs_select_ref_head( |
433 | struct btrfs_delayed_ref_root *delayed_refs); |
434 | |
435 | int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq); |
436 | |
437 | void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr_refs, int nr_csums); |
438 | void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans); |
439 | void btrfs_inc_delayed_refs_rsv_bg_inserts(struct btrfs_fs_info *fs_info); |
440 | void btrfs_dec_delayed_refs_rsv_bg_inserts(struct btrfs_fs_info *fs_info); |
441 | void btrfs_inc_delayed_refs_rsv_bg_updates(struct btrfs_fs_info *fs_info); |
442 | void btrfs_dec_delayed_refs_rsv_bg_updates(struct btrfs_fs_info *fs_info); |
443 | int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, |
444 | enum btrfs_reserve_flush_enum flush); |
445 | void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info, |
446 | u64 num_bytes); |
447 | bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info); |
448 | |
449 | /* |
450 | * helper functions to cast a node into its container |
451 | */ |
452 | static inline struct btrfs_delayed_tree_ref * |
453 | btrfs_delayed_node_to_tree_ref(struct btrfs_delayed_ref_node *node) |
454 | { |
455 | return container_of(node, struct btrfs_delayed_tree_ref, node); |
456 | } |
457 | |
458 | static inline struct btrfs_delayed_data_ref * |
459 | btrfs_delayed_node_to_data_ref(struct btrfs_delayed_ref_node *node) |
460 | { |
461 | return container_of(node, struct btrfs_delayed_data_ref, node); |
462 | } |
463 | |
464 | #endif |
465 | |