1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright (C) 2009 Oracle. All rights reserved. |
4 | */ |
5 | |
6 | #include <linux/sched.h> |
7 | #include <linux/slab.h> |
8 | #include <linux/sort.h> |
9 | #include "messages.h" |
10 | #include "ctree.h" |
11 | #include "delayed-ref.h" |
12 | #include "transaction.h" |
13 | #include "qgroup.h" |
14 | #include "space-info.h" |
15 | #include "tree-mod-log.h" |
16 | #include "fs.h" |
17 | |
18 | struct kmem_cache *btrfs_delayed_ref_head_cachep; |
19 | struct kmem_cache *btrfs_delayed_tree_ref_cachep; |
20 | struct kmem_cache *btrfs_delayed_data_ref_cachep; |
21 | struct kmem_cache *btrfs_delayed_extent_op_cachep; |
22 | /* |
23 | * delayed back reference update tracking. For subvolume trees |
24 | * we queue up extent allocations and backref maintenance for |
25 | * delayed processing. This avoids deep call chains where we |
26 | * add extents in the middle of btrfs_search_slot, and it allows |
27 | * us to buffer up frequently modified backrefs in an rb tree instead |
28 | * of hammering updates on the extent allocation tree. |
29 | */ |
30 | |
31 | bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info) |
32 | { |
33 | struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv; |
34 | struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; |
35 | bool ret = false; |
36 | u64 reserved; |
37 | |
38 | spin_lock(lock: &global_rsv->lock); |
39 | reserved = global_rsv->reserved; |
40 | spin_unlock(lock: &global_rsv->lock); |
41 | |
42 | /* |
43 | * Since the global reserve is just kind of magic we don't really want |
44 | * to rely on it to save our bacon, so if our size is more than the |
45 | * delayed_refs_rsv and the global rsv then it's time to think about |
46 | * bailing. |
47 | */ |
48 | spin_lock(lock: &delayed_refs_rsv->lock); |
49 | reserved += delayed_refs_rsv->reserved; |
50 | if (delayed_refs_rsv->size >= reserved) |
51 | ret = true; |
52 | spin_unlock(lock: &delayed_refs_rsv->lock); |
53 | return ret; |
54 | } |
55 | |
56 | /* |
57 | * Release a ref head's reservation. |
58 | * |
59 | * @fs_info: the filesystem |
60 | * @nr_refs: number of delayed refs to drop |
61 | * @nr_csums: number of csum items to drop |
62 | * |
63 | * Drops the delayed ref head's count from the delayed refs rsv and free any |
64 | * excess reservation we had. |
65 | */ |
66 | void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr_refs, int nr_csums) |
67 | { |
68 | struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv; |
69 | u64 num_bytes; |
70 | u64 released; |
71 | |
72 | num_bytes = btrfs_calc_delayed_ref_bytes(fs_info, num_delayed_refs: nr_refs); |
73 | num_bytes += btrfs_calc_delayed_ref_csum_bytes(fs_info, num_csum_items: nr_csums); |
74 | |
75 | released = btrfs_block_rsv_release(fs_info, block_rsv, num_bytes, NULL); |
76 | if (released) |
77 | trace_btrfs_space_reservation(fs_info, type: "delayed_refs_rsv" , |
78 | val: 0, bytes: released, reserve: 0); |
79 | } |
80 | |
81 | /* |
82 | * Adjust the size of the delayed refs rsv. |
83 | * |
84 | * This is to be called anytime we may have adjusted trans->delayed_ref_updates |
85 | * or trans->delayed_ref_csum_deletions, it'll calculate the additional size and |
86 | * add it to the delayed_refs_rsv. |
87 | */ |
88 | void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans) |
89 | { |
90 | struct btrfs_fs_info *fs_info = trans->fs_info; |
91 | struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv; |
92 | struct btrfs_block_rsv *local_rsv = &trans->delayed_rsv; |
93 | u64 num_bytes; |
94 | u64 reserved_bytes; |
95 | |
96 | num_bytes = btrfs_calc_delayed_ref_bytes(fs_info, num_delayed_refs: trans->delayed_ref_updates); |
97 | num_bytes += btrfs_calc_delayed_ref_csum_bytes(fs_info, |
98 | num_csum_items: trans->delayed_ref_csum_deletions); |
99 | |
100 | if (num_bytes == 0) |
101 | return; |
102 | |
103 | /* |
104 | * Try to take num_bytes from the transaction's local delayed reserve. |
105 | * If not possible, try to take as much as it's available. If the local |
106 | * reserve doesn't have enough reserved space, the delayed refs reserve |
107 | * will be refilled next time btrfs_delayed_refs_rsv_refill() is called |
108 | * by someone or if a transaction commit is triggered before that, the |
109 | * global block reserve will be used. We want to minimize using the |
110 | * global block reserve for cases we can account for in advance, to |
111 | * avoid exhausting it and reach -ENOSPC during a transaction commit. |
112 | */ |
113 | spin_lock(lock: &local_rsv->lock); |
114 | reserved_bytes = min(num_bytes, local_rsv->reserved); |
115 | local_rsv->reserved -= reserved_bytes; |
116 | local_rsv->full = (local_rsv->reserved >= local_rsv->size); |
117 | spin_unlock(lock: &local_rsv->lock); |
118 | |
119 | spin_lock(lock: &delayed_rsv->lock); |
120 | delayed_rsv->size += num_bytes; |
121 | delayed_rsv->reserved += reserved_bytes; |
122 | delayed_rsv->full = (delayed_rsv->reserved >= delayed_rsv->size); |
123 | spin_unlock(lock: &delayed_rsv->lock); |
124 | trans->delayed_ref_updates = 0; |
125 | trans->delayed_ref_csum_deletions = 0; |
126 | } |
127 | |
128 | /* |
129 | * Adjust the size of the delayed refs block reserve for 1 block group item |
130 | * insertion, used after allocating a block group. |
131 | */ |
132 | void btrfs_inc_delayed_refs_rsv_bg_inserts(struct btrfs_fs_info *fs_info) |
133 | { |
134 | struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv; |
135 | |
136 | spin_lock(lock: &delayed_rsv->lock); |
137 | /* |
138 | * Inserting a block group item does not require changing the free space |
139 | * tree, only the extent tree or the block group tree, so this is all we |
140 | * need. |
141 | */ |
142 | delayed_rsv->size += btrfs_calc_insert_metadata_size(fs_info, num_items: 1); |
143 | delayed_rsv->full = false; |
144 | spin_unlock(lock: &delayed_rsv->lock); |
145 | } |
146 | |
147 | /* |
148 | * Adjust the size of the delayed refs block reserve to release space for 1 |
149 | * block group item insertion. |
150 | */ |
151 | void btrfs_dec_delayed_refs_rsv_bg_inserts(struct btrfs_fs_info *fs_info) |
152 | { |
153 | struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv; |
154 | const u64 num_bytes = btrfs_calc_insert_metadata_size(fs_info, num_items: 1); |
155 | u64 released; |
156 | |
157 | released = btrfs_block_rsv_release(fs_info, block_rsv: delayed_rsv, num_bytes, NULL); |
158 | if (released > 0) |
159 | trace_btrfs_space_reservation(fs_info, type: "delayed_refs_rsv" , |
160 | val: 0, bytes: released, reserve: 0); |
161 | } |
162 | |
163 | /* |
164 | * Adjust the size of the delayed refs block reserve for 1 block group item |
165 | * update. |
166 | */ |
167 | void btrfs_inc_delayed_refs_rsv_bg_updates(struct btrfs_fs_info *fs_info) |
168 | { |
169 | struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv; |
170 | |
171 | spin_lock(lock: &delayed_rsv->lock); |
172 | /* |
173 | * Updating a block group item does not result in new nodes/leaves and |
174 | * does not require changing the free space tree, only the extent tree |
175 | * or the block group tree, so this is all we need. |
176 | */ |
177 | delayed_rsv->size += btrfs_calc_metadata_size(fs_info, num_items: 1); |
178 | delayed_rsv->full = false; |
179 | spin_unlock(lock: &delayed_rsv->lock); |
180 | } |
181 | |
182 | /* |
183 | * Adjust the size of the delayed refs block reserve to release space for 1 |
184 | * block group item update. |
185 | */ |
186 | void btrfs_dec_delayed_refs_rsv_bg_updates(struct btrfs_fs_info *fs_info) |
187 | { |
188 | struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv; |
189 | const u64 num_bytes = btrfs_calc_metadata_size(fs_info, num_items: 1); |
190 | u64 released; |
191 | |
192 | released = btrfs_block_rsv_release(fs_info, block_rsv: delayed_rsv, num_bytes, NULL); |
193 | if (released > 0) |
194 | trace_btrfs_space_reservation(fs_info, type: "delayed_refs_rsv" , |
195 | val: 0, bytes: released, reserve: 0); |
196 | } |
197 | |
198 | /* |
199 | * Transfer bytes to our delayed refs rsv. |
200 | * |
201 | * @fs_info: the filesystem |
202 | * @num_bytes: number of bytes to transfer |
203 | * |
204 | * This transfers up to the num_bytes amount, previously reserved, to the |
205 | * delayed_refs_rsv. Any extra bytes are returned to the space info. |
206 | */ |
207 | void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info, |
208 | u64 num_bytes) |
209 | { |
210 | struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv; |
211 | u64 to_free = 0; |
212 | |
213 | spin_lock(lock: &delayed_refs_rsv->lock); |
214 | if (delayed_refs_rsv->size > delayed_refs_rsv->reserved) { |
215 | u64 delta = delayed_refs_rsv->size - |
216 | delayed_refs_rsv->reserved; |
217 | if (num_bytes > delta) { |
218 | to_free = num_bytes - delta; |
219 | num_bytes = delta; |
220 | } |
221 | } else { |
222 | to_free = num_bytes; |
223 | num_bytes = 0; |
224 | } |
225 | |
226 | if (num_bytes) |
227 | delayed_refs_rsv->reserved += num_bytes; |
228 | if (delayed_refs_rsv->reserved >= delayed_refs_rsv->size) |
229 | delayed_refs_rsv->full = true; |
230 | spin_unlock(lock: &delayed_refs_rsv->lock); |
231 | |
232 | if (num_bytes) |
233 | trace_btrfs_space_reservation(fs_info, type: "delayed_refs_rsv" , |
234 | val: 0, bytes: num_bytes, reserve: 1); |
235 | if (to_free) |
236 | btrfs_space_info_free_bytes_may_use(fs_info, |
237 | space_info: delayed_refs_rsv->space_info, num_bytes: to_free); |
238 | } |
239 | |
240 | /* |
241 | * Refill based on our delayed refs usage. |
242 | * |
243 | * @fs_info: the filesystem |
244 | * @flush: control how we can flush for this reservation. |
245 | * |
246 | * This will refill the delayed block_rsv up to 1 items size worth of space and |
247 | * will return -ENOSPC if we can't make the reservation. |
248 | */ |
249 | int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, |
250 | enum btrfs_reserve_flush_enum flush) |
251 | { |
252 | struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv; |
253 | struct btrfs_space_info *space_info = block_rsv->space_info; |
254 | u64 limit = btrfs_calc_delayed_ref_bytes(fs_info, num_delayed_refs: 1); |
255 | u64 num_bytes = 0; |
256 | u64 refilled_bytes; |
257 | u64 to_free; |
258 | int ret = -ENOSPC; |
259 | |
260 | spin_lock(lock: &block_rsv->lock); |
261 | if (block_rsv->reserved < block_rsv->size) { |
262 | num_bytes = block_rsv->size - block_rsv->reserved; |
263 | num_bytes = min(num_bytes, limit); |
264 | } |
265 | spin_unlock(lock: &block_rsv->lock); |
266 | |
267 | if (!num_bytes) |
268 | return 0; |
269 | |
270 | ret = btrfs_reserve_metadata_bytes(fs_info, space_info, orig_bytes: num_bytes, flush); |
271 | if (ret) |
272 | return ret; |
273 | |
274 | /* |
275 | * We may have raced with someone else, so check again if we the block |
276 | * reserve is still not full and release any excess space. |
277 | */ |
278 | spin_lock(lock: &block_rsv->lock); |
279 | if (block_rsv->reserved < block_rsv->size) { |
280 | u64 needed = block_rsv->size - block_rsv->reserved; |
281 | |
282 | if (num_bytes >= needed) { |
283 | block_rsv->reserved += needed; |
284 | block_rsv->full = true; |
285 | to_free = num_bytes - needed; |
286 | refilled_bytes = needed; |
287 | } else { |
288 | block_rsv->reserved += num_bytes; |
289 | to_free = 0; |
290 | refilled_bytes = num_bytes; |
291 | } |
292 | } else { |
293 | to_free = num_bytes; |
294 | refilled_bytes = 0; |
295 | } |
296 | spin_unlock(lock: &block_rsv->lock); |
297 | |
298 | if (to_free > 0) |
299 | btrfs_space_info_free_bytes_may_use(fs_info, space_info, num_bytes: to_free); |
300 | |
301 | if (refilled_bytes > 0) |
302 | trace_btrfs_space_reservation(fs_info, type: "delayed_refs_rsv" , val: 0, |
303 | bytes: refilled_bytes, reserve: 1); |
304 | return 0; |
305 | } |
306 | |
307 | /* |
308 | * compare two delayed tree backrefs with same bytenr and type |
309 | */ |
310 | static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref1, |
311 | struct btrfs_delayed_tree_ref *ref2) |
312 | { |
313 | if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) { |
314 | if (ref1->root < ref2->root) |
315 | return -1; |
316 | if (ref1->root > ref2->root) |
317 | return 1; |
318 | } else { |
319 | if (ref1->parent < ref2->parent) |
320 | return -1; |
321 | if (ref1->parent > ref2->parent) |
322 | return 1; |
323 | } |
324 | return 0; |
325 | } |
326 | |
327 | /* |
328 | * compare two delayed data backrefs with same bytenr and type |
329 | */ |
330 | static int comp_data_refs(struct btrfs_delayed_data_ref *ref1, |
331 | struct btrfs_delayed_data_ref *ref2) |
332 | { |
333 | if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) { |
334 | if (ref1->root < ref2->root) |
335 | return -1; |
336 | if (ref1->root > ref2->root) |
337 | return 1; |
338 | if (ref1->objectid < ref2->objectid) |
339 | return -1; |
340 | if (ref1->objectid > ref2->objectid) |
341 | return 1; |
342 | if (ref1->offset < ref2->offset) |
343 | return -1; |
344 | if (ref1->offset > ref2->offset) |
345 | return 1; |
346 | } else { |
347 | if (ref1->parent < ref2->parent) |
348 | return -1; |
349 | if (ref1->parent > ref2->parent) |
350 | return 1; |
351 | } |
352 | return 0; |
353 | } |
354 | |
355 | static int comp_refs(struct btrfs_delayed_ref_node *ref1, |
356 | struct btrfs_delayed_ref_node *ref2, |
357 | bool check_seq) |
358 | { |
359 | int ret = 0; |
360 | |
361 | if (ref1->type < ref2->type) |
362 | return -1; |
363 | if (ref1->type > ref2->type) |
364 | return 1; |
365 | if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY || |
366 | ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) |
367 | ret = comp_tree_refs(ref1: btrfs_delayed_node_to_tree_ref(node: ref1), |
368 | ref2: btrfs_delayed_node_to_tree_ref(node: ref2)); |
369 | else |
370 | ret = comp_data_refs(ref1: btrfs_delayed_node_to_data_ref(node: ref1), |
371 | ref2: btrfs_delayed_node_to_data_ref(node: ref2)); |
372 | if (ret) |
373 | return ret; |
374 | if (check_seq) { |
375 | if (ref1->seq < ref2->seq) |
376 | return -1; |
377 | if (ref1->seq > ref2->seq) |
378 | return 1; |
379 | } |
380 | return 0; |
381 | } |
382 | |
383 | /* insert a new ref to head ref rbtree */ |
384 | static struct btrfs_delayed_ref_head *htree_insert(struct rb_root_cached *root, |
385 | struct rb_node *node) |
386 | { |
387 | struct rb_node **p = &root->rb_root.rb_node; |
388 | struct rb_node *parent_node = NULL; |
389 | struct btrfs_delayed_ref_head *entry; |
390 | struct btrfs_delayed_ref_head *ins; |
391 | u64 bytenr; |
392 | bool leftmost = true; |
393 | |
394 | ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node); |
395 | bytenr = ins->bytenr; |
396 | while (*p) { |
397 | parent_node = *p; |
398 | entry = rb_entry(parent_node, struct btrfs_delayed_ref_head, |
399 | href_node); |
400 | |
401 | if (bytenr < entry->bytenr) { |
402 | p = &(*p)->rb_left; |
403 | } else if (bytenr > entry->bytenr) { |
404 | p = &(*p)->rb_right; |
405 | leftmost = false; |
406 | } else { |
407 | return entry; |
408 | } |
409 | } |
410 | |
411 | rb_link_node(node, parent: parent_node, rb_link: p); |
412 | rb_insert_color_cached(node, root, leftmost); |
413 | return NULL; |
414 | } |
415 | |
416 | static struct btrfs_delayed_ref_node* tree_insert(struct rb_root_cached *root, |
417 | struct btrfs_delayed_ref_node *ins) |
418 | { |
419 | struct rb_node **p = &root->rb_root.rb_node; |
420 | struct rb_node *node = &ins->ref_node; |
421 | struct rb_node *parent_node = NULL; |
422 | struct btrfs_delayed_ref_node *entry; |
423 | bool leftmost = true; |
424 | |
425 | while (*p) { |
426 | int comp; |
427 | |
428 | parent_node = *p; |
429 | entry = rb_entry(parent_node, struct btrfs_delayed_ref_node, |
430 | ref_node); |
431 | comp = comp_refs(ref1: ins, ref2: entry, check_seq: true); |
432 | if (comp < 0) { |
433 | p = &(*p)->rb_left; |
434 | } else if (comp > 0) { |
435 | p = &(*p)->rb_right; |
436 | leftmost = false; |
437 | } else { |
438 | return entry; |
439 | } |
440 | } |
441 | |
442 | rb_link_node(node, parent: parent_node, rb_link: p); |
443 | rb_insert_color_cached(node, root, leftmost); |
444 | return NULL; |
445 | } |
446 | |
447 | static struct btrfs_delayed_ref_head *find_first_ref_head( |
448 | struct btrfs_delayed_ref_root *dr) |
449 | { |
450 | struct rb_node *n; |
451 | struct btrfs_delayed_ref_head *entry; |
452 | |
453 | n = rb_first_cached(&dr->href_root); |
454 | if (!n) |
455 | return NULL; |
456 | |
457 | entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node); |
458 | |
459 | return entry; |
460 | } |
461 | |
462 | /* |
463 | * Find a head entry based on bytenr. This returns the delayed ref head if it |
464 | * was able to find one, or NULL if nothing was in that spot. If return_bigger |
465 | * is given, the next bigger entry is returned if no exact match is found. |
466 | */ |
467 | static struct btrfs_delayed_ref_head *find_ref_head( |
468 | struct btrfs_delayed_ref_root *dr, u64 bytenr, |
469 | bool return_bigger) |
470 | { |
471 | struct rb_root *root = &dr->href_root.rb_root; |
472 | struct rb_node *n; |
473 | struct btrfs_delayed_ref_head *entry; |
474 | |
475 | n = root->rb_node; |
476 | entry = NULL; |
477 | while (n) { |
478 | entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node); |
479 | |
480 | if (bytenr < entry->bytenr) |
481 | n = n->rb_left; |
482 | else if (bytenr > entry->bytenr) |
483 | n = n->rb_right; |
484 | else |
485 | return entry; |
486 | } |
487 | if (entry && return_bigger) { |
488 | if (bytenr > entry->bytenr) { |
489 | n = rb_next(&entry->href_node); |
490 | if (!n) |
491 | return NULL; |
492 | entry = rb_entry(n, struct btrfs_delayed_ref_head, |
493 | href_node); |
494 | } |
495 | return entry; |
496 | } |
497 | return NULL; |
498 | } |
499 | |
500 | int btrfs_delayed_ref_lock(struct btrfs_delayed_ref_root *delayed_refs, |
501 | struct btrfs_delayed_ref_head *head) |
502 | { |
503 | lockdep_assert_held(&delayed_refs->lock); |
504 | if (mutex_trylock(lock: &head->mutex)) |
505 | return 0; |
506 | |
507 | refcount_inc(r: &head->refs); |
508 | spin_unlock(lock: &delayed_refs->lock); |
509 | |
510 | mutex_lock(&head->mutex); |
511 | spin_lock(lock: &delayed_refs->lock); |
512 | if (RB_EMPTY_NODE(&head->href_node)) { |
513 | mutex_unlock(lock: &head->mutex); |
514 | btrfs_put_delayed_ref_head(head); |
515 | return -EAGAIN; |
516 | } |
517 | btrfs_put_delayed_ref_head(head); |
518 | return 0; |
519 | } |
520 | |
521 | static inline void drop_delayed_ref(struct btrfs_fs_info *fs_info, |
522 | struct btrfs_delayed_ref_root *delayed_refs, |
523 | struct btrfs_delayed_ref_head *head, |
524 | struct btrfs_delayed_ref_node *ref) |
525 | { |
526 | lockdep_assert_held(&head->lock); |
527 | rb_erase_cached(node: &ref->ref_node, root: &head->ref_tree); |
528 | RB_CLEAR_NODE(&ref->ref_node); |
529 | if (!list_empty(head: &ref->add_list)) |
530 | list_del(entry: &ref->add_list); |
531 | btrfs_put_delayed_ref(ref); |
532 | atomic_dec(v: &delayed_refs->num_entries); |
533 | btrfs_delayed_refs_rsv_release(fs_info, nr_refs: 1, nr_csums: 0); |
534 | } |
535 | |
536 | static bool merge_ref(struct btrfs_fs_info *fs_info, |
537 | struct btrfs_delayed_ref_root *delayed_refs, |
538 | struct btrfs_delayed_ref_head *head, |
539 | struct btrfs_delayed_ref_node *ref, |
540 | u64 seq) |
541 | { |
542 | struct btrfs_delayed_ref_node *next; |
543 | struct rb_node *node = rb_next(&ref->ref_node); |
544 | bool done = false; |
545 | |
546 | while (!done && node) { |
547 | int mod; |
548 | |
549 | next = rb_entry(node, struct btrfs_delayed_ref_node, ref_node); |
550 | node = rb_next(node); |
551 | if (seq && next->seq >= seq) |
552 | break; |
553 | if (comp_refs(ref1: ref, ref2: next, check_seq: false)) |
554 | break; |
555 | |
556 | if (ref->action == next->action) { |
557 | mod = next->ref_mod; |
558 | } else { |
559 | if (ref->ref_mod < next->ref_mod) { |
560 | swap(ref, next); |
561 | done = true; |
562 | } |
563 | mod = -next->ref_mod; |
564 | } |
565 | |
566 | drop_delayed_ref(fs_info, delayed_refs, head, ref: next); |
567 | ref->ref_mod += mod; |
568 | if (ref->ref_mod == 0) { |
569 | drop_delayed_ref(fs_info, delayed_refs, head, ref); |
570 | done = true; |
571 | } else { |
572 | /* |
573 | * Can't have multiples of the same ref on a tree block. |
574 | */ |
575 | WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY || |
576 | ref->type == BTRFS_SHARED_BLOCK_REF_KEY); |
577 | } |
578 | } |
579 | |
580 | return done; |
581 | } |
582 | |
583 | void btrfs_merge_delayed_refs(struct btrfs_fs_info *fs_info, |
584 | struct btrfs_delayed_ref_root *delayed_refs, |
585 | struct btrfs_delayed_ref_head *head) |
586 | { |
587 | struct btrfs_delayed_ref_node *ref; |
588 | struct rb_node *node; |
589 | u64 seq = 0; |
590 | |
591 | lockdep_assert_held(&head->lock); |
592 | |
593 | if (RB_EMPTY_ROOT(&head->ref_tree.rb_root)) |
594 | return; |
595 | |
596 | /* We don't have too many refs to merge for data. */ |
597 | if (head->is_data) |
598 | return; |
599 | |
600 | seq = btrfs_tree_mod_log_lowest_seq(fs_info); |
601 | again: |
602 | for (node = rb_first_cached(&head->ref_tree); node; |
603 | node = rb_next(node)) { |
604 | ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node); |
605 | if (seq && ref->seq >= seq) |
606 | continue; |
607 | if (merge_ref(fs_info, delayed_refs, head, ref, seq)) |
608 | goto again; |
609 | } |
610 | } |
611 | |
612 | int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq) |
613 | { |
614 | int ret = 0; |
615 | u64 min_seq = btrfs_tree_mod_log_lowest_seq(fs_info); |
616 | |
617 | if (min_seq != 0 && seq >= min_seq) { |
618 | btrfs_debug(fs_info, |
619 | "holding back delayed_ref %llu, lowest is %llu" , |
620 | seq, min_seq); |
621 | ret = 1; |
622 | } |
623 | |
624 | return ret; |
625 | } |
626 | |
627 | struct btrfs_delayed_ref_head *btrfs_select_ref_head( |
628 | struct btrfs_delayed_ref_root *delayed_refs) |
629 | { |
630 | struct btrfs_delayed_ref_head *head; |
631 | |
632 | lockdep_assert_held(&delayed_refs->lock); |
633 | again: |
634 | head = find_ref_head(dr: delayed_refs, bytenr: delayed_refs->run_delayed_start, |
635 | return_bigger: true); |
636 | if (!head && delayed_refs->run_delayed_start != 0) { |
637 | delayed_refs->run_delayed_start = 0; |
638 | head = find_first_ref_head(dr: delayed_refs); |
639 | } |
640 | if (!head) |
641 | return NULL; |
642 | |
643 | while (head->processing) { |
644 | struct rb_node *node; |
645 | |
646 | node = rb_next(&head->href_node); |
647 | if (!node) { |
648 | if (delayed_refs->run_delayed_start == 0) |
649 | return NULL; |
650 | delayed_refs->run_delayed_start = 0; |
651 | goto again; |
652 | } |
653 | head = rb_entry(node, struct btrfs_delayed_ref_head, |
654 | href_node); |
655 | } |
656 | |
657 | head->processing = true; |
658 | WARN_ON(delayed_refs->num_heads_ready == 0); |
659 | delayed_refs->num_heads_ready--; |
660 | delayed_refs->run_delayed_start = head->bytenr + |
661 | head->num_bytes; |
662 | return head; |
663 | } |
664 | |
665 | void btrfs_delete_ref_head(struct btrfs_delayed_ref_root *delayed_refs, |
666 | struct btrfs_delayed_ref_head *head) |
667 | { |
668 | lockdep_assert_held(&delayed_refs->lock); |
669 | lockdep_assert_held(&head->lock); |
670 | |
671 | rb_erase_cached(node: &head->href_node, root: &delayed_refs->href_root); |
672 | RB_CLEAR_NODE(&head->href_node); |
673 | atomic_dec(v: &delayed_refs->num_entries); |
674 | delayed_refs->num_heads--; |
675 | if (!head->processing) |
676 | delayed_refs->num_heads_ready--; |
677 | } |
678 | |
679 | /* |
680 | * Helper to insert the ref_node to the tail or merge with tail. |
681 | * |
682 | * Return false if the ref was inserted. |
683 | * Return true if the ref was merged into an existing one (and therefore can be |
684 | * freed by the caller). |
685 | */ |
686 | static bool insert_delayed_ref(struct btrfs_trans_handle *trans, |
687 | struct btrfs_delayed_ref_head *href, |
688 | struct btrfs_delayed_ref_node *ref) |
689 | { |
690 | struct btrfs_delayed_ref_root *root = &trans->transaction->delayed_refs; |
691 | struct btrfs_delayed_ref_node *exist; |
692 | int mod; |
693 | |
694 | spin_lock(lock: &href->lock); |
695 | exist = tree_insert(root: &href->ref_tree, ins: ref); |
696 | if (!exist) { |
697 | if (ref->action == BTRFS_ADD_DELAYED_REF) |
698 | list_add_tail(new: &ref->add_list, head: &href->ref_add_list); |
699 | atomic_inc(v: &root->num_entries); |
700 | spin_unlock(lock: &href->lock); |
701 | trans->delayed_ref_updates++; |
702 | return false; |
703 | } |
704 | |
705 | /* Now we are sure we can merge */ |
706 | if (exist->action == ref->action) { |
707 | mod = ref->ref_mod; |
708 | } else { |
709 | /* Need to change action */ |
710 | if (exist->ref_mod < ref->ref_mod) { |
711 | exist->action = ref->action; |
712 | mod = -exist->ref_mod; |
713 | exist->ref_mod = ref->ref_mod; |
714 | if (ref->action == BTRFS_ADD_DELAYED_REF) |
715 | list_add_tail(new: &exist->add_list, |
716 | head: &href->ref_add_list); |
717 | else if (ref->action == BTRFS_DROP_DELAYED_REF) { |
718 | ASSERT(!list_empty(&exist->add_list)); |
719 | list_del(entry: &exist->add_list); |
720 | } else { |
721 | ASSERT(0); |
722 | } |
723 | } else |
724 | mod = -ref->ref_mod; |
725 | } |
726 | exist->ref_mod += mod; |
727 | |
728 | /* remove existing tail if its ref_mod is zero */ |
729 | if (exist->ref_mod == 0) |
730 | drop_delayed_ref(fs_info: trans->fs_info, delayed_refs: root, head: href, ref: exist); |
731 | spin_unlock(lock: &href->lock); |
732 | return true; |
733 | } |
734 | |
735 | /* |
736 | * helper function to update the accounting in the head ref |
737 | * existing and update must have the same bytenr |
738 | */ |
739 | static noinline void update_existing_head_ref(struct btrfs_trans_handle *trans, |
740 | struct btrfs_delayed_ref_head *existing, |
741 | struct btrfs_delayed_ref_head *update) |
742 | { |
743 | struct btrfs_delayed_ref_root *delayed_refs = |
744 | &trans->transaction->delayed_refs; |
745 | struct btrfs_fs_info *fs_info = trans->fs_info; |
746 | int old_ref_mod; |
747 | |
748 | BUG_ON(existing->is_data != update->is_data); |
749 | |
750 | spin_lock(lock: &existing->lock); |
751 | |
752 | /* |
753 | * When freeing an extent, we may not know the owning root when we |
754 | * first create the head_ref. However, some deref before the last deref |
755 | * will know it, so we just need to update the head_ref accordingly. |
756 | */ |
757 | if (!existing->owning_root) |
758 | existing->owning_root = update->owning_root; |
759 | |
760 | if (update->must_insert_reserved) { |
761 | /* if the extent was freed and then |
762 | * reallocated before the delayed ref |
763 | * entries were processed, we can end up |
764 | * with an existing head ref without |
765 | * the must_insert_reserved flag set. |
766 | * Set it again here |
767 | */ |
768 | existing->must_insert_reserved = update->must_insert_reserved; |
769 | existing->owning_root = update->owning_root; |
770 | |
771 | /* |
772 | * update the num_bytes so we make sure the accounting |
773 | * is done correctly |
774 | */ |
775 | existing->num_bytes = update->num_bytes; |
776 | |
777 | } |
778 | |
779 | if (update->extent_op) { |
780 | if (!existing->extent_op) { |
781 | existing->extent_op = update->extent_op; |
782 | } else { |
783 | if (update->extent_op->update_key) { |
784 | memcpy(&existing->extent_op->key, |
785 | &update->extent_op->key, |
786 | sizeof(update->extent_op->key)); |
787 | existing->extent_op->update_key = true; |
788 | } |
789 | if (update->extent_op->update_flags) { |
790 | existing->extent_op->flags_to_set |= |
791 | update->extent_op->flags_to_set; |
792 | existing->extent_op->update_flags = true; |
793 | } |
794 | btrfs_free_delayed_extent_op(op: update->extent_op); |
795 | } |
796 | } |
797 | /* |
798 | * update the reference mod on the head to reflect this new operation, |
799 | * only need the lock for this case cause we could be processing it |
800 | * currently, for refs we just added we know we're a-ok. |
801 | */ |
802 | old_ref_mod = existing->total_ref_mod; |
803 | existing->ref_mod += update->ref_mod; |
804 | existing->total_ref_mod += update->ref_mod; |
805 | |
806 | /* |
807 | * If we are going to from a positive ref mod to a negative or vice |
808 | * versa we need to make sure to adjust pending_csums accordingly. |
809 | * We reserve bytes for csum deletion when adding or updating a ref head |
810 | * see add_delayed_ref_head() for more details. |
811 | */ |
812 | if (existing->is_data) { |
813 | u64 csum_leaves = |
814 | btrfs_csum_bytes_to_leaves(fs_info, |
815 | csum_bytes: existing->num_bytes); |
816 | |
817 | if (existing->total_ref_mod >= 0 && old_ref_mod < 0) { |
818 | delayed_refs->pending_csums -= existing->num_bytes; |
819 | btrfs_delayed_refs_rsv_release(fs_info, nr_refs: 0, nr_csums: csum_leaves); |
820 | } |
821 | if (existing->total_ref_mod < 0 && old_ref_mod >= 0) { |
822 | delayed_refs->pending_csums += existing->num_bytes; |
823 | trans->delayed_ref_csum_deletions += csum_leaves; |
824 | } |
825 | } |
826 | |
827 | spin_unlock(lock: &existing->lock); |
828 | } |
829 | |
830 | static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref, |
831 | struct btrfs_qgroup_extent_record *qrecord, |
832 | u64 bytenr, u64 num_bytes, u64 ref_root, |
833 | u64 reserved, int action, bool is_data, |
834 | bool is_system, u64 owning_root) |
835 | { |
836 | int count_mod = 1; |
837 | bool must_insert_reserved = false; |
838 | |
839 | /* If reserved is provided, it must be a data extent. */ |
840 | BUG_ON(!is_data && reserved); |
841 | |
842 | switch (action) { |
843 | case BTRFS_UPDATE_DELAYED_HEAD: |
844 | count_mod = 0; |
845 | break; |
846 | case BTRFS_DROP_DELAYED_REF: |
847 | /* |
848 | * The head node stores the sum of all the mods, so dropping a ref |
849 | * should drop the sum in the head node by one. |
850 | */ |
851 | count_mod = -1; |
852 | break; |
853 | case BTRFS_ADD_DELAYED_EXTENT: |
854 | /* |
855 | * BTRFS_ADD_DELAYED_EXTENT means that we need to update the |
856 | * reserved accounting when the extent is finally added, or if a |
857 | * later modification deletes the delayed ref without ever |
858 | * inserting the extent into the extent allocation tree. |
859 | * ref->must_insert_reserved is the flag used to record that |
860 | * accounting mods are required. |
861 | * |
862 | * Once we record must_insert_reserved, switch the action to |
863 | * BTRFS_ADD_DELAYED_REF because other special casing is not |
864 | * required. |
865 | */ |
866 | must_insert_reserved = true; |
867 | break; |
868 | } |
869 | |
870 | refcount_set(r: &head_ref->refs, n: 1); |
871 | head_ref->bytenr = bytenr; |
872 | head_ref->num_bytes = num_bytes; |
873 | head_ref->ref_mod = count_mod; |
874 | head_ref->reserved_bytes = reserved; |
875 | head_ref->must_insert_reserved = must_insert_reserved; |
876 | head_ref->owning_root = owning_root; |
877 | head_ref->is_data = is_data; |
878 | head_ref->is_system = is_system; |
879 | head_ref->ref_tree = RB_ROOT_CACHED; |
880 | INIT_LIST_HEAD(list: &head_ref->ref_add_list); |
881 | RB_CLEAR_NODE(&head_ref->href_node); |
882 | head_ref->processing = false; |
883 | head_ref->total_ref_mod = count_mod; |
884 | spin_lock_init(&head_ref->lock); |
885 | mutex_init(&head_ref->mutex); |
886 | |
887 | if (qrecord) { |
888 | if (ref_root && reserved) { |
889 | qrecord->data_rsv = reserved; |
890 | qrecord->data_rsv_refroot = ref_root; |
891 | } |
892 | qrecord->bytenr = bytenr; |
893 | qrecord->num_bytes = num_bytes; |
894 | qrecord->old_roots = NULL; |
895 | } |
896 | } |
897 | |
898 | /* |
899 | * helper function to actually insert a head node into the rbtree. |
900 | * this does all the dirty work in terms of maintaining the correct |
901 | * overall modification count. |
902 | */ |
903 | static noinline struct btrfs_delayed_ref_head * |
904 | add_delayed_ref_head(struct btrfs_trans_handle *trans, |
905 | struct btrfs_delayed_ref_head *head_ref, |
906 | struct btrfs_qgroup_extent_record *qrecord, |
907 | int action, bool *qrecord_inserted_ret) |
908 | { |
909 | struct btrfs_delayed_ref_head *existing; |
910 | struct btrfs_delayed_ref_root *delayed_refs; |
911 | bool qrecord_inserted = false; |
912 | |
913 | delayed_refs = &trans->transaction->delayed_refs; |
914 | |
915 | /* Record qgroup extent info if provided */ |
916 | if (qrecord) { |
917 | if (btrfs_qgroup_trace_extent_nolock(fs_info: trans->fs_info, |
918 | delayed_refs, record: qrecord)) |
919 | kfree(objp: qrecord); |
920 | else |
921 | qrecord_inserted = true; |
922 | } |
923 | |
924 | trace_add_delayed_ref_head(fs_info: trans->fs_info, head_ref, action); |
925 | |
926 | existing = htree_insert(root: &delayed_refs->href_root, |
927 | node: &head_ref->href_node); |
928 | if (existing) { |
929 | update_existing_head_ref(trans, existing, update: head_ref); |
930 | /* |
931 | * we've updated the existing ref, free the newly |
932 | * allocated ref |
933 | */ |
934 | kmem_cache_free(s: btrfs_delayed_ref_head_cachep, objp: head_ref); |
935 | head_ref = existing; |
936 | } else { |
937 | /* |
938 | * We reserve the amount of bytes needed to delete csums when |
939 | * adding the ref head and not when adding individual drop refs |
940 | * since the csum items are deleted only after running the last |
941 | * delayed drop ref (the data extent's ref count drops to 0). |
942 | */ |
943 | if (head_ref->is_data && head_ref->ref_mod < 0) { |
944 | delayed_refs->pending_csums += head_ref->num_bytes; |
945 | trans->delayed_ref_csum_deletions += |
946 | btrfs_csum_bytes_to_leaves(fs_info: trans->fs_info, |
947 | csum_bytes: head_ref->num_bytes); |
948 | } |
949 | delayed_refs->num_heads++; |
950 | delayed_refs->num_heads_ready++; |
951 | atomic_inc(v: &delayed_refs->num_entries); |
952 | } |
953 | if (qrecord_inserted_ret) |
954 | *qrecord_inserted_ret = qrecord_inserted; |
955 | |
956 | return head_ref; |
957 | } |
958 | |
959 | /* |
960 | * Initialize the structure which represents a modification to a an extent. |
961 | * |
962 | * @fs_info: Internal to the mounted filesystem mount structure. |
963 | * |
964 | * @ref: The structure which is going to be initialized. |
965 | * |
966 | * @bytenr: The logical address of the extent for which a modification is |
967 | * going to be recorded. |
968 | * |
969 | * @num_bytes: Size of the extent whose modification is being recorded. |
970 | * |
971 | * @ref_root: The id of the root where this modification has originated, this |
972 | * can be either one of the well-known metadata trees or the |
973 | * subvolume id which references this extent. |
974 | * |
975 | * @action: Can be one of BTRFS_ADD_DELAYED_REF/BTRFS_DROP_DELAYED_REF or |
976 | * BTRFS_ADD_DELAYED_EXTENT |
977 | * |
978 | * @ref_type: Holds the type of the extent which is being recorded, can be |
979 | * one of BTRFS_SHARED_BLOCK_REF_KEY/BTRFS_TREE_BLOCK_REF_KEY |
980 | * when recording a metadata extent or BTRFS_SHARED_DATA_REF_KEY/ |
981 | * BTRFS_EXTENT_DATA_REF_KEY when recording data extent |
982 | */ |
983 | static void init_delayed_ref_common(struct btrfs_fs_info *fs_info, |
984 | struct btrfs_delayed_ref_node *ref, |
985 | u64 bytenr, u64 num_bytes, u64 ref_root, |
986 | int action, u8 ref_type) |
987 | { |
988 | u64 seq = 0; |
989 | |
990 | if (action == BTRFS_ADD_DELAYED_EXTENT) |
991 | action = BTRFS_ADD_DELAYED_REF; |
992 | |
993 | if (is_fstree(rootid: ref_root)) |
994 | seq = atomic64_read(v: &fs_info->tree_mod_seq); |
995 | |
996 | refcount_set(r: &ref->refs, n: 1); |
997 | ref->bytenr = bytenr; |
998 | ref->num_bytes = num_bytes; |
999 | ref->ref_mod = 1; |
1000 | ref->action = action; |
1001 | ref->seq = seq; |
1002 | ref->type = ref_type; |
1003 | RB_CLEAR_NODE(&ref->ref_node); |
1004 | INIT_LIST_HEAD(list: &ref->add_list); |
1005 | } |
1006 | |
1007 | /* |
1008 | * add a delayed tree ref. This does all of the accounting required |
1009 | * to make sure the delayed ref is eventually processed before this |
1010 | * transaction commits. |
1011 | */ |
1012 | int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, |
1013 | struct btrfs_ref *generic_ref, |
1014 | struct btrfs_delayed_extent_op *extent_op) |
1015 | { |
1016 | struct btrfs_fs_info *fs_info = trans->fs_info; |
1017 | struct btrfs_delayed_tree_ref *ref; |
1018 | struct btrfs_delayed_ref_head *head_ref; |
1019 | struct btrfs_delayed_ref_root *delayed_refs; |
1020 | struct btrfs_qgroup_extent_record *record = NULL; |
1021 | bool qrecord_inserted; |
1022 | bool is_system; |
1023 | bool merged; |
1024 | int action = generic_ref->action; |
1025 | int level = generic_ref->tree_ref.level; |
1026 | u64 bytenr = generic_ref->bytenr; |
1027 | u64 num_bytes = generic_ref->len; |
1028 | u64 parent = generic_ref->parent; |
1029 | u8 ref_type; |
1030 | |
1031 | is_system = (generic_ref->tree_ref.ref_root == BTRFS_CHUNK_TREE_OBJECTID); |
1032 | |
1033 | ASSERT(generic_ref->type == BTRFS_REF_METADATA && generic_ref->action); |
1034 | ref = kmem_cache_alloc(cachep: btrfs_delayed_tree_ref_cachep, GFP_NOFS); |
1035 | if (!ref) |
1036 | return -ENOMEM; |
1037 | |
1038 | head_ref = kmem_cache_alloc(cachep: btrfs_delayed_ref_head_cachep, GFP_NOFS); |
1039 | if (!head_ref) { |
1040 | kmem_cache_free(s: btrfs_delayed_tree_ref_cachep, objp: ref); |
1041 | return -ENOMEM; |
1042 | } |
1043 | |
1044 | if (btrfs_qgroup_enabled(fs_info) && !generic_ref->skip_qgroup) { |
1045 | record = kzalloc(size: sizeof(*record), GFP_NOFS); |
1046 | if (!record) { |
1047 | kmem_cache_free(s: btrfs_delayed_tree_ref_cachep, objp: ref); |
1048 | kmem_cache_free(s: btrfs_delayed_ref_head_cachep, objp: head_ref); |
1049 | return -ENOMEM; |
1050 | } |
1051 | } |
1052 | |
1053 | if (parent) |
1054 | ref_type = BTRFS_SHARED_BLOCK_REF_KEY; |
1055 | else |
1056 | ref_type = BTRFS_TREE_BLOCK_REF_KEY; |
1057 | |
1058 | init_delayed_ref_common(fs_info, ref: &ref->node, bytenr, num_bytes, |
1059 | ref_root: generic_ref->tree_ref.ref_root, action, |
1060 | ref_type); |
1061 | ref->root = generic_ref->tree_ref.ref_root; |
1062 | ref->parent = parent; |
1063 | ref->level = level; |
1064 | |
1065 | init_delayed_ref_head(head_ref, qrecord: record, bytenr, num_bytes, |
1066 | ref_root: generic_ref->tree_ref.ref_root, reserved: 0, action, |
1067 | is_data: false, is_system, owning_root: generic_ref->owning_root); |
1068 | head_ref->extent_op = extent_op; |
1069 | |
1070 | delayed_refs = &trans->transaction->delayed_refs; |
1071 | spin_lock(lock: &delayed_refs->lock); |
1072 | |
1073 | /* |
1074 | * insert both the head node and the new ref without dropping |
1075 | * the spin lock |
1076 | */ |
1077 | head_ref = add_delayed_ref_head(trans, head_ref, qrecord: record, |
1078 | action, qrecord_inserted_ret: &qrecord_inserted); |
1079 | |
1080 | merged = insert_delayed_ref(trans, href: head_ref, ref: &ref->node); |
1081 | spin_unlock(lock: &delayed_refs->lock); |
1082 | |
1083 | /* |
1084 | * Need to update the delayed_refs_rsv with any changes we may have |
1085 | * made. |
1086 | */ |
1087 | btrfs_update_delayed_refs_rsv(trans); |
1088 | |
1089 | trace_add_delayed_tree_ref(fs_info, ref: &ref->node, full_ref: ref, |
1090 | action: action == BTRFS_ADD_DELAYED_EXTENT ? |
1091 | BTRFS_ADD_DELAYED_REF : action); |
1092 | if (merged) |
1093 | kmem_cache_free(s: btrfs_delayed_tree_ref_cachep, objp: ref); |
1094 | |
1095 | if (qrecord_inserted) |
1096 | btrfs_qgroup_trace_extent_post(trans, qrecord: record); |
1097 | |
1098 | return 0; |
1099 | } |
1100 | |
1101 | /* |
1102 | * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref. |
1103 | */ |
1104 | int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, |
1105 | struct btrfs_ref *generic_ref, |
1106 | u64 reserved) |
1107 | { |
1108 | struct btrfs_fs_info *fs_info = trans->fs_info; |
1109 | struct btrfs_delayed_data_ref *ref; |
1110 | struct btrfs_delayed_ref_head *head_ref; |
1111 | struct btrfs_delayed_ref_root *delayed_refs; |
1112 | struct btrfs_qgroup_extent_record *record = NULL; |
1113 | bool qrecord_inserted; |
1114 | int action = generic_ref->action; |
1115 | bool merged; |
1116 | u64 bytenr = generic_ref->bytenr; |
1117 | u64 num_bytes = generic_ref->len; |
1118 | u64 parent = generic_ref->parent; |
1119 | u64 ref_root = generic_ref->data_ref.ref_root; |
1120 | u64 owner = generic_ref->data_ref.ino; |
1121 | u64 offset = generic_ref->data_ref.offset; |
1122 | u8 ref_type; |
1123 | |
1124 | ASSERT(generic_ref->type == BTRFS_REF_DATA && action); |
1125 | ref = kmem_cache_alloc(cachep: btrfs_delayed_data_ref_cachep, GFP_NOFS); |
1126 | if (!ref) |
1127 | return -ENOMEM; |
1128 | |
1129 | if (parent) |
1130 | ref_type = BTRFS_SHARED_DATA_REF_KEY; |
1131 | else |
1132 | ref_type = BTRFS_EXTENT_DATA_REF_KEY; |
1133 | init_delayed_ref_common(fs_info, ref: &ref->node, bytenr, num_bytes, |
1134 | ref_root, action, ref_type); |
1135 | ref->root = ref_root; |
1136 | ref->parent = parent; |
1137 | ref->objectid = owner; |
1138 | ref->offset = offset; |
1139 | |
1140 | |
1141 | head_ref = kmem_cache_alloc(cachep: btrfs_delayed_ref_head_cachep, GFP_NOFS); |
1142 | if (!head_ref) { |
1143 | kmem_cache_free(s: btrfs_delayed_data_ref_cachep, objp: ref); |
1144 | return -ENOMEM; |
1145 | } |
1146 | |
1147 | if (btrfs_qgroup_enabled(fs_info) && !generic_ref->skip_qgroup) { |
1148 | record = kzalloc(size: sizeof(*record), GFP_NOFS); |
1149 | if (!record) { |
1150 | kmem_cache_free(s: btrfs_delayed_data_ref_cachep, objp: ref); |
1151 | kmem_cache_free(s: btrfs_delayed_ref_head_cachep, |
1152 | objp: head_ref); |
1153 | return -ENOMEM; |
1154 | } |
1155 | } |
1156 | |
1157 | init_delayed_ref_head(head_ref, qrecord: record, bytenr, num_bytes, ref_root, |
1158 | reserved, action, is_data: true, is_system: false, owning_root: generic_ref->owning_root); |
1159 | head_ref->extent_op = NULL; |
1160 | |
1161 | delayed_refs = &trans->transaction->delayed_refs; |
1162 | spin_lock(lock: &delayed_refs->lock); |
1163 | |
1164 | /* |
1165 | * insert both the head node and the new ref without dropping |
1166 | * the spin lock |
1167 | */ |
1168 | head_ref = add_delayed_ref_head(trans, head_ref, qrecord: record, |
1169 | action, qrecord_inserted_ret: &qrecord_inserted); |
1170 | |
1171 | merged = insert_delayed_ref(trans, href: head_ref, ref: &ref->node); |
1172 | spin_unlock(lock: &delayed_refs->lock); |
1173 | |
1174 | /* |
1175 | * Need to update the delayed_refs_rsv with any changes we may have |
1176 | * made. |
1177 | */ |
1178 | btrfs_update_delayed_refs_rsv(trans); |
1179 | |
1180 | trace_add_delayed_data_ref(fs_info: trans->fs_info, ref: &ref->node, full_ref: ref, |
1181 | action: action == BTRFS_ADD_DELAYED_EXTENT ? |
1182 | BTRFS_ADD_DELAYED_REF : action); |
1183 | if (merged) |
1184 | kmem_cache_free(s: btrfs_delayed_data_ref_cachep, objp: ref); |
1185 | |
1186 | |
1187 | if (qrecord_inserted) |
1188 | return btrfs_qgroup_trace_extent_post(trans, qrecord: record); |
1189 | return 0; |
1190 | } |
1191 | |
1192 | int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans, |
1193 | u64 bytenr, u64 num_bytes, |
1194 | struct btrfs_delayed_extent_op *extent_op) |
1195 | { |
1196 | struct btrfs_delayed_ref_head *head_ref; |
1197 | struct btrfs_delayed_ref_root *delayed_refs; |
1198 | |
1199 | head_ref = kmem_cache_alloc(cachep: btrfs_delayed_ref_head_cachep, GFP_NOFS); |
1200 | if (!head_ref) |
1201 | return -ENOMEM; |
1202 | |
1203 | init_delayed_ref_head(head_ref, NULL, bytenr, num_bytes, ref_root: 0, reserved: 0, |
1204 | action: BTRFS_UPDATE_DELAYED_HEAD, is_data: false, is_system: false, owning_root: 0); |
1205 | head_ref->extent_op = extent_op; |
1206 | |
1207 | delayed_refs = &trans->transaction->delayed_refs; |
1208 | spin_lock(lock: &delayed_refs->lock); |
1209 | |
1210 | add_delayed_ref_head(trans, head_ref, NULL, action: BTRFS_UPDATE_DELAYED_HEAD, |
1211 | NULL); |
1212 | |
1213 | spin_unlock(lock: &delayed_refs->lock); |
1214 | |
1215 | /* |
1216 | * Need to update the delayed_refs_rsv with any changes we may have |
1217 | * made. |
1218 | */ |
1219 | btrfs_update_delayed_refs_rsv(trans); |
1220 | return 0; |
1221 | } |
1222 | |
1223 | /* |
1224 | * This does a simple search for the head node for a given extent. Returns the |
1225 | * head node if found, or NULL if not. |
1226 | */ |
1227 | struct btrfs_delayed_ref_head * |
1228 | btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 bytenr) |
1229 | { |
1230 | lockdep_assert_held(&delayed_refs->lock); |
1231 | |
1232 | return find_ref_head(dr: delayed_refs, bytenr, return_bigger: false); |
1233 | } |
1234 | |
1235 | void __cold btrfs_delayed_ref_exit(void) |
1236 | { |
1237 | kmem_cache_destroy(s: btrfs_delayed_ref_head_cachep); |
1238 | kmem_cache_destroy(s: btrfs_delayed_tree_ref_cachep); |
1239 | kmem_cache_destroy(s: btrfs_delayed_data_ref_cachep); |
1240 | kmem_cache_destroy(s: btrfs_delayed_extent_op_cachep); |
1241 | } |
1242 | |
1243 | int __init btrfs_delayed_ref_init(void) |
1244 | { |
1245 | btrfs_delayed_ref_head_cachep = kmem_cache_create( |
1246 | name: "btrfs_delayed_ref_head" , |
1247 | size: sizeof(struct btrfs_delayed_ref_head), align: 0, |
1248 | SLAB_MEM_SPREAD, NULL); |
1249 | if (!btrfs_delayed_ref_head_cachep) |
1250 | goto fail; |
1251 | |
1252 | btrfs_delayed_tree_ref_cachep = kmem_cache_create( |
1253 | name: "btrfs_delayed_tree_ref" , |
1254 | size: sizeof(struct btrfs_delayed_tree_ref), align: 0, |
1255 | SLAB_MEM_SPREAD, NULL); |
1256 | if (!btrfs_delayed_tree_ref_cachep) |
1257 | goto fail; |
1258 | |
1259 | btrfs_delayed_data_ref_cachep = kmem_cache_create( |
1260 | name: "btrfs_delayed_data_ref" , |
1261 | size: sizeof(struct btrfs_delayed_data_ref), align: 0, |
1262 | SLAB_MEM_SPREAD, NULL); |
1263 | if (!btrfs_delayed_data_ref_cachep) |
1264 | goto fail; |
1265 | |
1266 | btrfs_delayed_extent_op_cachep = kmem_cache_create( |
1267 | name: "btrfs_delayed_extent_op" , |
1268 | size: sizeof(struct btrfs_delayed_extent_op), align: 0, |
1269 | SLAB_MEM_SPREAD, NULL); |
1270 | if (!btrfs_delayed_extent_op_cachep) |
1271 | goto fail; |
1272 | |
1273 | return 0; |
1274 | fail: |
1275 | btrfs_delayed_ref_exit(); |
1276 | return -ENOMEM; |
1277 | } |
1278 | |