1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright (C) 2007 Oracle. All rights reserved. |
4 | */ |
5 | |
6 | #include <linux/bio.h> |
7 | #include <linux/slab.h> |
8 | #include <linux/pagemap.h> |
9 | #include <linux/highmem.h> |
10 | #include <linux/sched/mm.h> |
11 | #include <crypto/hash.h> |
12 | #include "messages.h" |
13 | #include "misc.h" |
14 | #include "ctree.h" |
15 | #include "disk-io.h" |
16 | #include "transaction.h" |
17 | #include "bio.h" |
18 | #include "print-tree.h" |
19 | #include "compression.h" |
20 | #include "fs.h" |
21 | #include "accessors.h" |
22 | #include "file-item.h" |
23 | #include "super.h" |
24 | |
25 | #define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \ |
26 | sizeof(struct btrfs_item) * 2) / \ |
27 | size) - 1)) |
28 | |
29 | #define MAX_CSUM_ITEMS(r, size) (min_t(u32, __MAX_CSUM_ITEMS(r, size), \ |
30 | PAGE_SIZE)) |
31 | |
32 | /* |
33 | * Set inode's size according to filesystem options. |
34 | * |
35 | * @inode: inode we want to update the disk_i_size for |
36 | * @new_i_size: i_size we want to set to, 0 if we use i_size |
37 | * |
38 | * With NO_HOLES set this simply sets the disk_is_size to whatever i_size_read() |
39 | * returns as it is perfectly fine with a file that has holes without hole file |
40 | * extent items. |
41 | * |
42 | * However without NO_HOLES we need to only return the area that is contiguous |
43 | * from the 0 offset of the file. Otherwise we could end up adjust i_size up |
44 | * to an extent that has a gap in between. |
45 | * |
46 | * Finally new_i_size should only be set in the case of truncate where we're not |
47 | * ready to use i_size_read() as the limiter yet. |
48 | */ |
49 | void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_size) |
50 | { |
51 | struct btrfs_fs_info *fs_info = inode->root->fs_info; |
52 | u64 start, end, i_size; |
53 | int ret; |
54 | |
55 | spin_lock(lock: &inode->lock); |
56 | i_size = new_i_size ?: i_size_read(inode: &inode->vfs_inode); |
57 | if (btrfs_fs_incompat(fs_info, NO_HOLES)) { |
58 | inode->disk_i_size = i_size; |
59 | goto out_unlock; |
60 | } |
61 | |
62 | ret = find_contiguous_extent_bit(tree: &inode->file_extent_tree, start: 0, start_ret: &start, |
63 | end_ret: &end, bits: EXTENT_DIRTY); |
64 | if (!ret && start == 0) |
65 | i_size = min(i_size, end + 1); |
66 | else |
67 | i_size = 0; |
68 | inode->disk_i_size = i_size; |
69 | out_unlock: |
70 | spin_unlock(lock: &inode->lock); |
71 | } |
72 | |
73 | /* |
74 | * Mark range within a file as having a new extent inserted. |
75 | * |
76 | * @inode: inode being modified |
77 | * @start: start file offset of the file extent we've inserted |
78 | * @len: logical length of the file extent item |
79 | * |
80 | * Call when we are inserting a new file extent where there was none before. |
81 | * Does not need to call this in the case where we're replacing an existing file |
82 | * extent, however if not sure it's fine to call this multiple times. |
83 | * |
84 | * The start and len must match the file extent item, so thus must be sectorsize |
85 | * aligned. |
86 | */ |
87 | int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start, |
88 | u64 len) |
89 | { |
90 | if (len == 0) |
91 | return 0; |
92 | |
93 | ASSERT(IS_ALIGNED(start + len, inode->root->fs_info->sectorsize)); |
94 | |
95 | if (btrfs_fs_incompat(inode->root->fs_info, NO_HOLES)) |
96 | return 0; |
97 | return set_extent_bit(tree: &inode->file_extent_tree, start, end: start + len - 1, |
98 | bits: EXTENT_DIRTY, NULL); |
99 | } |
100 | |
101 | /* |
102 | * Mark an inode range as not having a backing extent. |
103 | * |
104 | * @inode: inode being modified |
105 | * @start: start file offset of the file extent we've inserted |
106 | * @len: logical length of the file extent item |
107 | * |
108 | * Called when we drop a file extent, for example when we truncate. Doesn't |
109 | * need to be called for cases where we're replacing a file extent, like when |
110 | * we've COWed a file extent. |
111 | * |
112 | * The start and len must match the file extent item, so thus must be sectorsize |
113 | * aligned. |
114 | */ |
115 | int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start, |
116 | u64 len) |
117 | { |
118 | if (len == 0) |
119 | return 0; |
120 | |
121 | ASSERT(IS_ALIGNED(start + len, inode->root->fs_info->sectorsize) || |
122 | len == (u64)-1); |
123 | |
124 | if (btrfs_fs_incompat(inode->root->fs_info, NO_HOLES)) |
125 | return 0; |
126 | return clear_extent_bit(tree: &inode->file_extent_tree, start, |
127 | end: start + len - 1, bits: EXTENT_DIRTY, NULL); |
128 | } |
129 | |
130 | static size_t bytes_to_csum_size(const struct btrfs_fs_info *fs_info, u32 bytes) |
131 | { |
132 | ASSERT(IS_ALIGNED(bytes, fs_info->sectorsize)); |
133 | |
134 | return (bytes >> fs_info->sectorsize_bits) * fs_info->csum_size; |
135 | } |
136 | |
137 | static size_t csum_size_to_bytes(const struct btrfs_fs_info *fs_info, u32 csum_size) |
138 | { |
139 | ASSERT(IS_ALIGNED(csum_size, fs_info->csum_size)); |
140 | |
141 | return (csum_size / fs_info->csum_size) << fs_info->sectorsize_bits; |
142 | } |
143 | |
144 | static inline u32 max_ordered_sum_bytes(const struct btrfs_fs_info *fs_info) |
145 | { |
146 | u32 max_csum_size = round_down(PAGE_SIZE - sizeof(struct btrfs_ordered_sum), |
147 | fs_info->csum_size); |
148 | |
149 | return csum_size_to_bytes(fs_info, csum_size: max_csum_size); |
150 | } |
151 | |
152 | /* |
153 | * Calculate the total size needed to allocate for an ordered sum structure |
154 | * spanning @bytes in the file. |
155 | */ |
156 | static int btrfs_ordered_sum_size(struct btrfs_fs_info *fs_info, unsigned long bytes) |
157 | { |
158 | return sizeof(struct btrfs_ordered_sum) + bytes_to_csum_size(fs_info, bytes); |
159 | } |
160 | |
161 | int btrfs_insert_hole_extent(struct btrfs_trans_handle *trans, |
162 | struct btrfs_root *root, |
163 | u64 objectid, u64 pos, u64 num_bytes) |
164 | { |
165 | int ret = 0; |
166 | struct btrfs_file_extent_item *item; |
167 | struct btrfs_key file_key; |
168 | struct btrfs_path *path; |
169 | struct extent_buffer *leaf; |
170 | |
171 | path = btrfs_alloc_path(); |
172 | if (!path) |
173 | return -ENOMEM; |
174 | file_key.objectid = objectid; |
175 | file_key.offset = pos; |
176 | file_key.type = BTRFS_EXTENT_DATA_KEY; |
177 | |
178 | ret = btrfs_insert_empty_item(trans, root, path, key: &file_key, |
179 | data_size: sizeof(*item)); |
180 | if (ret < 0) |
181 | goto out; |
182 | BUG_ON(ret); /* Can't happen */ |
183 | leaf = path->nodes[0]; |
184 | item = btrfs_item_ptr(leaf, path->slots[0], |
185 | struct btrfs_file_extent_item); |
186 | btrfs_set_file_extent_disk_bytenr(eb: leaf, s: item, val: 0); |
187 | btrfs_set_file_extent_disk_num_bytes(eb: leaf, s: item, val: 0); |
188 | btrfs_set_file_extent_offset(eb: leaf, s: item, val: 0); |
189 | btrfs_set_file_extent_num_bytes(eb: leaf, s: item, val: num_bytes); |
190 | btrfs_set_file_extent_ram_bytes(eb: leaf, s: item, val: num_bytes); |
191 | btrfs_set_file_extent_generation(eb: leaf, s: item, val: trans->transid); |
192 | btrfs_set_file_extent_type(eb: leaf, s: item, val: BTRFS_FILE_EXTENT_REG); |
193 | btrfs_set_file_extent_compression(eb: leaf, s: item, val: 0); |
194 | btrfs_set_file_extent_encryption(eb: leaf, s: item, val: 0); |
195 | btrfs_set_file_extent_other_encoding(eb: leaf, s: item, val: 0); |
196 | |
197 | btrfs_mark_buffer_dirty(trans, buf: leaf); |
198 | out: |
199 | btrfs_free_path(p: path); |
200 | return ret; |
201 | } |
202 | |
203 | static struct btrfs_csum_item * |
204 | btrfs_lookup_csum(struct btrfs_trans_handle *trans, |
205 | struct btrfs_root *root, |
206 | struct btrfs_path *path, |
207 | u64 bytenr, int cow) |
208 | { |
209 | struct btrfs_fs_info *fs_info = root->fs_info; |
210 | int ret; |
211 | struct btrfs_key file_key; |
212 | struct btrfs_key found_key; |
213 | struct btrfs_csum_item *item; |
214 | struct extent_buffer *leaf; |
215 | u64 csum_offset = 0; |
216 | const u32 csum_size = fs_info->csum_size; |
217 | int csums_in_item; |
218 | |
219 | file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; |
220 | file_key.offset = bytenr; |
221 | file_key.type = BTRFS_EXTENT_CSUM_KEY; |
222 | ret = btrfs_search_slot(trans, root, key: &file_key, p: path, ins_len: 0, cow); |
223 | if (ret < 0) |
224 | goto fail; |
225 | leaf = path->nodes[0]; |
226 | if (ret > 0) { |
227 | ret = 1; |
228 | if (path->slots[0] == 0) |
229 | goto fail; |
230 | path->slots[0]--; |
231 | btrfs_item_key_to_cpu(eb: leaf, cpu_key: &found_key, nr: path->slots[0]); |
232 | if (found_key.type != BTRFS_EXTENT_CSUM_KEY) |
233 | goto fail; |
234 | |
235 | csum_offset = (bytenr - found_key.offset) >> |
236 | fs_info->sectorsize_bits; |
237 | csums_in_item = btrfs_item_size(eb: leaf, slot: path->slots[0]); |
238 | csums_in_item /= csum_size; |
239 | |
240 | if (csum_offset == csums_in_item) { |
241 | ret = -EFBIG; |
242 | goto fail; |
243 | } else if (csum_offset > csums_in_item) { |
244 | goto fail; |
245 | } |
246 | } |
247 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); |
248 | item = (struct btrfs_csum_item *)((unsigned char *)item + |
249 | csum_offset * csum_size); |
250 | return item; |
251 | fail: |
252 | if (ret > 0) |
253 | ret = -ENOENT; |
254 | return ERR_PTR(error: ret); |
255 | } |
256 | |
257 | int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, |
258 | struct btrfs_root *root, |
259 | struct btrfs_path *path, u64 objectid, |
260 | u64 offset, int mod) |
261 | { |
262 | struct btrfs_key file_key; |
263 | int ins_len = mod < 0 ? -1 : 0; |
264 | int cow = mod != 0; |
265 | |
266 | file_key.objectid = objectid; |
267 | file_key.offset = offset; |
268 | file_key.type = BTRFS_EXTENT_DATA_KEY; |
269 | |
270 | return btrfs_search_slot(trans, root, key: &file_key, p: path, ins_len, cow); |
271 | } |
272 | |
273 | /* |
274 | * Find checksums for logical bytenr range [disk_bytenr, disk_bytenr + len) and |
275 | * store the result to @dst. |
276 | * |
277 | * Return >0 for the number of sectors we found. |
278 | * Return 0 for the range [disk_bytenr, disk_bytenr + sectorsize) has no csum |
279 | * for it. Caller may want to try next sector until one range is hit. |
280 | * Return <0 for fatal error. |
281 | */ |
282 | static int search_csum_tree(struct btrfs_fs_info *fs_info, |
283 | struct btrfs_path *path, u64 disk_bytenr, |
284 | u64 len, u8 *dst) |
285 | { |
286 | struct btrfs_root *csum_root; |
287 | struct btrfs_csum_item *item = NULL; |
288 | struct btrfs_key key; |
289 | const u32 sectorsize = fs_info->sectorsize; |
290 | const u32 csum_size = fs_info->csum_size; |
291 | u32 itemsize; |
292 | int ret; |
293 | u64 csum_start; |
294 | u64 csum_len; |
295 | |
296 | ASSERT(IS_ALIGNED(disk_bytenr, sectorsize) && |
297 | IS_ALIGNED(len, sectorsize)); |
298 | |
299 | /* Check if the current csum item covers disk_bytenr */ |
300 | if (path->nodes[0]) { |
301 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], |
302 | struct btrfs_csum_item); |
303 | btrfs_item_key_to_cpu(eb: path->nodes[0], cpu_key: &key, nr: path->slots[0]); |
304 | itemsize = btrfs_item_size(eb: path->nodes[0], slot: path->slots[0]); |
305 | |
306 | csum_start = key.offset; |
307 | csum_len = (itemsize / csum_size) * sectorsize; |
308 | |
309 | if (in_range(disk_bytenr, csum_start, csum_len)) |
310 | goto found; |
311 | } |
312 | |
313 | /* Current item doesn't contain the desired range, search again */ |
314 | btrfs_release_path(p: path); |
315 | csum_root = btrfs_csum_root(fs_info, bytenr: disk_bytenr); |
316 | item = btrfs_lookup_csum(NULL, root: csum_root, path, bytenr: disk_bytenr, cow: 0); |
317 | if (IS_ERR(ptr: item)) { |
318 | ret = PTR_ERR(ptr: item); |
319 | goto out; |
320 | } |
321 | btrfs_item_key_to_cpu(eb: path->nodes[0], cpu_key: &key, nr: path->slots[0]); |
322 | itemsize = btrfs_item_size(eb: path->nodes[0], slot: path->slots[0]); |
323 | |
324 | csum_start = key.offset; |
325 | csum_len = (itemsize / csum_size) * sectorsize; |
326 | ASSERT(in_range(disk_bytenr, csum_start, csum_len)); |
327 | |
328 | found: |
329 | ret = (min(csum_start + csum_len, disk_bytenr + len) - |
330 | disk_bytenr) >> fs_info->sectorsize_bits; |
331 | read_extent_buffer(eb: path->nodes[0], dst, start: (unsigned long)item, |
332 | len: ret * csum_size); |
333 | out: |
334 | if (ret == -ENOENT || ret == -EFBIG) |
335 | ret = 0; |
336 | return ret; |
337 | } |
338 | |
339 | /* |
340 | * Lookup the checksum for the read bio in csum tree. |
341 | * |
342 | * Return: BLK_STS_RESOURCE if allocating memory fails, BLK_STS_OK otherwise. |
343 | */ |
344 | blk_status_t btrfs_lookup_bio_sums(struct btrfs_bio *bbio) |
345 | { |
346 | struct btrfs_inode *inode = bbio->inode; |
347 | struct btrfs_fs_info *fs_info = inode->root->fs_info; |
348 | struct bio *bio = &bbio->bio; |
349 | struct btrfs_path *path; |
350 | const u32 sectorsize = fs_info->sectorsize; |
351 | const u32 csum_size = fs_info->csum_size; |
352 | u32 orig_len = bio->bi_iter.bi_size; |
353 | u64 orig_disk_bytenr = bio->bi_iter.bi_sector << SECTOR_SHIFT; |
354 | const unsigned int nblocks = orig_len >> fs_info->sectorsize_bits; |
355 | blk_status_t ret = BLK_STS_OK; |
356 | u32 bio_offset = 0; |
357 | |
358 | if ((inode->flags & BTRFS_INODE_NODATASUM) || |
359 | test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state)) |
360 | return BLK_STS_OK; |
361 | |
362 | /* |
363 | * This function is only called for read bio. |
364 | * |
365 | * This means two things: |
366 | * - All our csums should only be in csum tree |
367 | * No ordered extents csums, as ordered extents are only for write |
368 | * path. |
369 | * - No need to bother any other info from bvec |
370 | * Since we're looking up csums, the only important info is the |
371 | * disk_bytenr and the length, which can be extracted from bi_iter |
372 | * directly. |
373 | */ |
374 | ASSERT(bio_op(bio) == REQ_OP_READ); |
375 | path = btrfs_alloc_path(); |
376 | if (!path) |
377 | return BLK_STS_RESOURCE; |
378 | |
379 | if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) { |
380 | bbio->csum = kmalloc_array(n: nblocks, size: csum_size, GFP_NOFS); |
381 | if (!bbio->csum) { |
382 | btrfs_free_path(p: path); |
383 | return BLK_STS_RESOURCE; |
384 | } |
385 | } else { |
386 | bbio->csum = bbio->csum_inline; |
387 | } |
388 | |
389 | /* |
390 | * If requested number of sectors is larger than one leaf can contain, |
391 | * kick the readahead for csum tree. |
392 | */ |
393 | if (nblocks > fs_info->csums_per_leaf) |
394 | path->reada = READA_FORWARD; |
395 | |
396 | /* |
397 | * the free space stuff is only read when it hasn't been |
398 | * updated in the current transaction. So, we can safely |
399 | * read from the commit root and sidestep a nasty deadlock |
400 | * between reading the free space cache and updating the csum tree. |
401 | */ |
402 | if (btrfs_is_free_space_inode(inode)) { |
403 | path->search_commit_root = 1; |
404 | path->skip_locking = 1; |
405 | } |
406 | |
407 | while (bio_offset < orig_len) { |
408 | int count; |
409 | u64 cur_disk_bytenr = orig_disk_bytenr + bio_offset; |
410 | u8 *csum_dst = bbio->csum + |
411 | (bio_offset >> fs_info->sectorsize_bits) * csum_size; |
412 | |
413 | count = search_csum_tree(fs_info, path, disk_bytenr: cur_disk_bytenr, |
414 | len: orig_len - bio_offset, dst: csum_dst); |
415 | if (count < 0) { |
416 | ret = errno_to_blk_status(errno: count); |
417 | if (bbio->csum != bbio->csum_inline) |
418 | kfree(objp: bbio->csum); |
419 | bbio->csum = NULL; |
420 | break; |
421 | } |
422 | |
423 | /* |
424 | * We didn't find a csum for this range. We need to make sure |
425 | * we complain loudly about this, because we are not NODATASUM. |
426 | * |
427 | * However for the DATA_RELOC inode we could potentially be |
428 | * relocating data extents for a NODATASUM inode, so the inode |
429 | * itself won't be marked with NODATASUM, but the extent we're |
430 | * copying is in fact NODATASUM. If we don't find a csum we |
431 | * assume this is the case. |
432 | */ |
433 | if (count == 0) { |
434 | memset(csum_dst, 0, csum_size); |
435 | count = 1; |
436 | |
437 | if (inode->root->root_key.objectid == |
438 | BTRFS_DATA_RELOC_TREE_OBJECTID) { |
439 | u64 file_offset = bbio->file_offset + bio_offset; |
440 | |
441 | set_extent_bit(tree: &inode->io_tree, start: file_offset, |
442 | end: file_offset + sectorsize - 1, |
443 | bits: EXTENT_NODATASUM, NULL); |
444 | } else { |
445 | btrfs_warn_rl(fs_info, |
446 | "csum hole found for disk bytenr range [%llu, %llu)" , |
447 | cur_disk_bytenr, cur_disk_bytenr + sectorsize); |
448 | } |
449 | } |
450 | bio_offset += count * sectorsize; |
451 | } |
452 | |
453 | btrfs_free_path(p: path); |
454 | return ret; |
455 | } |
456 | |
457 | int btrfs_lookup_csums_list(struct btrfs_root *root, u64 start, u64 end, |
458 | struct list_head *list, int search_commit, |
459 | bool nowait) |
460 | { |
461 | struct btrfs_fs_info *fs_info = root->fs_info; |
462 | struct btrfs_key key; |
463 | struct btrfs_path *path; |
464 | struct extent_buffer *leaf; |
465 | struct btrfs_ordered_sum *sums; |
466 | struct btrfs_csum_item *item; |
467 | LIST_HEAD(tmplist); |
468 | int ret; |
469 | |
470 | ASSERT(IS_ALIGNED(start, fs_info->sectorsize) && |
471 | IS_ALIGNED(end + 1, fs_info->sectorsize)); |
472 | |
473 | path = btrfs_alloc_path(); |
474 | if (!path) |
475 | return -ENOMEM; |
476 | |
477 | path->nowait = nowait; |
478 | if (search_commit) { |
479 | path->skip_locking = 1; |
480 | path->reada = READA_FORWARD; |
481 | path->search_commit_root = 1; |
482 | } |
483 | |
484 | key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; |
485 | key.offset = start; |
486 | key.type = BTRFS_EXTENT_CSUM_KEY; |
487 | |
488 | ret = btrfs_search_slot(NULL, root, key: &key, p: path, ins_len: 0, cow: 0); |
489 | if (ret < 0) |
490 | goto fail; |
491 | if (ret > 0 && path->slots[0] > 0) { |
492 | leaf = path->nodes[0]; |
493 | btrfs_item_key_to_cpu(eb: leaf, cpu_key: &key, nr: path->slots[0] - 1); |
494 | |
495 | /* |
496 | * There are two cases we can hit here for the previous csum |
497 | * item: |
498 | * |
499 | * |<- search range ->| |
500 | * |<- csum item ->| |
501 | * |
502 | * Or |
503 | * |<- search range ->| |
504 | * |<- csum item ->| |
505 | * |
506 | * Check if the previous csum item covers the leading part of |
507 | * the search range. If so we have to start from previous csum |
508 | * item. |
509 | */ |
510 | if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID && |
511 | key.type == BTRFS_EXTENT_CSUM_KEY) { |
512 | if (bytes_to_csum_size(fs_info, bytes: start - key.offset) < |
513 | btrfs_item_size(eb: leaf, slot: path->slots[0] - 1)) |
514 | path->slots[0]--; |
515 | } |
516 | } |
517 | |
518 | while (start <= end) { |
519 | u64 csum_end; |
520 | |
521 | leaf = path->nodes[0]; |
522 | if (path->slots[0] >= btrfs_header_nritems(eb: leaf)) { |
523 | ret = btrfs_next_leaf(root, path); |
524 | if (ret < 0) |
525 | goto fail; |
526 | if (ret > 0) |
527 | break; |
528 | leaf = path->nodes[0]; |
529 | } |
530 | |
531 | btrfs_item_key_to_cpu(eb: leaf, cpu_key: &key, nr: path->slots[0]); |
532 | if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || |
533 | key.type != BTRFS_EXTENT_CSUM_KEY || |
534 | key.offset > end) |
535 | break; |
536 | |
537 | if (key.offset > start) |
538 | start = key.offset; |
539 | |
540 | csum_end = key.offset + csum_size_to_bytes(fs_info, |
541 | csum_size: btrfs_item_size(eb: leaf, slot: path->slots[0])); |
542 | if (csum_end <= start) { |
543 | path->slots[0]++; |
544 | continue; |
545 | } |
546 | |
547 | csum_end = min(csum_end, end + 1); |
548 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], |
549 | struct btrfs_csum_item); |
550 | while (start < csum_end) { |
551 | unsigned long offset; |
552 | size_t size; |
553 | |
554 | size = min_t(size_t, csum_end - start, |
555 | max_ordered_sum_bytes(fs_info)); |
556 | sums = kzalloc(size: btrfs_ordered_sum_size(fs_info, bytes: size), |
557 | GFP_NOFS); |
558 | if (!sums) { |
559 | ret = -ENOMEM; |
560 | goto fail; |
561 | } |
562 | |
563 | sums->logical = start; |
564 | sums->len = size; |
565 | |
566 | offset = bytes_to_csum_size(fs_info, bytes: start - key.offset); |
567 | |
568 | read_extent_buffer(eb: path->nodes[0], |
569 | dst: sums->sums, |
570 | start: ((unsigned long)item) + offset, |
571 | len: bytes_to_csum_size(fs_info, bytes: size)); |
572 | |
573 | start += size; |
574 | list_add_tail(new: &sums->list, head: &tmplist); |
575 | } |
576 | path->slots[0]++; |
577 | } |
578 | ret = 0; |
579 | fail: |
580 | while (ret < 0 && !list_empty(head: &tmplist)) { |
581 | sums = list_entry(tmplist.next, struct btrfs_ordered_sum, list); |
582 | list_del(entry: &sums->list); |
583 | kfree(objp: sums); |
584 | } |
585 | list_splice_tail(list: &tmplist, head: list); |
586 | |
587 | btrfs_free_path(p: path); |
588 | return ret; |
589 | } |
590 | |
591 | /* |
592 | * Do the same work as btrfs_lookup_csums_list(), the difference is in how |
593 | * we return the result. |
594 | * |
595 | * This version will set the corresponding bits in @csum_bitmap to represent |
596 | * that there is a csum found. |
597 | * Each bit represents a sector. Thus caller should ensure @csum_buf passed |
598 | * in is large enough to contain all csums. |
599 | */ |
600 | int btrfs_lookup_csums_bitmap(struct btrfs_root *root, struct btrfs_path *path, |
601 | u64 start, u64 end, u8 *csum_buf, |
602 | unsigned long *csum_bitmap) |
603 | { |
604 | struct btrfs_fs_info *fs_info = root->fs_info; |
605 | struct btrfs_key key; |
606 | struct extent_buffer *leaf; |
607 | struct btrfs_csum_item *item; |
608 | const u64 orig_start = start; |
609 | bool free_path = false; |
610 | int ret; |
611 | |
612 | ASSERT(IS_ALIGNED(start, fs_info->sectorsize) && |
613 | IS_ALIGNED(end + 1, fs_info->sectorsize)); |
614 | |
615 | if (!path) { |
616 | path = btrfs_alloc_path(); |
617 | if (!path) |
618 | return -ENOMEM; |
619 | free_path = true; |
620 | } |
621 | |
622 | /* Check if we can reuse the previous path. */ |
623 | if (path->nodes[0]) { |
624 | btrfs_item_key_to_cpu(eb: path->nodes[0], cpu_key: &key, nr: path->slots[0]); |
625 | |
626 | if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID && |
627 | key.type == BTRFS_EXTENT_CSUM_KEY && |
628 | key.offset <= start) |
629 | goto search_forward; |
630 | btrfs_release_path(p: path); |
631 | } |
632 | |
633 | key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; |
634 | key.type = BTRFS_EXTENT_CSUM_KEY; |
635 | key.offset = start; |
636 | |
637 | ret = btrfs_search_slot(NULL, root, key: &key, p: path, ins_len: 0, cow: 0); |
638 | if (ret < 0) |
639 | goto fail; |
640 | if (ret > 0 && path->slots[0] > 0) { |
641 | leaf = path->nodes[0]; |
642 | btrfs_item_key_to_cpu(eb: leaf, cpu_key: &key, nr: path->slots[0] - 1); |
643 | |
644 | /* |
645 | * There are two cases we can hit here for the previous csum |
646 | * item: |
647 | * |
648 | * |<- search range ->| |
649 | * |<- csum item ->| |
650 | * |
651 | * Or |
652 | * |<- search range ->| |
653 | * |<- csum item ->| |
654 | * |
655 | * Check if the previous csum item covers the leading part of |
656 | * the search range. If so we have to start from previous csum |
657 | * item. |
658 | */ |
659 | if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID && |
660 | key.type == BTRFS_EXTENT_CSUM_KEY) { |
661 | if (bytes_to_csum_size(fs_info, bytes: start - key.offset) < |
662 | btrfs_item_size(eb: leaf, slot: path->slots[0] - 1)) |
663 | path->slots[0]--; |
664 | } |
665 | } |
666 | |
667 | search_forward: |
668 | while (start <= end) { |
669 | u64 csum_end; |
670 | |
671 | leaf = path->nodes[0]; |
672 | if (path->slots[0] >= btrfs_header_nritems(eb: leaf)) { |
673 | ret = btrfs_next_leaf(root, path); |
674 | if (ret < 0) |
675 | goto fail; |
676 | if (ret > 0) |
677 | break; |
678 | leaf = path->nodes[0]; |
679 | } |
680 | |
681 | btrfs_item_key_to_cpu(eb: leaf, cpu_key: &key, nr: path->slots[0]); |
682 | if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || |
683 | key.type != BTRFS_EXTENT_CSUM_KEY || |
684 | key.offset > end) |
685 | break; |
686 | |
687 | if (key.offset > start) |
688 | start = key.offset; |
689 | |
690 | csum_end = key.offset + csum_size_to_bytes(fs_info, |
691 | csum_size: btrfs_item_size(eb: leaf, slot: path->slots[0])); |
692 | if (csum_end <= start) { |
693 | path->slots[0]++; |
694 | continue; |
695 | } |
696 | |
697 | csum_end = min(csum_end, end + 1); |
698 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], |
699 | struct btrfs_csum_item); |
700 | while (start < csum_end) { |
701 | unsigned long offset; |
702 | size_t size; |
703 | u8 *csum_dest = csum_buf + bytes_to_csum_size(fs_info, |
704 | bytes: start - orig_start); |
705 | |
706 | size = min_t(size_t, csum_end - start, end + 1 - start); |
707 | |
708 | offset = bytes_to_csum_size(fs_info, bytes: start - key.offset); |
709 | |
710 | read_extent_buffer(eb: path->nodes[0], dst: csum_dest, |
711 | start: ((unsigned long)item) + offset, |
712 | len: bytes_to_csum_size(fs_info, bytes: size)); |
713 | |
714 | bitmap_set(map: csum_bitmap, |
715 | start: (start - orig_start) >> fs_info->sectorsize_bits, |
716 | nbits: size >> fs_info->sectorsize_bits); |
717 | |
718 | start += size; |
719 | } |
720 | path->slots[0]++; |
721 | } |
722 | ret = 0; |
723 | fail: |
724 | if (free_path) |
725 | btrfs_free_path(p: path); |
726 | return ret; |
727 | } |
728 | |
729 | /* |
730 | * Calculate checksums of the data contained inside a bio. |
731 | */ |
732 | blk_status_t btrfs_csum_one_bio(struct btrfs_bio *bbio) |
733 | { |
734 | struct btrfs_ordered_extent *ordered = bbio->ordered; |
735 | struct btrfs_inode *inode = bbio->inode; |
736 | struct btrfs_fs_info *fs_info = inode->root->fs_info; |
737 | SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); |
738 | struct bio *bio = &bbio->bio; |
739 | struct btrfs_ordered_sum *sums; |
740 | char *data; |
741 | struct bvec_iter iter; |
742 | struct bio_vec bvec; |
743 | int index; |
744 | unsigned int blockcount; |
745 | int i; |
746 | unsigned nofs_flag; |
747 | |
748 | nofs_flag = memalloc_nofs_save(); |
749 | sums = kvzalloc(size: btrfs_ordered_sum_size(fs_info, bytes: bio->bi_iter.bi_size), |
750 | GFP_KERNEL); |
751 | memalloc_nofs_restore(flags: nofs_flag); |
752 | |
753 | if (!sums) |
754 | return BLK_STS_RESOURCE; |
755 | |
756 | sums->len = bio->bi_iter.bi_size; |
757 | INIT_LIST_HEAD(list: &sums->list); |
758 | |
759 | sums->logical = bio->bi_iter.bi_sector << SECTOR_SHIFT; |
760 | index = 0; |
761 | |
762 | shash->tfm = fs_info->csum_shash; |
763 | |
764 | bio_for_each_segment(bvec, bio, iter) { |
765 | blockcount = BTRFS_BYTES_TO_BLKS(fs_info, |
766 | bvec.bv_len + fs_info->sectorsize |
767 | - 1); |
768 | |
769 | for (i = 0; i < blockcount; i++) { |
770 | data = bvec_kmap_local(bvec: &bvec); |
771 | crypto_shash_digest(desc: shash, |
772 | data: data + (i * fs_info->sectorsize), |
773 | len: fs_info->sectorsize, |
774 | out: sums->sums + index); |
775 | kunmap_local(data); |
776 | index += fs_info->csum_size; |
777 | } |
778 | |
779 | } |
780 | |
781 | bbio->sums = sums; |
782 | btrfs_add_ordered_sum(entry: ordered, sum: sums); |
783 | return 0; |
784 | } |
785 | |
786 | /* |
787 | * Nodatasum I/O on zoned file systems still requires an btrfs_ordered_sum to |
788 | * record the updated logical address on Zone Append completion. |
789 | * Allocate just the structure with an empty sums array here for that case. |
790 | */ |
791 | blk_status_t btrfs_alloc_dummy_sum(struct btrfs_bio *bbio) |
792 | { |
793 | bbio->sums = kmalloc(size: sizeof(*bbio->sums), GFP_NOFS); |
794 | if (!bbio->sums) |
795 | return BLK_STS_RESOURCE; |
796 | bbio->sums->len = bbio->bio.bi_iter.bi_size; |
797 | bbio->sums->logical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT; |
798 | btrfs_add_ordered_sum(entry: bbio->ordered, sum: bbio->sums); |
799 | return 0; |
800 | } |
801 | |
802 | /* |
803 | * Remove one checksum overlapping a range. |
804 | * |
805 | * This expects the key to describe the csum pointed to by the path, and it |
806 | * expects the csum to overlap the range [bytenr, len] |
807 | * |
808 | * The csum should not be entirely contained in the range and the range should |
809 | * not be entirely contained in the csum. |
810 | * |
811 | * This calls btrfs_truncate_item with the correct args based on the overlap, |
812 | * and fixes up the key as required. |
813 | */ |
814 | static noinline void truncate_one_csum(struct btrfs_trans_handle *trans, |
815 | struct btrfs_path *path, |
816 | struct btrfs_key *key, |
817 | u64 bytenr, u64 len) |
818 | { |
819 | struct btrfs_fs_info *fs_info = trans->fs_info; |
820 | struct extent_buffer *leaf; |
821 | const u32 csum_size = fs_info->csum_size; |
822 | u64 csum_end; |
823 | u64 end_byte = bytenr + len; |
824 | u32 blocksize_bits = fs_info->sectorsize_bits; |
825 | |
826 | leaf = path->nodes[0]; |
827 | csum_end = btrfs_item_size(eb: leaf, slot: path->slots[0]) / csum_size; |
828 | csum_end <<= blocksize_bits; |
829 | csum_end += key->offset; |
830 | |
831 | if (key->offset < bytenr && csum_end <= end_byte) { |
832 | /* |
833 | * [ bytenr - len ] |
834 | * [ ] |
835 | * [csum ] |
836 | * A simple truncate off the end of the item |
837 | */ |
838 | u32 new_size = (bytenr - key->offset) >> blocksize_bits; |
839 | new_size *= csum_size; |
840 | btrfs_truncate_item(trans, path, new_size, from_end: 1); |
841 | } else if (key->offset >= bytenr && csum_end > end_byte && |
842 | end_byte > key->offset) { |
843 | /* |
844 | * [ bytenr - len ] |
845 | * [ ] |
846 | * [csum ] |
847 | * we need to truncate from the beginning of the csum |
848 | */ |
849 | u32 new_size = (csum_end - end_byte) >> blocksize_bits; |
850 | new_size *= csum_size; |
851 | |
852 | btrfs_truncate_item(trans, path, new_size, from_end: 0); |
853 | |
854 | key->offset = end_byte; |
855 | btrfs_set_item_key_safe(trans, path, new_key: key); |
856 | } else { |
857 | BUG(); |
858 | } |
859 | } |
860 | |
861 | /* |
862 | * Delete the csum items from the csum tree for a given range of bytes. |
863 | */ |
864 | int btrfs_del_csums(struct btrfs_trans_handle *trans, |
865 | struct btrfs_root *root, u64 bytenr, u64 len) |
866 | { |
867 | struct btrfs_fs_info *fs_info = trans->fs_info; |
868 | struct btrfs_path *path; |
869 | struct btrfs_key key; |
870 | u64 end_byte = bytenr + len; |
871 | u64 csum_end; |
872 | struct extent_buffer *leaf; |
873 | int ret = 0; |
874 | const u32 csum_size = fs_info->csum_size; |
875 | u32 blocksize_bits = fs_info->sectorsize_bits; |
876 | |
877 | ASSERT(root->root_key.objectid == BTRFS_CSUM_TREE_OBJECTID || |
878 | root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID); |
879 | |
880 | path = btrfs_alloc_path(); |
881 | if (!path) |
882 | return -ENOMEM; |
883 | |
884 | while (1) { |
885 | key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; |
886 | key.offset = end_byte - 1; |
887 | key.type = BTRFS_EXTENT_CSUM_KEY; |
888 | |
889 | ret = btrfs_search_slot(trans, root, key: &key, p: path, ins_len: -1, cow: 1); |
890 | if (ret > 0) { |
891 | ret = 0; |
892 | if (path->slots[0] == 0) |
893 | break; |
894 | path->slots[0]--; |
895 | } else if (ret < 0) { |
896 | break; |
897 | } |
898 | |
899 | leaf = path->nodes[0]; |
900 | btrfs_item_key_to_cpu(eb: leaf, cpu_key: &key, nr: path->slots[0]); |
901 | |
902 | if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || |
903 | key.type != BTRFS_EXTENT_CSUM_KEY) { |
904 | break; |
905 | } |
906 | |
907 | if (key.offset >= end_byte) |
908 | break; |
909 | |
910 | csum_end = btrfs_item_size(eb: leaf, slot: path->slots[0]) / csum_size; |
911 | csum_end <<= blocksize_bits; |
912 | csum_end += key.offset; |
913 | |
914 | /* this csum ends before we start, we're done */ |
915 | if (csum_end <= bytenr) |
916 | break; |
917 | |
918 | /* delete the entire item, it is inside our range */ |
919 | if (key.offset >= bytenr && csum_end <= end_byte) { |
920 | int del_nr = 1; |
921 | |
922 | /* |
923 | * Check how many csum items preceding this one in this |
924 | * leaf correspond to our range and then delete them all |
925 | * at once. |
926 | */ |
927 | if (key.offset > bytenr && path->slots[0] > 0) { |
928 | int slot = path->slots[0] - 1; |
929 | |
930 | while (slot >= 0) { |
931 | struct btrfs_key pk; |
932 | |
933 | btrfs_item_key_to_cpu(eb: leaf, cpu_key: &pk, nr: slot); |
934 | if (pk.offset < bytenr || |
935 | pk.type != BTRFS_EXTENT_CSUM_KEY || |
936 | pk.objectid != |
937 | BTRFS_EXTENT_CSUM_OBJECTID) |
938 | break; |
939 | path->slots[0] = slot; |
940 | del_nr++; |
941 | key.offset = pk.offset; |
942 | slot--; |
943 | } |
944 | } |
945 | ret = btrfs_del_items(trans, root, path, |
946 | slot: path->slots[0], nr: del_nr); |
947 | if (ret) |
948 | break; |
949 | if (key.offset == bytenr) |
950 | break; |
951 | } else if (key.offset < bytenr && csum_end > end_byte) { |
952 | unsigned long offset; |
953 | unsigned long shift_len; |
954 | unsigned long item_offset; |
955 | /* |
956 | * [ bytenr - len ] |
957 | * [csum ] |
958 | * |
959 | * Our bytes are in the middle of the csum, |
960 | * we need to split this item and insert a new one. |
961 | * |
962 | * But we can't drop the path because the |
963 | * csum could change, get removed, extended etc. |
964 | * |
965 | * The trick here is the max size of a csum item leaves |
966 | * enough room in the tree block for a single |
967 | * item header. So, we split the item in place, |
968 | * adding a new header pointing to the existing |
969 | * bytes. Then we loop around again and we have |
970 | * a nicely formed csum item that we can neatly |
971 | * truncate. |
972 | */ |
973 | offset = (bytenr - key.offset) >> blocksize_bits; |
974 | offset *= csum_size; |
975 | |
976 | shift_len = (len >> blocksize_bits) * csum_size; |
977 | |
978 | item_offset = btrfs_item_ptr_offset(leaf, |
979 | path->slots[0]); |
980 | |
981 | memzero_extent_buffer(eb: leaf, start: item_offset + offset, |
982 | len: shift_len); |
983 | key.offset = bytenr; |
984 | |
985 | /* |
986 | * btrfs_split_item returns -EAGAIN when the |
987 | * item changed size or key |
988 | */ |
989 | ret = btrfs_split_item(trans, root, path, new_key: &key, split_offset: offset); |
990 | if (ret && ret != -EAGAIN) { |
991 | btrfs_abort_transaction(trans, ret); |
992 | break; |
993 | } |
994 | ret = 0; |
995 | |
996 | key.offset = end_byte - 1; |
997 | } else { |
998 | truncate_one_csum(trans, path, key: &key, bytenr, len); |
999 | if (key.offset < bytenr) |
1000 | break; |
1001 | } |
1002 | btrfs_release_path(p: path); |
1003 | } |
1004 | btrfs_free_path(p: path); |
1005 | return ret; |
1006 | } |
1007 | |
1008 | static int find_next_csum_offset(struct btrfs_root *root, |
1009 | struct btrfs_path *path, |
1010 | u64 *next_offset) |
1011 | { |
1012 | const u32 nritems = btrfs_header_nritems(eb: path->nodes[0]); |
1013 | struct btrfs_key found_key; |
1014 | int slot = path->slots[0] + 1; |
1015 | int ret; |
1016 | |
1017 | if (nritems == 0 || slot >= nritems) { |
1018 | ret = btrfs_next_leaf(root, path); |
1019 | if (ret < 0) { |
1020 | return ret; |
1021 | } else if (ret > 0) { |
1022 | *next_offset = (u64)-1; |
1023 | return 0; |
1024 | } |
1025 | slot = path->slots[0]; |
1026 | } |
1027 | |
1028 | btrfs_item_key_to_cpu(eb: path->nodes[0], cpu_key: &found_key, nr: slot); |
1029 | |
1030 | if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || |
1031 | found_key.type != BTRFS_EXTENT_CSUM_KEY) |
1032 | *next_offset = (u64)-1; |
1033 | else |
1034 | *next_offset = found_key.offset; |
1035 | |
1036 | return 0; |
1037 | } |
1038 | |
1039 | int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, |
1040 | struct btrfs_root *root, |
1041 | struct btrfs_ordered_sum *sums) |
1042 | { |
1043 | struct btrfs_fs_info *fs_info = root->fs_info; |
1044 | struct btrfs_key file_key; |
1045 | struct btrfs_key found_key; |
1046 | struct btrfs_path *path; |
1047 | struct btrfs_csum_item *item; |
1048 | struct btrfs_csum_item *item_end; |
1049 | struct extent_buffer *leaf = NULL; |
1050 | u64 next_offset; |
1051 | u64 total_bytes = 0; |
1052 | u64 csum_offset; |
1053 | u64 bytenr; |
1054 | u32 ins_size; |
1055 | int index = 0; |
1056 | int found_next; |
1057 | int ret; |
1058 | const u32 csum_size = fs_info->csum_size; |
1059 | |
1060 | path = btrfs_alloc_path(); |
1061 | if (!path) |
1062 | return -ENOMEM; |
1063 | again: |
1064 | next_offset = (u64)-1; |
1065 | found_next = 0; |
1066 | bytenr = sums->logical + total_bytes; |
1067 | file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; |
1068 | file_key.offset = bytenr; |
1069 | file_key.type = BTRFS_EXTENT_CSUM_KEY; |
1070 | |
1071 | item = btrfs_lookup_csum(trans, root, path, bytenr, cow: 1); |
1072 | if (!IS_ERR(ptr: item)) { |
1073 | ret = 0; |
1074 | leaf = path->nodes[0]; |
1075 | item_end = btrfs_item_ptr(leaf, path->slots[0], |
1076 | struct btrfs_csum_item); |
1077 | item_end = (struct btrfs_csum_item *)((char *)item_end + |
1078 | btrfs_item_size(eb: leaf, slot: path->slots[0])); |
1079 | goto found; |
1080 | } |
1081 | ret = PTR_ERR(ptr: item); |
1082 | if (ret != -EFBIG && ret != -ENOENT) |
1083 | goto out; |
1084 | |
1085 | if (ret == -EFBIG) { |
1086 | u32 item_size; |
1087 | /* we found one, but it isn't big enough yet */ |
1088 | leaf = path->nodes[0]; |
1089 | item_size = btrfs_item_size(eb: leaf, slot: path->slots[0]); |
1090 | if ((item_size / csum_size) >= |
1091 | MAX_CSUM_ITEMS(fs_info, csum_size)) { |
1092 | /* already at max size, make a new one */ |
1093 | goto insert; |
1094 | } |
1095 | } else { |
1096 | /* We didn't find a csum item, insert one. */ |
1097 | ret = find_next_csum_offset(root, path, next_offset: &next_offset); |
1098 | if (ret < 0) |
1099 | goto out; |
1100 | found_next = 1; |
1101 | goto insert; |
1102 | } |
1103 | |
1104 | /* |
1105 | * At this point, we know the tree has a checksum item that ends at an |
1106 | * offset matching the start of the checksum range we want to insert. |
1107 | * We try to extend that item as much as possible and then add as many |
1108 | * checksums to it as they fit. |
1109 | * |
1110 | * First check if the leaf has enough free space for at least one |
1111 | * checksum. If it has go directly to the item extension code, otherwise |
1112 | * release the path and do a search for insertion before the extension. |
1113 | */ |
1114 | if (btrfs_leaf_free_space(leaf) >= csum_size) { |
1115 | btrfs_item_key_to_cpu(eb: leaf, cpu_key: &found_key, nr: path->slots[0]); |
1116 | csum_offset = (bytenr - found_key.offset) >> |
1117 | fs_info->sectorsize_bits; |
1118 | goto extend_csum; |
1119 | } |
1120 | |
1121 | btrfs_release_path(p: path); |
1122 | path->search_for_extension = 1; |
1123 | ret = btrfs_search_slot(trans, root, key: &file_key, p: path, |
1124 | ins_len: csum_size, cow: 1); |
1125 | path->search_for_extension = 0; |
1126 | if (ret < 0) |
1127 | goto out; |
1128 | |
1129 | if (ret > 0) { |
1130 | if (path->slots[0] == 0) |
1131 | goto insert; |
1132 | path->slots[0]--; |
1133 | } |
1134 | |
1135 | leaf = path->nodes[0]; |
1136 | btrfs_item_key_to_cpu(eb: leaf, cpu_key: &found_key, nr: path->slots[0]); |
1137 | csum_offset = (bytenr - found_key.offset) >> fs_info->sectorsize_bits; |
1138 | |
1139 | if (found_key.type != BTRFS_EXTENT_CSUM_KEY || |
1140 | found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || |
1141 | csum_offset >= MAX_CSUM_ITEMS(fs_info, csum_size)) { |
1142 | goto insert; |
1143 | } |
1144 | |
1145 | extend_csum: |
1146 | if (csum_offset == btrfs_item_size(eb: leaf, slot: path->slots[0]) / |
1147 | csum_size) { |
1148 | int extend_nr; |
1149 | u64 tmp; |
1150 | u32 diff; |
1151 | |
1152 | tmp = sums->len - total_bytes; |
1153 | tmp >>= fs_info->sectorsize_bits; |
1154 | WARN_ON(tmp < 1); |
1155 | extend_nr = max_t(int, 1, tmp); |
1156 | |
1157 | /* |
1158 | * A log tree can already have checksum items with a subset of |
1159 | * the checksums we are trying to log. This can happen after |
1160 | * doing a sequence of partial writes into prealloc extents and |
1161 | * fsyncs in between, with a full fsync logging a larger subrange |
1162 | * of an extent for which a previous fast fsync logged a smaller |
1163 | * subrange. And this happens in particular due to merging file |
1164 | * extent items when we complete an ordered extent for a range |
1165 | * covered by a prealloc extent - this is done at |
1166 | * btrfs_mark_extent_written(). |
1167 | * |
1168 | * So if we try to extend the previous checksum item, which has |
1169 | * a range that ends at the start of the range we want to insert, |
1170 | * make sure we don't extend beyond the start offset of the next |
1171 | * checksum item. If we are at the last item in the leaf, then |
1172 | * forget the optimization of extending and add a new checksum |
1173 | * item - it is not worth the complexity of releasing the path, |
1174 | * getting the first key for the next leaf, repeat the btree |
1175 | * search, etc, because log trees are temporary anyway and it |
1176 | * would only save a few bytes of leaf space. |
1177 | */ |
1178 | if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { |
1179 | if (path->slots[0] + 1 >= |
1180 | btrfs_header_nritems(eb: path->nodes[0])) { |
1181 | ret = find_next_csum_offset(root, path, next_offset: &next_offset); |
1182 | if (ret < 0) |
1183 | goto out; |
1184 | found_next = 1; |
1185 | goto insert; |
1186 | } |
1187 | |
1188 | ret = find_next_csum_offset(root, path, next_offset: &next_offset); |
1189 | if (ret < 0) |
1190 | goto out; |
1191 | |
1192 | tmp = (next_offset - bytenr) >> fs_info->sectorsize_bits; |
1193 | if (tmp <= INT_MAX) |
1194 | extend_nr = min_t(int, extend_nr, tmp); |
1195 | } |
1196 | |
1197 | diff = (csum_offset + extend_nr) * csum_size; |
1198 | diff = min(diff, |
1199 | MAX_CSUM_ITEMS(fs_info, csum_size) * csum_size); |
1200 | |
1201 | diff = diff - btrfs_item_size(eb: leaf, slot: path->slots[0]); |
1202 | diff = min_t(u32, btrfs_leaf_free_space(leaf), diff); |
1203 | diff /= csum_size; |
1204 | diff *= csum_size; |
1205 | |
1206 | btrfs_extend_item(trans, path, data_size: diff); |
1207 | ret = 0; |
1208 | goto csum; |
1209 | } |
1210 | |
1211 | insert: |
1212 | btrfs_release_path(p: path); |
1213 | csum_offset = 0; |
1214 | if (found_next) { |
1215 | u64 tmp; |
1216 | |
1217 | tmp = sums->len - total_bytes; |
1218 | tmp >>= fs_info->sectorsize_bits; |
1219 | tmp = min(tmp, (next_offset - file_key.offset) >> |
1220 | fs_info->sectorsize_bits); |
1221 | |
1222 | tmp = max_t(u64, 1, tmp); |
1223 | tmp = min_t(u64, tmp, MAX_CSUM_ITEMS(fs_info, csum_size)); |
1224 | ins_size = csum_size * tmp; |
1225 | } else { |
1226 | ins_size = csum_size; |
1227 | } |
1228 | ret = btrfs_insert_empty_item(trans, root, path, key: &file_key, |
1229 | data_size: ins_size); |
1230 | if (ret < 0) |
1231 | goto out; |
1232 | if (WARN_ON(ret != 0)) |
1233 | goto out; |
1234 | leaf = path->nodes[0]; |
1235 | csum: |
1236 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); |
1237 | item_end = (struct btrfs_csum_item *)((unsigned char *)item + |
1238 | btrfs_item_size(eb: leaf, slot: path->slots[0])); |
1239 | item = (struct btrfs_csum_item *)((unsigned char *)item + |
1240 | csum_offset * csum_size); |
1241 | found: |
1242 | ins_size = (u32)(sums->len - total_bytes) >> fs_info->sectorsize_bits; |
1243 | ins_size *= csum_size; |
1244 | ins_size = min_t(u32, (unsigned long)item_end - (unsigned long)item, |
1245 | ins_size); |
1246 | write_extent_buffer(eb: leaf, src: sums->sums + index, start: (unsigned long)item, |
1247 | len: ins_size); |
1248 | |
1249 | index += ins_size; |
1250 | ins_size /= csum_size; |
1251 | total_bytes += ins_size * fs_info->sectorsize; |
1252 | |
1253 | btrfs_mark_buffer_dirty(trans, buf: path->nodes[0]); |
1254 | if (total_bytes < sums->len) { |
1255 | btrfs_release_path(p: path); |
1256 | cond_resched(); |
1257 | goto again; |
1258 | } |
1259 | out: |
1260 | btrfs_free_path(p: path); |
1261 | return ret; |
1262 | } |
1263 | |
1264 | void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode, |
1265 | const struct btrfs_path *path, |
1266 | struct btrfs_file_extent_item *fi, |
1267 | struct extent_map *em) |
1268 | { |
1269 | struct btrfs_fs_info *fs_info = inode->root->fs_info; |
1270 | struct btrfs_root *root = inode->root; |
1271 | struct extent_buffer *leaf = path->nodes[0]; |
1272 | const int slot = path->slots[0]; |
1273 | struct btrfs_key key; |
1274 | u64 extent_start, extent_end; |
1275 | u64 bytenr; |
1276 | u8 type = btrfs_file_extent_type(eb: leaf, s: fi); |
1277 | int compress_type = btrfs_file_extent_compression(eb: leaf, s: fi); |
1278 | |
1279 | btrfs_item_key_to_cpu(eb: leaf, cpu_key: &key, nr: slot); |
1280 | extent_start = key.offset; |
1281 | extent_end = btrfs_file_extent_end(path); |
1282 | em->ram_bytes = btrfs_file_extent_ram_bytes(eb: leaf, s: fi); |
1283 | em->generation = btrfs_file_extent_generation(eb: leaf, s: fi); |
1284 | if (type == BTRFS_FILE_EXTENT_REG || |
1285 | type == BTRFS_FILE_EXTENT_PREALLOC) { |
1286 | em->start = extent_start; |
1287 | em->len = extent_end - extent_start; |
1288 | em->orig_start = extent_start - |
1289 | btrfs_file_extent_offset(eb: leaf, s: fi); |
1290 | em->orig_block_len = btrfs_file_extent_disk_num_bytes(eb: leaf, s: fi); |
1291 | bytenr = btrfs_file_extent_disk_bytenr(eb: leaf, s: fi); |
1292 | if (bytenr == 0) { |
1293 | em->block_start = EXTENT_MAP_HOLE; |
1294 | return; |
1295 | } |
1296 | if (compress_type != BTRFS_COMPRESS_NONE) { |
1297 | set_bit(nr: EXTENT_FLAG_COMPRESSED, addr: &em->flags); |
1298 | em->compress_type = compress_type; |
1299 | em->block_start = bytenr; |
1300 | em->block_len = em->orig_block_len; |
1301 | } else { |
1302 | bytenr += btrfs_file_extent_offset(eb: leaf, s: fi); |
1303 | em->block_start = bytenr; |
1304 | em->block_len = em->len; |
1305 | if (type == BTRFS_FILE_EXTENT_PREALLOC) |
1306 | set_bit(nr: EXTENT_FLAG_PREALLOC, addr: &em->flags); |
1307 | } |
1308 | } else if (type == BTRFS_FILE_EXTENT_INLINE) { |
1309 | em->block_start = EXTENT_MAP_INLINE; |
1310 | em->start = extent_start; |
1311 | em->len = extent_end - extent_start; |
1312 | /* |
1313 | * Initialize orig_start and block_len with the same values |
1314 | * as in inode.c:btrfs_get_extent(). |
1315 | */ |
1316 | em->orig_start = EXTENT_MAP_HOLE; |
1317 | em->block_len = (u64)-1; |
1318 | em->compress_type = compress_type; |
1319 | if (compress_type != BTRFS_COMPRESS_NONE) |
1320 | set_bit(nr: EXTENT_FLAG_COMPRESSED, addr: &em->flags); |
1321 | } else { |
1322 | btrfs_err(fs_info, |
1323 | "unknown file extent item type %d, inode %llu, offset %llu, " |
1324 | "root %llu" , type, btrfs_ino(inode), extent_start, |
1325 | root->root_key.objectid); |
1326 | } |
1327 | } |
1328 | |
1329 | /* |
1330 | * Returns the end offset (non inclusive) of the file extent item the given path |
1331 | * points to. If it points to an inline extent, the returned offset is rounded |
1332 | * up to the sector size. |
1333 | */ |
1334 | u64 btrfs_file_extent_end(const struct btrfs_path *path) |
1335 | { |
1336 | const struct extent_buffer *leaf = path->nodes[0]; |
1337 | const int slot = path->slots[0]; |
1338 | struct btrfs_file_extent_item *fi; |
1339 | struct btrfs_key key; |
1340 | u64 end; |
1341 | |
1342 | btrfs_item_key_to_cpu(eb: leaf, cpu_key: &key, nr: slot); |
1343 | ASSERT(key.type == BTRFS_EXTENT_DATA_KEY); |
1344 | fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); |
1345 | |
1346 | if (btrfs_file_extent_type(eb: leaf, s: fi) == BTRFS_FILE_EXTENT_INLINE) { |
1347 | end = btrfs_file_extent_ram_bytes(eb: leaf, s: fi); |
1348 | end = ALIGN(key.offset + end, leaf->fs_info->sectorsize); |
1349 | } else { |
1350 | end = key.offset + btrfs_file_extent_num_bytes(eb: leaf, s: fi); |
1351 | } |
1352 | |
1353 | return end; |
1354 | } |
1355 | |