1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * extent_map.c |
4 | * |
5 | * Block/Cluster mapping functions |
6 | * |
7 | * Copyright (C) 2004 Oracle. All rights reserved. |
8 | */ |
9 | |
10 | #include <linux/fs.h> |
11 | #include <linux/init.h> |
12 | #include <linux/slab.h> |
13 | #include <linux/types.h> |
14 | #include <linux/fiemap.h> |
15 | |
16 | #include <cluster/masklog.h> |
17 | |
18 | #include "ocfs2.h" |
19 | |
20 | #include "alloc.h" |
21 | #include "dlmglue.h" |
22 | #include "extent_map.h" |
23 | #include "inode.h" |
24 | #include "super.h" |
25 | #include "symlink.h" |
26 | #include "aops.h" |
27 | #include "ocfs2_trace.h" |
28 | |
29 | #include "buffer_head_io.h" |
30 | |
31 | /* |
32 | * The extent caching implementation is intentionally trivial. |
33 | * |
34 | * We only cache a small number of extents stored directly on the |
35 | * inode, so linear order operations are acceptable. If we ever want |
36 | * to increase the size of the extent map, then these algorithms must |
37 | * get smarter. |
38 | */ |
39 | |
40 | void ocfs2_extent_map_init(struct inode *inode) |
41 | { |
42 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
43 | |
44 | oi->ip_extent_map.em_num_items = 0; |
45 | INIT_LIST_HEAD(list: &oi->ip_extent_map.em_list); |
46 | } |
47 | |
48 | static void __ocfs2_extent_map_lookup(struct ocfs2_extent_map *em, |
49 | unsigned int cpos, |
50 | struct ocfs2_extent_map_item **ret_emi) |
51 | { |
52 | unsigned int range; |
53 | struct ocfs2_extent_map_item *emi; |
54 | |
55 | *ret_emi = NULL; |
56 | |
57 | list_for_each_entry(emi, &em->em_list, ei_list) { |
58 | range = emi->ei_cpos + emi->ei_clusters; |
59 | |
60 | if (cpos >= emi->ei_cpos && cpos < range) { |
61 | list_move(list: &emi->ei_list, head: &em->em_list); |
62 | |
63 | *ret_emi = emi; |
64 | break; |
65 | } |
66 | } |
67 | } |
68 | |
69 | static int ocfs2_extent_map_lookup(struct inode *inode, unsigned int cpos, |
70 | unsigned int *phys, unsigned int *len, |
71 | unsigned int *flags) |
72 | { |
73 | unsigned int coff; |
74 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
75 | struct ocfs2_extent_map_item *emi; |
76 | |
77 | spin_lock(lock: &oi->ip_lock); |
78 | |
79 | __ocfs2_extent_map_lookup(em: &oi->ip_extent_map, cpos, ret_emi: &emi); |
80 | if (emi) { |
81 | coff = cpos - emi->ei_cpos; |
82 | *phys = emi->ei_phys + coff; |
83 | if (len) |
84 | *len = emi->ei_clusters - coff; |
85 | if (flags) |
86 | *flags = emi->ei_flags; |
87 | } |
88 | |
89 | spin_unlock(lock: &oi->ip_lock); |
90 | |
91 | if (emi == NULL) |
92 | return -ENOENT; |
93 | |
94 | return 0; |
95 | } |
96 | |
97 | /* |
98 | * Forget about all clusters equal to or greater than cpos. |
99 | */ |
100 | void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos) |
101 | { |
102 | struct ocfs2_extent_map_item *emi, *n; |
103 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
104 | struct ocfs2_extent_map *em = &oi->ip_extent_map; |
105 | LIST_HEAD(tmp_list); |
106 | unsigned int range; |
107 | |
108 | spin_lock(lock: &oi->ip_lock); |
109 | list_for_each_entry_safe(emi, n, &em->em_list, ei_list) { |
110 | if (emi->ei_cpos >= cpos) { |
111 | /* Full truncate of this record. */ |
112 | list_move(list: &emi->ei_list, head: &tmp_list); |
113 | BUG_ON(em->em_num_items == 0); |
114 | em->em_num_items--; |
115 | continue; |
116 | } |
117 | |
118 | range = emi->ei_cpos + emi->ei_clusters; |
119 | if (range > cpos) { |
120 | /* Partial truncate */ |
121 | emi->ei_clusters = cpos - emi->ei_cpos; |
122 | } |
123 | } |
124 | spin_unlock(lock: &oi->ip_lock); |
125 | |
126 | list_for_each_entry_safe(emi, n, &tmp_list, ei_list) { |
127 | list_del(entry: &emi->ei_list); |
128 | kfree(objp: emi); |
129 | } |
130 | } |
131 | |
132 | /* |
133 | * Is any part of emi2 contained within emi1 |
134 | */ |
135 | static int ocfs2_ei_is_contained(struct ocfs2_extent_map_item *emi1, |
136 | struct ocfs2_extent_map_item *emi2) |
137 | { |
138 | unsigned int range1, range2; |
139 | |
140 | /* |
141 | * Check if logical start of emi2 is inside emi1 |
142 | */ |
143 | range1 = emi1->ei_cpos + emi1->ei_clusters; |
144 | if (emi2->ei_cpos >= emi1->ei_cpos && emi2->ei_cpos < range1) |
145 | return 1; |
146 | |
147 | /* |
148 | * Check if logical end of emi2 is inside emi1 |
149 | */ |
150 | range2 = emi2->ei_cpos + emi2->ei_clusters; |
151 | if (range2 > emi1->ei_cpos && range2 <= range1) |
152 | return 1; |
153 | |
154 | return 0; |
155 | } |
156 | |
157 | static void ocfs2_copy_emi_fields(struct ocfs2_extent_map_item *dest, |
158 | struct ocfs2_extent_map_item *src) |
159 | { |
160 | dest->ei_cpos = src->ei_cpos; |
161 | dest->ei_phys = src->ei_phys; |
162 | dest->ei_clusters = src->ei_clusters; |
163 | dest->ei_flags = src->ei_flags; |
164 | } |
165 | |
166 | /* |
167 | * Try to merge emi with ins. Returns 1 if merge succeeds, zero |
168 | * otherwise. |
169 | */ |
170 | static int ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item *emi, |
171 | struct ocfs2_extent_map_item *ins) |
172 | { |
173 | /* |
174 | * Handle contiguousness |
175 | */ |
176 | if (ins->ei_phys == (emi->ei_phys + emi->ei_clusters) && |
177 | ins->ei_cpos == (emi->ei_cpos + emi->ei_clusters) && |
178 | ins->ei_flags == emi->ei_flags) { |
179 | emi->ei_clusters += ins->ei_clusters; |
180 | return 1; |
181 | } else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys && |
182 | (ins->ei_cpos + ins->ei_clusters) == emi->ei_cpos && |
183 | ins->ei_flags == emi->ei_flags) { |
184 | emi->ei_phys = ins->ei_phys; |
185 | emi->ei_cpos = ins->ei_cpos; |
186 | emi->ei_clusters += ins->ei_clusters; |
187 | return 1; |
188 | } |
189 | |
190 | /* |
191 | * Overlapping extents - this shouldn't happen unless we've |
192 | * split an extent to change it's flags. That is exceedingly |
193 | * rare, so there's no sense in trying to optimize it yet. |
194 | */ |
195 | if (ocfs2_ei_is_contained(emi1: emi, emi2: ins) || |
196 | ocfs2_ei_is_contained(emi1: ins, emi2: emi)) { |
197 | ocfs2_copy_emi_fields(dest: emi, src: ins); |
198 | return 1; |
199 | } |
200 | |
201 | /* No merge was possible. */ |
202 | return 0; |
203 | } |
204 | |
205 | /* |
206 | * In order to reduce complexity on the caller, this insert function |
207 | * is intentionally liberal in what it will accept. |
208 | * |
209 | * The only rule is that the truncate call *must* be used whenever |
210 | * records have been deleted. This avoids inserting overlapping |
211 | * records with different physical mappings. |
212 | */ |
213 | void ocfs2_extent_map_insert_rec(struct inode *inode, |
214 | struct ocfs2_extent_rec *rec) |
215 | { |
216 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
217 | struct ocfs2_extent_map *em = &oi->ip_extent_map; |
218 | struct ocfs2_extent_map_item *emi, *new_emi = NULL; |
219 | struct ocfs2_extent_map_item ins; |
220 | |
221 | ins.ei_cpos = le32_to_cpu(rec->e_cpos); |
222 | ins.ei_phys = ocfs2_blocks_to_clusters(sb: inode->i_sb, |
223 | le64_to_cpu(rec->e_blkno)); |
224 | ins.ei_clusters = le16_to_cpu(rec->e_leaf_clusters); |
225 | ins.ei_flags = rec->e_flags; |
226 | |
227 | search: |
228 | spin_lock(lock: &oi->ip_lock); |
229 | |
230 | list_for_each_entry(emi, &em->em_list, ei_list) { |
231 | if (ocfs2_try_to_merge_extent_map(emi, ins: &ins)) { |
232 | list_move(list: &emi->ei_list, head: &em->em_list); |
233 | spin_unlock(lock: &oi->ip_lock); |
234 | goto out; |
235 | } |
236 | } |
237 | |
238 | /* |
239 | * No item could be merged. |
240 | * |
241 | * Either allocate and add a new item, or overwrite the last recently |
242 | * inserted. |
243 | */ |
244 | |
245 | if (em->em_num_items < OCFS2_MAX_EXTENT_MAP_ITEMS) { |
246 | if (new_emi == NULL) { |
247 | spin_unlock(lock: &oi->ip_lock); |
248 | |
249 | new_emi = kmalloc(size: sizeof(*new_emi), GFP_NOFS); |
250 | if (new_emi == NULL) |
251 | goto out; |
252 | |
253 | goto search; |
254 | } |
255 | |
256 | ocfs2_copy_emi_fields(dest: new_emi, src: &ins); |
257 | list_add(new: &new_emi->ei_list, head: &em->em_list); |
258 | em->em_num_items++; |
259 | new_emi = NULL; |
260 | } else { |
261 | BUG_ON(list_empty(&em->em_list) || em->em_num_items == 0); |
262 | emi = list_entry(em->em_list.prev, |
263 | struct ocfs2_extent_map_item, ei_list); |
264 | list_move(list: &emi->ei_list, head: &em->em_list); |
265 | ocfs2_copy_emi_fields(dest: emi, src: &ins); |
266 | } |
267 | |
268 | spin_unlock(lock: &oi->ip_lock); |
269 | |
270 | out: |
271 | kfree(objp: new_emi); |
272 | } |
273 | |
274 | static int ocfs2_last_eb_is_empty(struct inode *inode, |
275 | struct ocfs2_dinode *di) |
276 | { |
277 | int ret, next_free; |
278 | u64 last_eb_blk = le64_to_cpu(di->i_last_eb_blk); |
279 | struct buffer_head *eb_bh = NULL; |
280 | struct ocfs2_extent_block *eb; |
281 | struct ocfs2_extent_list *el; |
282 | |
283 | ret = ocfs2_read_extent_block(ci: INODE_CACHE(inode), eb_blkno: last_eb_blk, bh: &eb_bh); |
284 | if (ret) { |
285 | mlog_errno(ret); |
286 | goto out; |
287 | } |
288 | |
289 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; |
290 | el = &eb->h_list; |
291 | |
292 | if (el->l_tree_depth) { |
293 | ocfs2_error(inode->i_sb, |
294 | "Inode %lu has non zero tree depth in leaf block %llu\n" , |
295 | inode->i_ino, |
296 | (unsigned long long)eb_bh->b_blocknr); |
297 | ret = -EROFS; |
298 | goto out; |
299 | } |
300 | |
301 | next_free = le16_to_cpu(el->l_next_free_rec); |
302 | |
303 | if (next_free == 0 || |
304 | (next_free == 1 && ocfs2_is_empty_extent(rec: &el->l_recs[0]))) |
305 | ret = 1; |
306 | |
307 | out: |
308 | brelse(bh: eb_bh); |
309 | return ret; |
310 | } |
311 | |
312 | /* |
313 | * Return the 1st index within el which contains an extent start |
314 | * larger than v_cluster. |
315 | */ |
316 | static int ocfs2_search_for_hole_index(struct ocfs2_extent_list *el, |
317 | u32 v_cluster) |
318 | { |
319 | int i; |
320 | struct ocfs2_extent_rec *rec; |
321 | |
322 | for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { |
323 | rec = &el->l_recs[i]; |
324 | |
325 | if (v_cluster < le32_to_cpu(rec->e_cpos)) |
326 | break; |
327 | } |
328 | |
329 | return i; |
330 | } |
331 | |
332 | /* |
333 | * Figure out the size of a hole which starts at v_cluster within the given |
334 | * extent list. |
335 | * |
336 | * If there is no more allocation past v_cluster, we return the maximum |
337 | * cluster size minus v_cluster. |
338 | * |
339 | * If we have in-inode extents, then el points to the dinode list and |
340 | * eb_bh is NULL. Otherwise, eb_bh should point to the extent block |
341 | * containing el. |
342 | */ |
343 | int ocfs2_figure_hole_clusters(struct ocfs2_caching_info *ci, |
344 | struct ocfs2_extent_list *el, |
345 | struct buffer_head *eb_bh, |
346 | u32 v_cluster, |
347 | u32 *num_clusters) |
348 | { |
349 | int ret, i; |
350 | struct buffer_head *next_eb_bh = NULL; |
351 | struct ocfs2_extent_block *eb, *next_eb; |
352 | |
353 | i = ocfs2_search_for_hole_index(el, v_cluster); |
354 | |
355 | if (i == le16_to_cpu(el->l_next_free_rec) && eb_bh) { |
356 | eb = (struct ocfs2_extent_block *)eb_bh->b_data; |
357 | |
358 | /* |
359 | * Check the next leaf for any extents. |
360 | */ |
361 | |
362 | if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL) |
363 | goto no_more_extents; |
364 | |
365 | ret = ocfs2_read_extent_block(ci, |
366 | le64_to_cpu(eb->h_next_leaf_blk), |
367 | bh: &next_eb_bh); |
368 | if (ret) { |
369 | mlog_errno(ret); |
370 | goto out; |
371 | } |
372 | |
373 | next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data; |
374 | el = &next_eb->h_list; |
375 | i = ocfs2_search_for_hole_index(el, v_cluster); |
376 | } |
377 | |
378 | no_more_extents: |
379 | if (i == le16_to_cpu(el->l_next_free_rec)) { |
380 | /* |
381 | * We're at the end of our existing allocation. Just |
382 | * return the maximum number of clusters we could |
383 | * possibly allocate. |
384 | */ |
385 | *num_clusters = UINT_MAX - v_cluster; |
386 | } else { |
387 | *num_clusters = le32_to_cpu(el->l_recs[i].e_cpos) - v_cluster; |
388 | } |
389 | |
390 | ret = 0; |
391 | out: |
392 | brelse(bh: next_eb_bh); |
393 | return ret; |
394 | } |
395 | |
396 | static int ocfs2_get_clusters_nocache(struct inode *inode, |
397 | struct buffer_head *di_bh, |
398 | u32 v_cluster, unsigned int *hole_len, |
399 | struct ocfs2_extent_rec *ret_rec, |
400 | unsigned int *is_last) |
401 | { |
402 | int i, ret, tree_height, len; |
403 | struct ocfs2_dinode *di; |
404 | struct ocfs2_extent_block *eb; |
405 | struct ocfs2_extent_list *el; |
406 | struct ocfs2_extent_rec *rec; |
407 | struct buffer_head *eb_bh = NULL; |
408 | |
409 | memset(ret_rec, 0, sizeof(*ret_rec)); |
410 | if (is_last) |
411 | *is_last = 0; |
412 | |
413 | di = (struct ocfs2_dinode *) di_bh->b_data; |
414 | el = &di->id2.i_list; |
415 | tree_height = le16_to_cpu(el->l_tree_depth); |
416 | |
417 | if (tree_height > 0) { |
418 | ret = ocfs2_find_leaf(ci: INODE_CACHE(inode), root_el: el, cpos: v_cluster, |
419 | leaf_bh: &eb_bh); |
420 | if (ret) { |
421 | mlog_errno(ret); |
422 | goto out; |
423 | } |
424 | |
425 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; |
426 | el = &eb->h_list; |
427 | |
428 | if (el->l_tree_depth) { |
429 | ocfs2_error(inode->i_sb, |
430 | "Inode %lu has non zero tree depth in leaf block %llu\n" , |
431 | inode->i_ino, |
432 | (unsigned long long)eb_bh->b_blocknr); |
433 | ret = -EROFS; |
434 | goto out; |
435 | } |
436 | } |
437 | |
438 | i = ocfs2_search_extent_list(el, v_cluster); |
439 | if (i == -1) { |
440 | /* |
441 | * Holes can be larger than the maximum size of an |
442 | * extent, so we return their lengths in a separate |
443 | * field. |
444 | */ |
445 | if (hole_len) { |
446 | ret = ocfs2_figure_hole_clusters(ci: INODE_CACHE(inode), |
447 | el, eb_bh, |
448 | v_cluster, num_clusters: &len); |
449 | if (ret) { |
450 | mlog_errno(ret); |
451 | goto out; |
452 | } |
453 | |
454 | *hole_len = len; |
455 | } |
456 | goto out_hole; |
457 | } |
458 | |
459 | rec = &el->l_recs[i]; |
460 | |
461 | BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); |
462 | |
463 | if (!rec->e_blkno) { |
464 | ocfs2_error(inode->i_sb, |
465 | "Inode %lu has bad extent record (%u, %u, 0)\n" , |
466 | inode->i_ino, |
467 | le32_to_cpu(rec->e_cpos), |
468 | ocfs2_rec_clusters(el, rec)); |
469 | ret = -EROFS; |
470 | goto out; |
471 | } |
472 | |
473 | *ret_rec = *rec; |
474 | |
475 | /* |
476 | * Checking for last extent is potentially expensive - we |
477 | * might have to look at the next leaf over to see if it's |
478 | * empty. |
479 | * |
480 | * The first two checks are to see whether the caller even |
481 | * cares for this information, and if the extent is at least |
482 | * the last in it's list. |
483 | * |
484 | * If those hold true, then the extent is last if any of the |
485 | * additional conditions hold true: |
486 | * - Extent list is in-inode |
487 | * - Extent list is right-most |
488 | * - Extent list is 2nd to rightmost, with empty right-most |
489 | */ |
490 | if (is_last) { |
491 | if (i == (le16_to_cpu(el->l_next_free_rec) - 1)) { |
492 | if (tree_height == 0) |
493 | *is_last = 1; |
494 | else if (eb->h_blkno == di->i_last_eb_blk) |
495 | *is_last = 1; |
496 | else if (eb->h_next_leaf_blk == di->i_last_eb_blk) { |
497 | ret = ocfs2_last_eb_is_empty(inode, di); |
498 | if (ret < 0) { |
499 | mlog_errno(ret); |
500 | goto out; |
501 | } |
502 | if (ret == 1) |
503 | *is_last = 1; |
504 | } |
505 | } |
506 | } |
507 | |
508 | out_hole: |
509 | ret = 0; |
510 | out: |
511 | brelse(bh: eb_bh); |
512 | return ret; |
513 | } |
514 | |
515 | static void ocfs2_relative_extent_offsets(struct super_block *sb, |
516 | u32 v_cluster, |
517 | struct ocfs2_extent_rec *rec, |
518 | u32 *p_cluster, u32 *num_clusters) |
519 | |
520 | { |
521 | u32 coff = v_cluster - le32_to_cpu(rec->e_cpos); |
522 | |
523 | *p_cluster = ocfs2_blocks_to_clusters(sb, le64_to_cpu(rec->e_blkno)); |
524 | *p_cluster = *p_cluster + coff; |
525 | |
526 | if (num_clusters) |
527 | *num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff; |
528 | } |
529 | |
530 | int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster, |
531 | u32 *p_cluster, u32 *num_clusters, |
532 | struct ocfs2_extent_list *el, |
533 | unsigned int *extent_flags) |
534 | { |
535 | int ret = 0, i; |
536 | struct buffer_head *eb_bh = NULL; |
537 | struct ocfs2_extent_block *eb; |
538 | struct ocfs2_extent_rec *rec; |
539 | u32 coff; |
540 | |
541 | if (el->l_tree_depth) { |
542 | ret = ocfs2_find_leaf(ci: INODE_CACHE(inode), root_el: el, cpos: v_cluster, |
543 | leaf_bh: &eb_bh); |
544 | if (ret) { |
545 | mlog_errno(ret); |
546 | goto out; |
547 | } |
548 | |
549 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; |
550 | el = &eb->h_list; |
551 | |
552 | if (el->l_tree_depth) { |
553 | ocfs2_error(inode->i_sb, |
554 | "Inode %lu has non zero tree depth in xattr leaf block %llu\n" , |
555 | inode->i_ino, |
556 | (unsigned long long)eb_bh->b_blocknr); |
557 | ret = -EROFS; |
558 | goto out; |
559 | } |
560 | } |
561 | |
562 | i = ocfs2_search_extent_list(el, v_cluster); |
563 | if (i == -1) { |
564 | ret = -EROFS; |
565 | mlog_errno(ret); |
566 | goto out; |
567 | } else { |
568 | rec = &el->l_recs[i]; |
569 | BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); |
570 | |
571 | if (!rec->e_blkno) { |
572 | ocfs2_error(inode->i_sb, |
573 | "Inode %lu has bad extent record (%u, %u, 0) in xattr\n" , |
574 | inode->i_ino, |
575 | le32_to_cpu(rec->e_cpos), |
576 | ocfs2_rec_clusters(el, rec)); |
577 | ret = -EROFS; |
578 | goto out; |
579 | } |
580 | coff = v_cluster - le32_to_cpu(rec->e_cpos); |
581 | *p_cluster = ocfs2_blocks_to_clusters(sb: inode->i_sb, |
582 | le64_to_cpu(rec->e_blkno)); |
583 | *p_cluster = *p_cluster + coff; |
584 | if (num_clusters) |
585 | *num_clusters = ocfs2_rec_clusters(el, rec) - coff; |
586 | |
587 | if (extent_flags) |
588 | *extent_flags = rec->e_flags; |
589 | } |
590 | out: |
591 | brelse(bh: eb_bh); |
592 | return ret; |
593 | } |
594 | |
595 | int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, |
596 | u32 *p_cluster, u32 *num_clusters, |
597 | unsigned int *extent_flags) |
598 | { |
599 | int ret; |
600 | unsigned int hole_len, flags = 0; |
601 | struct buffer_head *di_bh = NULL; |
602 | struct ocfs2_extent_rec rec; |
603 | |
604 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { |
605 | ret = -ERANGE; |
606 | mlog_errno(ret); |
607 | goto out; |
608 | } |
609 | |
610 | ret = ocfs2_extent_map_lookup(inode, cpos: v_cluster, phys: p_cluster, |
611 | len: num_clusters, flags: extent_flags); |
612 | if (ret == 0) |
613 | goto out; |
614 | |
615 | ret = ocfs2_read_inode_block(inode, bh: &di_bh); |
616 | if (ret) { |
617 | mlog_errno(ret); |
618 | goto out; |
619 | } |
620 | |
621 | ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster, hole_len: &hole_len, |
622 | ret_rec: &rec, NULL); |
623 | if (ret) { |
624 | mlog_errno(ret); |
625 | goto out; |
626 | } |
627 | |
628 | if (rec.e_blkno == 0ULL) { |
629 | /* |
630 | * A hole was found. Return some canned values that |
631 | * callers can key on. If asked for, num_clusters will |
632 | * be populated with the size of the hole. |
633 | */ |
634 | *p_cluster = 0; |
635 | if (num_clusters) { |
636 | *num_clusters = hole_len; |
637 | } |
638 | } else { |
639 | ocfs2_relative_extent_offsets(sb: inode->i_sb, v_cluster, rec: &rec, |
640 | p_cluster, num_clusters); |
641 | flags = rec.e_flags; |
642 | |
643 | ocfs2_extent_map_insert_rec(inode, rec: &rec); |
644 | } |
645 | |
646 | if (extent_flags) |
647 | *extent_flags = flags; |
648 | |
649 | out: |
650 | brelse(bh: di_bh); |
651 | return ret; |
652 | } |
653 | |
654 | /* |
655 | * This expects alloc_sem to be held. The allocation cannot change at |
656 | * all while the map is in the process of being updated. |
657 | */ |
658 | int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, |
659 | u64 *ret_count, unsigned int *extent_flags) |
660 | { |
661 | int ret; |
662 | int bpc = ocfs2_clusters_to_blocks(sb: inode->i_sb, clusters: 1); |
663 | u32 cpos, num_clusters, p_cluster; |
664 | u64 boff = 0; |
665 | |
666 | cpos = ocfs2_blocks_to_clusters(sb: inode->i_sb, blocks: v_blkno); |
667 | |
668 | ret = ocfs2_get_clusters(inode, v_cluster: cpos, p_cluster: &p_cluster, num_clusters: &num_clusters, |
669 | extent_flags); |
670 | if (ret) { |
671 | mlog_errno(ret); |
672 | goto out; |
673 | } |
674 | |
675 | /* |
676 | * p_cluster == 0 indicates a hole. |
677 | */ |
678 | if (p_cluster) { |
679 | boff = ocfs2_clusters_to_blocks(sb: inode->i_sb, clusters: p_cluster); |
680 | boff += (v_blkno & (u64)(bpc - 1)); |
681 | } |
682 | |
683 | *p_blkno = boff; |
684 | |
685 | if (ret_count) { |
686 | *ret_count = ocfs2_clusters_to_blocks(sb: inode->i_sb, clusters: num_clusters); |
687 | *ret_count -= v_blkno & (u64)(bpc - 1); |
688 | } |
689 | |
690 | out: |
691 | return ret; |
692 | } |
693 | |
694 | /* |
695 | * The ocfs2_fiemap_inline() may be a little bit misleading, since |
696 | * it not only handles the fiemap for inlined files, but also deals |
697 | * with the fast symlink, cause they have no difference for extent |
698 | * mapping per se. |
699 | */ |
700 | static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh, |
701 | struct fiemap_extent_info *fieinfo, |
702 | u64 map_start) |
703 | { |
704 | int ret; |
705 | unsigned int id_count; |
706 | struct ocfs2_dinode *di; |
707 | u64 phys; |
708 | u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST; |
709 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
710 | |
711 | di = (struct ocfs2_dinode *)di_bh->b_data; |
712 | if (ocfs2_inode_is_fast_symlink(inode)) |
713 | id_count = ocfs2_fast_symlink_chars(sb: inode->i_sb); |
714 | else |
715 | id_count = le16_to_cpu(di->id2.i_data.id_count); |
716 | |
717 | if (map_start < id_count) { |
718 | phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits; |
719 | if (ocfs2_inode_is_fast_symlink(inode)) |
720 | phys += offsetof(struct ocfs2_dinode, id2.i_symlink); |
721 | else |
722 | phys += offsetof(struct ocfs2_dinode, |
723 | id2.i_data.id_data); |
724 | |
725 | ret = fiemap_fill_next_extent(info: fieinfo, logical: 0, phys, len: id_count, |
726 | flags); |
727 | if (ret < 0) |
728 | return ret; |
729 | } |
730 | |
731 | return 0; |
732 | } |
733 | |
734 | int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
735 | u64 map_start, u64 map_len) |
736 | { |
737 | int ret, is_last; |
738 | u32 mapping_end, cpos; |
739 | unsigned int hole_size; |
740 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
741 | u64 len_bytes, phys_bytes, virt_bytes; |
742 | struct buffer_head *di_bh = NULL; |
743 | struct ocfs2_extent_rec rec; |
744 | |
745 | ret = fiemap_prep(inode, fieinfo, start: map_start, len: &map_len, supported_flags: 0); |
746 | if (ret) |
747 | return ret; |
748 | |
749 | ret = ocfs2_inode_lock(inode, &di_bh, 0); |
750 | if (ret) { |
751 | mlog_errno(ret); |
752 | goto out; |
753 | } |
754 | |
755 | down_read(sem: &OCFS2_I(inode)->ip_alloc_sem); |
756 | |
757 | /* |
758 | * Handle inline-data and fast symlink separately. |
759 | */ |
760 | if ((OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) || |
761 | ocfs2_inode_is_fast_symlink(inode)) { |
762 | ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start); |
763 | goto out_unlock; |
764 | } |
765 | |
766 | cpos = map_start >> osb->s_clustersize_bits; |
767 | mapping_end = ocfs2_clusters_for_bytes(sb: inode->i_sb, |
768 | bytes: map_start + map_len); |
769 | is_last = 0; |
770 | while (cpos < mapping_end && !is_last) { |
771 | u32 fe_flags; |
772 | |
773 | ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster: cpos, |
774 | hole_len: &hole_size, ret_rec: &rec, is_last: &is_last); |
775 | if (ret) { |
776 | mlog_errno(ret); |
777 | goto out_unlock; |
778 | } |
779 | |
780 | if (rec.e_blkno == 0ULL) { |
781 | cpos += hole_size; |
782 | continue; |
783 | } |
784 | |
785 | fe_flags = 0; |
786 | if (rec.e_flags & OCFS2_EXT_UNWRITTEN) |
787 | fe_flags |= FIEMAP_EXTENT_UNWRITTEN; |
788 | if (rec.e_flags & OCFS2_EXT_REFCOUNTED) |
789 | fe_flags |= FIEMAP_EXTENT_SHARED; |
790 | if (is_last) |
791 | fe_flags |= FIEMAP_EXTENT_LAST; |
792 | len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits; |
793 | phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits; |
794 | virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits; |
795 | |
796 | ret = fiemap_fill_next_extent(info: fieinfo, logical: virt_bytes, phys: phys_bytes, |
797 | len: len_bytes, flags: fe_flags); |
798 | if (ret) |
799 | break; |
800 | |
801 | cpos = le32_to_cpu(rec.e_cpos)+ le16_to_cpu(rec.e_leaf_clusters); |
802 | } |
803 | |
804 | if (ret > 0) |
805 | ret = 0; |
806 | |
807 | out_unlock: |
808 | brelse(bh: di_bh); |
809 | |
810 | up_read(sem: &OCFS2_I(inode)->ip_alloc_sem); |
811 | |
812 | ocfs2_inode_unlock(inode, ex: 0); |
813 | out: |
814 | |
815 | return ret; |
816 | } |
817 | |
818 | /* Is IO overwriting allocated blocks? */ |
819 | int ocfs2_overwrite_io(struct inode *inode, struct buffer_head *di_bh, |
820 | u64 map_start, u64 map_len) |
821 | { |
822 | int ret = 0, is_last; |
823 | u32 mapping_end, cpos; |
824 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
825 | struct ocfs2_extent_rec rec; |
826 | |
827 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { |
828 | if (ocfs2_size_fits_inline_data(di_bh, new_size: map_start + map_len)) |
829 | return ret; |
830 | else |
831 | return -EAGAIN; |
832 | } |
833 | |
834 | cpos = map_start >> osb->s_clustersize_bits; |
835 | mapping_end = ocfs2_clusters_for_bytes(sb: inode->i_sb, |
836 | bytes: map_start + map_len); |
837 | is_last = 0; |
838 | while (cpos < mapping_end && !is_last) { |
839 | ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster: cpos, |
840 | NULL, ret_rec: &rec, is_last: &is_last); |
841 | if (ret) { |
842 | mlog_errno(ret); |
843 | goto out; |
844 | } |
845 | |
846 | if (rec.e_blkno == 0ULL) |
847 | break; |
848 | |
849 | if (rec.e_flags & OCFS2_EXT_REFCOUNTED) |
850 | break; |
851 | |
852 | cpos = le32_to_cpu(rec.e_cpos) + |
853 | le16_to_cpu(rec.e_leaf_clusters); |
854 | } |
855 | |
856 | if (cpos < mapping_end) |
857 | ret = -EAGAIN; |
858 | out: |
859 | return ret; |
860 | } |
861 | |
862 | int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence) |
863 | { |
864 | struct inode *inode = file->f_mapping->host; |
865 | int ret; |
866 | unsigned int is_last = 0, is_data = 0; |
867 | u16 cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits; |
868 | u32 cpos, cend, clen, hole_size; |
869 | u64 extoff, extlen; |
870 | struct buffer_head *di_bh = NULL; |
871 | struct ocfs2_extent_rec rec; |
872 | |
873 | BUG_ON(whence != SEEK_DATA && whence != SEEK_HOLE); |
874 | |
875 | ret = ocfs2_inode_lock(inode, &di_bh, 0); |
876 | if (ret) { |
877 | mlog_errno(ret); |
878 | goto out; |
879 | } |
880 | |
881 | down_read(sem: &OCFS2_I(inode)->ip_alloc_sem); |
882 | |
883 | if (*offset >= i_size_read(inode)) { |
884 | ret = -ENXIO; |
885 | goto out_unlock; |
886 | } |
887 | |
888 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { |
889 | if (whence == SEEK_HOLE) |
890 | *offset = i_size_read(inode); |
891 | goto out_unlock; |
892 | } |
893 | |
894 | clen = 0; |
895 | cpos = *offset >> cs_bits; |
896 | cend = ocfs2_clusters_for_bytes(sb: inode->i_sb, bytes: i_size_read(inode)); |
897 | |
898 | while (cpos < cend && !is_last) { |
899 | ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster: cpos, hole_len: &hole_size, |
900 | ret_rec: &rec, is_last: &is_last); |
901 | if (ret) { |
902 | mlog_errno(ret); |
903 | goto out_unlock; |
904 | } |
905 | |
906 | extoff = cpos; |
907 | extoff <<= cs_bits; |
908 | |
909 | if (rec.e_blkno == 0ULL) { |
910 | clen = hole_size; |
911 | is_data = 0; |
912 | } else { |
913 | clen = le16_to_cpu(rec.e_leaf_clusters) - |
914 | (cpos - le32_to_cpu(rec.e_cpos)); |
915 | is_data = (rec.e_flags & OCFS2_EXT_UNWRITTEN) ? 0 : 1; |
916 | } |
917 | |
918 | if ((!is_data && whence == SEEK_HOLE) || |
919 | (is_data && whence == SEEK_DATA)) { |
920 | if (extoff > *offset) |
921 | *offset = extoff; |
922 | goto out_unlock; |
923 | } |
924 | |
925 | if (!is_last) |
926 | cpos += clen; |
927 | } |
928 | |
929 | if (whence == SEEK_HOLE) { |
930 | extoff = cpos; |
931 | extoff <<= cs_bits; |
932 | extlen = clen; |
933 | extlen <<= cs_bits; |
934 | |
935 | if ((extoff + extlen) > i_size_read(inode)) |
936 | extlen = i_size_read(inode) - extoff; |
937 | extoff += extlen; |
938 | if (extoff > *offset) |
939 | *offset = extoff; |
940 | goto out_unlock; |
941 | } |
942 | |
943 | ret = -ENXIO; |
944 | |
945 | out_unlock: |
946 | |
947 | brelse(bh: di_bh); |
948 | |
949 | up_read(sem: &OCFS2_I(inode)->ip_alloc_sem); |
950 | |
951 | ocfs2_inode_unlock(inode, ex: 0); |
952 | out: |
953 | return ret; |
954 | } |
955 | |
956 | int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr, |
957 | struct buffer_head *bhs[], int flags, |
958 | int (*validate)(struct super_block *sb, |
959 | struct buffer_head *bh)) |
960 | { |
961 | int rc = 0; |
962 | u64 p_block, p_count; |
963 | int i, count, done = 0; |
964 | |
965 | trace_ocfs2_read_virt_blocks( |
966 | inode, vblock: (unsigned long long)v_block, nr, bhs, flags, |
967 | validate); |
968 | |
969 | if (((v_block + nr - 1) << inode->i_sb->s_blocksize_bits) >= |
970 | i_size_read(inode)) { |
971 | BUG_ON(!(flags & OCFS2_BH_READAHEAD)); |
972 | goto out; |
973 | } |
974 | |
975 | while (done < nr) { |
976 | down_read(sem: &OCFS2_I(inode)->ip_alloc_sem); |
977 | rc = ocfs2_extent_map_get_blocks(inode, v_blkno: v_block + done, |
978 | p_blkno: &p_block, ret_count: &p_count, NULL); |
979 | up_read(sem: &OCFS2_I(inode)->ip_alloc_sem); |
980 | if (rc) { |
981 | mlog_errno(rc); |
982 | break; |
983 | } |
984 | |
985 | if (!p_block) { |
986 | rc = -EIO; |
987 | mlog(ML_ERROR, |
988 | "Inode #%llu contains a hole at offset %llu\n" , |
989 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
990 | (unsigned long long)(v_block + done) << |
991 | inode->i_sb->s_blocksize_bits); |
992 | break; |
993 | } |
994 | |
995 | count = nr - done; |
996 | if (p_count < count) |
997 | count = p_count; |
998 | |
999 | /* |
1000 | * If the caller passed us bhs, they should have come |
1001 | * from a previous readahead call to this function. Thus, |
1002 | * they should have the right b_blocknr. |
1003 | */ |
1004 | for (i = 0; i < count; i++) { |
1005 | if (!bhs[done + i]) |
1006 | continue; |
1007 | BUG_ON(bhs[done + i]->b_blocknr != (p_block + i)); |
1008 | } |
1009 | |
1010 | rc = ocfs2_read_blocks(ci: INODE_CACHE(inode), block: p_block, nr: count, |
1011 | bhs: bhs + done, flags, validate); |
1012 | if (rc) { |
1013 | mlog_errno(rc); |
1014 | break; |
1015 | } |
1016 | done += count; |
1017 | } |
1018 | |
1019 | out: |
1020 | return rc; |
1021 | } |
1022 | |
1023 | |
1024 | |