1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * suballoc.c |
4 | * |
5 | * metadata alloc and free |
6 | * Inspired by ext3 block groups. |
7 | * |
8 | * Copyright (C) 2002, 2004 Oracle. All rights reserved. |
9 | */ |
10 | |
11 | #include <linux/fs.h> |
12 | #include <linux/types.h> |
13 | #include <linux/slab.h> |
14 | #include <linux/highmem.h> |
15 | |
16 | #include <cluster/masklog.h> |
17 | |
18 | #include "ocfs2.h" |
19 | |
20 | #include "alloc.h" |
21 | #include "blockcheck.h" |
22 | #include "dlmglue.h" |
23 | #include "inode.h" |
24 | #include "journal.h" |
25 | #include "localalloc.h" |
26 | #include "suballoc.h" |
27 | #include "super.h" |
28 | #include "sysfile.h" |
29 | #include "uptodate.h" |
30 | #include "ocfs2_trace.h" |
31 | |
32 | #include "buffer_head_io.h" |
33 | |
34 | #define NOT_ALLOC_NEW_GROUP 0 |
35 | #define ALLOC_NEW_GROUP 0x1 |
36 | #define ALLOC_GROUPS_FROM_GLOBAL 0x2 |
37 | |
38 | #define OCFS2_MAX_TO_STEAL 1024 |
39 | |
40 | struct ocfs2_suballoc_result { |
41 | u64 sr_bg_blkno; /* The bg we allocated from. Set |
42 | to 0 when a block group is |
43 | contiguous. */ |
44 | u64 sr_bg_stable_blkno; /* |
45 | * Doesn't change, always |
46 | * set to target block |
47 | * group descriptor |
48 | * block. |
49 | */ |
50 | u64 sr_blkno; /* The first allocated block */ |
51 | unsigned int sr_bit_offset; /* The bit in the bg */ |
52 | unsigned int sr_bits; /* How many bits we claimed */ |
53 | }; |
54 | |
55 | static u64 ocfs2_group_from_res(struct ocfs2_suballoc_result *res) |
56 | { |
57 | if (res->sr_blkno == 0) |
58 | return 0; |
59 | |
60 | if (res->sr_bg_blkno) |
61 | return res->sr_bg_blkno; |
62 | |
63 | return ocfs2_which_suballoc_group(block: res->sr_blkno, bit: res->sr_bit_offset); |
64 | } |
65 | |
66 | static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl); |
67 | static int ocfs2_block_group_fill(handle_t *handle, |
68 | struct inode *alloc_inode, |
69 | struct buffer_head *bg_bh, |
70 | u64 group_blkno, |
71 | unsigned int group_clusters, |
72 | u16 my_chain, |
73 | struct ocfs2_chain_list *cl); |
74 | static int ocfs2_block_group_alloc(struct ocfs2_super *osb, |
75 | struct inode *alloc_inode, |
76 | struct buffer_head *bh, |
77 | u64 max_block, |
78 | u64 *last_alloc_group, |
79 | int flags); |
80 | |
81 | static int ocfs2_cluster_group_search(struct inode *inode, |
82 | struct buffer_head *group_bh, |
83 | u32 bits_wanted, u32 min_bits, |
84 | u64 max_block, |
85 | struct ocfs2_suballoc_result *res); |
86 | static int ocfs2_block_group_search(struct inode *inode, |
87 | struct buffer_head *group_bh, |
88 | u32 bits_wanted, u32 min_bits, |
89 | u64 max_block, |
90 | struct ocfs2_suballoc_result *res); |
91 | static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, |
92 | handle_t *handle, |
93 | u32 bits_wanted, |
94 | u32 min_bits, |
95 | struct ocfs2_suballoc_result *res); |
96 | static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, |
97 | int nr); |
98 | static int ocfs2_relink_block_group(handle_t *handle, |
99 | struct inode *alloc_inode, |
100 | struct buffer_head *fe_bh, |
101 | struct buffer_head *bg_bh, |
102 | struct buffer_head *prev_bg_bh, |
103 | u16 chain); |
104 | static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg, |
105 | u32 wanted); |
106 | static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode, |
107 | u64 bg_blkno, |
108 | u16 bg_bit_off); |
109 | static inline void ocfs2_block_to_cluster_group(struct inode *inode, |
110 | u64 data_blkno, |
111 | u64 *bg_blkno, |
112 | u16 *bg_bit_off); |
113 | static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb, |
114 | u32 bits_wanted, u64 max_block, |
115 | int flags, |
116 | struct ocfs2_alloc_context **ac); |
117 | |
118 | void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac) |
119 | { |
120 | struct inode *inode = ac->ac_inode; |
121 | |
122 | if (inode) { |
123 | if (ac->ac_which != OCFS2_AC_USE_LOCAL) |
124 | ocfs2_inode_unlock(inode, ex: 1); |
125 | |
126 | inode_unlock(inode); |
127 | |
128 | iput(inode); |
129 | ac->ac_inode = NULL; |
130 | } |
131 | brelse(bh: ac->ac_bh); |
132 | ac->ac_bh = NULL; |
133 | ac->ac_resv = NULL; |
134 | kfree(objp: ac->ac_find_loc_priv); |
135 | ac->ac_find_loc_priv = NULL; |
136 | } |
137 | |
138 | void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) |
139 | { |
140 | ocfs2_free_ac_resource(ac); |
141 | kfree(objp: ac); |
142 | } |
143 | |
144 | static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl) |
145 | { |
146 | return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc); |
147 | } |
148 | |
149 | #define do_error(fmt, ...) \ |
150 | do { \ |
151 | if (resize) \ |
152 | mlog(ML_ERROR, fmt, ##__VA_ARGS__); \ |
153 | else \ |
154 | return ocfs2_error(sb, fmt, ##__VA_ARGS__); \ |
155 | } while (0) |
156 | |
157 | static int ocfs2_validate_gd_self(struct super_block *sb, |
158 | struct buffer_head *bh, |
159 | int resize) |
160 | { |
161 | struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; |
162 | |
163 | if (!OCFS2_IS_VALID_GROUP_DESC(gd)) { |
164 | do_error("Group descriptor #%llu has bad signature %.*s\n" , |
165 | (unsigned long long)bh->b_blocknr, 7, |
166 | gd->bg_signature); |
167 | } |
168 | |
169 | if (le64_to_cpu(gd->bg_blkno) != bh->b_blocknr) { |
170 | do_error("Group descriptor #%llu has an invalid bg_blkno of %llu\n" , |
171 | (unsigned long long)bh->b_blocknr, |
172 | (unsigned long long)le64_to_cpu(gd->bg_blkno)); |
173 | } |
174 | |
175 | if (le32_to_cpu(gd->bg_generation) != OCFS2_SB(sb)->fs_generation) { |
176 | do_error("Group descriptor #%llu has an invalid fs_generation of #%u\n" , |
177 | (unsigned long long)bh->b_blocknr, |
178 | le32_to_cpu(gd->bg_generation)); |
179 | } |
180 | |
181 | if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) { |
182 | do_error("Group descriptor #%llu has bit count %u but claims that %u are free\n" , |
183 | (unsigned long long)bh->b_blocknr, |
184 | le16_to_cpu(gd->bg_bits), |
185 | le16_to_cpu(gd->bg_free_bits_count)); |
186 | } |
187 | |
188 | if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) { |
189 | do_error("Group descriptor #%llu has bit count %u but max bitmap bits of %u\n" , |
190 | (unsigned long long)bh->b_blocknr, |
191 | le16_to_cpu(gd->bg_bits), |
192 | 8 * le16_to_cpu(gd->bg_size)); |
193 | } |
194 | |
195 | return 0; |
196 | } |
197 | |
198 | static int ocfs2_validate_gd_parent(struct super_block *sb, |
199 | struct ocfs2_dinode *di, |
200 | struct buffer_head *bh, |
201 | int resize) |
202 | { |
203 | unsigned int max_bits; |
204 | struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; |
205 | |
206 | if (di->i_blkno != gd->bg_parent_dinode) { |
207 | do_error("Group descriptor #%llu has bad parent pointer (%llu, expected %llu)\n" , |
208 | (unsigned long long)bh->b_blocknr, |
209 | (unsigned long long)le64_to_cpu(gd->bg_parent_dinode), |
210 | (unsigned long long)le64_to_cpu(di->i_blkno)); |
211 | } |
212 | |
213 | max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc); |
214 | if (le16_to_cpu(gd->bg_bits) > max_bits) { |
215 | do_error("Group descriptor #%llu has bit count of %u\n" , |
216 | (unsigned long long)bh->b_blocknr, |
217 | le16_to_cpu(gd->bg_bits)); |
218 | } |
219 | |
220 | /* In resize, we may meet the case bg_chain == cl_next_free_rec. */ |
221 | if ((le16_to_cpu(gd->bg_chain) > |
222 | le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) || |
223 | ((le16_to_cpu(gd->bg_chain) == |
224 | le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) && !resize)) { |
225 | do_error("Group descriptor #%llu has bad chain %u\n" , |
226 | (unsigned long long)bh->b_blocknr, |
227 | le16_to_cpu(gd->bg_chain)); |
228 | } |
229 | |
230 | return 0; |
231 | } |
232 | |
233 | #undef do_error |
234 | |
235 | /* |
236 | * This version only prints errors. It does not fail the filesystem, and |
237 | * exists only for resize. |
238 | */ |
239 | int ocfs2_check_group_descriptor(struct super_block *sb, |
240 | struct ocfs2_dinode *di, |
241 | struct buffer_head *bh) |
242 | { |
243 | int rc; |
244 | struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; |
245 | |
246 | BUG_ON(!buffer_uptodate(bh)); |
247 | |
248 | /* |
249 | * If the ecc fails, we return the error but otherwise |
250 | * leave the filesystem running. We know any error is |
251 | * local to this block. |
252 | */ |
253 | rc = ocfs2_validate_meta_ecc(sb, data: bh->b_data, bc: &gd->bg_check); |
254 | if (rc) { |
255 | mlog(ML_ERROR, |
256 | "Checksum failed for group descriptor %llu\n" , |
257 | (unsigned long long)bh->b_blocknr); |
258 | } else |
259 | rc = ocfs2_validate_gd_self(sb, bh, resize: 1); |
260 | if (!rc) |
261 | rc = ocfs2_validate_gd_parent(sb, di, bh, resize: 1); |
262 | |
263 | return rc; |
264 | } |
265 | |
266 | static int ocfs2_validate_group_descriptor(struct super_block *sb, |
267 | struct buffer_head *bh) |
268 | { |
269 | int rc; |
270 | struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; |
271 | |
272 | trace_ocfs2_validate_group_descriptor( |
273 | num: (unsigned long long)bh->b_blocknr); |
274 | |
275 | BUG_ON(!buffer_uptodate(bh)); |
276 | |
277 | /* |
278 | * If the ecc fails, we return the error but otherwise |
279 | * leave the filesystem running. We know any error is |
280 | * local to this block. |
281 | */ |
282 | rc = ocfs2_validate_meta_ecc(sb, data: bh->b_data, bc: &gd->bg_check); |
283 | if (rc) |
284 | return rc; |
285 | |
286 | /* |
287 | * Errors after here are fatal. |
288 | */ |
289 | |
290 | return ocfs2_validate_gd_self(sb, bh, resize: 0); |
291 | } |
292 | |
293 | int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di, |
294 | u64 gd_blkno, struct buffer_head **bh) |
295 | { |
296 | int rc; |
297 | struct buffer_head *tmp = *bh; |
298 | |
299 | rc = ocfs2_read_block(ci: INODE_CACHE(inode), off: gd_blkno, bh: &tmp, |
300 | validate: ocfs2_validate_group_descriptor); |
301 | if (rc) |
302 | goto out; |
303 | |
304 | rc = ocfs2_validate_gd_parent(sb: inode->i_sb, di, bh: tmp, resize: 0); |
305 | if (rc) { |
306 | brelse(bh: tmp); |
307 | goto out; |
308 | } |
309 | |
310 | /* If ocfs2_read_block() got us a new bh, pass it up. */ |
311 | if (!*bh) |
312 | *bh = tmp; |
313 | |
314 | out: |
315 | return rc; |
316 | } |
317 | |
318 | static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb, |
319 | struct ocfs2_group_desc *bg, |
320 | struct ocfs2_chain_list *cl, |
321 | u64 p_blkno, unsigned int clusters) |
322 | { |
323 | struct ocfs2_extent_list *el = &bg->bg_list; |
324 | struct ocfs2_extent_rec *rec; |
325 | |
326 | BUG_ON(!ocfs2_supports_discontig_bg(osb)); |
327 | if (!el->l_next_free_rec) |
328 | el->l_count = cpu_to_le16(ocfs2_extent_recs_per_gd(osb->sb)); |
329 | rec = &el->l_recs[le16_to_cpu(el->l_next_free_rec)]; |
330 | rec->e_blkno = cpu_to_le64(p_blkno); |
331 | rec->e_cpos = cpu_to_le32(le16_to_cpu(bg->bg_bits) / |
332 | le16_to_cpu(cl->cl_bpc)); |
333 | rec->e_leaf_clusters = cpu_to_le16(clusters); |
334 | le16_add_cpu(var: &bg->bg_bits, val: clusters * le16_to_cpu(cl->cl_bpc)); |
335 | le16_add_cpu(var: &bg->bg_free_bits_count, |
336 | val: clusters * le16_to_cpu(cl->cl_bpc)); |
337 | le16_add_cpu(var: &el->l_next_free_rec, val: 1); |
338 | } |
339 | |
340 | static int ocfs2_block_group_fill(handle_t *handle, |
341 | struct inode *alloc_inode, |
342 | struct buffer_head *bg_bh, |
343 | u64 group_blkno, |
344 | unsigned int group_clusters, |
345 | u16 my_chain, |
346 | struct ocfs2_chain_list *cl) |
347 | { |
348 | int status = 0; |
349 | struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb); |
350 | struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; |
351 | struct super_block * sb = alloc_inode->i_sb; |
352 | |
353 | if (((unsigned long long) bg_bh->b_blocknr) != group_blkno) { |
354 | status = ocfs2_error(alloc_inode->i_sb, |
355 | "group block (%llu) != b_blocknr (%llu)\n" , |
356 | (unsigned long long)group_blkno, |
357 | (unsigned long long) bg_bh->b_blocknr); |
358 | goto bail; |
359 | } |
360 | |
361 | status = ocfs2_journal_access_gd(handle, |
362 | ci: INODE_CACHE(inode: alloc_inode), |
363 | bh: bg_bh, |
364 | OCFS2_JOURNAL_ACCESS_CREATE); |
365 | if (status < 0) { |
366 | mlog_errno(status); |
367 | goto bail; |
368 | } |
369 | |
370 | memset(bg, 0, sb->s_blocksize); |
371 | strcpy(p: bg->bg_signature, OCFS2_GROUP_DESC_SIGNATURE); |
372 | bg->bg_generation = cpu_to_le32(osb->fs_generation); |
373 | bg->bg_size = cpu_to_le16(ocfs2_group_bitmap_size(sb, 1, |
374 | osb->s_feature_incompat)); |
375 | bg->bg_chain = cpu_to_le16(my_chain); |
376 | bg->bg_next_group = cl->cl_recs[my_chain].c_blkno; |
377 | bg->bg_parent_dinode = cpu_to_le64(OCFS2_I(alloc_inode)->ip_blkno); |
378 | bg->bg_blkno = cpu_to_le64(group_blkno); |
379 | if (group_clusters == le16_to_cpu(cl->cl_cpg)) |
380 | bg->bg_bits = cpu_to_le16(ocfs2_bits_per_group(cl)); |
381 | else |
382 | ocfs2_bg_discontig_add_extent(osb, bg, cl, p_blkno: group_blkno, |
383 | clusters: group_clusters); |
384 | |
385 | /* set the 1st bit in the bitmap to account for the descriptor block */ |
386 | ocfs2_set_bit(0, (unsigned long *)bg->bg_bitmap); |
387 | bg->bg_free_bits_count = cpu_to_le16(le16_to_cpu(bg->bg_bits) - 1); |
388 | |
389 | ocfs2_journal_dirty(handle, bh: bg_bh); |
390 | |
391 | /* There is no need to zero out or otherwise initialize the |
392 | * other blocks in a group - All valid FS metadata in a block |
393 | * group stores the superblock fs_generation value at |
394 | * allocation time. */ |
395 | |
396 | bail: |
397 | if (status) |
398 | mlog_errno(status); |
399 | return status; |
400 | } |
401 | |
402 | static inline u16 ocfs2_find_smallest_chain(struct ocfs2_chain_list *cl) |
403 | { |
404 | u16 curr, best; |
405 | |
406 | best = curr = 0; |
407 | while (curr < le16_to_cpu(cl->cl_count)) { |
408 | if (le32_to_cpu(cl->cl_recs[best].c_total) > |
409 | le32_to_cpu(cl->cl_recs[curr].c_total)) |
410 | best = curr; |
411 | curr++; |
412 | } |
413 | return best; |
414 | } |
415 | |
416 | static struct buffer_head * |
417 | ocfs2_block_group_alloc_contig(struct ocfs2_super *osb, handle_t *handle, |
418 | struct inode *alloc_inode, |
419 | struct ocfs2_alloc_context *ac, |
420 | struct ocfs2_chain_list *cl) |
421 | { |
422 | int status; |
423 | u32 bit_off, num_bits; |
424 | u64 bg_blkno; |
425 | struct buffer_head *bg_bh; |
426 | unsigned int alloc_rec = ocfs2_find_smallest_chain(cl); |
427 | |
428 | status = ocfs2_claim_clusters(handle, ac, |
429 | le16_to_cpu(cl->cl_cpg), cluster_start: &bit_off, |
430 | num_clusters: &num_bits); |
431 | if (status < 0) { |
432 | if (status != -ENOSPC) |
433 | mlog_errno(status); |
434 | goto bail; |
435 | } |
436 | |
437 | /* setup the group */ |
438 | bg_blkno = ocfs2_clusters_to_blocks(sb: osb->sb, clusters: bit_off); |
439 | trace_ocfs2_block_group_alloc_contig( |
440 | val1: (unsigned long long)bg_blkno, val2: alloc_rec); |
441 | |
442 | bg_bh = sb_getblk(sb: osb->sb, block: bg_blkno); |
443 | if (!bg_bh) { |
444 | status = -ENOMEM; |
445 | mlog_errno(status); |
446 | goto bail; |
447 | } |
448 | ocfs2_set_new_buffer_uptodate(ci: INODE_CACHE(inode: alloc_inode), bh: bg_bh); |
449 | |
450 | status = ocfs2_block_group_fill(handle, alloc_inode, bg_bh, |
451 | group_blkno: bg_blkno, group_clusters: num_bits, my_chain: alloc_rec, cl); |
452 | if (status < 0) { |
453 | brelse(bh: bg_bh); |
454 | mlog_errno(status); |
455 | } |
456 | |
457 | bail: |
458 | return status ? ERR_PTR(error: status) : bg_bh; |
459 | } |
460 | |
461 | static int ocfs2_block_group_claim_bits(struct ocfs2_super *osb, |
462 | handle_t *handle, |
463 | struct ocfs2_alloc_context *ac, |
464 | unsigned int min_bits, |
465 | u32 *bit_off, u32 *num_bits) |
466 | { |
467 | int status = 0; |
468 | |
469 | while (min_bits) { |
470 | status = ocfs2_claim_clusters(handle, ac, min_clusters: min_bits, |
471 | cluster_start: bit_off, num_clusters: num_bits); |
472 | if (status != -ENOSPC) |
473 | break; |
474 | |
475 | min_bits >>= 1; |
476 | } |
477 | |
478 | return status; |
479 | } |
480 | |
481 | static int ocfs2_block_group_grow_discontig(handle_t *handle, |
482 | struct inode *alloc_inode, |
483 | struct buffer_head *bg_bh, |
484 | struct ocfs2_alloc_context *ac, |
485 | struct ocfs2_chain_list *cl, |
486 | unsigned int min_bits) |
487 | { |
488 | int status; |
489 | struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb); |
490 | struct ocfs2_group_desc *bg = |
491 | (struct ocfs2_group_desc *)bg_bh->b_data; |
492 | unsigned int needed = le16_to_cpu(cl->cl_cpg) - |
493 | le16_to_cpu(bg->bg_bits) / le16_to_cpu(cl->cl_bpc); |
494 | u32 p_cpos, clusters; |
495 | u64 p_blkno; |
496 | struct ocfs2_extent_list *el = &bg->bg_list; |
497 | |
498 | status = ocfs2_journal_access_gd(handle, |
499 | ci: INODE_CACHE(inode: alloc_inode), |
500 | bh: bg_bh, |
501 | OCFS2_JOURNAL_ACCESS_CREATE); |
502 | if (status < 0) { |
503 | mlog_errno(status); |
504 | goto bail; |
505 | } |
506 | |
507 | while ((needed > 0) && (le16_to_cpu(el->l_next_free_rec) < |
508 | le16_to_cpu(el->l_count))) { |
509 | if (min_bits > needed) |
510 | min_bits = needed; |
511 | status = ocfs2_block_group_claim_bits(osb, handle, ac, |
512 | min_bits, bit_off: &p_cpos, |
513 | num_bits: &clusters); |
514 | if (status < 0) { |
515 | if (status != -ENOSPC) |
516 | mlog_errno(status); |
517 | goto bail; |
518 | } |
519 | p_blkno = ocfs2_clusters_to_blocks(sb: osb->sb, clusters: p_cpos); |
520 | ocfs2_bg_discontig_add_extent(osb, bg, cl, p_blkno, |
521 | clusters); |
522 | |
523 | min_bits = clusters; |
524 | needed = le16_to_cpu(cl->cl_cpg) - |
525 | le16_to_cpu(bg->bg_bits) / le16_to_cpu(cl->cl_bpc); |
526 | } |
527 | |
528 | if (needed > 0) { |
529 | /* |
530 | * We have used up all the extent rec but can't fill up |
531 | * the cpg. So bail out. |
532 | */ |
533 | status = -ENOSPC; |
534 | goto bail; |
535 | } |
536 | |
537 | ocfs2_journal_dirty(handle, bh: bg_bh); |
538 | |
539 | bail: |
540 | return status; |
541 | } |
542 | |
543 | static void ocfs2_bg_alloc_cleanup(handle_t *handle, |
544 | struct ocfs2_alloc_context *cluster_ac, |
545 | struct inode *alloc_inode, |
546 | struct buffer_head *bg_bh) |
547 | { |
548 | int i, ret; |
549 | struct ocfs2_group_desc *bg; |
550 | struct ocfs2_extent_list *el; |
551 | struct ocfs2_extent_rec *rec; |
552 | |
553 | if (!bg_bh) |
554 | return; |
555 | |
556 | bg = (struct ocfs2_group_desc *)bg_bh->b_data; |
557 | el = &bg->bg_list; |
558 | for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { |
559 | rec = &el->l_recs[i]; |
560 | ret = ocfs2_free_clusters(handle, bitmap_inode: cluster_ac->ac_inode, |
561 | bitmap_bh: cluster_ac->ac_bh, |
562 | le64_to_cpu(rec->e_blkno), |
563 | le16_to_cpu(rec->e_leaf_clusters)); |
564 | if (ret) |
565 | mlog_errno(ret); |
566 | /* Try all the clusters to free */ |
567 | } |
568 | |
569 | ocfs2_remove_from_cache(ci: INODE_CACHE(inode: alloc_inode), bh: bg_bh); |
570 | brelse(bh: bg_bh); |
571 | } |
572 | |
573 | static struct buffer_head * |
574 | ocfs2_block_group_alloc_discontig(handle_t *handle, |
575 | struct inode *alloc_inode, |
576 | struct ocfs2_alloc_context *ac, |
577 | struct ocfs2_chain_list *cl) |
578 | { |
579 | int status; |
580 | u32 bit_off, num_bits; |
581 | u64 bg_blkno; |
582 | unsigned int min_bits = le16_to_cpu(cl->cl_cpg) >> 1; |
583 | struct buffer_head *bg_bh = NULL; |
584 | unsigned int alloc_rec = ocfs2_find_smallest_chain(cl); |
585 | struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb); |
586 | |
587 | if (!ocfs2_supports_discontig_bg(osb)) { |
588 | status = -ENOSPC; |
589 | goto bail; |
590 | } |
591 | |
592 | status = ocfs2_extend_trans(handle, |
593 | nblocks: ocfs2_calc_bg_discontig_credits(sb: osb->sb)); |
594 | if (status) { |
595 | mlog_errno(status); |
596 | goto bail; |
597 | } |
598 | |
599 | /* |
600 | * We're going to be grabbing from multiple cluster groups. |
601 | * We don't have enough credits to relink them all, and the |
602 | * cluster groups will be staying in cache for the duration of |
603 | * this operation. |
604 | */ |
605 | ac->ac_disable_chain_relink = 1; |
606 | |
607 | /* Claim the first region */ |
608 | status = ocfs2_block_group_claim_bits(osb, handle, ac, min_bits, |
609 | bit_off: &bit_off, num_bits: &num_bits); |
610 | if (status < 0) { |
611 | if (status != -ENOSPC) |
612 | mlog_errno(status); |
613 | goto bail; |
614 | } |
615 | min_bits = num_bits; |
616 | |
617 | /* setup the group */ |
618 | bg_blkno = ocfs2_clusters_to_blocks(sb: osb->sb, clusters: bit_off); |
619 | trace_ocfs2_block_group_alloc_discontig( |
620 | val1: (unsigned long long)bg_blkno, val2: alloc_rec); |
621 | |
622 | bg_bh = sb_getblk(sb: osb->sb, block: bg_blkno); |
623 | if (!bg_bh) { |
624 | status = -ENOMEM; |
625 | mlog_errno(status); |
626 | goto bail; |
627 | } |
628 | ocfs2_set_new_buffer_uptodate(ci: INODE_CACHE(inode: alloc_inode), bh: bg_bh); |
629 | |
630 | status = ocfs2_block_group_fill(handle, alloc_inode, bg_bh, |
631 | group_blkno: bg_blkno, group_clusters: num_bits, my_chain: alloc_rec, cl); |
632 | if (status < 0) { |
633 | mlog_errno(status); |
634 | goto bail; |
635 | } |
636 | |
637 | status = ocfs2_block_group_grow_discontig(handle, alloc_inode, |
638 | bg_bh, ac, cl, min_bits); |
639 | if (status) |
640 | mlog_errno(status); |
641 | |
642 | bail: |
643 | if (status) |
644 | ocfs2_bg_alloc_cleanup(handle, cluster_ac: ac, alloc_inode, bg_bh); |
645 | return status ? ERR_PTR(error: status) : bg_bh; |
646 | } |
647 | |
648 | /* |
649 | * We expect the block group allocator to already be locked. |
650 | */ |
651 | static int ocfs2_block_group_alloc(struct ocfs2_super *osb, |
652 | struct inode *alloc_inode, |
653 | struct buffer_head *bh, |
654 | u64 max_block, |
655 | u64 *last_alloc_group, |
656 | int flags) |
657 | { |
658 | int status, credits; |
659 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data; |
660 | struct ocfs2_chain_list *cl; |
661 | struct ocfs2_alloc_context *ac = NULL; |
662 | handle_t *handle = NULL; |
663 | u16 alloc_rec; |
664 | struct buffer_head *bg_bh = NULL; |
665 | struct ocfs2_group_desc *bg; |
666 | |
667 | BUG_ON(ocfs2_is_cluster_bitmap(alloc_inode)); |
668 | |
669 | cl = &fe->id2.i_chain; |
670 | status = ocfs2_reserve_clusters_with_limit(osb, |
671 | le16_to_cpu(cl->cl_cpg), |
672 | max_block, flags, ac: &ac); |
673 | if (status < 0) { |
674 | if (status != -ENOSPC) |
675 | mlog_errno(status); |
676 | goto bail; |
677 | } |
678 | |
679 | credits = ocfs2_calc_group_alloc_credits(sb: osb->sb, |
680 | le16_to_cpu(cl->cl_cpg)); |
681 | handle = ocfs2_start_trans(osb, max_buffs: credits); |
682 | if (IS_ERR(ptr: handle)) { |
683 | status = PTR_ERR(ptr: handle); |
684 | handle = NULL; |
685 | mlog_errno(status); |
686 | goto bail; |
687 | } |
688 | |
689 | if (last_alloc_group && *last_alloc_group != 0) { |
690 | trace_ocfs2_block_group_alloc( |
691 | num: (unsigned long long)*last_alloc_group); |
692 | ac->ac_last_group = *last_alloc_group; |
693 | } |
694 | |
695 | bg_bh = ocfs2_block_group_alloc_contig(osb, handle, alloc_inode, |
696 | ac, cl); |
697 | if (PTR_ERR(ptr: bg_bh) == -ENOSPC) |
698 | bg_bh = ocfs2_block_group_alloc_discontig(handle, |
699 | alloc_inode, |
700 | ac, cl); |
701 | if (IS_ERR(ptr: bg_bh)) { |
702 | status = PTR_ERR(ptr: bg_bh); |
703 | bg_bh = NULL; |
704 | if (status != -ENOSPC) |
705 | mlog_errno(status); |
706 | goto bail; |
707 | } |
708 | bg = (struct ocfs2_group_desc *) bg_bh->b_data; |
709 | |
710 | status = ocfs2_journal_access_di(handle, ci: INODE_CACHE(inode: alloc_inode), |
711 | bh, OCFS2_JOURNAL_ACCESS_WRITE); |
712 | if (status < 0) { |
713 | mlog_errno(status); |
714 | goto bail; |
715 | } |
716 | |
717 | alloc_rec = le16_to_cpu(bg->bg_chain); |
718 | le32_add_cpu(var: &cl->cl_recs[alloc_rec].c_free, |
719 | le16_to_cpu(bg->bg_free_bits_count)); |
720 | le32_add_cpu(var: &cl->cl_recs[alloc_rec].c_total, |
721 | le16_to_cpu(bg->bg_bits)); |
722 | cl->cl_recs[alloc_rec].c_blkno = bg->bg_blkno; |
723 | if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count)) |
724 | le16_add_cpu(var: &cl->cl_next_free_rec, val: 1); |
725 | |
726 | le32_add_cpu(var: &fe->id1.bitmap1.i_used, le16_to_cpu(bg->bg_bits) - |
727 | le16_to_cpu(bg->bg_free_bits_count)); |
728 | le32_add_cpu(var: &fe->id1.bitmap1.i_total, le16_to_cpu(bg->bg_bits)); |
729 | le32_add_cpu(var: &fe->i_clusters, le16_to_cpu(cl->cl_cpg)); |
730 | |
731 | ocfs2_journal_dirty(handle, bh); |
732 | |
733 | spin_lock(lock: &OCFS2_I(inode: alloc_inode)->ip_lock); |
734 | OCFS2_I(inode: alloc_inode)->ip_clusters = le32_to_cpu(fe->i_clusters); |
735 | fe->i_size = cpu_to_le64(ocfs2_clusters_to_bytes(alloc_inode->i_sb, |
736 | le32_to_cpu(fe->i_clusters))); |
737 | spin_unlock(lock: &OCFS2_I(inode: alloc_inode)->ip_lock); |
738 | i_size_write(inode: alloc_inode, le64_to_cpu(fe->i_size)); |
739 | alloc_inode->i_blocks = ocfs2_inode_sector_count(inode: alloc_inode); |
740 | ocfs2_update_inode_fsync_trans(handle, inode: alloc_inode, datasync: 0); |
741 | |
742 | status = 0; |
743 | |
744 | /* save the new last alloc group so that the caller can cache it. */ |
745 | if (last_alloc_group) |
746 | *last_alloc_group = ac->ac_last_group; |
747 | |
748 | bail: |
749 | if (handle) |
750 | ocfs2_commit_trans(osb, handle); |
751 | |
752 | if (ac) |
753 | ocfs2_free_alloc_context(ac); |
754 | |
755 | brelse(bh: bg_bh); |
756 | |
757 | if (status) |
758 | mlog_errno(status); |
759 | return status; |
760 | } |
761 | |
762 | static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, |
763 | struct ocfs2_alloc_context *ac, |
764 | int type, |
765 | u32 slot, |
766 | u64 *last_alloc_group, |
767 | int flags) |
768 | { |
769 | int status; |
770 | u32 bits_wanted = ac->ac_bits_wanted; |
771 | struct inode *alloc_inode; |
772 | struct buffer_head *bh = NULL; |
773 | struct ocfs2_dinode *fe; |
774 | u32 free_bits; |
775 | |
776 | alloc_inode = ocfs2_get_system_file_inode(osb, type, slot); |
777 | if (!alloc_inode) { |
778 | mlog_errno(-EINVAL); |
779 | return -EINVAL; |
780 | } |
781 | |
782 | inode_lock(inode: alloc_inode); |
783 | |
784 | status = ocfs2_inode_lock(alloc_inode, &bh, 1); |
785 | if (status < 0) { |
786 | inode_unlock(inode: alloc_inode); |
787 | iput(alloc_inode); |
788 | |
789 | mlog_errno(status); |
790 | return status; |
791 | } |
792 | |
793 | ac->ac_inode = alloc_inode; |
794 | ac->ac_alloc_slot = slot; |
795 | |
796 | fe = (struct ocfs2_dinode *) bh->b_data; |
797 | |
798 | /* The bh was validated by the inode read inside |
799 | * ocfs2_inode_lock(). Any corruption is a code bug. */ |
800 | BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); |
801 | |
802 | if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) { |
803 | status = ocfs2_error(alloc_inode->i_sb, |
804 | "Invalid chain allocator %llu\n" , |
805 | (unsigned long long)le64_to_cpu(fe->i_blkno)); |
806 | goto bail; |
807 | } |
808 | |
809 | free_bits = le32_to_cpu(fe->id1.bitmap1.i_total) - |
810 | le32_to_cpu(fe->id1.bitmap1.i_used); |
811 | |
812 | if (bits_wanted > free_bits) { |
813 | /* cluster bitmap never grows */ |
814 | if (ocfs2_is_cluster_bitmap(inode: alloc_inode)) { |
815 | trace_ocfs2_reserve_suballoc_bits_nospc(val1: bits_wanted, |
816 | val2: free_bits); |
817 | status = -ENOSPC; |
818 | goto bail; |
819 | } |
820 | |
821 | if (!(flags & ALLOC_NEW_GROUP)) { |
822 | trace_ocfs2_reserve_suballoc_bits_no_new_group( |
823 | value1: slot, value2: bits_wanted, value3: free_bits); |
824 | status = -ENOSPC; |
825 | goto bail; |
826 | } |
827 | |
828 | status = ocfs2_block_group_alloc(osb, alloc_inode, bh, |
829 | max_block: ac->ac_max_block, |
830 | last_alloc_group, flags); |
831 | if (status < 0) { |
832 | if (status != -ENOSPC) |
833 | mlog_errno(status); |
834 | goto bail; |
835 | } |
836 | atomic_inc(v: &osb->alloc_stats.bg_extends); |
837 | |
838 | /* You should never ask for this much metadata */ |
839 | BUG_ON(bits_wanted > |
840 | (le32_to_cpu(fe->id1.bitmap1.i_total) |
841 | - le32_to_cpu(fe->id1.bitmap1.i_used))); |
842 | } |
843 | |
844 | get_bh(bh); |
845 | ac->ac_bh = bh; |
846 | bail: |
847 | brelse(bh); |
848 | |
849 | if (status) |
850 | mlog_errno(status); |
851 | return status; |
852 | } |
853 | |
854 | static void ocfs2_init_inode_steal_slot(struct ocfs2_super *osb) |
855 | { |
856 | spin_lock(lock: &osb->osb_lock); |
857 | osb->s_inode_steal_slot = OCFS2_INVALID_SLOT; |
858 | spin_unlock(lock: &osb->osb_lock); |
859 | atomic_set(v: &osb->s_num_inodes_stolen, i: 0); |
860 | } |
861 | |
862 | static void ocfs2_init_meta_steal_slot(struct ocfs2_super *osb) |
863 | { |
864 | spin_lock(lock: &osb->osb_lock); |
865 | osb->s_meta_steal_slot = OCFS2_INVALID_SLOT; |
866 | spin_unlock(lock: &osb->osb_lock); |
867 | atomic_set(v: &osb->s_num_meta_stolen, i: 0); |
868 | } |
869 | |
870 | void ocfs2_init_steal_slots(struct ocfs2_super *osb) |
871 | { |
872 | ocfs2_init_inode_steal_slot(osb); |
873 | ocfs2_init_meta_steal_slot(osb); |
874 | } |
875 | |
876 | static void __ocfs2_set_steal_slot(struct ocfs2_super *osb, int slot, int type) |
877 | { |
878 | spin_lock(lock: &osb->osb_lock); |
879 | if (type == INODE_ALLOC_SYSTEM_INODE) |
880 | osb->s_inode_steal_slot = (u16)slot; |
881 | else if (type == EXTENT_ALLOC_SYSTEM_INODE) |
882 | osb->s_meta_steal_slot = (u16)slot; |
883 | spin_unlock(lock: &osb->osb_lock); |
884 | } |
885 | |
886 | static int __ocfs2_get_steal_slot(struct ocfs2_super *osb, int type) |
887 | { |
888 | int slot = OCFS2_INVALID_SLOT; |
889 | |
890 | spin_lock(lock: &osb->osb_lock); |
891 | if (type == INODE_ALLOC_SYSTEM_INODE) |
892 | slot = osb->s_inode_steal_slot; |
893 | else if (type == EXTENT_ALLOC_SYSTEM_INODE) |
894 | slot = osb->s_meta_steal_slot; |
895 | spin_unlock(lock: &osb->osb_lock); |
896 | |
897 | return slot; |
898 | } |
899 | |
900 | static int ocfs2_get_inode_steal_slot(struct ocfs2_super *osb) |
901 | { |
902 | return __ocfs2_get_steal_slot(osb, type: INODE_ALLOC_SYSTEM_INODE); |
903 | } |
904 | |
905 | static int ocfs2_get_meta_steal_slot(struct ocfs2_super *osb) |
906 | { |
907 | return __ocfs2_get_steal_slot(osb, type: EXTENT_ALLOC_SYSTEM_INODE); |
908 | } |
909 | |
910 | static int ocfs2_steal_resource(struct ocfs2_super *osb, |
911 | struct ocfs2_alloc_context *ac, |
912 | int type) |
913 | { |
914 | int i, status = -ENOSPC; |
915 | int slot = __ocfs2_get_steal_slot(osb, type); |
916 | |
917 | /* Start to steal resource from the first slot after ours. */ |
918 | if (slot == OCFS2_INVALID_SLOT) |
919 | slot = osb->slot_num + 1; |
920 | |
921 | for (i = 0; i < osb->max_slots; i++, slot++) { |
922 | if (slot == osb->max_slots) |
923 | slot = 0; |
924 | |
925 | if (slot == osb->slot_num) |
926 | continue; |
927 | |
928 | status = ocfs2_reserve_suballoc_bits(osb, ac, |
929 | type, |
930 | slot: (u32)slot, NULL, |
931 | NOT_ALLOC_NEW_GROUP); |
932 | if (status >= 0) { |
933 | __ocfs2_set_steal_slot(osb, slot, type); |
934 | break; |
935 | } |
936 | |
937 | ocfs2_free_ac_resource(ac); |
938 | } |
939 | |
940 | return status; |
941 | } |
942 | |
943 | static int ocfs2_steal_inode(struct ocfs2_super *osb, |
944 | struct ocfs2_alloc_context *ac) |
945 | { |
946 | return ocfs2_steal_resource(osb, ac, type: INODE_ALLOC_SYSTEM_INODE); |
947 | } |
948 | |
949 | static int ocfs2_steal_meta(struct ocfs2_super *osb, |
950 | struct ocfs2_alloc_context *ac) |
951 | { |
952 | return ocfs2_steal_resource(osb, ac, type: EXTENT_ALLOC_SYSTEM_INODE); |
953 | } |
954 | |
955 | int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb, |
956 | int blocks, |
957 | struct ocfs2_alloc_context **ac) |
958 | { |
959 | int status; |
960 | int slot = ocfs2_get_meta_steal_slot(osb); |
961 | |
962 | *ac = kzalloc(size: sizeof(struct ocfs2_alloc_context), GFP_KERNEL); |
963 | if (!(*ac)) { |
964 | status = -ENOMEM; |
965 | mlog_errno(status); |
966 | goto bail; |
967 | } |
968 | |
969 | (*ac)->ac_bits_wanted = blocks; |
970 | (*ac)->ac_which = OCFS2_AC_USE_META; |
971 | (*ac)->ac_group_search = ocfs2_block_group_search; |
972 | |
973 | if (slot != OCFS2_INVALID_SLOT && |
974 | atomic_read(v: &osb->s_num_meta_stolen) < OCFS2_MAX_TO_STEAL) |
975 | goto extent_steal; |
976 | |
977 | atomic_set(v: &osb->s_num_meta_stolen, i: 0); |
978 | status = ocfs2_reserve_suballoc_bits(osb, ac: (*ac), |
979 | type: EXTENT_ALLOC_SYSTEM_INODE, |
980 | slot: (u32)osb->slot_num, NULL, |
981 | ALLOC_GROUPS_FROM_GLOBAL|ALLOC_NEW_GROUP); |
982 | |
983 | |
984 | if (status >= 0) { |
985 | status = 0; |
986 | if (slot != OCFS2_INVALID_SLOT) |
987 | ocfs2_init_meta_steal_slot(osb); |
988 | goto bail; |
989 | } else if (status < 0 && status != -ENOSPC) { |
990 | mlog_errno(status); |
991 | goto bail; |
992 | } |
993 | |
994 | ocfs2_free_ac_resource(ac: *ac); |
995 | |
996 | extent_steal: |
997 | status = ocfs2_steal_meta(osb, ac: *ac); |
998 | atomic_inc(v: &osb->s_num_meta_stolen); |
999 | if (status < 0) { |
1000 | if (status != -ENOSPC) |
1001 | mlog_errno(status); |
1002 | goto bail; |
1003 | } |
1004 | |
1005 | status = 0; |
1006 | bail: |
1007 | if ((status < 0) && *ac) { |
1008 | ocfs2_free_alloc_context(ac: *ac); |
1009 | *ac = NULL; |
1010 | } |
1011 | |
1012 | if (status) |
1013 | mlog_errno(status); |
1014 | return status; |
1015 | } |
1016 | |
1017 | int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, |
1018 | struct ocfs2_extent_list *root_el, |
1019 | struct ocfs2_alloc_context **ac) |
1020 | { |
1021 | return ocfs2_reserve_new_metadata_blocks(osb, |
1022 | blocks: ocfs2_extend_meta_needed(root_el), |
1023 | ac); |
1024 | } |
1025 | |
1026 | int ocfs2_reserve_new_inode(struct ocfs2_super *osb, |
1027 | struct ocfs2_alloc_context **ac) |
1028 | { |
1029 | int status; |
1030 | int slot = ocfs2_get_inode_steal_slot(osb); |
1031 | u64 alloc_group; |
1032 | |
1033 | *ac = kzalloc(size: sizeof(struct ocfs2_alloc_context), GFP_KERNEL); |
1034 | if (!(*ac)) { |
1035 | status = -ENOMEM; |
1036 | mlog_errno(status); |
1037 | goto bail; |
1038 | } |
1039 | |
1040 | (*ac)->ac_bits_wanted = 1; |
1041 | (*ac)->ac_which = OCFS2_AC_USE_INODE; |
1042 | |
1043 | (*ac)->ac_group_search = ocfs2_block_group_search; |
1044 | |
1045 | /* |
1046 | * stat(2) can't handle i_ino > 32bits, so we tell the |
1047 | * lower levels not to allocate us a block group past that |
1048 | * limit. The 'inode64' mount option avoids this behavior. |
1049 | */ |
1050 | if (!(osb->s_mount_opt & OCFS2_MOUNT_INODE64)) |
1051 | (*ac)->ac_max_block = (u32)~0U; |
1052 | |
1053 | /* |
1054 | * slot is set when we successfully steal inode from other nodes. |
1055 | * It is reset in 3 places: |
1056 | * 1. when we flush the truncate log |
1057 | * 2. when we complete local alloc recovery. |
1058 | * 3. when we successfully allocate from our own slot. |
1059 | * After it is set, we will go on stealing inodes until we find the |
1060 | * need to check our slots to see whether there is some space for us. |
1061 | */ |
1062 | if (slot != OCFS2_INVALID_SLOT && |
1063 | atomic_read(v: &osb->s_num_inodes_stolen) < OCFS2_MAX_TO_STEAL) |
1064 | goto inode_steal; |
1065 | |
1066 | atomic_set(v: &osb->s_num_inodes_stolen, i: 0); |
1067 | alloc_group = osb->osb_inode_alloc_group; |
1068 | status = ocfs2_reserve_suballoc_bits(osb, ac: *ac, |
1069 | type: INODE_ALLOC_SYSTEM_INODE, |
1070 | slot: (u32)osb->slot_num, |
1071 | last_alloc_group: &alloc_group, |
1072 | ALLOC_NEW_GROUP | |
1073 | ALLOC_GROUPS_FROM_GLOBAL); |
1074 | if (status >= 0) { |
1075 | status = 0; |
1076 | |
1077 | spin_lock(lock: &osb->osb_lock); |
1078 | osb->osb_inode_alloc_group = alloc_group; |
1079 | spin_unlock(lock: &osb->osb_lock); |
1080 | trace_ocfs2_reserve_new_inode_new_group( |
1081 | num: (unsigned long long)alloc_group); |
1082 | |
1083 | /* |
1084 | * Some inodes must be freed by us, so try to allocate |
1085 | * from our own next time. |
1086 | */ |
1087 | if (slot != OCFS2_INVALID_SLOT) |
1088 | ocfs2_init_inode_steal_slot(osb); |
1089 | goto bail; |
1090 | } else if (status < 0 && status != -ENOSPC) { |
1091 | mlog_errno(status); |
1092 | goto bail; |
1093 | } |
1094 | |
1095 | ocfs2_free_ac_resource(ac: *ac); |
1096 | |
1097 | inode_steal: |
1098 | status = ocfs2_steal_inode(osb, ac: *ac); |
1099 | atomic_inc(v: &osb->s_num_inodes_stolen); |
1100 | if (status < 0) { |
1101 | if (status != -ENOSPC) |
1102 | mlog_errno(status); |
1103 | goto bail; |
1104 | } |
1105 | |
1106 | status = 0; |
1107 | bail: |
1108 | if ((status < 0) && *ac) { |
1109 | ocfs2_free_alloc_context(ac: *ac); |
1110 | *ac = NULL; |
1111 | } |
1112 | |
1113 | if (status) |
1114 | mlog_errno(status); |
1115 | return status; |
1116 | } |
1117 | |
1118 | /* local alloc code has to do the same thing, so rather than do this |
1119 | * twice.. */ |
1120 | int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb, |
1121 | struct ocfs2_alloc_context *ac) |
1122 | { |
1123 | int status; |
1124 | |
1125 | ac->ac_which = OCFS2_AC_USE_MAIN; |
1126 | ac->ac_group_search = ocfs2_cluster_group_search; |
1127 | |
1128 | status = ocfs2_reserve_suballoc_bits(osb, ac, |
1129 | type: GLOBAL_BITMAP_SYSTEM_INODE, |
1130 | OCFS2_INVALID_SLOT, NULL, |
1131 | ALLOC_NEW_GROUP); |
1132 | if (status < 0 && status != -ENOSPC) |
1133 | mlog_errno(status); |
1134 | |
1135 | return status; |
1136 | } |
1137 | |
1138 | /* Callers don't need to care which bitmap (local alloc or main) to |
1139 | * use so we figure it out for them, but unfortunately this clutters |
1140 | * things a bit. */ |
1141 | static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb, |
1142 | u32 bits_wanted, u64 max_block, |
1143 | int flags, |
1144 | struct ocfs2_alloc_context **ac) |
1145 | { |
1146 | int status, ret = 0; |
1147 | int retried = 0; |
1148 | |
1149 | *ac = kzalloc(size: sizeof(struct ocfs2_alloc_context), GFP_KERNEL); |
1150 | if (!(*ac)) { |
1151 | status = -ENOMEM; |
1152 | mlog_errno(status); |
1153 | goto bail; |
1154 | } |
1155 | |
1156 | (*ac)->ac_bits_wanted = bits_wanted; |
1157 | (*ac)->ac_max_block = max_block; |
1158 | |
1159 | status = -ENOSPC; |
1160 | if (!(flags & ALLOC_GROUPS_FROM_GLOBAL) && |
1161 | ocfs2_alloc_should_use_local(osb, bits: bits_wanted)) { |
1162 | status = ocfs2_reserve_local_alloc_bits(osb, |
1163 | bits_wanted, |
1164 | ac: *ac); |
1165 | if ((status < 0) && (status != -ENOSPC)) { |
1166 | mlog_errno(status); |
1167 | goto bail; |
1168 | } |
1169 | } |
1170 | |
1171 | if (status == -ENOSPC) { |
1172 | retry: |
1173 | status = ocfs2_reserve_cluster_bitmap_bits(osb, ac: *ac); |
1174 | /* Retry if there is sufficient space cached in truncate log */ |
1175 | if (status == -ENOSPC && !retried) { |
1176 | retried = 1; |
1177 | ocfs2_inode_unlock(inode: (*ac)->ac_inode, ex: 1); |
1178 | inode_unlock(inode: (*ac)->ac_inode); |
1179 | |
1180 | ret = ocfs2_try_to_free_truncate_log(osb, needed: bits_wanted); |
1181 | if (ret == 1) { |
1182 | iput((*ac)->ac_inode); |
1183 | (*ac)->ac_inode = NULL; |
1184 | goto retry; |
1185 | } |
1186 | |
1187 | if (ret < 0) |
1188 | mlog_errno(ret); |
1189 | |
1190 | inode_lock(inode: (*ac)->ac_inode); |
1191 | ret = ocfs2_inode_lock((*ac)->ac_inode, NULL, 1); |
1192 | if (ret < 0) { |
1193 | mlog_errno(ret); |
1194 | inode_unlock(inode: (*ac)->ac_inode); |
1195 | iput((*ac)->ac_inode); |
1196 | (*ac)->ac_inode = NULL; |
1197 | goto bail; |
1198 | } |
1199 | } |
1200 | if (status < 0) { |
1201 | if (status != -ENOSPC) |
1202 | mlog_errno(status); |
1203 | goto bail; |
1204 | } |
1205 | } |
1206 | |
1207 | status = 0; |
1208 | bail: |
1209 | if ((status < 0) && *ac) { |
1210 | ocfs2_free_alloc_context(ac: *ac); |
1211 | *ac = NULL; |
1212 | } |
1213 | |
1214 | if (status) |
1215 | mlog_errno(status); |
1216 | return status; |
1217 | } |
1218 | |
1219 | int ocfs2_reserve_clusters(struct ocfs2_super *osb, |
1220 | u32 bits_wanted, |
1221 | struct ocfs2_alloc_context **ac) |
1222 | { |
1223 | return ocfs2_reserve_clusters_with_limit(osb, bits_wanted, max_block: 0, |
1224 | ALLOC_NEW_GROUP, ac); |
1225 | } |
1226 | |
1227 | /* |
1228 | * More or less lifted from ext3. I'll leave their description below: |
1229 | * |
1230 | * "For ext3 allocations, we must not reuse any blocks which are |
1231 | * allocated in the bitmap buffer's "last committed data" copy. This |
1232 | * prevents deletes from freeing up the page for reuse until we have |
1233 | * committed the delete transaction. |
1234 | * |
1235 | * If we didn't do this, then deleting something and reallocating it as |
1236 | * data would allow the old block to be overwritten before the |
1237 | * transaction committed (because we force data to disk before commit). |
1238 | * This would lead to corruption if we crashed between overwriting the |
1239 | * data and committing the delete. |
1240 | * |
1241 | * @@@ We may want to make this allocation behaviour conditional on |
1242 | * data-writes at some point, and disable it for metadata allocations or |
1243 | * sync-data inodes." |
1244 | * |
1245 | * Note: OCFS2 already does this differently for metadata vs data |
1246 | * allocations, as those bitmaps are separate and undo access is never |
1247 | * called on a metadata group descriptor. |
1248 | */ |
1249 | static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, |
1250 | int nr) |
1251 | { |
1252 | struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; |
1253 | struct journal_head *jh; |
1254 | int ret; |
1255 | |
1256 | if (ocfs2_test_bit(nr, addr: (unsigned long *)bg->bg_bitmap)) |
1257 | return 0; |
1258 | |
1259 | jh = jbd2_journal_grab_journal_head(bh: bg_bh); |
1260 | if (!jh) |
1261 | return 1; |
1262 | |
1263 | spin_lock(lock: &jh->b_state_lock); |
1264 | bg = (struct ocfs2_group_desc *) jh->b_committed_data; |
1265 | if (bg) |
1266 | ret = !ocfs2_test_bit(nr, addr: (unsigned long *)bg->bg_bitmap); |
1267 | else |
1268 | ret = 1; |
1269 | spin_unlock(lock: &jh->b_state_lock); |
1270 | jbd2_journal_put_journal_head(jh); |
1271 | |
1272 | return ret; |
1273 | } |
1274 | |
1275 | static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, |
1276 | struct buffer_head *bg_bh, |
1277 | unsigned int bits_wanted, |
1278 | unsigned int total_bits, |
1279 | struct ocfs2_suballoc_result *res) |
1280 | { |
1281 | void *bitmap; |
1282 | u16 best_offset, best_size; |
1283 | int offset, start, found, status = 0; |
1284 | struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; |
1285 | |
1286 | /* Callers got this descriptor from |
1287 | * ocfs2_read_group_descriptor(). Any corruption is a code bug. */ |
1288 | BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg)); |
1289 | |
1290 | found = start = best_offset = best_size = 0; |
1291 | bitmap = bg->bg_bitmap; |
1292 | |
1293 | while((offset = ocfs2_find_next_zero_bit(addr: bitmap, size: total_bits, offset: start)) != -1) { |
1294 | if (offset == total_bits) |
1295 | break; |
1296 | |
1297 | if (!ocfs2_test_bg_bit_allocatable(bg_bh, nr: offset)) { |
1298 | /* We found a zero, but we can't use it as it |
1299 | * hasn't been put to disk yet! */ |
1300 | found = 0; |
1301 | start = offset + 1; |
1302 | } else if (offset == start) { |
1303 | /* we found a zero */ |
1304 | found++; |
1305 | /* move start to the next bit to test */ |
1306 | start++; |
1307 | } else { |
1308 | /* got a zero after some ones */ |
1309 | found = 1; |
1310 | start = offset + 1; |
1311 | } |
1312 | if (found > best_size) { |
1313 | best_size = found; |
1314 | best_offset = start - found; |
1315 | } |
1316 | /* we got everything we needed */ |
1317 | if (found == bits_wanted) { |
1318 | /* mlog(0, "Found it all!\n"); */ |
1319 | break; |
1320 | } |
1321 | } |
1322 | |
1323 | if (best_size) { |
1324 | res->sr_bit_offset = best_offset; |
1325 | res->sr_bits = best_size; |
1326 | } else { |
1327 | status = -ENOSPC; |
1328 | /* No error log here -- see the comment above |
1329 | * ocfs2_test_bg_bit_allocatable */ |
1330 | } |
1331 | |
1332 | return status; |
1333 | } |
1334 | |
1335 | int ocfs2_block_group_set_bits(handle_t *handle, |
1336 | struct inode *alloc_inode, |
1337 | struct ocfs2_group_desc *bg, |
1338 | struct buffer_head *group_bh, |
1339 | unsigned int bit_off, |
1340 | unsigned int num_bits) |
1341 | { |
1342 | int status; |
1343 | void *bitmap = bg->bg_bitmap; |
1344 | int journal_type = OCFS2_JOURNAL_ACCESS_WRITE; |
1345 | |
1346 | /* All callers get the descriptor via |
1347 | * ocfs2_read_group_descriptor(). Any corruption is a code bug. */ |
1348 | BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg)); |
1349 | BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits); |
1350 | |
1351 | trace_ocfs2_block_group_set_bits(val1: bit_off, val2: num_bits); |
1352 | |
1353 | if (ocfs2_is_cluster_bitmap(inode: alloc_inode)) |
1354 | journal_type = OCFS2_JOURNAL_ACCESS_UNDO; |
1355 | |
1356 | status = ocfs2_journal_access_gd(handle, |
1357 | ci: INODE_CACHE(inode: alloc_inode), |
1358 | bh: group_bh, |
1359 | type: journal_type); |
1360 | if (status < 0) { |
1361 | mlog_errno(status); |
1362 | goto bail; |
1363 | } |
1364 | |
1365 | le16_add_cpu(var: &bg->bg_free_bits_count, val: -num_bits); |
1366 | if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) { |
1367 | return ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit count %u but claims %u are freed. num_bits %d\n" , |
1368 | (unsigned long long)le64_to_cpu(bg->bg_blkno), |
1369 | le16_to_cpu(bg->bg_bits), |
1370 | le16_to_cpu(bg->bg_free_bits_count), |
1371 | num_bits); |
1372 | } |
1373 | while(num_bits--) |
1374 | ocfs2_set_bit(bit_off++, bitmap); |
1375 | |
1376 | ocfs2_journal_dirty(handle, bh: group_bh); |
1377 | |
1378 | bail: |
1379 | return status; |
1380 | } |
1381 | |
1382 | /* find the one with the most empty bits */ |
1383 | static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl) |
1384 | { |
1385 | u16 curr, best; |
1386 | |
1387 | BUG_ON(!cl->cl_next_free_rec); |
1388 | |
1389 | best = curr = 0; |
1390 | while (curr < le16_to_cpu(cl->cl_next_free_rec)) { |
1391 | if (le32_to_cpu(cl->cl_recs[curr].c_free) > |
1392 | le32_to_cpu(cl->cl_recs[best].c_free)) |
1393 | best = curr; |
1394 | curr++; |
1395 | } |
1396 | |
1397 | BUG_ON(best >= le16_to_cpu(cl->cl_next_free_rec)); |
1398 | return best; |
1399 | } |
1400 | |
1401 | static int ocfs2_relink_block_group(handle_t *handle, |
1402 | struct inode *alloc_inode, |
1403 | struct buffer_head *fe_bh, |
1404 | struct buffer_head *bg_bh, |
1405 | struct buffer_head *prev_bg_bh, |
1406 | u16 chain) |
1407 | { |
1408 | int status; |
1409 | /* there is a really tiny chance the journal calls could fail, |
1410 | * but we wouldn't want inconsistent blocks in *any* case. */ |
1411 | u64 bg_ptr, prev_bg_ptr; |
1412 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data; |
1413 | struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; |
1414 | struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data; |
1415 | |
1416 | /* The caller got these descriptors from |
1417 | * ocfs2_read_group_descriptor(). Any corruption is a code bug. */ |
1418 | BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg)); |
1419 | BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(prev_bg)); |
1420 | |
1421 | trace_ocfs2_relink_block_group( |
1422 | i_blkno: (unsigned long long)le64_to_cpu(fe->i_blkno), chain, |
1423 | bg_blkno: (unsigned long long)le64_to_cpu(bg->bg_blkno), |
1424 | prev_blkno: (unsigned long long)le64_to_cpu(prev_bg->bg_blkno)); |
1425 | |
1426 | bg_ptr = le64_to_cpu(bg->bg_next_group); |
1427 | prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group); |
1428 | |
1429 | status = ocfs2_journal_access_gd(handle, ci: INODE_CACHE(inode: alloc_inode), |
1430 | bh: prev_bg_bh, |
1431 | OCFS2_JOURNAL_ACCESS_WRITE); |
1432 | if (status < 0) |
1433 | goto out; |
1434 | |
1435 | prev_bg->bg_next_group = bg->bg_next_group; |
1436 | ocfs2_journal_dirty(handle, bh: prev_bg_bh); |
1437 | |
1438 | status = ocfs2_journal_access_gd(handle, ci: INODE_CACHE(inode: alloc_inode), |
1439 | bh: bg_bh, OCFS2_JOURNAL_ACCESS_WRITE); |
1440 | if (status < 0) |
1441 | goto out_rollback_prev_bg; |
1442 | |
1443 | bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno; |
1444 | ocfs2_journal_dirty(handle, bh: bg_bh); |
1445 | |
1446 | status = ocfs2_journal_access_di(handle, ci: INODE_CACHE(inode: alloc_inode), |
1447 | bh: fe_bh, OCFS2_JOURNAL_ACCESS_WRITE); |
1448 | if (status < 0) |
1449 | goto out_rollback_bg; |
1450 | |
1451 | fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno; |
1452 | ocfs2_journal_dirty(handle, bh: fe_bh); |
1453 | |
1454 | out: |
1455 | if (status < 0) |
1456 | mlog_errno(status); |
1457 | return status; |
1458 | |
1459 | out_rollback_bg: |
1460 | bg->bg_next_group = cpu_to_le64(bg_ptr); |
1461 | out_rollback_prev_bg: |
1462 | prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr); |
1463 | goto out; |
1464 | } |
1465 | |
1466 | static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg, |
1467 | u32 wanted) |
1468 | { |
1469 | return le16_to_cpu(bg->bg_free_bits_count) > wanted; |
1470 | } |
1471 | |
1472 | /* return 0 on success, -ENOSPC to keep searching and any other < 0 |
1473 | * value on error. */ |
1474 | static int ocfs2_cluster_group_search(struct inode *inode, |
1475 | struct buffer_head *group_bh, |
1476 | u32 bits_wanted, u32 min_bits, |
1477 | u64 max_block, |
1478 | struct ocfs2_suballoc_result *res) |
1479 | { |
1480 | int search = -ENOSPC; |
1481 | int ret; |
1482 | u64 blkoff; |
1483 | struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data; |
1484 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
1485 | unsigned int max_bits, gd_cluster_off; |
1486 | |
1487 | BUG_ON(!ocfs2_is_cluster_bitmap(inode)); |
1488 | |
1489 | if (gd->bg_free_bits_count) { |
1490 | max_bits = le16_to_cpu(gd->bg_bits); |
1491 | |
1492 | /* Tail groups in cluster bitmaps which aren't cpg |
1493 | * aligned are prone to partial extension by a failed |
1494 | * fs resize. If the file system resize never got to |
1495 | * update the dinode cluster count, then we don't want |
1496 | * to trust any clusters past it, regardless of what |
1497 | * the group descriptor says. */ |
1498 | gd_cluster_off = ocfs2_blocks_to_clusters(sb: inode->i_sb, |
1499 | le64_to_cpu(gd->bg_blkno)); |
1500 | if ((gd_cluster_off + max_bits) > |
1501 | OCFS2_I(inode)->ip_clusters) { |
1502 | max_bits = OCFS2_I(inode)->ip_clusters - gd_cluster_off; |
1503 | trace_ocfs2_cluster_group_search_wrong_max_bits( |
1504 | ull: (unsigned long long)le64_to_cpu(gd->bg_blkno), |
1505 | le16_to_cpu(gd->bg_bits), |
1506 | value2: OCFS2_I(inode)->ip_clusters, value3: max_bits); |
1507 | } |
1508 | |
1509 | ret = ocfs2_block_group_find_clear_bits(osb, |
1510 | bg_bh: group_bh, bits_wanted, |
1511 | total_bits: max_bits, res); |
1512 | if (ret) |
1513 | return ret; |
1514 | |
1515 | if (max_block) { |
1516 | blkoff = ocfs2_clusters_to_blocks(sb: inode->i_sb, |
1517 | clusters: gd_cluster_off + |
1518 | res->sr_bit_offset + |
1519 | res->sr_bits); |
1520 | trace_ocfs2_cluster_group_search_max_block( |
1521 | val1: (unsigned long long)blkoff, |
1522 | val2: (unsigned long long)max_block); |
1523 | if (blkoff > max_block) |
1524 | return -ENOSPC; |
1525 | } |
1526 | |
1527 | /* ocfs2_block_group_find_clear_bits() might |
1528 | * return success, but we still want to return |
1529 | * -ENOSPC unless it found the minimum number |
1530 | * of bits. */ |
1531 | if (min_bits <= res->sr_bits) |
1532 | search = 0; /* success */ |
1533 | else if (res->sr_bits) { |
1534 | /* |
1535 | * Don't show bits which we'll be returning |
1536 | * for allocation to the local alloc bitmap. |
1537 | */ |
1538 | ocfs2_local_alloc_seen_free_bits(osb, num_clusters: res->sr_bits); |
1539 | } |
1540 | } |
1541 | |
1542 | return search; |
1543 | } |
1544 | |
1545 | static int ocfs2_block_group_search(struct inode *inode, |
1546 | struct buffer_head *group_bh, |
1547 | u32 bits_wanted, u32 min_bits, |
1548 | u64 max_block, |
1549 | struct ocfs2_suballoc_result *res) |
1550 | { |
1551 | int ret = -ENOSPC; |
1552 | u64 blkoff; |
1553 | struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data; |
1554 | |
1555 | BUG_ON(min_bits != 1); |
1556 | BUG_ON(ocfs2_is_cluster_bitmap(inode)); |
1557 | |
1558 | if (bg->bg_free_bits_count) { |
1559 | ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), |
1560 | bg_bh: group_bh, bits_wanted, |
1561 | le16_to_cpu(bg->bg_bits), |
1562 | res); |
1563 | if (!ret && max_block) { |
1564 | blkoff = le64_to_cpu(bg->bg_blkno) + |
1565 | res->sr_bit_offset + res->sr_bits; |
1566 | trace_ocfs2_block_group_search_max_block( |
1567 | val1: (unsigned long long)blkoff, |
1568 | val2: (unsigned long long)max_block); |
1569 | if (blkoff > max_block) |
1570 | ret = -ENOSPC; |
1571 | } |
1572 | } |
1573 | |
1574 | return ret; |
1575 | } |
1576 | |
1577 | int ocfs2_alloc_dinode_update_counts(struct inode *inode, |
1578 | handle_t *handle, |
1579 | struct buffer_head *di_bh, |
1580 | u32 num_bits, |
1581 | u16 chain) |
1582 | { |
1583 | int ret; |
1584 | u32 tmp_used; |
1585 | struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; |
1586 | struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain; |
1587 | |
1588 | ret = ocfs2_journal_access_di(handle, ci: INODE_CACHE(inode), bh: di_bh, |
1589 | OCFS2_JOURNAL_ACCESS_WRITE); |
1590 | if (ret < 0) { |
1591 | mlog_errno(ret); |
1592 | goto out; |
1593 | } |
1594 | |
1595 | tmp_used = le32_to_cpu(di->id1.bitmap1.i_used); |
1596 | di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used); |
1597 | le32_add_cpu(var: &cl->cl_recs[chain].c_free, val: -num_bits); |
1598 | ocfs2_journal_dirty(handle, bh: di_bh); |
1599 | |
1600 | out: |
1601 | return ret; |
1602 | } |
1603 | |
1604 | void ocfs2_rollback_alloc_dinode_counts(struct inode *inode, |
1605 | struct buffer_head *di_bh, |
1606 | u32 num_bits, |
1607 | u16 chain) |
1608 | { |
1609 | u32 tmp_used; |
1610 | struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; |
1611 | struct ocfs2_chain_list *cl; |
1612 | |
1613 | cl = (struct ocfs2_chain_list *)&di->id2.i_chain; |
1614 | tmp_used = le32_to_cpu(di->id1.bitmap1.i_used); |
1615 | di->id1.bitmap1.i_used = cpu_to_le32(tmp_used - num_bits); |
1616 | le32_add_cpu(var: &cl->cl_recs[chain].c_free, val: num_bits); |
1617 | } |
1618 | |
1619 | static int ocfs2_bg_discontig_fix_by_rec(struct ocfs2_suballoc_result *res, |
1620 | struct ocfs2_extent_rec *rec, |
1621 | struct ocfs2_chain_list *cl) |
1622 | { |
1623 | unsigned int bpc = le16_to_cpu(cl->cl_bpc); |
1624 | unsigned int bitoff = le32_to_cpu(rec->e_cpos) * bpc; |
1625 | unsigned int bitcount = le16_to_cpu(rec->e_leaf_clusters) * bpc; |
1626 | |
1627 | if (res->sr_bit_offset < bitoff) |
1628 | return 0; |
1629 | if (res->sr_bit_offset >= (bitoff + bitcount)) |
1630 | return 0; |
1631 | res->sr_blkno = le64_to_cpu(rec->e_blkno) + |
1632 | (res->sr_bit_offset - bitoff); |
1633 | if ((res->sr_bit_offset + res->sr_bits) > (bitoff + bitcount)) |
1634 | res->sr_bits = (bitoff + bitcount) - res->sr_bit_offset; |
1635 | return 1; |
1636 | } |
1637 | |
1638 | static void ocfs2_bg_discontig_fix_result(struct ocfs2_alloc_context *ac, |
1639 | struct ocfs2_group_desc *bg, |
1640 | struct ocfs2_suballoc_result *res) |
1641 | { |
1642 | int i; |
1643 | u64 bg_blkno = res->sr_bg_blkno; /* Save off */ |
1644 | struct ocfs2_extent_rec *rec; |
1645 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)ac->ac_bh->b_data; |
1646 | struct ocfs2_chain_list *cl = &di->id2.i_chain; |
1647 | |
1648 | if (ocfs2_is_cluster_bitmap(inode: ac->ac_inode)) { |
1649 | res->sr_blkno = 0; |
1650 | return; |
1651 | } |
1652 | |
1653 | res->sr_blkno = res->sr_bg_blkno + res->sr_bit_offset; |
1654 | res->sr_bg_blkno = 0; /* Clear it for contig block groups */ |
1655 | if (!ocfs2_supports_discontig_bg(OCFS2_SB(ac->ac_inode->i_sb)) || |
1656 | !bg->bg_list.l_next_free_rec) |
1657 | return; |
1658 | |
1659 | for (i = 0; i < le16_to_cpu(bg->bg_list.l_next_free_rec); i++) { |
1660 | rec = &bg->bg_list.l_recs[i]; |
1661 | if (ocfs2_bg_discontig_fix_by_rec(res, rec, cl)) { |
1662 | res->sr_bg_blkno = bg_blkno; /* Restore */ |
1663 | break; |
1664 | } |
1665 | } |
1666 | } |
1667 | |
1668 | static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, |
1669 | handle_t *handle, |
1670 | u32 bits_wanted, |
1671 | u32 min_bits, |
1672 | struct ocfs2_suballoc_result *res, |
1673 | u16 *bits_left) |
1674 | { |
1675 | int ret; |
1676 | struct buffer_head *group_bh = NULL; |
1677 | struct ocfs2_group_desc *gd; |
1678 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)ac->ac_bh->b_data; |
1679 | struct inode *alloc_inode = ac->ac_inode; |
1680 | |
1681 | ret = ocfs2_read_group_descriptor(inode: alloc_inode, di, |
1682 | gd_blkno: res->sr_bg_blkno, bh: &group_bh); |
1683 | if (ret < 0) { |
1684 | mlog_errno(ret); |
1685 | return ret; |
1686 | } |
1687 | |
1688 | gd = (struct ocfs2_group_desc *) group_bh->b_data; |
1689 | ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits, |
1690 | ac->ac_max_block, res); |
1691 | if (ret < 0) { |
1692 | if (ret != -ENOSPC) |
1693 | mlog_errno(ret); |
1694 | goto out; |
1695 | } |
1696 | |
1697 | if (!ret) |
1698 | ocfs2_bg_discontig_fix_result(ac, bg: gd, res); |
1699 | |
1700 | /* |
1701 | * sr_bg_blkno might have been changed by |
1702 | * ocfs2_bg_discontig_fix_result |
1703 | */ |
1704 | res->sr_bg_stable_blkno = group_bh->b_blocknr; |
1705 | |
1706 | if (ac->ac_find_loc_only) |
1707 | goto out_loc_only; |
1708 | |
1709 | ret = ocfs2_alloc_dinode_update_counts(inode: alloc_inode, handle, di_bh: ac->ac_bh, |
1710 | num_bits: res->sr_bits, |
1711 | le16_to_cpu(gd->bg_chain)); |
1712 | if (ret < 0) { |
1713 | mlog_errno(ret); |
1714 | goto out; |
1715 | } |
1716 | |
1717 | ret = ocfs2_block_group_set_bits(handle, alloc_inode, bg: gd, group_bh, |
1718 | bit_off: res->sr_bit_offset, num_bits: res->sr_bits); |
1719 | if (ret < 0) { |
1720 | ocfs2_rollback_alloc_dinode_counts(inode: alloc_inode, di_bh: ac->ac_bh, |
1721 | num_bits: res->sr_bits, |
1722 | le16_to_cpu(gd->bg_chain)); |
1723 | mlog_errno(ret); |
1724 | } |
1725 | |
1726 | out_loc_only: |
1727 | *bits_left = le16_to_cpu(gd->bg_free_bits_count); |
1728 | |
1729 | out: |
1730 | brelse(bh: group_bh); |
1731 | |
1732 | return ret; |
1733 | } |
1734 | |
1735 | static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, |
1736 | handle_t *handle, |
1737 | u32 bits_wanted, |
1738 | u32 min_bits, |
1739 | struct ocfs2_suballoc_result *res, |
1740 | u16 *bits_left) |
1741 | { |
1742 | int status; |
1743 | u16 chain; |
1744 | u64 next_group; |
1745 | struct inode *alloc_inode = ac->ac_inode; |
1746 | struct buffer_head *group_bh = NULL; |
1747 | struct buffer_head *prev_group_bh = NULL; |
1748 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) ac->ac_bh->b_data; |
1749 | struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &fe->id2.i_chain; |
1750 | struct ocfs2_group_desc *bg; |
1751 | |
1752 | chain = ac->ac_chain; |
1753 | trace_ocfs2_search_chain_begin( |
1754 | val1: (unsigned long long)OCFS2_I(inode: alloc_inode)->ip_blkno, |
1755 | val2: bits_wanted, val3: chain); |
1756 | |
1757 | status = ocfs2_read_group_descriptor(inode: alloc_inode, di: fe, |
1758 | le64_to_cpu(cl->cl_recs[chain].c_blkno), |
1759 | bh: &group_bh); |
1760 | if (status < 0) { |
1761 | mlog_errno(status); |
1762 | goto bail; |
1763 | } |
1764 | bg = (struct ocfs2_group_desc *) group_bh->b_data; |
1765 | |
1766 | status = -ENOSPC; |
1767 | /* for now, the chain search is a bit simplistic. We just use |
1768 | * the 1st group with any empty bits. */ |
1769 | while ((status = ac->ac_group_search(alloc_inode, group_bh, |
1770 | bits_wanted, min_bits, |
1771 | ac->ac_max_block, |
1772 | res)) == -ENOSPC) { |
1773 | if (!bg->bg_next_group) |
1774 | break; |
1775 | |
1776 | brelse(bh: prev_group_bh); |
1777 | prev_group_bh = NULL; |
1778 | |
1779 | next_group = le64_to_cpu(bg->bg_next_group); |
1780 | prev_group_bh = group_bh; |
1781 | group_bh = NULL; |
1782 | status = ocfs2_read_group_descriptor(inode: alloc_inode, di: fe, |
1783 | gd_blkno: next_group, bh: &group_bh); |
1784 | if (status < 0) { |
1785 | mlog_errno(status); |
1786 | goto bail; |
1787 | } |
1788 | bg = (struct ocfs2_group_desc *) group_bh->b_data; |
1789 | } |
1790 | if (status < 0) { |
1791 | if (status != -ENOSPC) |
1792 | mlog_errno(status); |
1793 | goto bail; |
1794 | } |
1795 | |
1796 | trace_ocfs2_search_chain_succ( |
1797 | val1: (unsigned long long)le64_to_cpu(bg->bg_blkno), val2: res->sr_bits); |
1798 | |
1799 | res->sr_bg_blkno = le64_to_cpu(bg->bg_blkno); |
1800 | |
1801 | BUG_ON(res->sr_bits == 0); |
1802 | if (!status) |
1803 | ocfs2_bg_discontig_fix_result(ac, bg, res); |
1804 | |
1805 | /* |
1806 | * sr_bg_blkno might have been changed by |
1807 | * ocfs2_bg_discontig_fix_result |
1808 | */ |
1809 | res->sr_bg_stable_blkno = group_bh->b_blocknr; |
1810 | |
1811 | /* |
1812 | * Keep track of previous block descriptor read. When |
1813 | * we find a target, if we have read more than X |
1814 | * number of descriptors, and the target is reasonably |
1815 | * empty, relink him to top of his chain. |
1816 | * |
1817 | * We've read 0 extra blocks and only send one more to |
1818 | * the transaction, yet the next guy to search has a |
1819 | * much easier time. |
1820 | * |
1821 | * Do this *after* figuring out how many bits we're taking out |
1822 | * of our target group. |
1823 | */ |
1824 | if (!ac->ac_disable_chain_relink && |
1825 | (prev_group_bh) && |
1826 | (ocfs2_block_group_reasonably_empty(bg, wanted: res->sr_bits))) { |
1827 | status = ocfs2_relink_block_group(handle, alloc_inode, |
1828 | fe_bh: ac->ac_bh, bg_bh: group_bh, |
1829 | prev_bg_bh: prev_group_bh, chain); |
1830 | if (status < 0) { |
1831 | mlog_errno(status); |
1832 | goto bail; |
1833 | } |
1834 | } |
1835 | |
1836 | if (ac->ac_find_loc_only) |
1837 | goto out_loc_only; |
1838 | |
1839 | status = ocfs2_alloc_dinode_update_counts(inode: alloc_inode, handle, |
1840 | di_bh: ac->ac_bh, num_bits: res->sr_bits, |
1841 | chain); |
1842 | if (status) { |
1843 | mlog_errno(status); |
1844 | goto bail; |
1845 | } |
1846 | |
1847 | status = ocfs2_block_group_set_bits(handle, |
1848 | alloc_inode, |
1849 | bg, |
1850 | group_bh, |
1851 | bit_off: res->sr_bit_offset, |
1852 | num_bits: res->sr_bits); |
1853 | if (status < 0) { |
1854 | ocfs2_rollback_alloc_dinode_counts(inode: alloc_inode, |
1855 | di_bh: ac->ac_bh, num_bits: res->sr_bits, chain); |
1856 | mlog_errno(status); |
1857 | goto bail; |
1858 | } |
1859 | |
1860 | trace_ocfs2_search_chain_end( |
1861 | val1: (unsigned long long)le64_to_cpu(fe->i_blkno), |
1862 | val2: res->sr_bits); |
1863 | |
1864 | out_loc_only: |
1865 | *bits_left = le16_to_cpu(bg->bg_free_bits_count); |
1866 | bail: |
1867 | brelse(bh: group_bh); |
1868 | brelse(bh: prev_group_bh); |
1869 | |
1870 | if (status) |
1871 | mlog_errno(status); |
1872 | return status; |
1873 | } |
1874 | |
1875 | /* will give out up to bits_wanted contiguous bits. */ |
1876 | static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, |
1877 | handle_t *handle, |
1878 | u32 bits_wanted, |
1879 | u32 min_bits, |
1880 | struct ocfs2_suballoc_result *res) |
1881 | { |
1882 | int status; |
1883 | u16 victim, i; |
1884 | u16 bits_left = 0; |
1885 | u64 hint = ac->ac_last_group; |
1886 | struct ocfs2_chain_list *cl; |
1887 | struct ocfs2_dinode *fe; |
1888 | |
1889 | BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted); |
1890 | BUG_ON(bits_wanted > (ac->ac_bits_wanted - ac->ac_bits_given)); |
1891 | BUG_ON(!ac->ac_bh); |
1892 | |
1893 | fe = (struct ocfs2_dinode *) ac->ac_bh->b_data; |
1894 | |
1895 | /* The bh was validated by the inode read during |
1896 | * ocfs2_reserve_suballoc_bits(). Any corruption is a code bug. */ |
1897 | BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); |
1898 | |
1899 | if (le32_to_cpu(fe->id1.bitmap1.i_used) >= |
1900 | le32_to_cpu(fe->id1.bitmap1.i_total)) { |
1901 | status = ocfs2_error(ac->ac_inode->i_sb, |
1902 | "Chain allocator dinode %llu has %u used bits but only %u total\n" , |
1903 | (unsigned long long)le64_to_cpu(fe->i_blkno), |
1904 | le32_to_cpu(fe->id1.bitmap1.i_used), |
1905 | le32_to_cpu(fe->id1.bitmap1.i_total)); |
1906 | goto bail; |
1907 | } |
1908 | |
1909 | res->sr_bg_blkno = hint; |
1910 | if (res->sr_bg_blkno) { |
1911 | /* Attempt to short-circuit the usual search mechanism |
1912 | * by jumping straight to the most recently used |
1913 | * allocation group. This helps us maintain some |
1914 | * contiguousness across allocations. */ |
1915 | status = ocfs2_search_one_group(ac, handle, bits_wanted, |
1916 | min_bits, res, bits_left: &bits_left); |
1917 | if (!status) |
1918 | goto set_hint; |
1919 | if (status < 0 && status != -ENOSPC) { |
1920 | mlog_errno(status); |
1921 | goto bail; |
1922 | } |
1923 | } |
1924 | |
1925 | cl = (struct ocfs2_chain_list *) &fe->id2.i_chain; |
1926 | |
1927 | victim = ocfs2_find_victim_chain(cl); |
1928 | ac->ac_chain = victim; |
1929 | |
1930 | status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, |
1931 | res, bits_left: &bits_left); |
1932 | if (!status) { |
1933 | if (ocfs2_is_cluster_bitmap(inode: ac->ac_inode)) |
1934 | hint = res->sr_bg_blkno; |
1935 | else |
1936 | hint = ocfs2_group_from_res(res); |
1937 | goto set_hint; |
1938 | } |
1939 | if (status < 0 && status != -ENOSPC) { |
1940 | mlog_errno(status); |
1941 | goto bail; |
1942 | } |
1943 | |
1944 | trace_ocfs2_claim_suballoc_bits(num: victim); |
1945 | |
1946 | /* If we didn't pick a good victim, then just default to |
1947 | * searching each chain in order. Don't allow chain relinking |
1948 | * because we only calculate enough journal credits for one |
1949 | * relink per alloc. */ |
1950 | ac->ac_disable_chain_relink = 1; |
1951 | for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i ++) { |
1952 | if (i == victim) |
1953 | continue; |
1954 | if (!cl->cl_recs[i].c_free) |
1955 | continue; |
1956 | |
1957 | ac->ac_chain = i; |
1958 | status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, |
1959 | res, bits_left: &bits_left); |
1960 | if (!status) { |
1961 | hint = ocfs2_group_from_res(res); |
1962 | break; |
1963 | } |
1964 | if (status < 0 && status != -ENOSPC) { |
1965 | mlog_errno(status); |
1966 | goto bail; |
1967 | } |
1968 | } |
1969 | |
1970 | set_hint: |
1971 | if (status != -ENOSPC) { |
1972 | /* If the next search of this group is not likely to |
1973 | * yield a suitable extent, then we reset the last |
1974 | * group hint so as to not waste a disk read */ |
1975 | if (bits_left < min_bits) |
1976 | ac->ac_last_group = 0; |
1977 | else |
1978 | ac->ac_last_group = hint; |
1979 | } |
1980 | |
1981 | bail: |
1982 | if (status) |
1983 | mlog_errno(status); |
1984 | return status; |
1985 | } |
1986 | |
1987 | int ocfs2_claim_metadata(handle_t *handle, |
1988 | struct ocfs2_alloc_context *ac, |
1989 | u32 bits_wanted, |
1990 | u64 *suballoc_loc, |
1991 | u16 *suballoc_bit_start, |
1992 | unsigned int *num_bits, |
1993 | u64 *blkno_start) |
1994 | { |
1995 | int status; |
1996 | struct ocfs2_suballoc_result res = { .sr_blkno = 0, }; |
1997 | |
1998 | BUG_ON(!ac); |
1999 | BUG_ON(ac->ac_bits_wanted < (ac->ac_bits_given + bits_wanted)); |
2000 | BUG_ON(ac->ac_which != OCFS2_AC_USE_META); |
2001 | |
2002 | status = ocfs2_claim_suballoc_bits(ac, |
2003 | handle, |
2004 | bits_wanted, |
2005 | min_bits: 1, |
2006 | res: &res); |
2007 | if (status < 0) { |
2008 | mlog_errno(status); |
2009 | goto bail; |
2010 | } |
2011 | atomic_inc(v: &OCFS2_SB(ac->ac_inode->i_sb)->alloc_stats.bg_allocs); |
2012 | |
2013 | *suballoc_loc = res.sr_bg_blkno; |
2014 | *suballoc_bit_start = res.sr_bit_offset; |
2015 | *blkno_start = res.sr_blkno; |
2016 | ac->ac_bits_given += res.sr_bits; |
2017 | *num_bits = res.sr_bits; |
2018 | status = 0; |
2019 | bail: |
2020 | if (status) |
2021 | mlog_errno(status); |
2022 | return status; |
2023 | } |
2024 | |
2025 | static void ocfs2_init_inode_ac_group(struct inode *dir, |
2026 | struct buffer_head *parent_di_bh, |
2027 | struct ocfs2_alloc_context *ac) |
2028 | { |
2029 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)parent_di_bh->b_data; |
2030 | /* |
2031 | * Try to allocate inodes from some specific group. |
2032 | * |
2033 | * If the parent dir has recorded the last group used in allocation, |
2034 | * cool, use it. Otherwise if we try to allocate new inode from the |
2035 | * same slot the parent dir belongs to, use the same chunk. |
2036 | * |
2037 | * We are very careful here to avoid the mistake of setting |
2038 | * ac_last_group to a group descriptor from a different (unlocked) slot. |
2039 | */ |
2040 | if (OCFS2_I(inode: dir)->ip_last_used_group && |
2041 | OCFS2_I(inode: dir)->ip_last_used_slot == ac->ac_alloc_slot) |
2042 | ac->ac_last_group = OCFS2_I(inode: dir)->ip_last_used_group; |
2043 | else if (le16_to_cpu(di->i_suballoc_slot) == ac->ac_alloc_slot) { |
2044 | if (di->i_suballoc_loc) |
2045 | ac->ac_last_group = le64_to_cpu(di->i_suballoc_loc); |
2046 | else |
2047 | ac->ac_last_group = ocfs2_which_suballoc_group( |
2048 | le64_to_cpu(di->i_blkno), |
2049 | le16_to_cpu(di->i_suballoc_bit)); |
2050 | } |
2051 | } |
2052 | |
2053 | static inline void ocfs2_save_inode_ac_group(struct inode *dir, |
2054 | struct ocfs2_alloc_context *ac) |
2055 | { |
2056 | OCFS2_I(inode: dir)->ip_last_used_group = ac->ac_last_group; |
2057 | OCFS2_I(inode: dir)->ip_last_used_slot = ac->ac_alloc_slot; |
2058 | } |
2059 | |
2060 | int ocfs2_find_new_inode_loc(struct inode *dir, |
2061 | struct buffer_head *parent_fe_bh, |
2062 | struct ocfs2_alloc_context *ac, |
2063 | u64 *fe_blkno) |
2064 | { |
2065 | int ret; |
2066 | handle_t *handle = NULL; |
2067 | struct ocfs2_suballoc_result *res; |
2068 | |
2069 | BUG_ON(!ac); |
2070 | BUG_ON(ac->ac_bits_given != 0); |
2071 | BUG_ON(ac->ac_bits_wanted != 1); |
2072 | BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE); |
2073 | |
2074 | res = kzalloc(size: sizeof(*res), GFP_NOFS); |
2075 | if (res == NULL) { |
2076 | ret = -ENOMEM; |
2077 | mlog_errno(ret); |
2078 | goto out; |
2079 | } |
2080 | |
2081 | ocfs2_init_inode_ac_group(dir, parent_di_bh: parent_fe_bh, ac); |
2082 | |
2083 | /* |
2084 | * The handle started here is for chain relink. Alternatively, |
2085 | * we could just disable relink for these calls. |
2086 | */ |
2087 | handle = ocfs2_start_trans(OCFS2_SB(dir->i_sb), OCFS2_SUBALLOC_ALLOC); |
2088 | if (IS_ERR(ptr: handle)) { |
2089 | ret = PTR_ERR(ptr: handle); |
2090 | handle = NULL; |
2091 | mlog_errno(ret); |
2092 | goto out; |
2093 | } |
2094 | |
2095 | /* |
2096 | * This will instruct ocfs2_claim_suballoc_bits and |
2097 | * ocfs2_search_one_group to search but save actual allocation |
2098 | * for later. |
2099 | */ |
2100 | ac->ac_find_loc_only = 1; |
2101 | |
2102 | ret = ocfs2_claim_suballoc_bits(ac, handle, bits_wanted: 1, min_bits: 1, res); |
2103 | if (ret < 0) { |
2104 | mlog_errno(ret); |
2105 | goto out; |
2106 | } |
2107 | |
2108 | ac->ac_find_loc_priv = res; |
2109 | *fe_blkno = res->sr_blkno; |
2110 | ocfs2_update_inode_fsync_trans(handle, inode: dir, datasync: 0); |
2111 | out: |
2112 | if (handle) |
2113 | ocfs2_commit_trans(OCFS2_SB(dir->i_sb), handle); |
2114 | |
2115 | if (ret) |
2116 | kfree(objp: res); |
2117 | |
2118 | return ret; |
2119 | } |
2120 | |
2121 | int ocfs2_claim_new_inode_at_loc(handle_t *handle, |
2122 | struct inode *dir, |
2123 | struct ocfs2_alloc_context *ac, |
2124 | u64 *suballoc_loc, |
2125 | u16 *suballoc_bit, |
2126 | u64 di_blkno) |
2127 | { |
2128 | int ret; |
2129 | u16 chain; |
2130 | struct ocfs2_suballoc_result *res = ac->ac_find_loc_priv; |
2131 | struct buffer_head *bg_bh = NULL; |
2132 | struct ocfs2_group_desc *bg; |
2133 | struct ocfs2_dinode *di = (struct ocfs2_dinode *) ac->ac_bh->b_data; |
2134 | |
2135 | /* |
2136 | * Since di_blkno is being passed back in, we check for any |
2137 | * inconsistencies which may have happened between |
2138 | * calls. These are code bugs as di_blkno is not expected to |
2139 | * change once returned from ocfs2_find_new_inode_loc() |
2140 | */ |
2141 | BUG_ON(res->sr_blkno != di_blkno); |
2142 | |
2143 | ret = ocfs2_read_group_descriptor(inode: ac->ac_inode, di, |
2144 | gd_blkno: res->sr_bg_stable_blkno, bh: &bg_bh); |
2145 | if (ret) { |
2146 | mlog_errno(ret); |
2147 | goto out; |
2148 | } |
2149 | |
2150 | bg = (struct ocfs2_group_desc *) bg_bh->b_data; |
2151 | chain = le16_to_cpu(bg->bg_chain); |
2152 | |
2153 | ret = ocfs2_alloc_dinode_update_counts(inode: ac->ac_inode, handle, |
2154 | di_bh: ac->ac_bh, num_bits: res->sr_bits, |
2155 | chain); |
2156 | if (ret) { |
2157 | mlog_errno(ret); |
2158 | goto out; |
2159 | } |
2160 | |
2161 | ret = ocfs2_block_group_set_bits(handle, |
2162 | alloc_inode: ac->ac_inode, |
2163 | bg, |
2164 | group_bh: bg_bh, |
2165 | bit_off: res->sr_bit_offset, |
2166 | num_bits: res->sr_bits); |
2167 | if (ret < 0) { |
2168 | ocfs2_rollback_alloc_dinode_counts(inode: ac->ac_inode, |
2169 | di_bh: ac->ac_bh, num_bits: res->sr_bits, chain); |
2170 | mlog_errno(ret); |
2171 | goto out; |
2172 | } |
2173 | |
2174 | trace_ocfs2_claim_new_inode_at_loc(val1: (unsigned long long)di_blkno, |
2175 | val2: res->sr_bits); |
2176 | |
2177 | atomic_inc(v: &OCFS2_SB(ac->ac_inode->i_sb)->alloc_stats.bg_allocs); |
2178 | |
2179 | BUG_ON(res->sr_bits != 1); |
2180 | |
2181 | *suballoc_loc = res->sr_bg_blkno; |
2182 | *suballoc_bit = res->sr_bit_offset; |
2183 | ac->ac_bits_given++; |
2184 | ocfs2_save_inode_ac_group(dir, ac); |
2185 | |
2186 | out: |
2187 | brelse(bh: bg_bh); |
2188 | |
2189 | return ret; |
2190 | } |
2191 | |
2192 | int ocfs2_claim_new_inode(handle_t *handle, |
2193 | struct inode *dir, |
2194 | struct buffer_head *parent_fe_bh, |
2195 | struct ocfs2_alloc_context *ac, |
2196 | u64 *suballoc_loc, |
2197 | u16 *suballoc_bit, |
2198 | u64 *fe_blkno) |
2199 | { |
2200 | int status; |
2201 | struct ocfs2_suballoc_result res; |
2202 | |
2203 | BUG_ON(!ac); |
2204 | BUG_ON(ac->ac_bits_given != 0); |
2205 | BUG_ON(ac->ac_bits_wanted != 1); |
2206 | BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE); |
2207 | |
2208 | ocfs2_init_inode_ac_group(dir, parent_di_bh: parent_fe_bh, ac); |
2209 | |
2210 | status = ocfs2_claim_suballoc_bits(ac, |
2211 | handle, |
2212 | bits_wanted: 1, |
2213 | min_bits: 1, |
2214 | res: &res); |
2215 | if (status < 0) { |
2216 | mlog_errno(status); |
2217 | goto bail; |
2218 | } |
2219 | atomic_inc(v: &OCFS2_SB(ac->ac_inode->i_sb)->alloc_stats.bg_allocs); |
2220 | |
2221 | BUG_ON(res.sr_bits != 1); |
2222 | |
2223 | *suballoc_loc = res.sr_bg_blkno; |
2224 | *suballoc_bit = res.sr_bit_offset; |
2225 | *fe_blkno = res.sr_blkno; |
2226 | ac->ac_bits_given++; |
2227 | ocfs2_save_inode_ac_group(dir, ac); |
2228 | status = 0; |
2229 | bail: |
2230 | if (status) |
2231 | mlog_errno(status); |
2232 | return status; |
2233 | } |
2234 | |
2235 | /* translate a group desc. blkno and it's bitmap offset into |
2236 | * disk cluster offset. */ |
2237 | static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode, |
2238 | u64 bg_blkno, |
2239 | u16 bg_bit_off) |
2240 | { |
2241 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
2242 | u32 cluster = 0; |
2243 | |
2244 | BUG_ON(!ocfs2_is_cluster_bitmap(inode)); |
2245 | |
2246 | if (bg_blkno != osb->first_cluster_group_blkno) |
2247 | cluster = ocfs2_blocks_to_clusters(sb: inode->i_sb, blocks: bg_blkno); |
2248 | cluster += (u32) bg_bit_off; |
2249 | return cluster; |
2250 | } |
2251 | |
2252 | /* given a cluster offset, calculate which block group it belongs to |
2253 | * and return that block offset. */ |
2254 | u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster) |
2255 | { |
2256 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
2257 | u32 group_no; |
2258 | |
2259 | BUG_ON(!ocfs2_is_cluster_bitmap(inode)); |
2260 | |
2261 | group_no = cluster / osb->bitmap_cpg; |
2262 | if (!group_no) |
2263 | return osb->first_cluster_group_blkno; |
2264 | return ocfs2_clusters_to_blocks(sb: inode->i_sb, |
2265 | clusters: group_no * osb->bitmap_cpg); |
2266 | } |
2267 | |
2268 | /* given the block number of a cluster start, calculate which cluster |
2269 | * group and descriptor bitmap offset that corresponds to. */ |
2270 | static inline void ocfs2_block_to_cluster_group(struct inode *inode, |
2271 | u64 data_blkno, |
2272 | u64 *bg_blkno, |
2273 | u16 *bg_bit_off) |
2274 | { |
2275 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
2276 | u32 data_cluster = ocfs2_blocks_to_clusters(sb: osb->sb, blocks: data_blkno); |
2277 | |
2278 | BUG_ON(!ocfs2_is_cluster_bitmap(inode)); |
2279 | |
2280 | *bg_blkno = ocfs2_which_cluster_group(inode, |
2281 | cluster: data_cluster); |
2282 | |
2283 | if (*bg_blkno == osb->first_cluster_group_blkno) |
2284 | *bg_bit_off = (u16) data_cluster; |
2285 | else |
2286 | *bg_bit_off = (u16) ocfs2_blocks_to_clusters(sb: osb->sb, |
2287 | blocks: data_blkno - *bg_blkno); |
2288 | } |
2289 | |
2290 | /* |
2291 | * min_bits - minimum contiguous chunk from this total allocation we |
2292 | * can handle. set to what we asked for originally for a full |
2293 | * contig. allocation, set to '1' to indicate we can deal with extents |
2294 | * of any size. |
2295 | */ |
2296 | int __ocfs2_claim_clusters(handle_t *handle, |
2297 | struct ocfs2_alloc_context *ac, |
2298 | u32 min_clusters, |
2299 | u32 max_clusters, |
2300 | u32 *cluster_start, |
2301 | u32 *num_clusters) |
2302 | { |
2303 | int status; |
2304 | unsigned int bits_wanted = max_clusters; |
2305 | struct ocfs2_suballoc_result res = { .sr_blkno = 0, }; |
2306 | struct ocfs2_super *osb = OCFS2_SB(ac->ac_inode->i_sb); |
2307 | |
2308 | BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted); |
2309 | |
2310 | BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL |
2311 | && ac->ac_which != OCFS2_AC_USE_MAIN); |
2312 | |
2313 | if (ac->ac_which == OCFS2_AC_USE_LOCAL) { |
2314 | WARN_ON(min_clusters > 1); |
2315 | |
2316 | status = ocfs2_claim_local_alloc_bits(osb, |
2317 | handle, |
2318 | ac, |
2319 | bits_wanted, |
2320 | bit_off: cluster_start, |
2321 | num_bits: num_clusters); |
2322 | if (!status) |
2323 | atomic_inc(v: &osb->alloc_stats.local_data); |
2324 | } else { |
2325 | if (min_clusters > (osb->bitmap_cpg - 1)) { |
2326 | /* The only paths asking for contiguousness |
2327 | * should know about this already. */ |
2328 | mlog(ML_ERROR, "minimum allocation requested %u exceeds " |
2329 | "group bitmap size %u!\n" , min_clusters, |
2330 | osb->bitmap_cpg); |
2331 | status = -ENOSPC; |
2332 | goto bail; |
2333 | } |
2334 | /* clamp the current request down to a realistic size. */ |
2335 | if (bits_wanted > (osb->bitmap_cpg - 1)) |
2336 | bits_wanted = osb->bitmap_cpg - 1; |
2337 | |
2338 | status = ocfs2_claim_suballoc_bits(ac, |
2339 | handle, |
2340 | bits_wanted, |
2341 | min_bits: min_clusters, |
2342 | res: &res); |
2343 | if (!status) { |
2344 | BUG_ON(res.sr_blkno); /* cluster alloc can't set */ |
2345 | *cluster_start = |
2346 | ocfs2_desc_bitmap_to_cluster_off(inode: ac->ac_inode, |
2347 | bg_blkno: res.sr_bg_blkno, |
2348 | bg_bit_off: res.sr_bit_offset); |
2349 | atomic_inc(v: &osb->alloc_stats.bitmap_data); |
2350 | *num_clusters = res.sr_bits; |
2351 | } |
2352 | } |
2353 | if (status < 0) { |
2354 | if (status != -ENOSPC) |
2355 | mlog_errno(status); |
2356 | goto bail; |
2357 | } |
2358 | |
2359 | ac->ac_bits_given += *num_clusters; |
2360 | |
2361 | bail: |
2362 | if (status) |
2363 | mlog_errno(status); |
2364 | return status; |
2365 | } |
2366 | |
2367 | int ocfs2_claim_clusters(handle_t *handle, |
2368 | struct ocfs2_alloc_context *ac, |
2369 | u32 min_clusters, |
2370 | u32 *cluster_start, |
2371 | u32 *num_clusters) |
2372 | { |
2373 | unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given; |
2374 | |
2375 | return __ocfs2_claim_clusters(handle, ac, min_clusters, |
2376 | max_clusters: bits_wanted, cluster_start, num_clusters); |
2377 | } |
2378 | |
2379 | static int ocfs2_block_group_clear_bits(handle_t *handle, |
2380 | struct inode *alloc_inode, |
2381 | struct ocfs2_group_desc *bg, |
2382 | struct buffer_head *group_bh, |
2383 | unsigned int bit_off, |
2384 | unsigned int num_bits, |
2385 | void (*undo_fn)(unsigned int bit, |
2386 | unsigned long *bmap)) |
2387 | { |
2388 | int status; |
2389 | unsigned int tmp; |
2390 | struct ocfs2_group_desc *undo_bg = NULL; |
2391 | struct journal_head *jh; |
2392 | |
2393 | /* The caller got this descriptor from |
2394 | * ocfs2_read_group_descriptor(). Any corruption is a code bug. */ |
2395 | BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg)); |
2396 | |
2397 | trace_ocfs2_block_group_clear_bits(val1: bit_off, val2: num_bits); |
2398 | |
2399 | BUG_ON(undo_fn && !ocfs2_is_cluster_bitmap(alloc_inode)); |
2400 | status = ocfs2_journal_access_gd(handle, ci: INODE_CACHE(inode: alloc_inode), |
2401 | bh: group_bh, |
2402 | type: undo_fn ? |
2403 | OCFS2_JOURNAL_ACCESS_UNDO : |
2404 | OCFS2_JOURNAL_ACCESS_WRITE); |
2405 | if (status < 0) { |
2406 | mlog_errno(status); |
2407 | goto bail; |
2408 | } |
2409 | |
2410 | jh = bh2jh(bh: group_bh); |
2411 | if (undo_fn) { |
2412 | spin_lock(lock: &jh->b_state_lock); |
2413 | undo_bg = (struct ocfs2_group_desc *) jh->b_committed_data; |
2414 | BUG_ON(!undo_bg); |
2415 | } |
2416 | |
2417 | tmp = num_bits; |
2418 | while(tmp--) { |
2419 | ocfs2_clear_bit((bit_off + tmp), |
2420 | (unsigned long *) bg->bg_bitmap); |
2421 | if (undo_fn) |
2422 | undo_fn(bit_off + tmp, |
2423 | (unsigned long *) undo_bg->bg_bitmap); |
2424 | } |
2425 | le16_add_cpu(var: &bg->bg_free_bits_count, val: num_bits); |
2426 | if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) { |
2427 | if (undo_fn) |
2428 | spin_unlock(lock: &jh->b_state_lock); |
2429 | return ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit count %u but claims %u are freed. num_bits %d\n" , |
2430 | (unsigned long long)le64_to_cpu(bg->bg_blkno), |
2431 | le16_to_cpu(bg->bg_bits), |
2432 | le16_to_cpu(bg->bg_free_bits_count), |
2433 | num_bits); |
2434 | } |
2435 | |
2436 | if (undo_fn) |
2437 | spin_unlock(lock: &jh->b_state_lock); |
2438 | |
2439 | ocfs2_journal_dirty(handle, bh: group_bh); |
2440 | bail: |
2441 | return status; |
2442 | } |
2443 | |
2444 | /* |
2445 | * expects the suballoc inode to already be locked. |
2446 | */ |
2447 | static int _ocfs2_free_suballoc_bits(handle_t *handle, |
2448 | struct inode *alloc_inode, |
2449 | struct buffer_head *alloc_bh, |
2450 | unsigned int start_bit, |
2451 | u64 bg_blkno, |
2452 | unsigned int count, |
2453 | void (*undo_fn)(unsigned int bit, |
2454 | unsigned long *bitmap)) |
2455 | { |
2456 | int status = 0; |
2457 | u32 tmp_used; |
2458 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) alloc_bh->b_data; |
2459 | struct ocfs2_chain_list *cl = &fe->id2.i_chain; |
2460 | struct buffer_head *group_bh = NULL; |
2461 | struct ocfs2_group_desc *group; |
2462 | |
2463 | /* The alloc_bh comes from ocfs2_free_dinode() or |
2464 | * ocfs2_free_clusters(). The callers have all locked the |
2465 | * allocator and gotten alloc_bh from the lock call. This |
2466 | * validates the dinode buffer. Any corruption that has happened |
2467 | * is a code bug. */ |
2468 | BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); |
2469 | BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl)); |
2470 | |
2471 | trace_ocfs2_free_suballoc_bits( |
2472 | inode: (unsigned long long)OCFS2_I(inode: alloc_inode)->ip_blkno, |
2473 | group: (unsigned long long)bg_blkno, |
2474 | start_bit, count); |
2475 | |
2476 | status = ocfs2_read_group_descriptor(inode: alloc_inode, di: fe, gd_blkno: bg_blkno, |
2477 | bh: &group_bh); |
2478 | if (status < 0) { |
2479 | mlog_errno(status); |
2480 | goto bail; |
2481 | } |
2482 | group = (struct ocfs2_group_desc *) group_bh->b_data; |
2483 | |
2484 | BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits)); |
2485 | |
2486 | status = ocfs2_block_group_clear_bits(handle, alloc_inode, |
2487 | bg: group, group_bh, |
2488 | bit_off: start_bit, num_bits: count, undo_fn); |
2489 | if (status < 0) { |
2490 | mlog_errno(status); |
2491 | goto bail; |
2492 | } |
2493 | |
2494 | status = ocfs2_journal_access_di(handle, ci: INODE_CACHE(inode: alloc_inode), |
2495 | bh: alloc_bh, OCFS2_JOURNAL_ACCESS_WRITE); |
2496 | if (status < 0) { |
2497 | mlog_errno(status); |
2498 | ocfs2_block_group_set_bits(handle, alloc_inode, bg: group, group_bh, |
2499 | bit_off: start_bit, num_bits: count); |
2500 | goto bail; |
2501 | } |
2502 | |
2503 | le32_add_cpu(var: &cl->cl_recs[le16_to_cpu(group->bg_chain)].c_free, |
2504 | val: count); |
2505 | tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used); |
2506 | fe->id1.bitmap1.i_used = cpu_to_le32(tmp_used - count); |
2507 | ocfs2_journal_dirty(handle, bh: alloc_bh); |
2508 | |
2509 | bail: |
2510 | brelse(bh: group_bh); |
2511 | return status; |
2512 | } |
2513 | |
2514 | int ocfs2_free_suballoc_bits(handle_t *handle, |
2515 | struct inode *alloc_inode, |
2516 | struct buffer_head *alloc_bh, |
2517 | unsigned int start_bit, |
2518 | u64 bg_blkno, |
2519 | unsigned int count) |
2520 | { |
2521 | return _ocfs2_free_suballoc_bits(handle, alloc_inode, alloc_bh, |
2522 | start_bit, bg_blkno, count, NULL); |
2523 | } |
2524 | |
2525 | int ocfs2_free_dinode(handle_t *handle, |
2526 | struct inode *inode_alloc_inode, |
2527 | struct buffer_head *inode_alloc_bh, |
2528 | struct ocfs2_dinode *di) |
2529 | { |
2530 | u64 blk = le64_to_cpu(di->i_blkno); |
2531 | u16 bit = le16_to_cpu(di->i_suballoc_bit); |
2532 | u64 bg_blkno = ocfs2_which_suballoc_group(block: blk, bit); |
2533 | |
2534 | if (di->i_suballoc_loc) |
2535 | bg_blkno = le64_to_cpu(di->i_suballoc_loc); |
2536 | return ocfs2_free_suballoc_bits(handle, alloc_inode: inode_alloc_inode, |
2537 | alloc_bh: inode_alloc_bh, start_bit: bit, bg_blkno, count: 1); |
2538 | } |
2539 | |
2540 | static int _ocfs2_free_clusters(handle_t *handle, |
2541 | struct inode *bitmap_inode, |
2542 | struct buffer_head *bitmap_bh, |
2543 | u64 start_blk, |
2544 | unsigned int num_clusters, |
2545 | void (*undo_fn)(unsigned int bit, |
2546 | unsigned long *bitmap)) |
2547 | { |
2548 | int status; |
2549 | u16 bg_start_bit; |
2550 | u64 bg_blkno; |
2551 | |
2552 | /* You can't ever have a contiguous set of clusters |
2553 | * bigger than a block group bitmap so we never have to worry |
2554 | * about looping on them. |
2555 | * This is expensive. We can safely remove once this stuff has |
2556 | * gotten tested really well. */ |
2557 | BUG_ON(start_blk != ocfs2_clusters_to_blocks(bitmap_inode->i_sb, |
2558 | ocfs2_blocks_to_clusters(bitmap_inode->i_sb, |
2559 | start_blk))); |
2560 | |
2561 | |
2562 | ocfs2_block_to_cluster_group(inode: bitmap_inode, data_blkno: start_blk, bg_blkno: &bg_blkno, |
2563 | bg_bit_off: &bg_start_bit); |
2564 | |
2565 | trace_ocfs2_free_clusters(bg_blkno: (unsigned long long)bg_blkno, |
2566 | start_blk: (unsigned long long)start_blk, |
2567 | start_bit: bg_start_bit, count: num_clusters); |
2568 | |
2569 | status = _ocfs2_free_suballoc_bits(handle, alloc_inode: bitmap_inode, alloc_bh: bitmap_bh, |
2570 | start_bit: bg_start_bit, bg_blkno, |
2571 | count: num_clusters, undo_fn); |
2572 | if (status < 0) { |
2573 | mlog_errno(status); |
2574 | goto out; |
2575 | } |
2576 | |
2577 | ocfs2_local_alloc_seen_free_bits(OCFS2_SB(bitmap_inode->i_sb), |
2578 | num_clusters); |
2579 | |
2580 | out: |
2581 | return status; |
2582 | } |
2583 | |
2584 | int ocfs2_free_clusters(handle_t *handle, |
2585 | struct inode *bitmap_inode, |
2586 | struct buffer_head *bitmap_bh, |
2587 | u64 start_blk, |
2588 | unsigned int num_clusters) |
2589 | { |
2590 | return _ocfs2_free_clusters(handle, bitmap_inode, bitmap_bh, |
2591 | start_blk, num_clusters, |
2592 | undo_fn: _ocfs2_set_bit); |
2593 | } |
2594 | |
2595 | /* |
2596 | * Give never-used clusters back to the global bitmap. We don't need |
2597 | * to protect these bits in the undo buffer. |
2598 | */ |
2599 | int ocfs2_release_clusters(handle_t *handle, |
2600 | struct inode *bitmap_inode, |
2601 | struct buffer_head *bitmap_bh, |
2602 | u64 start_blk, |
2603 | unsigned int num_clusters) |
2604 | { |
2605 | return _ocfs2_free_clusters(handle, bitmap_inode, bitmap_bh, |
2606 | start_blk, num_clusters, |
2607 | undo_fn: _ocfs2_clear_bit); |
2608 | } |
2609 | |
2610 | /* |
2611 | * For a given allocation, determine which allocators will need to be |
2612 | * accessed, and lock them, reserving the appropriate number of bits. |
2613 | * |
2614 | * Sparse file systems call this from ocfs2_write_begin_nolock() |
2615 | * and ocfs2_allocate_unwritten_extents(). |
2616 | * |
2617 | * File systems which don't support holes call this from |
2618 | * ocfs2_extend_allocation(). |
2619 | */ |
2620 | int ocfs2_lock_allocators(struct inode *inode, |
2621 | struct ocfs2_extent_tree *et, |
2622 | u32 clusters_to_add, u32 extents_to_split, |
2623 | struct ocfs2_alloc_context **data_ac, |
2624 | struct ocfs2_alloc_context **meta_ac) |
2625 | { |
2626 | int ret = 0, num_free_extents; |
2627 | unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split; |
2628 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
2629 | |
2630 | *meta_ac = NULL; |
2631 | if (data_ac) |
2632 | *data_ac = NULL; |
2633 | |
2634 | BUG_ON(clusters_to_add != 0 && data_ac == NULL); |
2635 | |
2636 | num_free_extents = ocfs2_num_free_extents(et); |
2637 | if (num_free_extents < 0) { |
2638 | ret = num_free_extents; |
2639 | mlog_errno(ret); |
2640 | goto out; |
2641 | } |
2642 | |
2643 | /* |
2644 | * Sparse allocation file systems need to be more conservative |
2645 | * with reserving room for expansion - the actual allocation |
2646 | * happens while we've got a journal handle open so re-taking |
2647 | * a cluster lock (because we ran out of room for another |
2648 | * extent) will violate ordering rules. |
2649 | * |
2650 | * Most of the time we'll only be seeing this 1 cluster at a time |
2651 | * anyway. |
2652 | * |
2653 | * Always lock for any unwritten extents - we might want to |
2654 | * add blocks during a split. |
2655 | */ |
2656 | if (!num_free_extents || |
2657 | (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) { |
2658 | ret = ocfs2_reserve_new_metadata(osb, root_el: et->et_root_el, ac: meta_ac); |
2659 | if (ret < 0) { |
2660 | if (ret != -ENOSPC) |
2661 | mlog_errno(ret); |
2662 | goto out; |
2663 | } |
2664 | } |
2665 | |
2666 | if (clusters_to_add == 0) |
2667 | goto out; |
2668 | |
2669 | ret = ocfs2_reserve_clusters(osb, bits_wanted: clusters_to_add, ac: data_ac); |
2670 | if (ret < 0) { |
2671 | if (ret != -ENOSPC) |
2672 | mlog_errno(ret); |
2673 | goto out; |
2674 | } |
2675 | |
2676 | out: |
2677 | if (ret) { |
2678 | if (*meta_ac) { |
2679 | ocfs2_free_alloc_context(ac: *meta_ac); |
2680 | *meta_ac = NULL; |
2681 | } |
2682 | |
2683 | /* |
2684 | * We cannot have an error and a non null *data_ac. |
2685 | */ |
2686 | } |
2687 | |
2688 | return ret; |
2689 | } |
2690 | |
2691 | /* |
2692 | * Read the inode specified by blkno to get suballoc_slot and |
2693 | * suballoc_bit. |
2694 | */ |
2695 | static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno, |
2696 | u16 *suballoc_slot, u64 *group_blkno, |
2697 | u16 *suballoc_bit) |
2698 | { |
2699 | int status; |
2700 | struct buffer_head *inode_bh = NULL; |
2701 | struct ocfs2_dinode *inode_fe; |
2702 | |
2703 | trace_ocfs2_get_suballoc_slot_bit(num: (unsigned long long)blkno); |
2704 | |
2705 | /* dirty read disk */ |
2706 | status = ocfs2_read_blocks_sync(osb, block: blkno, nr: 1, bhs: &inode_bh); |
2707 | if (status < 0) { |
2708 | mlog(ML_ERROR, "read block %llu failed %d\n" , |
2709 | (unsigned long long)blkno, status); |
2710 | goto bail; |
2711 | } |
2712 | |
2713 | inode_fe = (struct ocfs2_dinode *) inode_bh->b_data; |
2714 | if (!OCFS2_IS_VALID_DINODE(inode_fe)) { |
2715 | mlog(ML_ERROR, "invalid inode %llu requested\n" , |
2716 | (unsigned long long)blkno); |
2717 | status = -EINVAL; |
2718 | goto bail; |
2719 | } |
2720 | |
2721 | if (le16_to_cpu(inode_fe->i_suballoc_slot) != (u16)OCFS2_INVALID_SLOT && |
2722 | (u32)le16_to_cpu(inode_fe->i_suballoc_slot) > osb->max_slots - 1) { |
2723 | mlog(ML_ERROR, "inode %llu has invalid suballoc slot %u\n" , |
2724 | (unsigned long long)blkno, |
2725 | (u32)le16_to_cpu(inode_fe->i_suballoc_slot)); |
2726 | status = -EINVAL; |
2727 | goto bail; |
2728 | } |
2729 | |
2730 | if (suballoc_slot) |
2731 | *suballoc_slot = le16_to_cpu(inode_fe->i_suballoc_slot); |
2732 | if (suballoc_bit) |
2733 | *suballoc_bit = le16_to_cpu(inode_fe->i_suballoc_bit); |
2734 | if (group_blkno) |
2735 | *group_blkno = le64_to_cpu(inode_fe->i_suballoc_loc); |
2736 | |
2737 | bail: |
2738 | brelse(bh: inode_bh); |
2739 | |
2740 | if (status) |
2741 | mlog_errno(status); |
2742 | return status; |
2743 | } |
2744 | |
2745 | /* |
2746 | * test whether bit is SET in allocator bitmap or not. on success, 0 |
2747 | * is returned and *res is 1 for SET; 0 otherwise. when fails, errno |
2748 | * is returned and *res is meaningless. Call this after you have |
2749 | * cluster locked against suballoc, or you may get a result based on |
2750 | * non-up2date contents |
2751 | */ |
2752 | static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb, |
2753 | struct inode *suballoc, |
2754 | struct buffer_head *alloc_bh, |
2755 | u64 group_blkno, u64 blkno, |
2756 | u16 bit, int *res) |
2757 | { |
2758 | struct ocfs2_dinode *alloc_di; |
2759 | struct ocfs2_group_desc *group; |
2760 | struct buffer_head *group_bh = NULL; |
2761 | u64 bg_blkno; |
2762 | int status; |
2763 | |
2764 | trace_ocfs2_test_suballoc_bit(val1: (unsigned long long)blkno, |
2765 | val2: (unsigned int)bit); |
2766 | |
2767 | alloc_di = (struct ocfs2_dinode *)alloc_bh->b_data; |
2768 | if ((bit + 1) > ocfs2_bits_per_group(cl: &alloc_di->id2.i_chain)) { |
2769 | mlog(ML_ERROR, "suballoc bit %u out of range of %u\n" , |
2770 | (unsigned int)bit, |
2771 | ocfs2_bits_per_group(&alloc_di->id2.i_chain)); |
2772 | status = -EINVAL; |
2773 | goto bail; |
2774 | } |
2775 | |
2776 | bg_blkno = group_blkno ? group_blkno : |
2777 | ocfs2_which_suballoc_group(block: blkno, bit); |
2778 | status = ocfs2_read_group_descriptor(inode: suballoc, di: alloc_di, gd_blkno: bg_blkno, |
2779 | bh: &group_bh); |
2780 | if (status < 0) { |
2781 | mlog(ML_ERROR, "read group %llu failed %d\n" , |
2782 | (unsigned long long)bg_blkno, status); |
2783 | goto bail; |
2784 | } |
2785 | |
2786 | group = (struct ocfs2_group_desc *) group_bh->b_data; |
2787 | *res = ocfs2_test_bit(nr: bit, addr: (unsigned long *)group->bg_bitmap); |
2788 | |
2789 | bail: |
2790 | brelse(bh: group_bh); |
2791 | |
2792 | if (status) |
2793 | mlog_errno(status); |
2794 | return status; |
2795 | } |
2796 | |
2797 | /* |
2798 | * Test if the bit representing this inode (blkno) is set in the |
2799 | * suballocator. |
2800 | * |
2801 | * On success, 0 is returned and *res is 1 for SET; 0 otherwise. |
2802 | * |
2803 | * In the event of failure, a negative value is returned and *res is |
2804 | * meaningless. |
2805 | * |
2806 | * Callers must make sure to hold nfs_sync_lock to prevent |
2807 | * ocfs2_delete_inode() on another node from accessing the same |
2808 | * suballocator concurrently. |
2809 | */ |
2810 | int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) |
2811 | { |
2812 | int status; |
2813 | u64 group_blkno = 0; |
2814 | u16 suballoc_bit = 0, suballoc_slot = 0; |
2815 | struct inode *inode_alloc_inode; |
2816 | struct buffer_head *alloc_bh = NULL; |
2817 | |
2818 | trace_ocfs2_test_inode_bit(num: (unsigned long long)blkno); |
2819 | |
2820 | status = ocfs2_get_suballoc_slot_bit(osb, blkno, suballoc_slot: &suballoc_slot, |
2821 | group_blkno: &group_blkno, suballoc_bit: &suballoc_bit); |
2822 | if (status < 0) { |
2823 | mlog(ML_ERROR, "get alloc slot and bit failed %d\n" , status); |
2824 | goto bail; |
2825 | } |
2826 | |
2827 | if (suballoc_slot == (u16)OCFS2_INVALID_SLOT) |
2828 | inode_alloc_inode = ocfs2_get_system_file_inode(osb, |
2829 | type: GLOBAL_INODE_ALLOC_SYSTEM_INODE, slot: suballoc_slot); |
2830 | else |
2831 | inode_alloc_inode = ocfs2_get_system_file_inode(osb, |
2832 | type: INODE_ALLOC_SYSTEM_INODE, slot: suballoc_slot); |
2833 | if (!inode_alloc_inode) { |
2834 | /* the error code could be inaccurate, but we are not able to |
2835 | * get the correct one. */ |
2836 | status = -EINVAL; |
2837 | mlog(ML_ERROR, "unable to get alloc inode in slot %u\n" , |
2838 | (u32)suballoc_slot); |
2839 | goto bail; |
2840 | } |
2841 | |
2842 | inode_lock(inode: inode_alloc_inode); |
2843 | status = ocfs2_inode_lock(inode_alloc_inode, &alloc_bh, 0); |
2844 | if (status < 0) { |
2845 | inode_unlock(inode: inode_alloc_inode); |
2846 | iput(inode_alloc_inode); |
2847 | mlog(ML_ERROR, "lock on alloc inode on slot %u failed %d\n" , |
2848 | (u32)suballoc_slot, status); |
2849 | goto bail; |
2850 | } |
2851 | |
2852 | status = ocfs2_test_suballoc_bit(osb, suballoc: inode_alloc_inode, alloc_bh, |
2853 | group_blkno, blkno, bit: suballoc_bit, res); |
2854 | if (status < 0) |
2855 | mlog(ML_ERROR, "test suballoc bit failed %d\n" , status); |
2856 | |
2857 | ocfs2_inode_unlock(inode: inode_alloc_inode, ex: 0); |
2858 | inode_unlock(inode: inode_alloc_inode); |
2859 | |
2860 | iput(inode_alloc_inode); |
2861 | brelse(bh: alloc_bh); |
2862 | bail: |
2863 | if (status) |
2864 | mlog_errno(status); |
2865 | return status; |
2866 | } |
2867 | |