1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* |
3 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. |
4 | * Copyright (c) 2018 Red Hat, Inc. |
5 | * All rights reserved. |
6 | */ |
7 | |
8 | #include "xfs.h" |
9 | #include "xfs_fs.h" |
10 | #include "xfs_shared.h" |
11 | #include "xfs_format.h" |
12 | #include "xfs_trans_resv.h" |
13 | #include "xfs_bit.h" |
14 | #include "xfs_sb.h" |
15 | #include "xfs_mount.h" |
16 | #include "xfs_btree.h" |
17 | #include "xfs_alloc_btree.h" |
18 | #include "xfs_rmap_btree.h" |
19 | #include "xfs_alloc.h" |
20 | #include "xfs_ialloc.h" |
21 | #include "xfs_rmap.h" |
22 | #include "xfs_ag.h" |
23 | #include "xfs_ag_resv.h" |
24 | #include "xfs_health.h" |
25 | #include "xfs_error.h" |
26 | #include "xfs_bmap.h" |
27 | #include "xfs_defer.h" |
28 | #include "xfs_log_format.h" |
29 | #include "xfs_trans.h" |
30 | #include "xfs_trace.h" |
31 | #include "xfs_inode.h" |
32 | #include "xfs_icache.h" |
33 | |
34 | |
35 | /* |
36 | * Passive reference counting access wrappers to the perag structures. If the |
37 | * per-ag structure is to be freed, the freeing code is responsible for cleaning |
38 | * up objects with passive references before freeing the structure. This is |
39 | * things like cached buffers. |
40 | */ |
41 | struct xfs_perag * |
42 | xfs_perag_get( |
43 | struct xfs_mount *mp, |
44 | xfs_agnumber_t agno) |
45 | { |
46 | struct xfs_perag *pag; |
47 | |
48 | rcu_read_lock(); |
49 | pag = radix_tree_lookup(&mp->m_perag_tree, agno); |
50 | if (pag) { |
51 | trace_xfs_perag_get(pag, _RET_IP_); |
52 | ASSERT(atomic_read(&pag->pag_ref) >= 0); |
53 | atomic_inc(&pag->pag_ref); |
54 | } |
55 | rcu_read_unlock(); |
56 | return pag; |
57 | } |
58 | |
59 | /* |
60 | * search from @first to find the next perag with the given tag set. |
61 | */ |
62 | struct xfs_perag * |
63 | xfs_perag_get_tag( |
64 | struct xfs_mount *mp, |
65 | xfs_agnumber_t first, |
66 | unsigned int tag) |
67 | { |
68 | struct xfs_perag *pag; |
69 | int found; |
70 | |
71 | rcu_read_lock(); |
72 | found = radix_tree_gang_lookup_tag(&mp->m_perag_tree, |
73 | (void **)&pag, first, 1, tag); |
74 | if (found <= 0) { |
75 | rcu_read_unlock(); |
76 | return NULL; |
77 | } |
78 | trace_xfs_perag_get_tag(pag, _RET_IP_); |
79 | atomic_inc(&pag->pag_ref); |
80 | rcu_read_unlock(); |
81 | return pag; |
82 | } |
83 | |
84 | /* Get a passive reference to the given perag. */ |
85 | struct xfs_perag * |
86 | xfs_perag_hold( |
87 | struct xfs_perag *pag) |
88 | { |
89 | ASSERT(atomic_read(&pag->pag_ref) > 0 || |
90 | atomic_read(&pag->pag_active_ref) > 0); |
91 | |
92 | trace_xfs_perag_hold(pag, _RET_IP_); |
93 | atomic_inc(&pag->pag_ref); |
94 | return pag; |
95 | } |
96 | |
97 | void |
98 | xfs_perag_put( |
99 | struct xfs_perag *pag) |
100 | { |
101 | trace_xfs_perag_put(pag, _RET_IP_); |
102 | ASSERT(atomic_read(&pag->pag_ref) > 0); |
103 | atomic_dec(&pag->pag_ref); |
104 | } |
105 | |
106 | /* |
107 | * Active references for perag structures. This is for short term access to the |
108 | * per ag structures for walking trees or accessing state. If an AG is being |
109 | * shrunk or is offline, then this will fail to find that AG and return NULL |
110 | * instead. |
111 | */ |
112 | struct xfs_perag * |
113 | xfs_perag_grab( |
114 | struct xfs_mount *mp, |
115 | xfs_agnumber_t agno) |
116 | { |
117 | struct xfs_perag *pag; |
118 | |
119 | rcu_read_lock(); |
120 | pag = radix_tree_lookup(&mp->m_perag_tree, agno); |
121 | if (pag) { |
122 | trace_xfs_perag_grab(pag, _RET_IP_); |
123 | if (!atomic_inc_not_zero(&pag->pag_active_ref)) |
124 | pag = NULL; |
125 | } |
126 | rcu_read_unlock(); |
127 | return pag; |
128 | } |
129 | |
130 | /* |
131 | * search from @first to find the next perag with the given tag set. |
132 | */ |
133 | struct xfs_perag * |
134 | xfs_perag_grab_tag( |
135 | struct xfs_mount *mp, |
136 | xfs_agnumber_t first, |
137 | int tag) |
138 | { |
139 | struct xfs_perag *pag; |
140 | int found; |
141 | |
142 | rcu_read_lock(); |
143 | found = radix_tree_gang_lookup_tag(&mp->m_perag_tree, |
144 | (void **)&pag, first, 1, tag); |
145 | if (found <= 0) { |
146 | rcu_read_unlock(); |
147 | return NULL; |
148 | } |
149 | trace_xfs_perag_grab_tag(pag, _RET_IP_); |
150 | if (!atomic_inc_not_zero(&pag->pag_active_ref)) |
151 | pag = NULL; |
152 | rcu_read_unlock(); |
153 | return pag; |
154 | } |
155 | |
156 | void |
157 | xfs_perag_rele( |
158 | struct xfs_perag *pag) |
159 | { |
160 | trace_xfs_perag_rele(pag, _RET_IP_); |
161 | if (atomic_dec_and_test(&pag->pag_active_ref)) |
162 | wake_up(&pag->pag_active_wq); |
163 | } |
164 | |
165 | /* |
166 | * xfs_initialize_perag_data |
167 | * |
168 | * Read in each per-ag structure so we can count up the number of |
169 | * allocated inodes, free inodes and used filesystem blocks as this |
170 | * information is no longer persistent in the superblock. Once we have |
171 | * this information, write it into the in-core superblock structure. |
172 | */ |
173 | int |
174 | xfs_initialize_perag_data( |
175 | struct xfs_mount *mp, |
176 | xfs_agnumber_t agcount) |
177 | { |
178 | xfs_agnumber_t index; |
179 | struct xfs_perag *pag; |
180 | struct xfs_sb *sbp = &mp->m_sb; |
181 | uint64_t ifree = 0; |
182 | uint64_t ialloc = 0; |
183 | uint64_t bfree = 0; |
184 | uint64_t bfreelst = 0; |
185 | uint64_t btree = 0; |
186 | uint64_t fdblocks; |
187 | int error = 0; |
188 | |
189 | for (index = 0; index < agcount; index++) { |
190 | /* |
191 | * Read the AGF and AGI buffers to populate the per-ag |
192 | * structures for us. |
193 | */ |
194 | pag = xfs_perag_get(mp, index); |
195 | error = xfs_alloc_read_agf(pag, NULL, 0, NULL); |
196 | if (!error) |
197 | error = xfs_ialloc_read_agi(pag, NULL, NULL); |
198 | if (error) { |
199 | xfs_perag_put(pag); |
200 | return error; |
201 | } |
202 | |
203 | ifree += pag->pagi_freecount; |
204 | ialloc += pag->pagi_count; |
205 | bfree += pag->pagf_freeblks; |
206 | bfreelst += pag->pagf_flcount; |
207 | btree += pag->pagf_btreeblks; |
208 | xfs_perag_put(pag); |
209 | } |
210 | fdblocks = bfree + bfreelst + btree; |
211 | |
212 | /* |
213 | * If the new summary counts are obviously incorrect, fail the |
214 | * mount operation because that implies the AGFs are also corrupt. |
215 | * Clear FS_COUNTERS so that we don't unmount with a dirty log, which |
216 | * will prevent xfs_repair from fixing anything. |
217 | */ |
218 | if (fdblocks > sbp->sb_dblocks || ifree > ialloc) { |
219 | xfs_alert(mp, "AGF corruption. Please run xfs_repair." ); |
220 | error = -EFSCORRUPTED; |
221 | goto out; |
222 | } |
223 | |
224 | /* Overwrite incore superblock counters with just-read data */ |
225 | spin_lock(&mp->m_sb_lock); |
226 | sbp->sb_ifree = ifree; |
227 | sbp->sb_icount = ialloc; |
228 | sbp->sb_fdblocks = fdblocks; |
229 | spin_unlock(&mp->m_sb_lock); |
230 | |
231 | xfs_reinit_percpu_counters(mp); |
232 | out: |
233 | xfs_fs_mark_healthy(mp, XFS_SICK_FS_COUNTERS); |
234 | return error; |
235 | } |
236 | |
237 | STATIC void |
238 | __xfs_free_perag( |
239 | struct rcu_head *head) |
240 | { |
241 | struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head); |
242 | |
243 | ASSERT(!delayed_work_pending(&pag->pag_blockgc_work)); |
244 | kmem_free(pag); |
245 | } |
246 | |
247 | /* |
248 | * Free up the per-ag resources associated with the mount structure. |
249 | */ |
250 | void |
251 | xfs_free_perag( |
252 | struct xfs_mount *mp) |
253 | { |
254 | struct xfs_perag *pag; |
255 | xfs_agnumber_t agno; |
256 | |
257 | for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { |
258 | spin_lock(&mp->m_perag_lock); |
259 | pag = radix_tree_delete(&mp->m_perag_tree, agno); |
260 | spin_unlock(&mp->m_perag_lock); |
261 | ASSERT(pag); |
262 | XFS_IS_CORRUPT(pag->pag_mount, atomic_read(&pag->pag_ref) != 0); |
263 | xfs_defer_drain_free(&pag->pag_intents_drain); |
264 | |
265 | cancel_delayed_work_sync(&pag->pag_blockgc_work); |
266 | xfs_buf_hash_destroy(pag); |
267 | |
268 | /* drop the mount's active reference */ |
269 | xfs_perag_rele(pag); |
270 | XFS_IS_CORRUPT(pag->pag_mount, |
271 | atomic_read(&pag->pag_active_ref) != 0); |
272 | call_rcu(&pag->rcu_head, __xfs_free_perag); |
273 | } |
274 | } |
275 | |
276 | /* Find the size of the AG, in blocks. */ |
277 | static xfs_agblock_t |
278 | __xfs_ag_block_count( |
279 | struct xfs_mount *mp, |
280 | xfs_agnumber_t agno, |
281 | xfs_agnumber_t agcount, |
282 | xfs_rfsblock_t dblocks) |
283 | { |
284 | ASSERT(agno < agcount); |
285 | |
286 | if (agno < agcount - 1) |
287 | return mp->m_sb.sb_agblocks; |
288 | return dblocks - (agno * mp->m_sb.sb_agblocks); |
289 | } |
290 | |
291 | xfs_agblock_t |
292 | xfs_ag_block_count( |
293 | struct xfs_mount *mp, |
294 | xfs_agnumber_t agno) |
295 | { |
296 | return __xfs_ag_block_count(mp, agno, mp->m_sb.sb_agcount, |
297 | mp->m_sb.sb_dblocks); |
298 | } |
299 | |
300 | /* Calculate the first and last possible inode number in an AG. */ |
301 | static void |
302 | __xfs_agino_range( |
303 | struct xfs_mount *mp, |
304 | xfs_agblock_t eoag, |
305 | xfs_agino_t *first, |
306 | xfs_agino_t *last) |
307 | { |
308 | xfs_agblock_t bno; |
309 | |
310 | /* |
311 | * Calculate the first inode, which will be in the first |
312 | * cluster-aligned block after the AGFL. |
313 | */ |
314 | bno = round_up(XFS_AGFL_BLOCK(mp) + 1, M_IGEO(mp)->cluster_align); |
315 | *first = XFS_AGB_TO_AGINO(mp, bno); |
316 | |
317 | /* |
318 | * Calculate the last inode, which will be at the end of the |
319 | * last (aligned) cluster that can be allocated in the AG. |
320 | */ |
321 | bno = round_down(eoag, M_IGEO(mp)->cluster_align); |
322 | *last = XFS_AGB_TO_AGINO(mp, bno) - 1; |
323 | } |
324 | |
325 | void |
326 | xfs_agino_range( |
327 | struct xfs_mount *mp, |
328 | xfs_agnumber_t agno, |
329 | xfs_agino_t *first, |
330 | xfs_agino_t *last) |
331 | { |
332 | return __xfs_agino_range(mp, xfs_ag_block_count(mp, agno), first, last); |
333 | } |
334 | |
335 | int |
336 | xfs_initialize_perag( |
337 | struct xfs_mount *mp, |
338 | xfs_agnumber_t agcount, |
339 | xfs_rfsblock_t dblocks, |
340 | xfs_agnumber_t *maxagi) |
341 | { |
342 | struct xfs_perag *pag; |
343 | xfs_agnumber_t index; |
344 | xfs_agnumber_t first_initialised = NULLAGNUMBER; |
345 | int error; |
346 | |
347 | /* |
348 | * Walk the current per-ag tree so we don't try to initialise AGs |
349 | * that already exist (growfs case). Allocate and insert all the |
350 | * AGs we don't find ready for initialisation. |
351 | */ |
352 | for (index = 0; index < agcount; index++) { |
353 | pag = xfs_perag_get(mp, index); |
354 | if (pag) { |
355 | xfs_perag_put(pag); |
356 | continue; |
357 | } |
358 | |
359 | pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL); |
360 | if (!pag) { |
361 | error = -ENOMEM; |
362 | goto out_unwind_new_pags; |
363 | } |
364 | pag->pag_agno = index; |
365 | pag->pag_mount = mp; |
366 | |
367 | error = radix_tree_preload(GFP_NOFS); |
368 | if (error) |
369 | goto out_free_pag; |
370 | |
371 | spin_lock(&mp->m_perag_lock); |
372 | if (radix_tree_insert(&mp->m_perag_tree, index, pag)) { |
373 | WARN_ON_ONCE(1); |
374 | spin_unlock(&mp->m_perag_lock); |
375 | radix_tree_preload_end(); |
376 | error = -EEXIST; |
377 | goto out_free_pag; |
378 | } |
379 | spin_unlock(&mp->m_perag_lock); |
380 | radix_tree_preload_end(); |
381 | |
382 | #ifdef __KERNEL__ |
383 | /* Place kernel structure only init below this point. */ |
384 | spin_lock_init(&pag->pag_ici_lock); |
385 | spin_lock_init(&pag->pagb_lock); |
386 | spin_lock_init(&pag->pag_state_lock); |
387 | INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker); |
388 | INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); |
389 | xfs_defer_drain_init(&pag->pag_intents_drain); |
390 | init_waitqueue_head(&pag->pagb_wait); |
391 | init_waitqueue_head(&pag->pag_active_wq); |
392 | pag->pagb_count = 0; |
393 | pag->pagb_tree = RB_ROOT; |
394 | #endif /* __KERNEL__ */ |
395 | |
396 | error = xfs_buf_hash_init(pag); |
397 | if (error) |
398 | goto out_remove_pag; |
399 | |
400 | /* Active ref owned by mount indicates AG is online. */ |
401 | atomic_set(&pag->pag_active_ref, 1); |
402 | |
403 | /* first new pag is fully initialized */ |
404 | if (first_initialised == NULLAGNUMBER) |
405 | first_initialised = index; |
406 | |
407 | /* |
408 | * Pre-calculated geometry |
409 | */ |
410 | pag->block_count = __xfs_ag_block_count(mp, index, agcount, |
411 | dblocks); |
412 | pag->min_block = XFS_AGFL_BLOCK(mp); |
413 | __xfs_agino_range(mp, pag->block_count, &pag->agino_min, |
414 | &pag->agino_max); |
415 | } |
416 | |
417 | index = xfs_set_inode_alloc(mp, agcount); |
418 | |
419 | if (maxagi) |
420 | *maxagi = index; |
421 | |
422 | mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp); |
423 | return 0; |
424 | |
425 | out_remove_pag: |
426 | xfs_defer_drain_free(&pag->pag_intents_drain); |
427 | radix_tree_delete(&mp->m_perag_tree, index); |
428 | out_free_pag: |
429 | kmem_free(pag); |
430 | out_unwind_new_pags: |
431 | /* unwind any prior newly initialized pags */ |
432 | for (index = first_initialised; index < agcount; index++) { |
433 | pag = radix_tree_delete(&mp->m_perag_tree, index); |
434 | if (!pag) |
435 | break; |
436 | xfs_buf_hash_destroy(pag); |
437 | xfs_defer_drain_free(&pag->pag_intents_drain); |
438 | kmem_free(pag); |
439 | } |
440 | return error; |
441 | } |
442 | |
443 | static int |
444 | xfs_get_aghdr_buf( |
445 | struct xfs_mount *mp, |
446 | xfs_daddr_t blkno, |
447 | size_t numblks, |
448 | struct xfs_buf **bpp, |
449 | const struct xfs_buf_ops *ops) |
450 | { |
451 | struct xfs_buf *bp; |
452 | int error; |
453 | |
454 | error = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, 0, &bp); |
455 | if (error) |
456 | return error; |
457 | |
458 | bp->b_maps[0].bm_bn = blkno; |
459 | bp->b_ops = ops; |
460 | |
461 | *bpp = bp; |
462 | return 0; |
463 | } |
464 | |
465 | /* |
466 | * Generic btree root block init function |
467 | */ |
468 | static void |
469 | xfs_btroot_init( |
470 | struct xfs_mount *mp, |
471 | struct xfs_buf *bp, |
472 | struct aghdr_init_data *id) |
473 | { |
474 | xfs_btree_init_block(mp, bp, id->type, 0, 0, id->agno); |
475 | } |
476 | |
477 | /* Finish initializing a free space btree. */ |
478 | static void |
479 | xfs_freesp_init_recs( |
480 | struct xfs_mount *mp, |
481 | struct xfs_buf *bp, |
482 | struct aghdr_init_data *id) |
483 | { |
484 | struct xfs_alloc_rec *arec; |
485 | struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); |
486 | |
487 | arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); |
488 | arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks); |
489 | |
490 | if (xfs_ag_contains_log(mp, id->agno)) { |
491 | struct xfs_alloc_rec *nrec; |
492 | xfs_agblock_t start = XFS_FSB_TO_AGBNO(mp, |
493 | mp->m_sb.sb_logstart); |
494 | |
495 | ASSERT(start >= mp->m_ag_prealloc_blocks); |
496 | if (start != mp->m_ag_prealloc_blocks) { |
497 | /* |
498 | * Modify first record to pad stripe align of log and |
499 | * bump the record count. |
500 | */ |
501 | arec->ar_blockcount = cpu_to_be32(start - |
502 | mp->m_ag_prealloc_blocks); |
503 | be16_add_cpu(&block->bb_numrecs, 1); |
504 | nrec = arec + 1; |
505 | |
506 | /* |
507 | * Insert second record at start of internal log |
508 | * which then gets trimmed. |
509 | */ |
510 | nrec->ar_startblock = cpu_to_be32( |
511 | be32_to_cpu(arec->ar_startblock) + |
512 | be32_to_cpu(arec->ar_blockcount)); |
513 | arec = nrec; |
514 | } |
515 | /* |
516 | * Change record start to after the internal log |
517 | */ |
518 | be32_add_cpu(&arec->ar_startblock, mp->m_sb.sb_logblocks); |
519 | } |
520 | |
521 | /* |
522 | * Calculate the block count of this record; if it is nonzero, |
523 | * increment the record count. |
524 | */ |
525 | arec->ar_blockcount = cpu_to_be32(id->agsize - |
526 | be32_to_cpu(arec->ar_startblock)); |
527 | if (arec->ar_blockcount) |
528 | be16_add_cpu(&block->bb_numrecs, 1); |
529 | } |
530 | |
531 | /* |
532 | * Alloc btree root block init functions |
533 | */ |
534 | static void |
535 | xfs_bnoroot_init( |
536 | struct xfs_mount *mp, |
537 | struct xfs_buf *bp, |
538 | struct aghdr_init_data *id) |
539 | { |
540 | xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 0, id->agno); |
541 | xfs_freesp_init_recs(mp, bp, id); |
542 | } |
543 | |
544 | static void |
545 | xfs_cntroot_init( |
546 | struct xfs_mount *mp, |
547 | struct xfs_buf *bp, |
548 | struct aghdr_init_data *id) |
549 | { |
550 | xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 0, id->agno); |
551 | xfs_freesp_init_recs(mp, bp, id); |
552 | } |
553 | |
554 | /* |
555 | * Reverse map root block init |
556 | */ |
557 | static void |
558 | xfs_rmaproot_init( |
559 | struct xfs_mount *mp, |
560 | struct xfs_buf *bp, |
561 | struct aghdr_init_data *id) |
562 | { |
563 | struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); |
564 | struct xfs_rmap_rec *rrec; |
565 | |
566 | xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 4, id->agno); |
567 | |
568 | /* |
569 | * mark the AG header regions as static metadata The BNO |
570 | * btree block is the first block after the headers, so |
571 | * it's location defines the size of region the static |
572 | * metadata consumes. |
573 | * |
574 | * Note: unlike mkfs, we never have to account for log |
575 | * space when growing the data regions |
576 | */ |
577 | rrec = XFS_RMAP_REC_ADDR(block, 1); |
578 | rrec->rm_startblock = 0; |
579 | rrec->rm_blockcount = cpu_to_be32(XFS_BNO_BLOCK(mp)); |
580 | rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_FS); |
581 | rrec->rm_offset = 0; |
582 | |
583 | /* account freespace btree root blocks */ |
584 | rrec = XFS_RMAP_REC_ADDR(block, 2); |
585 | rrec->rm_startblock = cpu_to_be32(XFS_BNO_BLOCK(mp)); |
586 | rrec->rm_blockcount = cpu_to_be32(2); |
587 | rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG); |
588 | rrec->rm_offset = 0; |
589 | |
590 | /* account inode btree root blocks */ |
591 | rrec = XFS_RMAP_REC_ADDR(block, 3); |
592 | rrec->rm_startblock = cpu_to_be32(XFS_IBT_BLOCK(mp)); |
593 | rrec->rm_blockcount = cpu_to_be32(XFS_RMAP_BLOCK(mp) - |
594 | XFS_IBT_BLOCK(mp)); |
595 | rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_INOBT); |
596 | rrec->rm_offset = 0; |
597 | |
598 | /* account for rmap btree root */ |
599 | rrec = XFS_RMAP_REC_ADDR(block, 4); |
600 | rrec->rm_startblock = cpu_to_be32(XFS_RMAP_BLOCK(mp)); |
601 | rrec->rm_blockcount = cpu_to_be32(1); |
602 | rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG); |
603 | rrec->rm_offset = 0; |
604 | |
605 | /* account for refc btree root */ |
606 | if (xfs_has_reflink(mp)) { |
607 | rrec = XFS_RMAP_REC_ADDR(block, 5); |
608 | rrec->rm_startblock = cpu_to_be32(xfs_refc_block(mp)); |
609 | rrec->rm_blockcount = cpu_to_be32(1); |
610 | rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_REFC); |
611 | rrec->rm_offset = 0; |
612 | be16_add_cpu(&block->bb_numrecs, 1); |
613 | } |
614 | |
615 | /* account for the log space */ |
616 | if (xfs_ag_contains_log(mp, id->agno)) { |
617 | rrec = XFS_RMAP_REC_ADDR(block, |
618 | be16_to_cpu(block->bb_numrecs) + 1); |
619 | rrec->rm_startblock = cpu_to_be32( |
620 | XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart)); |
621 | rrec->rm_blockcount = cpu_to_be32(mp->m_sb.sb_logblocks); |
622 | rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_LOG); |
623 | rrec->rm_offset = 0; |
624 | be16_add_cpu(&block->bb_numrecs, 1); |
625 | } |
626 | } |
627 | |
628 | /* |
629 | * Initialise new secondary superblocks with the pre-grow geometry, but mark |
630 | * them as "in progress" so we know they haven't yet been activated. This will |
631 | * get cleared when the update with the new geometry information is done after |
632 | * changes to the primary are committed. This isn't strictly necessary, but we |
633 | * get it for free with the delayed buffer write lists and it means we can tell |
634 | * if a grow operation didn't complete properly after the fact. |
635 | */ |
636 | static void |
637 | xfs_sbblock_init( |
638 | struct xfs_mount *mp, |
639 | struct xfs_buf *bp, |
640 | struct aghdr_init_data *id) |
641 | { |
642 | struct xfs_dsb *dsb = bp->b_addr; |
643 | |
644 | xfs_sb_to_disk(to: dsb, from: &mp->m_sb); |
645 | dsb->sb_inprogress = 1; |
646 | } |
647 | |
648 | static void |
649 | xfs_agfblock_init( |
650 | struct xfs_mount *mp, |
651 | struct xfs_buf *bp, |
652 | struct aghdr_init_data *id) |
653 | { |
654 | struct xfs_agf *agf = bp->b_addr; |
655 | xfs_extlen_t tmpsize; |
656 | |
657 | agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC); |
658 | agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION); |
659 | agf->agf_seqno = cpu_to_be32(id->agno); |
660 | agf->agf_length = cpu_to_be32(id->agsize); |
661 | agf->agf_roots[XFS_BTNUM_BNOi] = cpu_to_be32(XFS_BNO_BLOCK(mp)); |
662 | agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp)); |
663 | agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1); |
664 | agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1); |
665 | if (xfs_has_rmapbt(mp)) { |
666 | agf->agf_roots[XFS_BTNUM_RMAPi] = |
667 | cpu_to_be32(XFS_RMAP_BLOCK(mp)); |
668 | agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1); |
669 | agf->agf_rmap_blocks = cpu_to_be32(1); |
670 | } |
671 | |
672 | agf->agf_flfirst = cpu_to_be32(1); |
673 | agf->agf_fllast = 0; |
674 | agf->agf_flcount = 0; |
675 | tmpsize = id->agsize - mp->m_ag_prealloc_blocks; |
676 | agf->agf_freeblks = cpu_to_be32(tmpsize); |
677 | agf->agf_longest = cpu_to_be32(tmpsize); |
678 | if (xfs_has_crc(mp)) |
679 | uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid); |
680 | if (xfs_has_reflink(mp)) { |
681 | agf->agf_refcount_root = cpu_to_be32( |
682 | xfs_refc_block(mp)); |
683 | agf->agf_refcount_level = cpu_to_be32(1); |
684 | agf->agf_refcount_blocks = cpu_to_be32(1); |
685 | } |
686 | |
687 | if (xfs_ag_contains_log(mp, id->agno)) { |
688 | int64_t logblocks = mp->m_sb.sb_logblocks; |
689 | |
690 | be32_add_cpu(&agf->agf_freeblks, -logblocks); |
691 | agf->agf_longest = cpu_to_be32(id->agsize - |
692 | XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart) - logblocks); |
693 | } |
694 | } |
695 | |
696 | static void |
697 | xfs_agflblock_init( |
698 | struct xfs_mount *mp, |
699 | struct xfs_buf *bp, |
700 | struct aghdr_init_data *id) |
701 | { |
702 | struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); |
703 | __be32 *agfl_bno; |
704 | int bucket; |
705 | |
706 | if (xfs_has_crc(mp)) { |
707 | agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC); |
708 | agfl->agfl_seqno = cpu_to_be32(id->agno); |
709 | uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid); |
710 | } |
711 | |
712 | agfl_bno = xfs_buf_to_agfl_bno(bp); |
713 | for (bucket = 0; bucket < xfs_agfl_size(mp); bucket++) |
714 | agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK); |
715 | } |
716 | |
717 | static void |
718 | xfs_agiblock_init( |
719 | struct xfs_mount *mp, |
720 | struct xfs_buf *bp, |
721 | struct aghdr_init_data *id) |
722 | { |
723 | struct xfs_agi *agi = bp->b_addr; |
724 | int bucket; |
725 | |
726 | agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC); |
727 | agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION); |
728 | agi->agi_seqno = cpu_to_be32(id->agno); |
729 | agi->agi_length = cpu_to_be32(id->agsize); |
730 | agi->agi_count = 0; |
731 | agi->agi_root = cpu_to_be32(XFS_IBT_BLOCK(mp)); |
732 | agi->agi_level = cpu_to_be32(1); |
733 | agi->agi_freecount = 0; |
734 | agi->agi_newino = cpu_to_be32(NULLAGINO); |
735 | agi->agi_dirino = cpu_to_be32(NULLAGINO); |
736 | if (xfs_has_crc(mp)) |
737 | uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid); |
738 | if (xfs_has_finobt(mp)) { |
739 | agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp)); |
740 | agi->agi_free_level = cpu_to_be32(1); |
741 | } |
742 | for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) |
743 | agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); |
744 | if (xfs_has_inobtcounts(mp)) { |
745 | agi->agi_iblocks = cpu_to_be32(1); |
746 | if (xfs_has_finobt(mp)) |
747 | agi->agi_fblocks = cpu_to_be32(1); |
748 | } |
749 | } |
750 | |
751 | typedef void (*aghdr_init_work_f)(struct xfs_mount *mp, struct xfs_buf *bp, |
752 | struct aghdr_init_data *id); |
753 | static int |
754 | xfs_ag_init_hdr( |
755 | struct xfs_mount *mp, |
756 | struct aghdr_init_data *id, |
757 | aghdr_init_work_f work, |
758 | const struct xfs_buf_ops *ops) |
759 | { |
760 | struct xfs_buf *bp; |
761 | int error; |
762 | |
763 | error = xfs_get_aghdr_buf(mp, id->daddr, id->numblks, &bp, ops); |
764 | if (error) |
765 | return error; |
766 | |
767 | (*work)(mp, bp, id); |
768 | |
769 | xfs_buf_delwri_queue(bp, &id->buffer_list); |
770 | xfs_buf_relse(bp); |
771 | return 0; |
772 | } |
773 | |
774 | struct xfs_aghdr_grow_data { |
775 | xfs_daddr_t daddr; |
776 | size_t numblks; |
777 | const struct xfs_buf_ops *ops; |
778 | aghdr_init_work_f work; |
779 | xfs_btnum_t type; |
780 | bool need_init; |
781 | }; |
782 | |
783 | /* |
784 | * Prepare new AG headers to be written to disk. We use uncached buffers here, |
785 | * as it is assumed these new AG headers are currently beyond the currently |
786 | * valid filesystem address space. Using cached buffers would trip over EOFS |
787 | * corruption detection alogrithms in the buffer cache lookup routines. |
788 | * |
789 | * This is a non-transactional function, but the prepared buffers are added to a |
790 | * delayed write buffer list supplied by the caller so they can submit them to |
791 | * disk and wait on them as required. |
792 | */ |
793 | int |
794 | ( |
795 | struct xfs_mount *mp, |
796 | struct aghdr_init_data *id) |
797 | |
798 | { |
799 | struct xfs_aghdr_grow_data aghdr_data[] = { |
800 | { /* SB */ |
801 | .daddr = XFS_AG_DADDR(mp, id->agno, XFS_SB_DADDR), |
802 | .numblks = XFS_FSS_TO_BB(mp, 1), |
803 | .ops = &xfs_sb_buf_ops, |
804 | .work = &xfs_sbblock_init, |
805 | .need_init = true |
806 | }, |
807 | { /* AGF */ |
808 | .daddr = XFS_AG_DADDR(mp, id->agno, XFS_AGF_DADDR(mp)), |
809 | .numblks = XFS_FSS_TO_BB(mp, 1), |
810 | .ops = &xfs_agf_buf_ops, |
811 | .work = &xfs_agfblock_init, |
812 | .need_init = true |
813 | }, |
814 | { /* AGFL */ |
815 | .daddr = XFS_AG_DADDR(mp, id->agno, XFS_AGFL_DADDR(mp)), |
816 | .numblks = XFS_FSS_TO_BB(mp, 1), |
817 | .ops = &xfs_agfl_buf_ops, |
818 | .work = &xfs_agflblock_init, |
819 | .need_init = true |
820 | }, |
821 | { /* AGI */ |
822 | .daddr = XFS_AG_DADDR(mp, id->agno, XFS_AGI_DADDR(mp)), |
823 | .numblks = XFS_FSS_TO_BB(mp, 1), |
824 | .ops = &xfs_agi_buf_ops, |
825 | .work = &xfs_agiblock_init, |
826 | .need_init = true |
827 | }, |
828 | { /* BNO root block */ |
829 | .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_BNO_BLOCK(mp)), |
830 | .numblks = BTOBB(mp->m_sb.sb_blocksize), |
831 | .ops = &xfs_bnobt_buf_ops, |
832 | .work = &xfs_bnoroot_init, |
833 | .need_init = true |
834 | }, |
835 | { /* CNT root block */ |
836 | .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_CNT_BLOCK(mp)), |
837 | .numblks = BTOBB(mp->m_sb.sb_blocksize), |
838 | .ops = &xfs_cntbt_buf_ops, |
839 | .work = &xfs_cntroot_init, |
840 | .need_init = true |
841 | }, |
842 | { /* INO root block */ |
843 | .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_IBT_BLOCK(mp)), |
844 | .numblks = BTOBB(mp->m_sb.sb_blocksize), |
845 | .ops = &xfs_inobt_buf_ops, |
846 | .work = &xfs_btroot_init, |
847 | .type = XFS_BTNUM_INO, |
848 | .need_init = true |
849 | }, |
850 | { /* FINO root block */ |
851 | .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_FIBT_BLOCK(mp)), |
852 | .numblks = BTOBB(mp->m_sb.sb_blocksize), |
853 | .ops = &xfs_finobt_buf_ops, |
854 | .work = &xfs_btroot_init, |
855 | .type = XFS_BTNUM_FINO, |
856 | .need_init = xfs_has_finobt(mp) |
857 | }, |
858 | { /* RMAP root block */ |
859 | .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_RMAP_BLOCK(mp)), |
860 | .numblks = BTOBB(mp->m_sb.sb_blocksize), |
861 | .ops = &xfs_rmapbt_buf_ops, |
862 | .work = &xfs_rmaproot_init, |
863 | .need_init = xfs_has_rmapbt(mp) |
864 | }, |
865 | { /* REFC root block */ |
866 | .daddr = XFS_AGB_TO_DADDR(mp, id->agno, xfs_refc_block(mp)), |
867 | .numblks = BTOBB(mp->m_sb.sb_blocksize), |
868 | .ops = &xfs_refcountbt_buf_ops, |
869 | .work = &xfs_btroot_init, |
870 | .type = XFS_BTNUM_REFC, |
871 | .need_init = xfs_has_reflink(mp) |
872 | }, |
873 | { /* NULL terminating block */ |
874 | .daddr = XFS_BUF_DADDR_NULL, |
875 | } |
876 | }; |
877 | struct xfs_aghdr_grow_data *dp; |
878 | int error = 0; |
879 | |
880 | /* Account for AG free space in new AG */ |
881 | id->nfree += id->agsize - mp->m_ag_prealloc_blocks; |
882 | for (dp = &aghdr_data[0]; dp->daddr != XFS_BUF_DADDR_NULL; dp++) { |
883 | if (!dp->need_init) |
884 | continue; |
885 | |
886 | id->daddr = dp->daddr; |
887 | id->numblks = dp->numblks; |
888 | id->type = dp->type; |
889 | error = xfs_ag_init_hdr(mp, id, dp->work, dp->ops); |
890 | if (error) |
891 | break; |
892 | } |
893 | return error; |
894 | } |
895 | |
896 | int |
897 | xfs_ag_shrink_space( |
898 | struct xfs_perag *pag, |
899 | struct xfs_trans **tpp, |
900 | xfs_extlen_t delta) |
901 | { |
902 | struct xfs_mount *mp = pag->pag_mount; |
903 | struct xfs_alloc_arg args = { |
904 | .tp = *tpp, |
905 | .mp = mp, |
906 | .pag = pag, |
907 | .minlen = delta, |
908 | .maxlen = delta, |
909 | .oinfo = XFS_RMAP_OINFO_SKIP_UPDATE, |
910 | .resv = XFS_AG_RESV_NONE, |
911 | .prod = 1 |
912 | }; |
913 | struct xfs_buf *agibp, *agfbp; |
914 | struct xfs_agi *agi; |
915 | struct xfs_agf *agf; |
916 | xfs_agblock_t aglen; |
917 | int error, err2; |
918 | |
919 | ASSERT(pag->pag_agno == mp->m_sb.sb_agcount - 1); |
920 | error = xfs_ialloc_read_agi(pag, tp: *tpp, agibpp: &agibp); |
921 | if (error) |
922 | return error; |
923 | |
924 | agi = agibp->b_addr; |
925 | |
926 | error = xfs_alloc_read_agf(pag, tp: *tpp, flags: 0, agfbpp: &agfbp); |
927 | if (error) |
928 | return error; |
929 | |
930 | agf = agfbp->b_addr; |
931 | aglen = be32_to_cpu(agi->agi_length); |
932 | /* some extra paranoid checks before we shrink the ag */ |
933 | if (XFS_IS_CORRUPT(mp, agf->agf_length != agi->agi_length)) |
934 | return -EFSCORRUPTED; |
935 | if (delta >= aglen) |
936 | return -EINVAL; |
937 | |
938 | /* |
939 | * Make sure that the last inode cluster cannot overlap with the new |
940 | * end of the AG, even if it's sparse. |
941 | */ |
942 | error = xfs_ialloc_check_shrink(pag, *tpp, agibp, aglen - delta); |
943 | if (error) |
944 | return error; |
945 | |
946 | /* |
947 | * Disable perag reservations so it doesn't cause the allocation request |
948 | * to fail. We'll reestablish reservation before we return. |
949 | */ |
950 | error = xfs_ag_resv_free(pag); |
951 | if (error) |
952 | return error; |
953 | |
954 | /* internal log shouldn't also show up in the free space btrees */ |
955 | error = xfs_alloc_vextent_exact_bno(&args, |
956 | XFS_AGB_TO_FSB(mp, pag->pag_agno, aglen - delta)); |
957 | if (!error && args.agbno == NULLAGBLOCK) |
958 | error = -ENOSPC; |
959 | |
960 | if (error) { |
961 | /* |
962 | * if extent allocation fails, need to roll the transaction to |
963 | * ensure that the AGFL fixup has been committed anyway. |
964 | */ |
965 | xfs_trans_bhold(*tpp, agfbp); |
966 | err2 = xfs_trans_roll(tpp); |
967 | if (err2) |
968 | return err2; |
969 | xfs_trans_bjoin(*tpp, agfbp); |
970 | goto resv_init_out; |
971 | } |
972 | |
973 | /* |
974 | * if successfully deleted from freespace btrees, need to confirm |
975 | * per-AG reservation works as expected. |
976 | */ |
977 | be32_add_cpu(&agi->agi_length, -delta); |
978 | be32_add_cpu(&agf->agf_length, -delta); |
979 | |
980 | err2 = xfs_ag_resv_init(pag, tp: *tpp); |
981 | if (err2) { |
982 | be32_add_cpu(&agi->agi_length, delta); |
983 | be32_add_cpu(&agf->agf_length, delta); |
984 | if (err2 != -ENOSPC) |
985 | goto resv_err; |
986 | |
987 | err2 = __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL, |
988 | XFS_AG_RESV_NONE, true); |
989 | if (err2) |
990 | goto resv_err; |
991 | |
992 | /* |
993 | * Roll the transaction before trying to re-init the per-ag |
994 | * reservation. The new transaction is clean so it will cancel |
995 | * without any side effects. |
996 | */ |
997 | error = xfs_defer_finish(tp: tpp); |
998 | if (error) |
999 | return error; |
1000 | |
1001 | error = -ENOSPC; |
1002 | goto resv_init_out; |
1003 | } |
1004 | |
1005 | /* Update perag geometry */ |
1006 | pag->block_count -= delta; |
1007 | __xfs_agino_range(pag->pag_mount, pag->block_count, &pag->agino_min, |
1008 | &pag->agino_max); |
1009 | |
1010 | xfs_ialloc_log_agi(*tpp, agibp, XFS_AGI_LENGTH); |
1011 | xfs_alloc_log_agf(*tpp, agfbp, XFS_AGF_LENGTH); |
1012 | return 0; |
1013 | |
1014 | resv_init_out: |
1015 | err2 = xfs_ag_resv_init(pag, tp: *tpp); |
1016 | if (!err2) |
1017 | return error; |
1018 | resv_err: |
1019 | xfs_warn(mp, "Error %d reserving per-AG metadata reserve pool." , err2); |
1020 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); |
1021 | return err2; |
1022 | } |
1023 | |
1024 | /* |
1025 | * Extent the AG indicated by the @id by the length passed in |
1026 | */ |
1027 | int |
1028 | xfs_ag_extend_space( |
1029 | struct xfs_perag *pag, |
1030 | struct xfs_trans *tp, |
1031 | xfs_extlen_t len) |
1032 | { |
1033 | struct xfs_buf *bp; |
1034 | struct xfs_agi *agi; |
1035 | struct xfs_agf *agf; |
1036 | int error; |
1037 | |
1038 | ASSERT(pag->pag_agno == pag->pag_mount->m_sb.sb_agcount - 1); |
1039 | |
1040 | error = xfs_ialloc_read_agi(pag, tp, agibpp: &bp); |
1041 | if (error) |
1042 | return error; |
1043 | |
1044 | agi = bp->b_addr; |
1045 | be32_add_cpu(&agi->agi_length, len); |
1046 | xfs_ialloc_log_agi(tp, bp, XFS_AGI_LENGTH); |
1047 | |
1048 | /* |
1049 | * Change agf length. |
1050 | */ |
1051 | error = xfs_alloc_read_agf(pag, tp, flags: 0, agfbpp: &bp); |
1052 | if (error) |
1053 | return error; |
1054 | |
1055 | agf = bp->b_addr; |
1056 | be32_add_cpu(&agf->agf_length, len); |
1057 | ASSERT(agf->agf_length == agi->agi_length); |
1058 | xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH); |
1059 | |
1060 | /* |
1061 | * Free the new space. |
1062 | * |
1063 | * XFS_RMAP_OINFO_SKIP_UPDATE is used here to tell the rmap btree that |
1064 | * this doesn't actually exist in the rmap btree. |
1065 | */ |
1066 | error = xfs_rmap_free(tp, bp, pag, be32_to_cpu(agf->agf_length) - len, |
1067 | len, &XFS_RMAP_OINFO_SKIP_UPDATE); |
1068 | if (error) |
1069 | return error; |
1070 | |
1071 | error = xfs_free_extent(tp, pag, be32_to_cpu(agf->agf_length) - len, |
1072 | len, &XFS_RMAP_OINFO_SKIP_UPDATE, XFS_AG_RESV_NONE); |
1073 | if (error) |
1074 | return error; |
1075 | |
1076 | /* Update perag geometry */ |
1077 | pag->block_count = be32_to_cpu(agf->agf_length); |
1078 | __xfs_agino_range(pag->pag_mount, pag->block_count, &pag->agino_min, |
1079 | &pag->agino_max); |
1080 | return 0; |
1081 | } |
1082 | |
1083 | /* Retrieve AG geometry. */ |
1084 | int |
1085 | xfs_ag_get_geometry( |
1086 | struct xfs_perag *pag, |
1087 | struct xfs_ag_geometry *ageo) |
1088 | { |
1089 | struct xfs_buf *agi_bp; |
1090 | struct xfs_buf *agf_bp; |
1091 | struct xfs_agi *agi; |
1092 | struct xfs_agf *agf; |
1093 | unsigned int freeblks; |
1094 | int error; |
1095 | |
1096 | /* Lock the AG headers. */ |
1097 | error = xfs_ialloc_read_agi(pag, NULL, &agi_bp); |
1098 | if (error) |
1099 | return error; |
1100 | error = xfs_alloc_read_agf(pag, NULL, 0, &agf_bp); |
1101 | if (error) |
1102 | goto out_agi; |
1103 | |
1104 | /* Fill out form. */ |
1105 | memset(ageo, 0, sizeof(*ageo)); |
1106 | ageo->ag_number = pag->pag_agno; |
1107 | |
1108 | agi = agi_bp->b_addr; |
1109 | ageo->ag_icount = be32_to_cpu(agi->agi_count); |
1110 | ageo->ag_ifree = be32_to_cpu(agi->agi_freecount); |
1111 | |
1112 | agf = agf_bp->b_addr; |
1113 | ageo->ag_length = be32_to_cpu(agf->agf_length); |
1114 | freeblks = pag->pagf_freeblks + |
1115 | pag->pagf_flcount + |
1116 | pag->pagf_btreeblks - |
1117 | xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE); |
1118 | ageo->ag_freeblks = freeblks; |
1119 | xfs_ag_geom_health(pag, ageo); |
1120 | |
1121 | /* Release resources. */ |
1122 | xfs_buf_relse(agf_bp); |
1123 | out_agi: |
1124 | xfs_buf_relse(agi_bp); |
1125 | return error; |
1126 | } |
1127 | |