1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. |
4 | * All Rights Reserved. |
5 | */ |
6 | #include "xfs.h" |
7 | #include "xfs_fs.h" |
8 | #include "xfs_shared.h" |
9 | #include "xfs_format.h" |
10 | #include "xfs_log_format.h" |
11 | #include "xfs_trans_resv.h" |
12 | #include "xfs_mount.h" |
13 | #include "xfs_inode.h" |
14 | #include "xfs_btree.h" |
15 | #include "xfs_ialloc.h" |
16 | #include "xfs_ialloc_btree.h" |
17 | #include "xfs_iwalk.h" |
18 | #include "xfs_itable.h" |
19 | #include "xfs_error.h" |
20 | #include "xfs_icache.h" |
21 | #include "xfs_health.h" |
22 | #include "xfs_trans.h" |
23 | |
24 | /* |
25 | * Bulk Stat |
26 | * ========= |
27 | * |
28 | * Use the inode walking functions to fill out struct xfs_bulkstat for every |
29 | * allocated inode, then pass the stat information to some externally provided |
30 | * iteration function. |
31 | */ |
32 | |
33 | struct xfs_bstat_chunk { |
34 | bulkstat_one_fmt_pf formatter; |
35 | struct xfs_ibulk *breq; |
36 | struct xfs_bulkstat *buf; |
37 | }; |
38 | |
39 | /* |
40 | * Fill out the bulkstat info for a single inode and report it somewhere. |
41 | * |
42 | * bc->breq->lastino is effectively the inode cursor as we walk through the |
43 | * filesystem. Therefore, we update it any time we need to move the cursor |
44 | * forward, regardless of whether or not we're sending any bstat information |
45 | * back to userspace. If the inode is internal metadata or, has been freed |
46 | * out from under us, we just simply keep going. |
47 | * |
48 | * However, if any other type of error happens we want to stop right where we |
49 | * are so that userspace will call back with exact number of the bad inode and |
50 | * we can send back an error code. |
51 | * |
52 | * Note that if the formatter tells us there's no space left in the buffer we |
53 | * move the cursor forward and abort the walk. |
54 | */ |
55 | STATIC int |
56 | xfs_bulkstat_one_int( |
57 | struct xfs_mount *mp, |
58 | struct mnt_idmap *idmap, |
59 | struct xfs_trans *tp, |
60 | xfs_ino_t ino, |
61 | struct xfs_bstat_chunk *bc) |
62 | { |
63 | struct user_namespace *sb_userns = mp->m_super->s_user_ns; |
64 | struct xfs_inode *ip; /* incore inode pointer */ |
65 | struct inode *inode; |
66 | struct xfs_bulkstat *buf = bc->buf; |
67 | xfs_extnum_t nextents; |
68 | int error = -EINVAL; |
69 | vfsuid_t vfsuid; |
70 | vfsgid_t vfsgid; |
71 | |
72 | if (xfs_internal_inum(mp, ino)) |
73 | goto out_advance; |
74 | |
75 | error = xfs_iget(mp, tp, ino, |
76 | flags: (XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED), |
77 | XFS_ILOCK_SHARED, ipp: &ip); |
78 | if (error == -ENOENT || error == -EINVAL) |
79 | goto out_advance; |
80 | if (error) |
81 | goto out; |
82 | |
83 | /* Reload the incore unlinked list to avoid failure in inodegc. */ |
84 | if (xfs_inode_unlinked_incomplete(ip)) { |
85 | error = xfs_inode_reload_unlinked_bucket(tp, ip); |
86 | if (error) { |
87 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
88 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); |
89 | xfs_irele(ip); |
90 | return error; |
91 | } |
92 | } |
93 | |
94 | ASSERT(ip != NULL); |
95 | ASSERT(ip->i_imap.im_blkno != 0); |
96 | inode = VFS_I(ip); |
97 | vfsuid = i_uid_into_vfsuid(idmap, inode); |
98 | vfsgid = i_gid_into_vfsgid(idmap, inode); |
99 | |
100 | /* xfs_iget returns the following without needing |
101 | * further change. |
102 | */ |
103 | buf->bs_projectid = ip->i_projid; |
104 | buf->bs_ino = ino; |
105 | buf->bs_uid = from_kuid(to: sb_userns, uid: vfsuid_into_kuid(vfsuid)); |
106 | buf->bs_gid = from_kgid(to: sb_userns, gid: vfsgid_into_kgid(vfsgid)); |
107 | buf->bs_size = ip->i_disk_size; |
108 | |
109 | buf->bs_nlink = inode->i_nlink; |
110 | buf->bs_atime = inode_get_atime_sec(inode); |
111 | buf->bs_atime_nsec = inode_get_atime_nsec(inode); |
112 | buf->bs_mtime = inode_get_mtime_sec(inode); |
113 | buf->bs_mtime_nsec = inode_get_mtime_nsec(inode); |
114 | buf->bs_ctime = inode_get_ctime_sec(inode); |
115 | buf->bs_ctime_nsec = inode_get_ctime_nsec(inode); |
116 | buf->bs_gen = inode->i_generation; |
117 | buf->bs_mode = inode->i_mode; |
118 | |
119 | buf->bs_xflags = xfs_ip2xflags(ip); |
120 | buf->bs_extsize_blks = ip->i_extsize; |
121 | |
122 | nextents = xfs_ifork_nextents(&ip->i_df); |
123 | if (!(bc->breq->flags & XFS_IBULK_NREXT64)) |
124 | buf->bs_extents = min(nextents, XFS_MAX_EXTCNT_DATA_FORK_SMALL); |
125 | else |
126 | buf->bs_extents64 = nextents; |
127 | |
128 | xfs_bulkstat_health(ip, buf); |
129 | buf->bs_aextents = xfs_ifork_nextents(&ip->i_af); |
130 | buf->bs_forkoff = xfs_inode_fork_boff(ip); |
131 | buf->bs_version = XFS_BULKSTAT_VERSION_V5; |
132 | |
133 | if (xfs_has_v3inodes(mp)) { |
134 | buf->bs_btime = ip->i_crtime.tv_sec; |
135 | buf->bs_btime_nsec = ip->i_crtime.tv_nsec; |
136 | if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) |
137 | buf->bs_cowextsize_blks = ip->i_cowextsize; |
138 | } |
139 | |
140 | switch (ip->i_df.if_format) { |
141 | case XFS_DINODE_FMT_DEV: |
142 | buf->bs_rdev = sysv_encode_dev(dev: inode->i_rdev); |
143 | buf->bs_blksize = BLKDEV_IOSIZE; |
144 | buf->bs_blocks = 0; |
145 | break; |
146 | case XFS_DINODE_FMT_LOCAL: |
147 | buf->bs_rdev = 0; |
148 | buf->bs_blksize = mp->m_sb.sb_blocksize; |
149 | buf->bs_blocks = 0; |
150 | break; |
151 | case XFS_DINODE_FMT_EXTENTS: |
152 | case XFS_DINODE_FMT_BTREE: |
153 | buf->bs_rdev = 0; |
154 | buf->bs_blksize = mp->m_sb.sb_blocksize; |
155 | buf->bs_blocks = ip->i_nblocks + ip->i_delayed_blks; |
156 | break; |
157 | } |
158 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
159 | xfs_irele(ip); |
160 | |
161 | error = bc->formatter(bc->breq, buf); |
162 | if (error == -ECANCELED) |
163 | goto out_advance; |
164 | if (error) |
165 | goto out; |
166 | |
167 | out_advance: |
168 | /* |
169 | * Advance the cursor to the inode that comes after the one we just |
170 | * looked at. We want the caller to move along if the bulkstat |
171 | * information was copied successfully; if we tried to grab the inode |
172 | * but it's no longer allocated; or if it's internal metadata. |
173 | */ |
174 | bc->breq->startino = ino + 1; |
175 | out: |
176 | return error; |
177 | } |
178 | |
179 | /* Bulkstat a single inode. */ |
180 | int |
181 | xfs_bulkstat_one( |
182 | struct xfs_ibulk *breq, |
183 | bulkstat_one_fmt_pf formatter) |
184 | { |
185 | struct xfs_bstat_chunk bc = { |
186 | .formatter = formatter, |
187 | .breq = breq, |
188 | }; |
189 | struct xfs_trans *tp; |
190 | int error; |
191 | |
192 | if (breq->idmap != &nop_mnt_idmap) { |
193 | xfs_warn_ratelimited(breq->mp, |
194 | "bulkstat not supported inside of idmapped mounts." ); |
195 | return -EINVAL; |
196 | } |
197 | |
198 | ASSERT(breq->icount == 1); |
199 | |
200 | bc.buf = kmem_zalloc(sizeof(struct xfs_bulkstat), |
201 | KM_MAYFAIL); |
202 | if (!bc.buf) |
203 | return -ENOMEM; |
204 | |
205 | /* |
206 | * Grab an empty transaction so that we can use its recursive buffer |
207 | * locking abilities to detect cycles in the inobt without deadlocking. |
208 | */ |
209 | error = xfs_trans_alloc_empty(mp: breq->mp, tpp: &tp); |
210 | if (error) |
211 | goto out; |
212 | |
213 | error = xfs_bulkstat_one_int(mp: breq->mp, idmap: breq->idmap, tp, |
214 | ino: breq->startino, bc: &bc); |
215 | xfs_trans_cancel(tp); |
216 | out: |
217 | kmem_free(ptr: bc.buf); |
218 | |
219 | /* |
220 | * If we reported one inode to userspace then we abort because we hit |
221 | * the end of the buffer. Don't leak that back to userspace. |
222 | */ |
223 | if (error == -ECANCELED) |
224 | error = 0; |
225 | |
226 | return error; |
227 | } |
228 | |
229 | static int |
230 | xfs_bulkstat_iwalk( |
231 | struct xfs_mount *mp, |
232 | struct xfs_trans *tp, |
233 | xfs_ino_t ino, |
234 | void *data) |
235 | { |
236 | struct xfs_bstat_chunk *bc = data; |
237 | int error; |
238 | |
239 | error = xfs_bulkstat_one_int(mp, idmap: bc->breq->idmap, tp, ino, bc: data); |
240 | /* bulkstat just skips over missing inodes */ |
241 | if (error == -ENOENT || error == -EINVAL) |
242 | return 0; |
243 | return error; |
244 | } |
245 | |
246 | /* |
247 | * Check the incoming lastino parameter. |
248 | * |
249 | * We allow any inode value that could map to physical space inside the |
250 | * filesystem because if there are no inodes there, bulkstat moves on to the |
251 | * next chunk. In other words, the magic agino value of zero takes us to the |
252 | * first chunk in the AG, and an agino value past the end of the AG takes us to |
253 | * the first chunk in the next AG. |
254 | * |
255 | * Therefore we can end early if the requested inode is beyond the end of the |
256 | * filesystem or doesn't map properly. |
257 | */ |
258 | static inline bool |
259 | xfs_bulkstat_already_done( |
260 | struct xfs_mount *mp, |
261 | xfs_ino_t startino) |
262 | { |
263 | xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino); |
264 | xfs_agino_t agino = XFS_INO_TO_AGINO(mp, startino); |
265 | |
266 | return agno >= mp->m_sb.sb_agcount || |
267 | startino != XFS_AGINO_TO_INO(mp, agno, agino); |
268 | } |
269 | |
270 | /* Return stat information in bulk (by-inode) for the filesystem. */ |
271 | int |
272 | xfs_bulkstat( |
273 | struct xfs_ibulk *breq, |
274 | bulkstat_one_fmt_pf formatter) |
275 | { |
276 | struct xfs_bstat_chunk bc = { |
277 | .formatter = formatter, |
278 | .breq = breq, |
279 | }; |
280 | struct xfs_trans *tp; |
281 | unsigned int iwalk_flags = 0; |
282 | int error; |
283 | |
284 | if (breq->idmap != &nop_mnt_idmap) { |
285 | xfs_warn_ratelimited(breq->mp, |
286 | "bulkstat not supported inside of idmapped mounts." ); |
287 | return -EINVAL; |
288 | } |
289 | if (xfs_bulkstat_already_done(mp: breq->mp, startino: breq->startino)) |
290 | return 0; |
291 | |
292 | bc.buf = kmem_zalloc(sizeof(struct xfs_bulkstat), |
293 | KM_MAYFAIL); |
294 | if (!bc.buf) |
295 | return -ENOMEM; |
296 | |
297 | /* |
298 | * Grab an empty transaction so that we can use its recursive buffer |
299 | * locking abilities to detect cycles in the inobt without deadlocking. |
300 | */ |
301 | error = xfs_trans_alloc_empty(mp: breq->mp, tpp: &tp); |
302 | if (error) |
303 | goto out; |
304 | |
305 | if (breq->flags & XFS_IBULK_SAME_AG) |
306 | iwalk_flags |= XFS_IWALK_SAME_AG; |
307 | |
308 | error = xfs_iwalk(mp: breq->mp, tp, startino: breq->startino, flags: iwalk_flags, |
309 | iwalk_fn: xfs_bulkstat_iwalk, inode_records: breq->icount, data: &bc); |
310 | xfs_trans_cancel(tp); |
311 | out: |
312 | kmem_free(ptr: bc.buf); |
313 | |
314 | /* |
315 | * We found some inodes, so clear the error status and return them. |
316 | * The lastino pointer will point directly at the inode that triggered |
317 | * any error that occurred, so on the next call the error will be |
318 | * triggered again and propagated to userspace as there will be no |
319 | * formatted inodes in the buffer. |
320 | */ |
321 | if (breq->ocount > 0) |
322 | error = 0; |
323 | |
324 | return error; |
325 | } |
326 | |
327 | /* Convert bulkstat (v5) to bstat (v1). */ |
328 | void |
329 | xfs_bulkstat_to_bstat( |
330 | struct xfs_mount *mp, |
331 | struct xfs_bstat *bs1, |
332 | const struct xfs_bulkstat *bstat) |
333 | { |
334 | /* memset is needed here because of padding holes in the structure. */ |
335 | memset(bs1, 0, sizeof(struct xfs_bstat)); |
336 | bs1->bs_ino = bstat->bs_ino; |
337 | bs1->bs_mode = bstat->bs_mode; |
338 | bs1->bs_nlink = bstat->bs_nlink; |
339 | bs1->bs_uid = bstat->bs_uid; |
340 | bs1->bs_gid = bstat->bs_gid; |
341 | bs1->bs_rdev = bstat->bs_rdev; |
342 | bs1->bs_blksize = bstat->bs_blksize; |
343 | bs1->bs_size = bstat->bs_size; |
344 | bs1->bs_atime.tv_sec = bstat->bs_atime; |
345 | bs1->bs_mtime.tv_sec = bstat->bs_mtime; |
346 | bs1->bs_ctime.tv_sec = bstat->bs_ctime; |
347 | bs1->bs_atime.tv_nsec = bstat->bs_atime_nsec; |
348 | bs1->bs_mtime.tv_nsec = bstat->bs_mtime_nsec; |
349 | bs1->bs_ctime.tv_nsec = bstat->bs_ctime_nsec; |
350 | bs1->bs_blocks = bstat->bs_blocks; |
351 | bs1->bs_xflags = bstat->bs_xflags; |
352 | bs1->bs_extsize = XFS_FSB_TO_B(mp, bstat->bs_extsize_blks); |
353 | bs1->bs_extents = bstat->bs_extents; |
354 | bs1->bs_gen = bstat->bs_gen; |
355 | bs1->bs_projid_lo = bstat->bs_projectid & 0xFFFF; |
356 | bs1->bs_forkoff = bstat->bs_forkoff; |
357 | bs1->bs_projid_hi = bstat->bs_projectid >> 16; |
358 | bs1->bs_sick = bstat->bs_sick; |
359 | bs1->bs_checked = bstat->bs_checked; |
360 | bs1->bs_cowextsize = XFS_FSB_TO_B(mp, bstat->bs_cowextsize_blks); |
361 | bs1->bs_dmevmask = 0; |
362 | bs1->bs_dmstate = 0; |
363 | bs1->bs_aextents = bstat->bs_aextents; |
364 | } |
365 | |
366 | struct xfs_inumbers_chunk { |
367 | inumbers_fmt_pf formatter; |
368 | struct xfs_ibulk *breq; |
369 | }; |
370 | |
371 | /* |
372 | * INUMBERS |
373 | * ======== |
374 | * This is how we export inode btree records to userspace, so that XFS tools |
375 | * can figure out where inodes are allocated. |
376 | */ |
377 | |
378 | /* |
379 | * Format the inode group structure and report it somewhere. |
380 | * |
381 | * Similar to xfs_bulkstat_one_int, lastino is the inode cursor as we walk |
382 | * through the filesystem so we move it forward unless there was a runtime |
383 | * error. If the formatter tells us the buffer is now full we also move the |
384 | * cursor forward and abort the walk. |
385 | */ |
386 | STATIC int |
387 | xfs_inumbers_walk( |
388 | struct xfs_mount *mp, |
389 | struct xfs_trans *tp, |
390 | xfs_agnumber_t agno, |
391 | const struct xfs_inobt_rec_incore *irec, |
392 | void *data) |
393 | { |
394 | struct xfs_inumbers inogrp = { |
395 | .xi_startino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino), |
396 | .xi_alloccount = irec->ir_count - irec->ir_freecount, |
397 | .xi_allocmask = ~irec->ir_free, |
398 | .xi_version = XFS_INUMBERS_VERSION_V5, |
399 | }; |
400 | struct xfs_inumbers_chunk *ic = data; |
401 | int error; |
402 | |
403 | error = ic->formatter(ic->breq, &inogrp); |
404 | if (error && error != -ECANCELED) |
405 | return error; |
406 | |
407 | ic->breq->startino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino) + |
408 | XFS_INODES_PER_CHUNK; |
409 | return error; |
410 | } |
411 | |
412 | /* |
413 | * Return inode number table for the filesystem. |
414 | */ |
415 | int |
416 | xfs_inumbers( |
417 | struct xfs_ibulk *breq, |
418 | inumbers_fmt_pf formatter) |
419 | { |
420 | struct xfs_inumbers_chunk ic = { |
421 | .formatter = formatter, |
422 | .breq = breq, |
423 | }; |
424 | struct xfs_trans *tp; |
425 | int error = 0; |
426 | |
427 | if (xfs_bulkstat_already_done(mp: breq->mp, startino: breq->startino)) |
428 | return 0; |
429 | |
430 | /* |
431 | * Grab an empty transaction so that we can use its recursive buffer |
432 | * locking abilities to detect cycles in the inobt without deadlocking. |
433 | */ |
434 | error = xfs_trans_alloc_empty(mp: breq->mp, tpp: &tp); |
435 | if (error) |
436 | goto out; |
437 | |
438 | error = xfs_inobt_walk(mp: breq->mp, tp, startino: breq->startino, flags: breq->flags, |
439 | inobt_walk_fn: xfs_inumbers_walk, inobt_records: breq->icount, data: &ic); |
440 | xfs_trans_cancel(tp); |
441 | out: |
442 | |
443 | /* |
444 | * We found some inode groups, so clear the error status and return |
445 | * them. The lastino pointer will point directly at the inode that |
446 | * triggered any error that occurred, so on the next call the error |
447 | * will be triggered again and propagated to userspace as there will be |
448 | * no formatted inode groups in the buffer. |
449 | */ |
450 | if (breq->ocount > 0) |
451 | error = 0; |
452 | |
453 | return error; |
454 | } |
455 | |
456 | /* Convert an inumbers (v5) struct to a inogrp (v1) struct. */ |
457 | void |
458 | xfs_inumbers_to_inogrp( |
459 | struct xfs_inogrp *ig1, |
460 | const struct xfs_inumbers *ig) |
461 | { |
462 | /* memset is needed here because of padding holes in the structure. */ |
463 | memset(ig1, 0, sizeof(struct xfs_inogrp)); |
464 | ig1->xi_startino = ig->xi_startino; |
465 | ig1->xi_alloccount = ig->xi_alloccount; |
466 | ig1->xi_allocmask = ig->xi_allocmask; |
467 | } |
468 | |