1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright (c) 2006-2007 Silicon Graphics, Inc. |
4 | * Copyright (c) 2014 Christoph Hellwig. |
5 | * All Rights Reserved. |
6 | */ |
7 | #include "xfs.h" |
8 | #include "xfs_shared.h" |
9 | #include "xfs_format.h" |
10 | #include "xfs_log_format.h" |
11 | #include "xfs_trans_resv.h" |
12 | #include "xfs_mount.h" |
13 | #include "xfs_inode.h" |
14 | #include "xfs_bmap.h" |
15 | #include "xfs_bmap_util.h" |
16 | #include "xfs_alloc.h" |
17 | #include "xfs_mru_cache.h" |
18 | #include "xfs_trace.h" |
19 | #include "xfs_ag.h" |
20 | #include "xfs_ag_resv.h" |
21 | #include "xfs_trans.h" |
22 | #include "xfs_filestream.h" |
23 | |
24 | struct xfs_fstrm_item { |
25 | struct xfs_mru_cache_elem mru; |
26 | struct xfs_perag *pag; /* AG in use for this directory */ |
27 | }; |
28 | |
29 | enum xfs_fstrm_alloc { |
30 | XFS_PICK_USERDATA = 1, |
31 | XFS_PICK_LOWSPACE = 2, |
32 | }; |
33 | |
34 | static void |
35 | xfs_fstrm_free_func( |
36 | void *data, |
37 | struct xfs_mru_cache_elem *mru) |
38 | { |
39 | struct xfs_fstrm_item *item = |
40 | container_of(mru, struct xfs_fstrm_item, mru); |
41 | struct xfs_perag *pag = item->pag; |
42 | |
43 | trace_xfs_filestream_free(pag, ino: mru->key); |
44 | atomic_dec(v: &pag->pagf_fstrms); |
45 | xfs_perag_rele(pag); |
46 | |
47 | kmem_free(ptr: item); |
48 | } |
49 | |
50 | /* |
51 | * Scan the AGs starting at start_agno looking for an AG that isn't in use and |
52 | * has at least minlen blocks free. If no AG is found to match the allocation |
53 | * requirements, pick the AG with the most free space in it. |
54 | */ |
55 | static int |
56 | xfs_filestream_pick_ag( |
57 | struct xfs_alloc_arg *args, |
58 | xfs_ino_t pino, |
59 | xfs_agnumber_t start_agno, |
60 | int flags, |
61 | xfs_extlen_t *longest) |
62 | { |
63 | struct xfs_mount *mp = args->mp; |
64 | struct xfs_perag *pag; |
65 | struct xfs_perag *max_pag = NULL; |
66 | xfs_extlen_t minlen = *longest; |
67 | xfs_extlen_t free = 0, minfree, maxfree = 0; |
68 | xfs_agnumber_t agno; |
69 | bool first_pass = true; |
70 | int err; |
71 | |
72 | /* 2% of an AG's blocks must be free for it to be chosen. */ |
73 | minfree = mp->m_sb.sb_agblocks / 50; |
74 | |
75 | restart: |
76 | for_each_perag_wrap(mp, start_agno, agno, pag) { |
77 | trace_xfs_filestream_scan(pag, pino); |
78 | *longest = 0; |
79 | err = xfs_bmap_longest_free_extent(pag, NULL, longest); |
80 | if (err) { |
81 | if (err != -EAGAIN) |
82 | break; |
83 | /* Couldn't lock the AGF, skip this AG. */ |
84 | err = 0; |
85 | continue; |
86 | } |
87 | |
88 | /* Keep track of the AG with the most free blocks. */ |
89 | if (pag->pagf_freeblks > maxfree) { |
90 | maxfree = pag->pagf_freeblks; |
91 | if (max_pag) |
92 | xfs_perag_rele(max_pag); |
93 | atomic_inc(&pag->pag_active_ref); |
94 | max_pag = pag; |
95 | } |
96 | |
97 | /* |
98 | * The AG reference count does two things: it enforces mutual |
99 | * exclusion when examining the suitability of an AG in this |
100 | * loop, and it guards against two filestreams being established |
101 | * in the same AG as each other. |
102 | */ |
103 | if (atomic_inc_return(&pag->pagf_fstrms) <= 1) { |
104 | if (((minlen && *longest >= minlen) || |
105 | (!minlen && pag->pagf_freeblks >= minfree)) && |
106 | (!xfs_perag_prefers_metadata(pag) || |
107 | !(flags & XFS_PICK_USERDATA) || |
108 | (flags & XFS_PICK_LOWSPACE))) { |
109 | /* Break out, retaining the reference on the AG. */ |
110 | free = pag->pagf_freeblks; |
111 | break; |
112 | } |
113 | } |
114 | |
115 | /* Drop the reference on this AG, it's not usable. */ |
116 | atomic_dec(&pag->pagf_fstrms); |
117 | } |
118 | |
119 | if (err) { |
120 | xfs_perag_rele(pag); |
121 | if (max_pag) |
122 | xfs_perag_rele(max_pag); |
123 | return err; |
124 | } |
125 | |
126 | if (!pag) { |
127 | /* |
128 | * Allow a second pass to give xfs_bmap_longest_free_extent() |
129 | * another attempt at locking AGFs that it might have skipped |
130 | * over before we fail. |
131 | */ |
132 | if (first_pass) { |
133 | first_pass = false; |
134 | goto restart; |
135 | } |
136 | |
137 | /* |
138 | * We must be low on data space, so run a final lowspace |
139 | * optimised selection pass if we haven't already. |
140 | */ |
141 | if (!(flags & XFS_PICK_LOWSPACE)) { |
142 | flags |= XFS_PICK_LOWSPACE; |
143 | goto restart; |
144 | } |
145 | |
146 | /* |
147 | * No unassociated AGs are available, so select the AG with the |
148 | * most free space, regardless of whether it's already in use by |
149 | * another filestream. It none suit, just use whatever AG we can |
150 | * grab. |
151 | */ |
152 | if (!max_pag) { |
153 | for_each_perag_wrap(args->mp, 0, start_agno, args->pag) |
154 | break; |
155 | atomic_inc(&args->pag->pagf_fstrms); |
156 | *longest = 0; |
157 | } else { |
158 | pag = max_pag; |
159 | free = maxfree; |
160 | atomic_inc(&pag->pagf_fstrms); |
161 | } |
162 | } else if (max_pag) { |
163 | xfs_perag_rele(max_pag); |
164 | } |
165 | |
166 | trace_xfs_filestream_pick(pag, pino, free); |
167 | args->pag = pag; |
168 | return 0; |
169 | |
170 | } |
171 | |
172 | static struct xfs_inode * |
173 | xfs_filestream_get_parent( |
174 | struct xfs_inode *ip) |
175 | { |
176 | struct inode *inode = VFS_I(ip), *dir = NULL; |
177 | struct dentry *dentry, *parent; |
178 | |
179 | dentry = d_find_alias(inode); |
180 | if (!dentry) |
181 | goto out; |
182 | |
183 | parent = dget_parent(dentry); |
184 | if (!parent) |
185 | goto out_dput; |
186 | |
187 | dir = igrab(d_inode(dentry: parent)); |
188 | dput(parent); |
189 | |
190 | out_dput: |
191 | dput(dentry); |
192 | out: |
193 | return dir ? XFS_I(inode: dir) : NULL; |
194 | } |
195 | |
196 | /* |
197 | * Lookup the mru cache for an existing association. If one exists and we can |
198 | * use it, return with an active perag reference indicating that the allocation |
199 | * will proceed with that association. |
200 | * |
201 | * If we have no association, or we cannot use the current one and have to |
202 | * destroy it, return with longest = 0 to tell the caller to create a new |
203 | * association. |
204 | */ |
205 | static int |
206 | xfs_filestream_lookup_association( |
207 | struct xfs_bmalloca *ap, |
208 | struct xfs_alloc_arg *args, |
209 | xfs_ino_t pino, |
210 | xfs_extlen_t *longest) |
211 | { |
212 | struct xfs_mount *mp = args->mp; |
213 | struct xfs_perag *pag; |
214 | struct xfs_mru_cache_elem *mru; |
215 | int error = 0; |
216 | |
217 | *longest = 0; |
218 | mru = xfs_mru_cache_lookup(mru: mp->m_filestream, key: pino); |
219 | if (!mru) |
220 | return 0; |
221 | /* |
222 | * Grab the pag and take an extra active reference for the caller whilst |
223 | * the mru item cannot go away. This means we'll pin the perag with |
224 | * the reference we get here even if the filestreams association is torn |
225 | * down immediately after we mark the lookup as done. |
226 | */ |
227 | pag = container_of(mru, struct xfs_fstrm_item, mru)->pag; |
228 | atomic_inc(v: &pag->pag_active_ref); |
229 | xfs_mru_cache_done(mru: mp->m_filestream); |
230 | |
231 | trace_xfs_filestream_lookup(pag, ino: ap->ip->i_ino); |
232 | |
233 | ap->blkno = XFS_AGB_TO_FSB(args->mp, pag->pag_agno, 0); |
234 | xfs_bmap_adjacent(ap); |
235 | |
236 | /* |
237 | * If there is very little free space before we start a filestreams |
238 | * allocation, we're almost guaranteed to fail to find a large enough |
239 | * free space available so just use the cached AG. |
240 | */ |
241 | if (ap->tp->t_flags & XFS_TRANS_LOWMODE) { |
242 | *longest = 1; |
243 | goto out_done; |
244 | } |
245 | |
246 | error = xfs_bmap_longest_free_extent(pag, args->tp, longest); |
247 | if (error == -EAGAIN) |
248 | error = 0; |
249 | if (error || *longest < args->maxlen) { |
250 | /* We aren't going to use this perag */ |
251 | *longest = 0; |
252 | xfs_perag_rele(pag); |
253 | return error; |
254 | } |
255 | |
256 | out_done: |
257 | args->pag = pag; |
258 | return 0; |
259 | } |
260 | |
261 | static int |
262 | xfs_filestream_create_association( |
263 | struct xfs_bmalloca *ap, |
264 | struct xfs_alloc_arg *args, |
265 | xfs_ino_t pino, |
266 | xfs_extlen_t *longest) |
267 | { |
268 | struct xfs_mount *mp = args->mp; |
269 | struct xfs_mru_cache_elem *mru; |
270 | struct xfs_fstrm_item *item; |
271 | xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, pino); |
272 | int flags = 0; |
273 | int error; |
274 | |
275 | /* Changing parent AG association now, so remove the existing one. */ |
276 | mru = xfs_mru_cache_remove(mru: mp->m_filestream, key: pino); |
277 | if (mru) { |
278 | struct xfs_fstrm_item *item = |
279 | container_of(mru, struct xfs_fstrm_item, mru); |
280 | |
281 | agno = (item->pag->pag_agno + 1) % mp->m_sb.sb_agcount; |
282 | xfs_fstrm_free_func(data: mp, mru); |
283 | } else if (xfs_is_inode32(mp)) { |
284 | xfs_agnumber_t rotorstep = xfs_rotorstep; |
285 | |
286 | agno = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount; |
287 | mp->m_agfrotor = (mp->m_agfrotor + 1) % |
288 | (mp->m_sb.sb_agcount * rotorstep); |
289 | } |
290 | |
291 | ap->blkno = XFS_AGB_TO_FSB(args->mp, agno, 0); |
292 | xfs_bmap_adjacent(ap); |
293 | |
294 | if (ap->datatype & XFS_ALLOC_USERDATA) |
295 | flags |= XFS_PICK_USERDATA; |
296 | if (ap->tp->t_flags & XFS_TRANS_LOWMODE) |
297 | flags |= XFS_PICK_LOWSPACE; |
298 | |
299 | *longest = ap->length; |
300 | error = xfs_filestream_pick_ag(args, pino, agno, flags, longest); |
301 | if (error) |
302 | return error; |
303 | |
304 | /* |
305 | * We are going to use this perag now, so create an assoication for it. |
306 | * xfs_filestream_pick_ag() has already bumped the perag fstrms counter |
307 | * for us, so all we need to do here is take another active reference to |
308 | * the perag for the cached association. |
309 | * |
310 | * If we fail to store the association, we need to drop the fstrms |
311 | * counter as well as drop the perag reference we take here for the |
312 | * item. We do not need to return an error for this failure - as long as |
313 | * we return a referenced AG, the allocation can still go ahead just |
314 | * fine. |
315 | */ |
316 | item = kmem_alloc(sizeof(*item), KM_MAYFAIL); |
317 | if (!item) |
318 | goto out_put_fstrms; |
319 | |
320 | atomic_inc(v: &args->pag->pag_active_ref); |
321 | item->pag = args->pag; |
322 | error = xfs_mru_cache_insert(mru: mp->m_filestream, key: pino, elem: &item->mru); |
323 | if (error) |
324 | goto out_free_item; |
325 | return 0; |
326 | |
327 | out_free_item: |
328 | xfs_perag_rele(item->pag); |
329 | kmem_free(ptr: item); |
330 | out_put_fstrms: |
331 | atomic_dec(v: &args->pag->pagf_fstrms); |
332 | return 0; |
333 | } |
334 | |
335 | /* |
336 | * Search for an allocation group with a single extent large enough for |
337 | * the request. First we look for an existing association and use that if it |
338 | * is found. Otherwise, we create a new association by selecting an AG that fits |
339 | * the allocation criteria. |
340 | * |
341 | * We return with a referenced perag in args->pag to indicate which AG we are |
342 | * allocating into or an error with no references held. |
343 | */ |
344 | int |
345 | xfs_filestream_select_ag( |
346 | struct xfs_bmalloca *ap, |
347 | struct xfs_alloc_arg *args, |
348 | xfs_extlen_t *longest) |
349 | { |
350 | struct xfs_mount *mp = args->mp; |
351 | struct xfs_inode *pip; |
352 | xfs_ino_t ino = 0; |
353 | int error = 0; |
354 | |
355 | *longest = 0; |
356 | args->total = ap->total; |
357 | pip = xfs_filestream_get_parent(ip: ap->ip); |
358 | if (pip) { |
359 | ino = pip->i_ino; |
360 | error = xfs_filestream_lookup_association(ap, args, ino, |
361 | longest); |
362 | xfs_irele(ip: pip); |
363 | if (error) |
364 | return error; |
365 | if (*longest >= args->maxlen) |
366 | goto out_select; |
367 | if (ap->tp->t_flags & XFS_TRANS_LOWMODE) |
368 | goto out_select; |
369 | } |
370 | |
371 | error = xfs_filestream_create_association(ap, args, ino, longest); |
372 | if (error) |
373 | return error; |
374 | |
375 | out_select: |
376 | ap->blkno = XFS_AGB_TO_FSB(mp, args->pag->pag_agno, 0); |
377 | return 0; |
378 | } |
379 | |
380 | void |
381 | xfs_filestream_deassociate( |
382 | struct xfs_inode *ip) |
383 | { |
384 | xfs_mru_cache_delete(mru: ip->i_mount->m_filestream, key: ip->i_ino); |
385 | } |
386 | |
387 | int |
388 | xfs_filestream_mount( |
389 | xfs_mount_t *mp) |
390 | { |
391 | /* |
392 | * The filestream timer tunable is currently fixed within the range of |
393 | * one second to four minutes, with five seconds being the default. The |
394 | * group count is somewhat arbitrary, but it'd be nice to adhere to the |
395 | * timer tunable to within about 10 percent. This requires at least 10 |
396 | * groups. |
397 | */ |
398 | return xfs_mru_cache_create(mrup: &mp->m_filestream, data: mp, |
399 | xfs_fstrm_centisecs * 10, grp_count: 10, free_func: xfs_fstrm_free_func); |
400 | } |
401 | |
402 | void |
403 | xfs_filestream_unmount( |
404 | xfs_mount_t *mp) |
405 | { |
406 | xfs_mru_cache_destroy(mru: mp->m_filestream); |
407 | } |
408 | |