1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* Volume-level cache cookie handling. |
3 | * |
4 | * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. |
5 | * Written by David Howells (dhowells@redhat.com) |
6 | */ |
7 | |
8 | #define FSCACHE_DEBUG_LEVEL COOKIE |
9 | #include <linux/export.h> |
10 | #include <linux/slab.h> |
11 | #include "internal.h" |
12 | |
13 | #define fscache_volume_hash_shift 10 |
14 | static struct hlist_bl_head fscache_volume_hash[1 << fscache_volume_hash_shift]; |
15 | static atomic_t fscache_volume_debug_id; |
16 | static LIST_HEAD(fscache_volumes); |
17 | |
18 | static void fscache_create_volume_work(struct work_struct *work); |
19 | |
20 | struct fscache_volume *fscache_get_volume(struct fscache_volume *volume, |
21 | enum fscache_volume_trace where) |
22 | { |
23 | int ref; |
24 | |
25 | __refcount_inc(r: &volume->ref, oldp: &ref); |
26 | trace_fscache_volume(volume_debug_id: volume->debug_id, usage: ref + 1, where); |
27 | return volume; |
28 | } |
29 | |
30 | static void fscache_see_volume(struct fscache_volume *volume, |
31 | enum fscache_volume_trace where) |
32 | { |
33 | int ref = refcount_read(r: &volume->ref); |
34 | |
35 | trace_fscache_volume(volume_debug_id: volume->debug_id, usage: ref, where); |
36 | } |
37 | |
38 | /* |
39 | * Pin the cache behind a volume so that we can access it. |
40 | */ |
41 | static void __fscache_begin_volume_access(struct fscache_volume *volume, |
42 | struct fscache_cookie *cookie, |
43 | enum fscache_access_trace why) |
44 | { |
45 | int n_accesses; |
46 | |
47 | n_accesses = atomic_inc_return(v: &volume->n_accesses); |
48 | smp_mb__after_atomic(); |
49 | trace_fscache_access_volume(volume_debug_id: volume->debug_id, cookie_debug_id: cookie ? cookie->debug_id : 0, |
50 | ref: refcount_read(r: &volume->ref), |
51 | n_accesses, why); |
52 | } |
53 | |
54 | /** |
55 | * fscache_begin_volume_access - Pin a cache so a volume can be accessed |
56 | * @volume: The volume cookie |
57 | * @cookie: A datafile cookie for a tracing reference (or NULL) |
58 | * @why: An indication of the circumstances of the access for tracing |
59 | * |
60 | * Attempt to pin the cache to prevent it from going away whilst we're |
61 | * accessing a volume and returns true if successful. This works as follows: |
62 | * |
63 | * (1) If the cache tests as not live (state is not FSCACHE_CACHE_IS_ACTIVE), |
64 | * then we return false to indicate access was not permitted. |
65 | * |
66 | * (2) If the cache tests as live, then we increment the volume's n_accesses |
67 | * count and then recheck the cache liveness, ending the access if it |
68 | * ceased to be live. |
69 | * |
70 | * (3) When we end the access, we decrement the volume's n_accesses and wake |
71 | * up the any waiters if it reaches 0. |
72 | * |
73 | * (4) Whilst the cache is caching, the volume's n_accesses is kept |
74 | * artificially incremented to prevent wakeups from happening. |
75 | * |
76 | * (5) When the cache is taken offline, the state is changed to prevent new |
77 | * accesses, the volume's n_accesses is decremented and we wait for it to |
78 | * become 0. |
79 | * |
80 | * The datafile @cookie and the @why indicator are merely provided for tracing |
81 | * purposes. |
82 | */ |
83 | bool fscache_begin_volume_access(struct fscache_volume *volume, |
84 | struct fscache_cookie *cookie, |
85 | enum fscache_access_trace why) |
86 | { |
87 | if (!fscache_cache_is_live(cache: volume->cache)) |
88 | return false; |
89 | __fscache_begin_volume_access(volume, cookie, why); |
90 | if (!fscache_cache_is_live(cache: volume->cache)) { |
91 | fscache_end_volume_access(volume, cookie, why: fscache_access_unlive); |
92 | return false; |
93 | } |
94 | return true; |
95 | } |
96 | |
97 | /** |
98 | * fscache_end_volume_access - Unpin a cache at the end of an access. |
99 | * @volume: The volume cookie |
100 | * @cookie: A datafile cookie for a tracing reference (or NULL) |
101 | * @why: An indication of the circumstances of the access for tracing |
102 | * |
103 | * Unpin a cache volume after we've accessed it. The datafile @cookie and the |
104 | * @why indicator are merely provided for tracing purposes. |
105 | */ |
106 | void fscache_end_volume_access(struct fscache_volume *volume, |
107 | struct fscache_cookie *cookie, |
108 | enum fscache_access_trace why) |
109 | { |
110 | int n_accesses; |
111 | |
112 | smp_mb__before_atomic(); |
113 | n_accesses = atomic_dec_return(v: &volume->n_accesses); |
114 | trace_fscache_access_volume(volume_debug_id: volume->debug_id, cookie_debug_id: cookie ? cookie->debug_id : 0, |
115 | ref: refcount_read(r: &volume->ref), |
116 | n_accesses, why); |
117 | if (n_accesses == 0) |
118 | wake_up_var(var: &volume->n_accesses); |
119 | } |
120 | EXPORT_SYMBOL(fscache_end_volume_access); |
121 | |
122 | static bool fscache_volume_same(const struct fscache_volume *a, |
123 | const struct fscache_volume *b) |
124 | { |
125 | size_t klen; |
126 | |
127 | if (a->key_hash != b->key_hash || |
128 | a->cache != b->cache || |
129 | a->key[0] != b->key[0]) |
130 | return false; |
131 | |
132 | klen = round_up(a->key[0] + 1, sizeof(__le32)); |
133 | return memcmp(p: a->key, q: b->key, size: klen) == 0; |
134 | } |
135 | |
136 | static bool fscache_is_acquire_pending(struct fscache_volume *volume) |
137 | { |
138 | return test_bit(FSCACHE_VOLUME_ACQUIRE_PENDING, &volume->flags); |
139 | } |
140 | |
141 | static void fscache_wait_on_volume_collision(struct fscache_volume *candidate, |
142 | unsigned int collidee_debug_id) |
143 | { |
144 | wait_on_bit_timeout(word: &candidate->flags, FSCACHE_VOLUME_ACQUIRE_PENDING, |
145 | TASK_UNINTERRUPTIBLE, timeout: 20 * HZ); |
146 | if (fscache_is_acquire_pending(volume: candidate)) { |
147 | pr_notice("Potential volume collision new=%08x old=%08x" , |
148 | candidate->debug_id, collidee_debug_id); |
149 | fscache_stat(stat: &fscache_n_volumes_collision); |
150 | wait_on_bit(word: &candidate->flags, FSCACHE_VOLUME_ACQUIRE_PENDING, |
151 | TASK_UNINTERRUPTIBLE); |
152 | } |
153 | } |
154 | |
155 | /* |
156 | * Attempt to insert the new volume into the hash. If there's a collision, we |
157 | * wait for the old volume to complete if it's being relinquished and an error |
158 | * otherwise. |
159 | */ |
160 | static bool fscache_hash_volume(struct fscache_volume *candidate) |
161 | { |
162 | struct fscache_volume *cursor; |
163 | struct hlist_bl_head *h; |
164 | struct hlist_bl_node *p; |
165 | unsigned int bucket, collidee_debug_id = 0; |
166 | |
167 | bucket = candidate->key_hash & (ARRAY_SIZE(fscache_volume_hash) - 1); |
168 | h = &fscache_volume_hash[bucket]; |
169 | |
170 | hlist_bl_lock(b: h); |
171 | hlist_bl_for_each_entry(cursor, p, h, hash_link) { |
172 | if (fscache_volume_same(a: candidate, b: cursor)) { |
173 | if (!test_bit(FSCACHE_VOLUME_RELINQUISHED, &cursor->flags)) |
174 | goto collision; |
175 | fscache_see_volume(volume: cursor, where: fscache_volume_get_hash_collision); |
176 | set_bit(FSCACHE_VOLUME_COLLIDED_WITH, addr: &cursor->flags); |
177 | set_bit(FSCACHE_VOLUME_ACQUIRE_PENDING, addr: &candidate->flags); |
178 | collidee_debug_id = cursor->debug_id; |
179 | break; |
180 | } |
181 | } |
182 | |
183 | hlist_bl_add_head(n: &candidate->hash_link, h); |
184 | hlist_bl_unlock(b: h); |
185 | |
186 | if (fscache_is_acquire_pending(volume: candidate)) |
187 | fscache_wait_on_volume_collision(candidate, collidee_debug_id); |
188 | return true; |
189 | |
190 | collision: |
191 | fscache_see_volume(volume: cursor, where: fscache_volume_collision); |
192 | hlist_bl_unlock(b: h); |
193 | return false; |
194 | } |
195 | |
196 | /* |
197 | * Allocate and initialise a volume representation cookie. |
198 | */ |
199 | static struct fscache_volume *fscache_alloc_volume(const char *volume_key, |
200 | const char *cache_name, |
201 | const void *coherency_data, |
202 | size_t coherency_len) |
203 | { |
204 | struct fscache_volume *volume; |
205 | struct fscache_cache *cache; |
206 | size_t klen, hlen; |
207 | u8 *key; |
208 | |
209 | klen = strlen(volume_key); |
210 | if (klen > NAME_MAX) |
211 | return NULL; |
212 | |
213 | if (!coherency_data) |
214 | coherency_len = 0; |
215 | |
216 | cache = fscache_lookup_cache(name: cache_name, is_cache: false); |
217 | if (IS_ERR(ptr: cache)) |
218 | return NULL; |
219 | |
220 | volume = kzalloc(struct_size(volume, coherency, coherency_len), |
221 | GFP_KERNEL); |
222 | if (!volume) |
223 | goto err_cache; |
224 | |
225 | volume->cache = cache; |
226 | volume->coherency_len = coherency_len; |
227 | if (coherency_data) |
228 | memcpy(volume->coherency, coherency_data, coherency_len); |
229 | INIT_LIST_HEAD(list: &volume->proc_link); |
230 | INIT_WORK(&volume->work, fscache_create_volume_work); |
231 | refcount_set(r: &volume->ref, n: 1); |
232 | spin_lock_init(&volume->lock); |
233 | |
234 | /* Stick the length on the front of the key and pad it out to make |
235 | * hashing easier. |
236 | */ |
237 | hlen = round_up(1 + klen + 1, sizeof(__le32)); |
238 | key = kzalloc(size: hlen, GFP_KERNEL); |
239 | if (!key) |
240 | goto err_vol; |
241 | key[0] = klen; |
242 | memcpy(key + 1, volume_key, klen); |
243 | |
244 | volume->key = key; |
245 | volume->key_hash = fscache_hash(salt: 0, data: key, len: hlen); |
246 | |
247 | volume->debug_id = atomic_inc_return(v: &fscache_volume_debug_id); |
248 | down_write(sem: &fscache_addremove_sem); |
249 | atomic_inc(v: &cache->n_volumes); |
250 | list_add_tail(new: &volume->proc_link, head: &fscache_volumes); |
251 | fscache_see_volume(volume, where: fscache_volume_new_acquire); |
252 | fscache_stat(stat: &fscache_n_volumes); |
253 | up_write(sem: &fscache_addremove_sem); |
254 | _leave(" = v=%x" , volume->debug_id); |
255 | return volume; |
256 | |
257 | err_vol: |
258 | kfree(objp: volume); |
259 | err_cache: |
260 | fscache_put_cache(cache, where: fscache_cache_put_alloc_volume); |
261 | fscache_stat(stat: &fscache_n_volumes_nomem); |
262 | return NULL; |
263 | } |
264 | |
265 | /* |
266 | * Create a volume's representation on disk. Have a volume ref and a cache |
267 | * access we have to release. |
268 | */ |
269 | static void fscache_create_volume_work(struct work_struct *work) |
270 | { |
271 | const struct fscache_cache_ops *ops; |
272 | struct fscache_volume *volume = |
273 | container_of(work, struct fscache_volume, work); |
274 | |
275 | fscache_see_volume(volume, where: fscache_volume_see_create_work); |
276 | |
277 | ops = volume->cache->ops; |
278 | if (ops->acquire_volume) |
279 | ops->acquire_volume(volume); |
280 | fscache_end_cache_access(cache: volume->cache, |
281 | why: fscache_access_acquire_volume_end); |
282 | |
283 | clear_and_wake_up_bit(FSCACHE_VOLUME_CREATING, word: &volume->flags); |
284 | fscache_put_volume(volume, where: fscache_volume_put_create_work); |
285 | } |
286 | |
287 | /* |
288 | * Dispatch a worker thread to create a volume's representation on disk. |
289 | */ |
290 | void fscache_create_volume(struct fscache_volume *volume, bool wait) |
291 | { |
292 | if (test_and_set_bit(FSCACHE_VOLUME_CREATING, addr: &volume->flags)) |
293 | goto maybe_wait; |
294 | if (volume->cache_priv) |
295 | goto no_wait; /* We raced */ |
296 | if (!fscache_begin_cache_access(cache: volume->cache, |
297 | why: fscache_access_acquire_volume)) |
298 | goto no_wait; |
299 | |
300 | fscache_get_volume(volume, where: fscache_volume_get_create_work); |
301 | if (!schedule_work(work: &volume->work)) |
302 | fscache_put_volume(volume, where: fscache_volume_put_create_work); |
303 | |
304 | maybe_wait: |
305 | if (wait) { |
306 | fscache_see_volume(volume, where: fscache_volume_wait_create_work); |
307 | wait_on_bit(word: &volume->flags, FSCACHE_VOLUME_CREATING, |
308 | TASK_UNINTERRUPTIBLE); |
309 | } |
310 | return; |
311 | no_wait: |
312 | clear_bit_unlock(FSCACHE_VOLUME_CREATING, addr: &volume->flags); |
313 | wake_up_bit(word: &volume->flags, FSCACHE_VOLUME_CREATING); |
314 | } |
315 | |
316 | /* |
317 | * Acquire a volume representation cookie and link it to a (proposed) cache. |
318 | */ |
319 | struct fscache_volume *__fscache_acquire_volume(const char *volume_key, |
320 | const char *cache_name, |
321 | const void *coherency_data, |
322 | size_t coherency_len) |
323 | { |
324 | struct fscache_volume *volume; |
325 | |
326 | volume = fscache_alloc_volume(volume_key, cache_name, |
327 | coherency_data, coherency_len); |
328 | if (!volume) |
329 | return ERR_PTR(error: -ENOMEM); |
330 | |
331 | if (!fscache_hash_volume(candidate: volume)) { |
332 | fscache_put_volume(volume, where: fscache_volume_put_hash_collision); |
333 | return ERR_PTR(error: -EBUSY); |
334 | } |
335 | |
336 | fscache_create_volume(volume, wait: false); |
337 | return volume; |
338 | } |
339 | EXPORT_SYMBOL(__fscache_acquire_volume); |
340 | |
341 | static void fscache_wake_pending_volume(struct fscache_volume *volume, |
342 | struct hlist_bl_head *h) |
343 | { |
344 | struct fscache_volume *cursor; |
345 | struct hlist_bl_node *p; |
346 | |
347 | hlist_bl_for_each_entry(cursor, p, h, hash_link) { |
348 | if (fscache_volume_same(a: cursor, b: volume)) { |
349 | fscache_see_volume(volume: cursor, where: fscache_volume_see_hash_wake); |
350 | clear_and_wake_up_bit(FSCACHE_VOLUME_ACQUIRE_PENDING, |
351 | word: &cursor->flags); |
352 | return; |
353 | } |
354 | } |
355 | } |
356 | |
357 | /* |
358 | * Remove a volume cookie from the hash table. |
359 | */ |
360 | static void fscache_unhash_volume(struct fscache_volume *volume) |
361 | { |
362 | struct hlist_bl_head *h; |
363 | unsigned int bucket; |
364 | |
365 | bucket = volume->key_hash & (ARRAY_SIZE(fscache_volume_hash) - 1); |
366 | h = &fscache_volume_hash[bucket]; |
367 | |
368 | hlist_bl_lock(b: h); |
369 | hlist_bl_del(n: &volume->hash_link); |
370 | if (test_bit(FSCACHE_VOLUME_COLLIDED_WITH, &volume->flags)) |
371 | fscache_wake_pending_volume(volume, h); |
372 | hlist_bl_unlock(b: h); |
373 | } |
374 | |
375 | /* |
376 | * Drop a cache's volume attachments. |
377 | */ |
378 | static void fscache_free_volume(struct fscache_volume *volume) |
379 | { |
380 | struct fscache_cache *cache = volume->cache; |
381 | |
382 | if (volume->cache_priv) { |
383 | __fscache_begin_volume_access(volume, NULL, |
384 | why: fscache_access_relinquish_volume); |
385 | if (volume->cache_priv) |
386 | cache->ops->free_volume(volume); |
387 | fscache_end_volume_access(volume, NULL, |
388 | fscache_access_relinquish_volume_end); |
389 | } |
390 | |
391 | down_write(sem: &fscache_addremove_sem); |
392 | list_del_init(entry: &volume->proc_link); |
393 | atomic_dec(v: &volume->cache->n_volumes); |
394 | up_write(sem: &fscache_addremove_sem); |
395 | |
396 | if (!hlist_bl_unhashed(h: &volume->hash_link)) |
397 | fscache_unhash_volume(volume); |
398 | |
399 | trace_fscache_volume(volume_debug_id: volume->debug_id, usage: 0, where: fscache_volume_free); |
400 | kfree(objp: volume->key); |
401 | kfree(objp: volume); |
402 | fscache_stat_d(stat: &fscache_n_volumes); |
403 | fscache_put_cache(cache, where: fscache_cache_put_volume); |
404 | } |
405 | |
406 | /* |
407 | * Drop a reference to a volume cookie. |
408 | */ |
409 | void fscache_put_volume(struct fscache_volume *volume, |
410 | enum fscache_volume_trace where) |
411 | { |
412 | if (volume) { |
413 | unsigned int debug_id = volume->debug_id; |
414 | bool zero; |
415 | int ref; |
416 | |
417 | zero = __refcount_dec_and_test(r: &volume->ref, oldp: &ref); |
418 | trace_fscache_volume(volume_debug_id: debug_id, usage: ref - 1, where); |
419 | if (zero) |
420 | fscache_free_volume(volume); |
421 | } |
422 | } |
423 | |
424 | /* |
425 | * Relinquish a volume representation cookie. |
426 | */ |
427 | void __fscache_relinquish_volume(struct fscache_volume *volume, |
428 | const void *coherency_data, |
429 | bool invalidate) |
430 | { |
431 | if (WARN_ON(test_and_set_bit(FSCACHE_VOLUME_RELINQUISHED, &volume->flags))) |
432 | return; |
433 | |
434 | if (invalidate) { |
435 | set_bit(FSCACHE_VOLUME_INVALIDATE, addr: &volume->flags); |
436 | } else if (coherency_data) { |
437 | memcpy(volume->coherency, coherency_data, volume->coherency_len); |
438 | } |
439 | |
440 | fscache_put_volume(volume, where: fscache_volume_put_relinquish); |
441 | } |
442 | EXPORT_SYMBOL(__fscache_relinquish_volume); |
443 | |
444 | /** |
445 | * fscache_withdraw_volume - Withdraw a volume from being cached |
446 | * @volume: Volume cookie |
447 | * |
448 | * Withdraw a cache volume from service, waiting for all accesses to complete |
449 | * before returning. |
450 | */ |
451 | void fscache_withdraw_volume(struct fscache_volume *volume) |
452 | { |
453 | int n_accesses; |
454 | |
455 | _debug("withdraw V=%x" , volume->debug_id); |
456 | |
457 | /* Allow wakeups on dec-to-0 */ |
458 | n_accesses = atomic_dec_return(v: &volume->n_accesses); |
459 | trace_fscache_access_volume(volume_debug_id: volume->debug_id, cookie_debug_id: 0, |
460 | ref: refcount_read(r: &volume->ref), |
461 | n_accesses, why: fscache_access_cache_unpin); |
462 | |
463 | wait_var_event(&volume->n_accesses, |
464 | atomic_read(&volume->n_accesses) == 0); |
465 | } |
466 | EXPORT_SYMBOL(fscache_withdraw_volume); |
467 | |
468 | #ifdef CONFIG_PROC_FS |
469 | /* |
470 | * Generate a list of volumes in /proc/fs/fscache/volumes |
471 | */ |
472 | static int fscache_volumes_seq_show(struct seq_file *m, void *v) |
473 | { |
474 | struct fscache_volume *volume; |
475 | |
476 | if (v == &fscache_volumes) { |
477 | seq_puts(m, |
478 | s: "VOLUME REF nCOOK ACC FL CACHE KEY\n" |
479 | "======== ===== ===== === == =============== ================\n" ); |
480 | return 0; |
481 | } |
482 | |
483 | volume = list_entry(v, struct fscache_volume, proc_link); |
484 | seq_printf(m, |
485 | fmt: "%08x %5d %5d %3d %02lx %-15.15s %s\n" , |
486 | volume->debug_id, |
487 | refcount_read(r: &volume->ref), |
488 | atomic_read(v: &volume->n_cookies), |
489 | atomic_read(v: &volume->n_accesses), |
490 | volume->flags, |
491 | volume->cache->name ?: "-" , |
492 | volume->key + 1); |
493 | return 0; |
494 | } |
495 | |
496 | static void *fscache_volumes_seq_start(struct seq_file *m, loff_t *_pos) |
497 | __acquires(&fscache_addremove_sem) |
498 | { |
499 | down_read(sem: &fscache_addremove_sem); |
500 | return seq_list_start_head(head: &fscache_volumes, pos: *_pos); |
501 | } |
502 | |
503 | static void *fscache_volumes_seq_next(struct seq_file *m, void *v, loff_t *_pos) |
504 | { |
505 | return seq_list_next(v, head: &fscache_volumes, ppos: _pos); |
506 | } |
507 | |
508 | static void fscache_volumes_seq_stop(struct seq_file *m, void *v) |
509 | __releases(&fscache_addremove_sem) |
510 | { |
511 | up_read(sem: &fscache_addremove_sem); |
512 | } |
513 | |
514 | const struct seq_operations fscache_volumes_seq_ops = { |
515 | .start = fscache_volumes_seq_start, |
516 | .next = fscache_volumes_seq_next, |
517 | .stop = fscache_volumes_seq_stop, |
518 | .show = fscache_volumes_seq_show, |
519 | }; |
520 | #endif /* CONFIG_PROC_FS */ |
521 | |