1// SPDX-License-Identifier: GPL-2.0
2#include <linux/memcontrol.h>
3#include <linux/rwsem.h>
4#include <linux/shrinker.h>
5#include <linux/rculist.h>
6#include <trace/events/vmscan.h>
7
8#include "internal.h"
9
10LIST_HEAD(shrinker_list);
11DEFINE_MUTEX(shrinker_mutex);
12
13#ifdef CONFIG_MEMCG
14static int shrinker_nr_max;
15
16static inline int shrinker_unit_size(int nr_items)
17{
18 return (DIV_ROUND_UP(nr_items, SHRINKER_UNIT_BITS) * sizeof(struct shrinker_info_unit *));
19}
20
21static inline void shrinker_unit_free(struct shrinker_info *info, int start)
22{
23 struct shrinker_info_unit **unit;
24 int nr, i;
25
26 if (!info)
27 return;
28
29 unit = info->unit;
30 nr = DIV_ROUND_UP(info->map_nr_max, SHRINKER_UNIT_BITS);
31
32 for (i = start; i < nr; i++) {
33 if (!unit[i])
34 break;
35
36 kfree(objp: unit[i]);
37 unit[i] = NULL;
38 }
39}
40
41static inline int shrinker_unit_alloc(struct shrinker_info *new,
42 struct shrinker_info *old, int nid)
43{
44 struct shrinker_info_unit *unit;
45 int nr = DIV_ROUND_UP(new->map_nr_max, SHRINKER_UNIT_BITS);
46 int start = old ? DIV_ROUND_UP(old->map_nr_max, SHRINKER_UNIT_BITS) : 0;
47 int i;
48
49 for (i = start; i < nr; i++) {
50 unit = kzalloc_node(size: sizeof(*unit), GFP_KERNEL, node: nid);
51 if (!unit) {
52 shrinker_unit_free(info: new, start);
53 return -ENOMEM;
54 }
55
56 new->unit[i] = unit;
57 }
58
59 return 0;
60}
61
62void free_shrinker_info(struct mem_cgroup *memcg)
63{
64 struct mem_cgroup_per_node *pn;
65 struct shrinker_info *info;
66 int nid;
67
68 for_each_node(nid) {
69 pn = memcg->nodeinfo[nid];
70 info = rcu_dereference_protected(pn->shrinker_info, true);
71 shrinker_unit_free(info, start: 0);
72 kvfree(addr: info);
73 rcu_assign_pointer(pn->shrinker_info, NULL);
74 }
75}
76
77int alloc_shrinker_info(struct mem_cgroup *memcg)
78{
79 struct shrinker_info *info;
80 int nid, ret = 0;
81 int array_size = 0;
82
83 mutex_lock(&shrinker_mutex);
84 array_size = shrinker_unit_size(nr_items: shrinker_nr_max);
85 for_each_node(nid) {
86 info = kvzalloc_node(size: sizeof(*info) + array_size, GFP_KERNEL, node: nid);
87 if (!info)
88 goto err;
89 info->map_nr_max = shrinker_nr_max;
90 if (shrinker_unit_alloc(new: info, NULL, nid))
91 goto err;
92 rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info);
93 }
94 mutex_unlock(lock: &shrinker_mutex);
95
96 return ret;
97
98err:
99 mutex_unlock(lock: &shrinker_mutex);
100 free_shrinker_info(memcg);
101 return -ENOMEM;
102}
103
104static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg,
105 int nid)
106{
107 return rcu_dereference_protected(memcg->nodeinfo[nid]->shrinker_info,
108 lockdep_is_held(&shrinker_mutex));
109}
110
111static int expand_one_shrinker_info(struct mem_cgroup *memcg, int new_size,
112 int old_size, int new_nr_max)
113{
114 struct shrinker_info *new, *old;
115 struct mem_cgroup_per_node *pn;
116 int nid;
117
118 for_each_node(nid) {
119 pn = memcg->nodeinfo[nid];
120 old = shrinker_info_protected(memcg, nid);
121 /* Not yet online memcg */
122 if (!old)
123 return 0;
124
125 /* Already expanded this shrinker_info */
126 if (new_nr_max <= old->map_nr_max)
127 continue;
128
129 new = kvmalloc_node(size: sizeof(*new) + new_size, GFP_KERNEL, node: nid);
130 if (!new)
131 return -ENOMEM;
132
133 new->map_nr_max = new_nr_max;
134
135 memcpy(new->unit, old->unit, old_size);
136 if (shrinker_unit_alloc(new, old, nid)) {
137 kvfree(addr: new);
138 return -ENOMEM;
139 }
140
141 rcu_assign_pointer(pn->shrinker_info, new);
142 kvfree_rcu(old, rcu);
143 }
144
145 return 0;
146}
147
148static int expand_shrinker_info(int new_id)
149{
150 int ret = 0;
151 int new_nr_max = round_up(new_id + 1, SHRINKER_UNIT_BITS);
152 int new_size, old_size = 0;
153 struct mem_cgroup *memcg;
154
155 if (!root_mem_cgroup)
156 goto out;
157
158 lockdep_assert_held(&shrinker_mutex);
159
160 new_size = shrinker_unit_size(nr_items: new_nr_max);
161 old_size = shrinker_unit_size(nr_items: shrinker_nr_max);
162
163 memcg = mem_cgroup_iter(NULL, NULL, NULL);
164 do {
165 ret = expand_one_shrinker_info(memcg, new_size, old_size,
166 new_nr_max);
167 if (ret) {
168 mem_cgroup_iter_break(NULL, memcg);
169 goto out;
170 }
171 } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);
172out:
173 if (!ret)
174 shrinker_nr_max = new_nr_max;
175
176 return ret;
177}
178
179static inline int shrinker_id_to_index(int shrinker_id)
180{
181 return shrinker_id / SHRINKER_UNIT_BITS;
182}
183
184static inline int shrinker_id_to_offset(int shrinker_id)
185{
186 return shrinker_id % SHRINKER_UNIT_BITS;
187}
188
189static inline int calc_shrinker_id(int index, int offset)
190{
191 return index * SHRINKER_UNIT_BITS + offset;
192}
193
194void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id)
195{
196 if (shrinker_id >= 0 && memcg && !mem_cgroup_is_root(memcg)) {
197 struct shrinker_info *info;
198 struct shrinker_info_unit *unit;
199
200 rcu_read_lock();
201 info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info);
202 unit = info->unit[shrinker_id_to_index(shrinker_id)];
203 if (!WARN_ON_ONCE(shrinker_id >= info->map_nr_max)) {
204 /* Pairs with smp mb in shrink_slab() */
205 smp_mb__before_atomic();
206 set_bit(nr: shrinker_id_to_offset(shrinker_id), addr: unit->map);
207 }
208 rcu_read_unlock();
209 }
210}
211
212static DEFINE_IDR(shrinker_idr);
213
214static int shrinker_memcg_alloc(struct shrinker *shrinker)
215{
216 int id, ret = -ENOMEM;
217
218 if (mem_cgroup_disabled())
219 return -ENOSYS;
220
221 mutex_lock(&shrinker_mutex);
222 id = idr_alloc(&shrinker_idr, ptr: shrinker, start: 0, end: 0, GFP_KERNEL);
223 if (id < 0)
224 goto unlock;
225
226 if (id >= shrinker_nr_max) {
227 if (expand_shrinker_info(new_id: id)) {
228 idr_remove(&shrinker_idr, id);
229 goto unlock;
230 }
231 }
232 shrinker->id = id;
233 ret = 0;
234unlock:
235 mutex_unlock(lock: &shrinker_mutex);
236 return ret;
237}
238
239static void shrinker_memcg_remove(struct shrinker *shrinker)
240{
241 int id = shrinker->id;
242
243 BUG_ON(id < 0);
244
245 lockdep_assert_held(&shrinker_mutex);
246
247 idr_remove(&shrinker_idr, id);
248}
249
250static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker,
251 struct mem_cgroup *memcg)
252{
253 struct shrinker_info *info;
254 struct shrinker_info_unit *unit;
255 long nr_deferred;
256
257 rcu_read_lock();
258 info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info);
259 unit = info->unit[shrinker_id_to_index(shrinker_id: shrinker->id)];
260 nr_deferred = atomic_long_xchg(v: &unit->nr_deferred[shrinker_id_to_offset(shrinker_id: shrinker->id)], new: 0);
261 rcu_read_unlock();
262
263 return nr_deferred;
264}
265
266static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker,
267 struct mem_cgroup *memcg)
268{
269 struct shrinker_info *info;
270 struct shrinker_info_unit *unit;
271 long nr_deferred;
272
273 rcu_read_lock();
274 info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info);
275 unit = info->unit[shrinker_id_to_index(shrinker_id: shrinker->id)];
276 nr_deferred =
277 atomic_long_add_return(i: nr, v: &unit->nr_deferred[shrinker_id_to_offset(shrinker_id: shrinker->id)]);
278 rcu_read_unlock();
279
280 return nr_deferred;
281}
282
283void reparent_shrinker_deferred(struct mem_cgroup *memcg)
284{
285 int nid, index, offset;
286 long nr;
287 struct mem_cgroup *parent;
288 struct shrinker_info *child_info, *parent_info;
289 struct shrinker_info_unit *child_unit, *parent_unit;
290
291 parent = parent_mem_cgroup(memcg);
292 if (!parent)
293 parent = root_mem_cgroup;
294
295 /* Prevent from concurrent shrinker_info expand */
296 mutex_lock(&shrinker_mutex);
297 for_each_node(nid) {
298 child_info = shrinker_info_protected(memcg, nid);
299 parent_info = shrinker_info_protected(memcg: parent, nid);
300 for (index = 0; index < shrinker_id_to_index(shrinker_id: child_info->map_nr_max); index++) {
301 child_unit = child_info->unit[index];
302 parent_unit = parent_info->unit[index];
303 for (offset = 0; offset < SHRINKER_UNIT_BITS; offset++) {
304 nr = atomic_long_read(v: &child_unit->nr_deferred[offset]);
305 atomic_long_add(i: nr, v: &parent_unit->nr_deferred[offset]);
306 }
307 }
308 }
309 mutex_unlock(lock: &shrinker_mutex);
310}
311#else
312static int shrinker_memcg_alloc(struct shrinker *shrinker)
313{
314 return -ENOSYS;
315}
316
317static void shrinker_memcg_remove(struct shrinker *shrinker)
318{
319}
320
321static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker,
322 struct mem_cgroup *memcg)
323{
324 return 0;
325}
326
327static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker,
328 struct mem_cgroup *memcg)
329{
330 return 0;
331}
332#endif /* CONFIG_MEMCG */
333
334static long xchg_nr_deferred(struct shrinker *shrinker,
335 struct shrink_control *sc)
336{
337 int nid = sc->nid;
338
339 if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
340 nid = 0;
341
342 if (sc->memcg &&
343 (shrinker->flags & SHRINKER_MEMCG_AWARE))
344 return xchg_nr_deferred_memcg(nid, shrinker,
345 memcg: sc->memcg);
346
347 return atomic_long_xchg(v: &shrinker->nr_deferred[nid], new: 0);
348}
349
350
351static long add_nr_deferred(long nr, struct shrinker *shrinker,
352 struct shrink_control *sc)
353{
354 int nid = sc->nid;
355
356 if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
357 nid = 0;
358
359 if (sc->memcg &&
360 (shrinker->flags & SHRINKER_MEMCG_AWARE))
361 return add_nr_deferred_memcg(nr, nid, shrinker,
362 memcg: sc->memcg);
363
364 return atomic_long_add_return(i: nr, v: &shrinker->nr_deferred[nid]);
365}
366
367#define SHRINK_BATCH 128
368
369static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
370 struct shrinker *shrinker, int priority)
371{
372 unsigned long freed = 0;
373 unsigned long long delta;
374 long total_scan;
375 long freeable;
376 long nr;
377 long new_nr;
378 long batch_size = shrinker->batch ? shrinker->batch
379 : SHRINK_BATCH;
380 long scanned = 0, next_deferred;
381
382 freeable = shrinker->count_objects(shrinker, shrinkctl);
383 if (freeable == 0 || freeable == SHRINK_EMPTY)
384 return freeable;
385
386 /*
387 * copy the current shrinker scan count into a local variable
388 * and zero it so that other concurrent shrinker invocations
389 * don't also do this scanning work.
390 */
391 nr = xchg_nr_deferred(shrinker, sc: shrinkctl);
392
393 if (shrinker->seeks) {
394 delta = freeable >> priority;
395 delta *= 4;
396 do_div(delta, shrinker->seeks);
397 } else {
398 /*
399 * These objects don't require any IO to create. Trim
400 * them aggressively under memory pressure to keep
401 * them from causing refetches in the IO caches.
402 */
403 delta = freeable / 2;
404 }
405
406 total_scan = nr >> priority;
407 total_scan += delta;
408 total_scan = min(total_scan, (2 * freeable));
409
410 trace_mm_shrink_slab_start(shr: shrinker, sc: shrinkctl, nr_objects_to_shrink: nr,
411 cache_items: freeable, delta, total_scan, priority);
412
413 /*
414 * Normally, we should not scan less than batch_size objects in one
415 * pass to avoid too frequent shrinker calls, but if the slab has less
416 * than batch_size objects in total and we are really tight on memory,
417 * we will try to reclaim all available objects, otherwise we can end
418 * up failing allocations although there are plenty of reclaimable
419 * objects spread over several slabs with usage less than the
420 * batch_size.
421 *
422 * We detect the "tight on memory" situations by looking at the total
423 * number of objects we want to scan (total_scan). If it is greater
424 * than the total number of objects on slab (freeable), we must be
425 * scanning at high prio and therefore should try to reclaim as much as
426 * possible.
427 */
428 while (total_scan >= batch_size ||
429 total_scan >= freeable) {
430 unsigned long ret;
431 unsigned long nr_to_scan = min(batch_size, total_scan);
432
433 shrinkctl->nr_to_scan = nr_to_scan;
434 shrinkctl->nr_scanned = nr_to_scan;
435 ret = shrinker->scan_objects(shrinker, shrinkctl);
436 if (ret == SHRINK_STOP)
437 break;
438 freed += ret;
439
440 count_vm_events(item: SLABS_SCANNED, delta: shrinkctl->nr_scanned);
441 total_scan -= shrinkctl->nr_scanned;
442 scanned += shrinkctl->nr_scanned;
443
444 cond_resched();
445 }
446
447 /*
448 * The deferred work is increased by any new work (delta) that wasn't
449 * done, decreased by old deferred work that was done now.
450 *
451 * And it is capped to two times of the freeable items.
452 */
453 next_deferred = max_t(long, (nr + delta - scanned), 0);
454 next_deferred = min(next_deferred, (2 * freeable));
455
456 /*
457 * move the unused scan count back into the shrinker in a
458 * manner that handles concurrent updates.
459 */
460 new_nr = add_nr_deferred(nr: next_deferred, shrinker, sc: shrinkctl);
461
462 trace_mm_shrink_slab_end(shr: shrinker, nid: shrinkctl->nid, shrinker_retval: freed, unused_scan_cnt: nr, new_scan_cnt: new_nr, total_scan);
463 return freed;
464}
465
466#ifdef CONFIG_MEMCG
467static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
468 struct mem_cgroup *memcg, int priority)
469{
470 struct shrinker_info *info;
471 unsigned long ret, freed = 0;
472 int offset, index = 0;
473
474 if (!mem_cgroup_online(memcg))
475 return 0;
476
477 /*
478 * lockless algorithm of memcg shrink.
479 *
480 * The shrinker_info may be freed asynchronously via RCU in the
481 * expand_one_shrinker_info(), so the rcu_read_lock() needs to be used
482 * to ensure the existence of the shrinker_info.
483 *
484 * The shrinker_info_unit is never freed unless its corresponding memcg
485 * is destroyed. Here we already hold the refcount of memcg, so the
486 * memcg will not be destroyed, and of course shrinker_info_unit will
487 * not be freed.
488 *
489 * So in the memcg shrink:
490 * step 1: use rcu_read_lock() to guarantee existence of the
491 * shrinker_info.
492 * step 2: after getting shrinker_info_unit we can safely release the
493 * RCU lock.
494 * step 3: traverse the bitmap and calculate shrinker_id
495 * step 4: use rcu_read_lock() to guarantee existence of the shrinker.
496 * step 5: use shrinker_id to find the shrinker, then use
497 * shrinker_try_get() to guarantee existence of the shrinker,
498 * then we can release the RCU lock to do do_shrink_slab() that
499 * may sleep.
500 * step 6: do shrinker_put() paired with step 5 to put the refcount,
501 * if the refcount reaches 0, then wake up the waiter in
502 * shrinker_free() by calling complete().
503 * Note: here is different from the global shrink, we don't
504 * need to acquire the RCU lock to guarantee existence of
505 * the shrinker, because we don't need to use this
506 * shrinker to traverse the next shrinker in the bitmap.
507 * step 7: we have already exited the read-side of rcu critical section
508 * before calling do_shrink_slab(), the shrinker_info may be
509 * released in expand_one_shrinker_info(), so go back to step 1
510 * to reacquire the shrinker_info.
511 */
512again:
513 rcu_read_lock();
514 info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info);
515 if (unlikely(!info))
516 goto unlock;
517
518 if (index < shrinker_id_to_index(shrinker_id: info->map_nr_max)) {
519 struct shrinker_info_unit *unit;
520
521 unit = info->unit[index];
522
523 rcu_read_unlock();
524
525 for_each_set_bit(offset, unit->map, SHRINKER_UNIT_BITS) {
526 struct shrink_control sc = {
527 .gfp_mask = gfp_mask,
528 .nid = nid,
529 .memcg = memcg,
530 };
531 struct shrinker *shrinker;
532 int shrinker_id = calc_shrinker_id(index, offset);
533
534 rcu_read_lock();
535 shrinker = idr_find(&shrinker_idr, id: shrinker_id);
536 if (unlikely(!shrinker || !shrinker_try_get(shrinker))) {
537 clear_bit(nr: offset, addr: unit->map);
538 rcu_read_unlock();
539 continue;
540 }
541 rcu_read_unlock();
542
543 /* Call non-slab shrinkers even though kmem is disabled */
544 if (!memcg_kmem_online() &&
545 !(shrinker->flags & SHRINKER_NONSLAB))
546 continue;
547
548 ret = do_shrink_slab(shrinkctl: &sc, shrinker, priority);
549 if (ret == SHRINK_EMPTY) {
550 clear_bit(nr: offset, addr: unit->map);
551 /*
552 * After the shrinker reported that it had no objects to
553 * free, but before we cleared the corresponding bit in
554 * the memcg shrinker map, a new object might have been
555 * added. To make sure, we have the bit set in this
556 * case, we invoke the shrinker one more time and reset
557 * the bit if it reports that it is not empty anymore.
558 * The memory barrier here pairs with the barrier in
559 * set_shrinker_bit():
560 *
561 * list_lru_add() shrink_slab_memcg()
562 * list_add_tail() clear_bit()
563 * <MB> <MB>
564 * set_bit() do_shrink_slab()
565 */
566 smp_mb__after_atomic();
567 ret = do_shrink_slab(shrinkctl: &sc, shrinker, priority);
568 if (ret == SHRINK_EMPTY)
569 ret = 0;
570 else
571 set_shrinker_bit(memcg, nid, shrinker_id);
572 }
573 freed += ret;
574 shrinker_put(shrinker);
575 }
576
577 index++;
578 goto again;
579 }
580unlock:
581 rcu_read_unlock();
582 return freed;
583}
584#else /* !CONFIG_MEMCG */
585static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
586 struct mem_cgroup *memcg, int priority)
587{
588 return 0;
589}
590#endif /* CONFIG_MEMCG */
591
592/**
593 * shrink_slab - shrink slab caches
594 * @gfp_mask: allocation context
595 * @nid: node whose slab caches to target
596 * @memcg: memory cgroup whose slab caches to target
597 * @priority: the reclaim priority
598 *
599 * Call the shrink functions to age shrinkable caches.
600 *
601 * @nid is passed along to shrinkers with SHRINKER_NUMA_AWARE set,
602 * unaware shrinkers will receive a node id of 0 instead.
603 *
604 * @memcg specifies the memory cgroup to target. Unaware shrinkers
605 * are called only if it is the root cgroup.
606 *
607 * @priority is sc->priority, we take the number of objects and >> by priority
608 * in order to get the scan target.
609 *
610 * Returns the number of reclaimed slab objects.
611 */
612unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg,
613 int priority)
614{
615 unsigned long ret, freed = 0;
616 struct shrinker *shrinker;
617
618 /*
619 * The root memcg might be allocated even though memcg is disabled
620 * via "cgroup_disable=memory" boot parameter. This could make
621 * mem_cgroup_is_root() return false, then just run memcg slab
622 * shrink, but skip global shrink. This may result in premature
623 * oom.
624 */
625 if (!mem_cgroup_disabled() && !mem_cgroup_is_root(memcg))
626 return shrink_slab_memcg(gfp_mask, nid, memcg, priority);
627
628 /*
629 * lockless algorithm of global shrink.
630 *
631 * In the unregistration setp, the shrinker will be freed asynchronously
632 * via RCU after its refcount reaches 0. So both rcu_read_lock() and
633 * shrinker_try_get() can be used to ensure the existence of the shrinker.
634 *
635 * So in the global shrink:
636 * step 1: use rcu_read_lock() to guarantee existence of the shrinker
637 * and the validity of the shrinker_list walk.
638 * step 2: use shrinker_try_get() to try get the refcount, if successful,
639 * then the existence of the shrinker can also be guaranteed,
640 * so we can release the RCU lock to do do_shrink_slab() that
641 * may sleep.
642 * step 3: *MUST* to reacquire the RCU lock before calling shrinker_put(),
643 * which ensures that neither this shrinker nor the next shrinker
644 * will be freed in the next traversal operation.
645 * step 4: do shrinker_put() paired with step 2 to put the refcount,
646 * if the refcount reaches 0, then wake up the waiter in
647 * shrinker_free() by calling complete().
648 */
649 rcu_read_lock();
650 list_for_each_entry_rcu(shrinker, &shrinker_list, list) {
651 struct shrink_control sc = {
652 .gfp_mask = gfp_mask,
653 .nid = nid,
654 .memcg = memcg,
655 };
656
657 if (!shrinker_try_get(shrinker))
658 continue;
659
660 rcu_read_unlock();
661
662 ret = do_shrink_slab(shrinkctl: &sc, shrinker, priority);
663 if (ret == SHRINK_EMPTY)
664 ret = 0;
665 freed += ret;
666
667 rcu_read_lock();
668 shrinker_put(shrinker);
669 }
670
671 rcu_read_unlock();
672 cond_resched();
673 return freed;
674}
675
676struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...)
677{
678 struct shrinker *shrinker;
679 unsigned int size;
680 va_list ap;
681 int err;
682
683 shrinker = kzalloc(size: sizeof(struct shrinker), GFP_KERNEL);
684 if (!shrinker)
685 return NULL;
686
687 va_start(ap, fmt);
688 err = shrinker_debugfs_name_alloc(shrinker, fmt, ap);
689 va_end(ap);
690 if (err)
691 goto err_name;
692
693 shrinker->flags = flags | SHRINKER_ALLOCATED;
694 shrinker->seeks = DEFAULT_SEEKS;
695
696 if (flags & SHRINKER_MEMCG_AWARE) {
697 err = shrinker_memcg_alloc(shrinker);
698 if (err == -ENOSYS) {
699 /* Memcg is not supported, fallback to non-memcg-aware shrinker. */
700 shrinker->flags &= ~SHRINKER_MEMCG_AWARE;
701 goto non_memcg;
702 }
703
704 if (err)
705 goto err_flags;
706
707 return shrinker;
708 }
709
710non_memcg:
711 /*
712 * The nr_deferred is available on per memcg level for memcg aware
713 * shrinkers, so only allocate nr_deferred in the following cases:
714 * - non-memcg-aware shrinkers
715 * - !CONFIG_MEMCG
716 * - memcg is disabled by kernel command line
717 */
718 size = sizeof(*shrinker->nr_deferred);
719 if (flags & SHRINKER_NUMA_AWARE)
720 size *= nr_node_ids;
721
722 shrinker->nr_deferred = kzalloc(size, GFP_KERNEL);
723 if (!shrinker->nr_deferred)
724 goto err_flags;
725
726 return shrinker;
727
728err_flags:
729 shrinker_debugfs_name_free(shrinker);
730err_name:
731 kfree(objp: shrinker);
732 return NULL;
733}
734EXPORT_SYMBOL_GPL(shrinker_alloc);
735
736void shrinker_register(struct shrinker *shrinker)
737{
738 if (unlikely(!(shrinker->flags & SHRINKER_ALLOCATED))) {
739 pr_warn("Must use shrinker_alloc() to dynamically allocate the shrinker");
740 return;
741 }
742
743 mutex_lock(&shrinker_mutex);
744 list_add_tail_rcu(new: &shrinker->list, head: &shrinker_list);
745 shrinker->flags |= SHRINKER_REGISTERED;
746 shrinker_debugfs_add(shrinker);
747 mutex_unlock(lock: &shrinker_mutex);
748
749 init_completion(x: &shrinker->done);
750 /*
751 * Now the shrinker is fully set up, take the first reference to it to
752 * indicate that lookup operations are now allowed to use it via
753 * shrinker_try_get().
754 */
755 refcount_set(r: &shrinker->refcount, n: 1);
756}
757EXPORT_SYMBOL_GPL(shrinker_register);
758
759static void shrinker_free_rcu_cb(struct rcu_head *head)
760{
761 struct shrinker *shrinker = container_of(head, struct shrinker, rcu);
762
763 kfree(objp: shrinker->nr_deferred);
764 kfree(objp: shrinker);
765}
766
767void shrinker_free(struct shrinker *shrinker)
768{
769 struct dentry *debugfs_entry = NULL;
770 int debugfs_id;
771
772 if (!shrinker)
773 return;
774
775 if (shrinker->flags & SHRINKER_REGISTERED) {
776 /* drop the initial refcount */
777 shrinker_put(shrinker);
778 /*
779 * Wait for all lookups of the shrinker to complete, after that,
780 * no shrinker is running or will run again, then we can safely
781 * free it asynchronously via RCU and safely free the structure
782 * where the shrinker is located, such as super_block etc.
783 */
784 wait_for_completion(&shrinker->done);
785 }
786
787 mutex_lock(&shrinker_mutex);
788 if (shrinker->flags & SHRINKER_REGISTERED) {
789 /*
790 * Now we can safely remove it from the shrinker_list and then
791 * free it.
792 */
793 list_del_rcu(entry: &shrinker->list);
794 debugfs_entry = shrinker_debugfs_detach(shrinker, debugfs_id: &debugfs_id);
795 shrinker->flags &= ~SHRINKER_REGISTERED;
796 }
797
798 shrinker_debugfs_name_free(shrinker);
799
800 if (shrinker->flags & SHRINKER_MEMCG_AWARE)
801 shrinker_memcg_remove(shrinker);
802 mutex_unlock(lock: &shrinker_mutex);
803
804 if (debugfs_entry)
805 shrinker_debugfs_remove(debugfs_entry, debugfs_id);
806
807 call_rcu(head: &shrinker->rcu, func: shrinker_free_rcu_cb);
808}
809EXPORT_SYMBOL_GPL(shrinker_free);
810

source code of linux/mm/shrinker.c