slub.c source code [linux/mm/slub.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* SLUB: A slab allocator that limits cache line use instead of queuing
4	* objects in per cpu and per node lists.
5	*
6	* The allocator synchronizes using per slab locks or atomic operations
7	* and only uses a centralized lock to manage a pool of partial slabs.
8	*
9	* (C) 2007 SGI, Christoph Lameter
10	* (C) 2011 Linux Foundation, Christoph Lameter
11	*/
12
13	#include <linux/mm.h>
14	#include <linux/swap.h> /* mm_account_reclaimed_pages() */
15	#include <linux/module.h>
16	#include <linux/bit_spinlock.h>
17	#include <linux/interrupt.h>
18	#include <linux/swab.h>
19	#include <linux/bitops.h>
20	#include <linux/slab.h>
21	#include "slab.h"
22	#include <linux/proc_fs.h>
23	#include <linux/seq_file.h>
24	#include <linux/kasan.h>
25	#include <linux/kmsan.h>
26	#include <linux/cpu.h>
27	#include <linux/cpuset.h>
28	#include <linux/mempolicy.h>
29	#include <linux/ctype.h>
30	#include <linux/stackdepot.h>
31	#include <linux/debugobjects.h>
32	#include <linux/kallsyms.h>
33	#include <linux/kfence.h>
34	#include <linux/memory.h>
35	#include <linux/math64.h>
36	#include <linux/fault-inject.h>
37	#include <linux/stacktrace.h>
38	#include <linux/prefetch.h>
39	#include <linux/memcontrol.h>
40	#include <linux/random.h>
41	#include <kunit/test.h>
42	#include <kunit/test-bug.h>
43	#include <linux/sort.h>
44
45	#include <linux/debugfs.h>
46	#include <trace/events/kmem.h>
47
48	#include "internal.h"
49
50	/*
51	* Lock order:
52	* 1. slab_mutex (Global Mutex)
53	* 2. node->list_lock (Spinlock)
54	* 3. kmem_cache->cpu_slab->lock (Local lock)
55	* 4. slab_lock(slab) (Only on some arches)
56	* 5. object_map_lock (Only for debugging)
57	*
58	* slab_mutex
59	*
60	* The role of the slab_mutex is to protect the list of all the slabs
61	* and to synchronize major metadata changes to slab cache structures.
62	* Also synchronizes memory hotplug callbacks.
63	*
64	* slab_lock
65	*
66	* The slab_lock is a wrapper around the page lock, thus it is a bit
67	* spinlock.
68	*
69	* The slab_lock is only used on arches that do not have the ability
70	* to do a cmpxchg_double. It only protects:
71	*
72	* A. slab->freelist -> List of free objects in a slab
73	* B. slab->inuse -> Number of objects in use
74	* C. slab->objects -> Number of objects in slab
75	* D. slab->frozen -> frozen state
76	*
77	* Frozen slabs
78	*
79	* If a slab is frozen then it is exempt from list management. It is not
80	* on any list except per cpu partial list. The processor that froze the
81	* slab is the one who can perform list operations on the slab. Other
82	* processors may put objects onto the freelist but the processor that
83	* froze the slab is the only one that can retrieve the objects from the
84	* slab's freelist.
85	*
86	* list_lock
87	*
88	* The list_lock protects the partial and full list on each node and
89	* the partial slab counter. If taken then no new slabs may be added or
90	* removed from the lists nor make the number of partial slabs be modified.
91	* (Note that the total number of slabs is an atomic value that may be
92	* modified without taking the list lock).
93	*
94	* The list_lock is a centralized lock and thus we avoid taking it as
95	* much as possible. As long as SLUB does not have to handle partial
96	* slabs, operations can continue without any centralized lock. F.e.
97	* allocating a long series of objects that fill up slabs does not require
98	* the list lock.
99	*
100	* For debug caches, all allocations are forced to go through a list_lock
101	* protected region to serialize against concurrent validation.
102	*
103	* cpu_slab->lock local lock
104	*
105	* This locks protect slowpath manipulation of all kmem_cache_cpu fields
106	* except the stat counters. This is a percpu structure manipulated only by
107	* the local cpu, so the lock protects against being preempted or interrupted
108	* by an irq. Fast path operations rely on lockless operations instead.
109	*
110	* On PREEMPT_RT, the local lock neither disables interrupts nor preemption
111	* which means the lockless fastpath cannot be used as it might interfere with
112	* an in-progress slow path operations. In this case the local lock is always
113	* taken but it still utilizes the freelist for the common operations.
114	*
115	* lockless fastpaths
116	*
117	* The fast path allocation (slab_alloc_node()) and freeing (do_slab_free())
118	* are fully lockless when satisfied from the percpu slab (and when
119	* cmpxchg_double is possible to use, otherwise slab_lock is taken).
120	* They also don't disable preemption or migration or irqs. They rely on
121	* the transaction id (tid) field to detect being preempted or moved to
122	* another cpu.
123	*
124	* irq, preemption, migration considerations
125	*
126	* Interrupts are disabled as part of list_lock or local_lock operations, or
127	* around the slab_lock operation, in order to make the slab allocator safe
128	* to use in the context of an irq.
129	*
130	* In addition, preemption (or migration on PREEMPT_RT) is disabled in the
131	* allocation slowpath, bulk allocation, and put_cpu_partial(), so that the
132	* local cpu doesn't change in the process and e.g. the kmem_cache_cpu pointer
133	* doesn't have to be revalidated in each section protected by the local lock.
134	*
135	* SLUB assigns one slab for allocation to each processor.
136	* Allocations only occur from these slabs called cpu slabs.
137	*
138	* Slabs with free elements are kept on a partial list and during regular
139	* operations no list for full slabs is used. If an object in a full slab is
140	* freed then the slab will show up again on the partial lists.
141	* We track full slabs for debugging purposes though because otherwise we
142	* cannot scan all objects.
143	*
144	* Slabs are freed when they become empty. Teardown and setup is
145	* minimal so we rely on the page allocators per cpu caches for
146	* fast frees and allocs.
147	*
148	* slab->frozen The slab is frozen and exempt from list processing.
149	* This means that the slab is dedicated to a purpose
150	* such as satisfying allocations for a specific
151	* processor. Objects may be freed in the slab while
152	* it is frozen but slab_free will then skip the usual
153	* list operations. It is up to the processor holding
154	* the slab to integrate the slab into the slab lists
155	* when the slab is no longer needed.
156	*
157	* One use of this flag is to mark slabs that are
158	* used for allocations. Then such a slab becomes a cpu
159	* slab. The cpu slab may be equipped with an additional
160	* freelist that allows lockless access to
161	* free objects in addition to the regular freelist
162	* that requires the slab lock.
163	*
164	* SLAB_DEBUG_FLAGS Slab requires special handling due to debug
165	* options set. This moves slab handling out of
166	* the fast path and disables lockless freelists.
167	*/
168
169	/*
170	* We could simply use migrate_disable()/enable() but as long as it's a
171	* function call even on !PREEMPT_RT, use inline preempt_disable() there.
172	*/
173	#ifndef CONFIG_PREEMPT_RT
174	#define slub_get_cpu_ptr(var) get_cpu_ptr(var)
175	#define slub_put_cpu_ptr(var) put_cpu_ptr(var)
176	#define USE_LOCKLESS_FAST_PATH() (true)
177	#else
178	#define slub_get_cpu_ptr(var) \
179	({ \
180	migrate_disable(); \
181	this_cpu_ptr(var); \
182	})
183	#define slub_put_cpu_ptr(var) \
184	do { \
185	(void)(var); \
186	migrate_enable(); \
187	} while (0)
188	#define USE_LOCKLESS_FAST_PATH() (false)
189	#endif
190
191	#ifndef CONFIG_SLUB_TINY
192	#define __fastpath_inline __always_inline
193	#else
194	#define __fastpath_inline
195	#endif
196
197	#ifdef CONFIG_SLUB_DEBUG
198	#ifdef CONFIG_SLUB_DEBUG_ON
199	DEFINE_STATIC_KEY_TRUE(slub_debug_enabled);
200	#else
201	DEFINE_STATIC_KEY_FALSE(slub_debug_enabled);
202	#endif
203	#endif /* CONFIG_SLUB_DEBUG */
204
205	/ Structure holding parameters for get_partial() call chain /
206	struct partial_context {
207	struct slab **slab;
208	gfp_t flags;
209	unsigned int orig_size;
210	};
211
212	static inline bool kmem_cache_debug(struct kmem_cache *s)
213	{
214	return kmem_cache_debug_flags(s, SLAB_DEBUG_FLAGS);
215	}
216
217	static inline bool slub_debug_orig_size(struct kmem_cache *s)
218	{
219	return (kmem_cache_debug_flags(s, SLAB_STORE_USER) &&
220	(s->flags & SLAB_KMALLOC));
221	}
222
223	void fixup_red_left(struct* kmem_cache s, void* *p)
224	{
225	if (kmem_cache_debug_flags(s, SLAB_RED_ZONE))
226	p += s->red_left_pad;
227
228	return p;
229	}
230
231	static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
232	{
233	#ifdef CONFIG_SLUB_CPU_PARTIAL
234	return !kmem_cache_debug(s);
235	#else
236	return false;
237	#endif
238	}
239
240	/*
241	* Issues still to be resolved:
242	*
243	* - Support PAGE_ALLOC_DEBUG. Should be easy to do.
244	*
245	* - Variable sizing of the per node arrays
246	*/
247
248	/ Enable to log cmpxchg failures /
249	#undef SLUB_DEBUG_CMPXCHG
250
251	#ifndef CONFIG_SLUB_TINY
252	/*
253	* Minimum number of partial slabs. These will be left on the partial
254	* lists even if they are empty. kmem_cache_shrink may reclaim them.
255	*/
256	#define MIN_PARTIAL 5
257
258	/*
259	* Maximum number of desirable partial slabs.
260	* The existence of more partial slabs makes kmem_cache_shrink
261	* sort the partial list by the number of objects in use.
262	*/
263	#define MAX_PARTIAL 10
264	#else
265	#define MIN_PARTIAL 0
266	#define MAX_PARTIAL 0
267	#endif
268
269	#define DEBUG_DEFAULT_FLAGS (SLAB_CONSISTENCY_CHECKS \| SLAB_RED_ZONE \| \
270	SLAB_POISON \| SLAB_STORE_USER)
271
272	/*
273	* These debug flags cannot use CMPXCHG because there might be consistency
274	* issues when checking or reading debug information
275	*/
276	#define SLAB_NO_CMPXCHG (SLAB_CONSISTENCY_CHECKS \| SLAB_STORE_USER \| \
277	SLAB_TRACE)
278
279
280	/*
281	* Debugging flags that require metadata to be stored in the slab. These get
282	* disabled when slub_debug=O is used and a cache's min order increases with
283	* metadata.
284	*/
285	#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE \| SLAB_POISON \| SLAB_STORE_USER)
286
287	#define OO_SHIFT 16
288	#define OO_MASK ((1 << OO_SHIFT) - 1)
289	#define MAX_OBJS_PER_PAGE 32767 /* since slab.objects is u15 */
290
291	/ Internal SLUB flags /
292	/ Poison object /
293	#define __OBJECT_POISON ((slab_flags_t __force)0x80000000U)
294	/ Use cmpxchg_double /
295
296	#ifdef system_has_freelist_aba
297	#define __CMPXCHG_DOUBLE ((slab_flags_t __force)0x40000000U)
298	#else
299	#define __CMPXCHG_DOUBLE ((slab_flags_t __force)0U)
300	#endif
301
302	/*
303	* Tracking user of a slab.
304	*/
305	#define TRACK_ADDRS_COUNT 16
306	struct track {
307	unsigned long addr; / Called from address /
308	#ifdef CONFIG_STACKDEPOT
309	depot_stack_handle_t handle;
310	#endif
311	int cpu; / Was running on cpu /
312	int pid; / Pid context /
313	unsigned long when; / When did the operation occur /
314	};
315
316	enum track_item { TRACK_ALLOC, TRACK_FREE };
317
318	#ifdef SLAB_SUPPORTS_SYSFS
319	static int sysfs_slab_add(struct kmem_cache *);
320	static int sysfs_slab_alias(struct kmem_cache , const* char *);
321	#else
322	static inline int sysfs_slab_add(struct kmem_cache s) { return* `0`; }
323	static inline int sysfs_slab_alias(struct kmem_cache s, const* char *p)
324	{ return `0`; }
325	#endif
326
327	#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_SLUB_DEBUG)
328	static void debugfs_slab_add(struct kmem_cache *);
329	#else
330	static inline void debugfs_slab_add(struct kmem_cache *s) { }
331	#endif
332
333	static inline void stat(const struct kmem_cache s, enum* stat_item si)
334	{
335	#ifdef CONFIG_SLUB_STATS
336	/*
337	* The rmw is racy on a preemptible kernel but this is acceptable, so
338	* avoid this_cpu_add()'s irq-disable overhead.
339	*/
340	raw_cpu_inc(s->cpu_slab->stat[si]);
341	#endif
342	}
343
344	/*
345	* Tracks for which NUMA nodes we have kmem_cache_nodes allocated.
346	* Corresponds to node_state[N_NORMAL_MEMORY], but can temporarily
347	* differ during memory hotplug/hotremove operations.
348	* Protected by slab_mutex.
349	*/
350	static nodemask_t slab_nodes;
351
352	#ifndef CONFIG_SLUB_TINY
353	/*
354	* Workqueue used for flush_cpu_slab().
355	*/
356	static struct workqueue_struct *flushwq;
357	#endif
358
359	/********************************************************************
360	* Core slab cache functions
361	*******************************************************************/
362
363	/*
364	* freeptr_t represents a SLUB freelist pointer, which might be encoded
365	* and not dereferenceable if CONFIG_SLAB_FREELIST_HARDENED is enabled.
366	*/
367	typedef struct { unsigned long v; } freeptr_t;
368
369	/*
370	* Returns freelist pointer (ptr). With hardening, this is obfuscated
371	* with an XOR of the address where the pointer is held and a per-cache
372	* random number.
373	*/
374	static inline freeptr_t freelist_ptr_encode(const struct kmem_cache *s,
375	void ptr, unsigned* long ptr_addr)
376	{
377	unsigned long encoded;
378
379	#ifdef CONFIG_SLAB_FREELIST_HARDENED
380	encoded = (unsigned long)ptr ^ s->random ^ swab(ptr_addr);
381	#else
382	encoded = (unsigned long)ptr;
383	#endif
384	return (freeptr_t){.v = encoded};
385	}
386
387	static inline void freelist_ptr_decode(const* struct kmem_cache *s,
388	freeptr_t ptr, unsigned long ptr_addr)
389	{
390	void *decoded;
391
392	#ifdef CONFIG_SLAB_FREELIST_HARDENED
393	decoded = (void *)(ptr.v ^ s->random ^ swab(ptr_addr));
394	#else
395	decoded = (void *)ptr.v;
396	#endif
397	return decoded;
398	}
399
400	static inline void get_freepointer(struct* kmem_cache s, void* *object)
401	{
402	unsigned long ptr_addr;
403	freeptr_t p;
404
405	object = kasan_reset_tag(addr: object);
406	ptr_addr = (unsigned long)object + s->offset;
407	p = (freeptr_t )(ptr_addr);
408	return freelist_ptr_decode(s, ptr: p, ptr_addr);
409	}
410
411	#ifndef CONFIG_SLUB_TINY
412	static void prefetch_freepointer(const struct kmem_cache s, void* *object)
413	{
414	prefetchw(object + s->offset);
415	}
416	#endif
417
418	/*
419	* When running under KMSAN, get_freepointer_safe() may return an uninitialized
420	* pointer value in the case the current thread loses the race for the next
421	* memory chunk in the freelist. In that case this_cpu_cmpxchg_double() in
422	* slab_alloc_node() will fail, so the uninitialized value won't be used, but
423	* KMSAN will still check all arguments of cmpxchg because of imperfect
424	* handling of inline assembly.
425	* To work around this problem, we apply __no_kmsan_checks to ensure that
426	* get_freepointer_safe() returns initialized memory.
427	*/
428	__no_kmsan_checks
429	static inline void get_freepointer_safe(struct* kmem_cache s, void* *object)
430	{
431	unsigned long freepointer_addr;
432	freeptr_t p;
433
434	if (!debug_pagealloc_enabled_static())
435	return get_freepointer(s, object);
436
437	object = kasan_reset_tag(addr: object);
438	freepointer_addr = (unsigned long)object + s->offset;
439	copy_from_kernel_nofault(dst: &p, src: (freeptr_t )freepointer_addr, size: sizeof*(p));
440	return freelist_ptr_decode(s, ptr: p, ptr_addr: freepointer_addr);
441	}
442
443	static inline void set_freepointer(struct kmem_cache s, void* object, void* *fp)
444	{
445	unsigned long freeptr_addr = (unsigned long)object + s->offset;
446
447	#ifdef CONFIG_SLAB_FREELIST_HARDENED
448	BUG_ON(object == fp); / naive detection of double free or corruption /
449	#endif
450
451	freeptr_addr = (unsigned long)kasan_reset_tag(addr: (void *)freeptr_addr);
452	(freeptr_t )freeptr_addr = freelist_ptr_encode(s, ptr: fp, ptr_addr: freeptr_addr);
453	}
454
455	/ Loop over all objects in a slab /
456	#define for_each_object(__p, __s, __addr, __objects) \
457	for (__p = fixup_red_left(__s, __addr); \
458	__p < (__addr) + (__objects) * (__s)->size; \
459	__p += (__s)->size)
460
461	static inline unsigned int order_objects(unsigned int order, unsigned int size)
462	{
463	return ((unsigned int)PAGE_SIZE << order) / size;
464	}
465
466	static inline struct kmem_cache_order_objects oo_make(unsigned int order,
467	unsigned int size)
468	{
469	struct kmem_cache_order_objects x = {
470	(order << OO_SHIFT) + order_objects(order, size)
471	};
472
473	return x;
474	}
475
476	static inline unsigned int oo_order(struct kmem_cache_order_objects x)
477	{
478	return x.x >> OO_SHIFT;
479	}
480
481	static inline unsigned int oo_objects(struct kmem_cache_order_objects x)
482	{
483	return x.x & OO_MASK;
484	}
485
486	#ifdef CONFIG_SLUB_CPU_PARTIAL
487	static void slub_set_cpu_partial(struct kmem_cache s, unsigned* int nr_objects)
488	{
489	unsigned int nr_slabs;
490
491	s->cpu_partial = nr_objects;
492
493	/*
494	* We take the number of objects but actually limit the number of
495	* slabs on the per cpu partial list, in order to limit excessive
496	* growth of the list. For simplicity we assume that the slabs will
497	* be half-full.
498	*/
499	nr_slabs = DIV_ROUND_UP(nr_objects * `2`, oo_objects(s->oo));
500	s->cpu_partial_slabs = nr_slabs;
501	}
502	#else
503	static inline void
504	slub_set_cpu_partial(struct kmem_cache s, unsigned* int nr_objects)
505	{
506	}
507	#endif /* CONFIG_SLUB_CPU_PARTIAL */
508
509	/*
510	* Per slab locking using the pagelock
511	*/
512	static __always_inline void slab_lock(struct slab *slab)
513	{
514	struct page *page = slab_page(slab);
515
516	VM_BUG_ON_PAGE(PageTail(page), page);
517	bit_spin_lock(bitnum: PG_locked, addr: &page->flags);
518	}
519
520	static __always_inline void slab_unlock(struct slab *slab)
521	{
522	struct page *page = slab_page(slab);
523
524	VM_BUG_ON_PAGE(PageTail(page), page);
525	__bit_spin_unlock(bitnum: PG_locked, addr: &page->flags);
526	}
527
528	static inline bool
529	__update_freelist_fast(struct slab *slab,
530	void freelist_old, unsigned* long counters_old,
531	void freelist_new, unsigned* long counters_new)
532	{
533	#ifdef system_has_freelist_aba
534	freelist_aba_t old = { .freelist = freelist_old, .counter = counters_old };
535	freelist_aba_t new = { .freelist = freelist_new, .counter = counters_new };
536
537	return try_cmpxchg_freelist(&slab->freelist_counter.full, &old.full, new.full);
538	#else
539	return false;
540	#endif
541	}
542
543	static inline bool
544	__update_freelist_slow(struct slab *slab,
545	void freelist_old, unsigned* long counters_old,
546	void freelist_new, unsigned* long counters_new)
547	{
548	bool ret = false;
549
550	slab_lock(slab);
551	if (slab->freelist == freelist_old &&
552	slab->counters == counters_old) {
553	slab->freelist = freelist_new;
554	slab->counters = counters_new;
555	ret = true;
556	}
557	slab_unlock(slab);
558
559	return ret;
560	}
561
562	/*
563	* Interrupts must be disabled (for the fallback code to work right), typically
564	* by an _irqsave() lock variant. On PREEMPT_RT the preempt_disable(), which is
565	* part of bit_spin_lock(), is sufficient because the policy is not to allow any
566	* allocation/ free operation in hardirq context. Therefore nothing can
567	* interrupt the operation.
568	*/
569	static inline bool __slab_update_freelist(struct kmem_cache s, struct* slab *slab,
570	void freelist_old, unsigned* long counters_old,
571	void freelist_new, unsigned* long counters_new,
572	const char *n)
573	{
574	bool ret;
575
576	if (USE_LOCKLESS_FAST_PATH())
577	lockdep_assert_irqs_disabled();
578
579	if (s->flags & __CMPXCHG_DOUBLE) {
580	ret = __update_freelist_fast(slab, freelist_old, counters_old,
581	freelist_new, counters_new);
582	} else {
583	ret = __update_freelist_slow(slab, freelist_old, counters_old,
584	freelist_new, counters_new);
585	}
586	if (likely(ret))
587	return true;
588
589	cpu_relax();
590	stat(s, si: CMPXCHG_DOUBLE_FAIL);
591
592	#ifdef SLUB_DEBUG_CMPXCHG
593	pr_info("%s %s: cmpxchg double redo ", n, s->name);
594	#endif
595
596	return false;
597	}
598
599	static inline bool slab_update_freelist(struct kmem_cache s, struct* slab *slab,
600	void freelist_old, unsigned* long counters_old,
601	void freelist_new, unsigned* long counters_new,
602	const char *n)
603	{
604	bool ret;
605
606	if (s->flags & __CMPXCHG_DOUBLE) {
607	ret = __update_freelist_fast(slab, freelist_old, counters_old,
608	freelist_new, counters_new);
609	} else {
610	unsigned long flags;
611
612	local_irq_save(flags);
613	ret = __update_freelist_slow(slab, freelist_old, counters_old,
614	freelist_new, counters_new);
615	local_irq_restore(flags);
616	}
617	if (likely(ret))
618	return true;
619
620	cpu_relax();
621	stat(s, si: CMPXCHG_DOUBLE_FAIL);
622
623	#ifdef SLUB_DEBUG_CMPXCHG
624	pr_info("%s %s: cmpxchg double redo ", n, s->name);
625	#endif
626
627	return false;
628	}
629
630	#ifdef CONFIG_SLUB_DEBUG
631	static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)];
632	static DEFINE_SPINLOCK(object_map_lock);
633
634	static void __fill_map(unsigned long obj_map, struct* kmem_cache *s,
635	struct slab *slab)
636	{
637	void *addr = slab_address(slab);
638	void *p;
639
640	bitmap_zero(obj_map, slab->objects);
641
642	for (p = slab->freelist; p; p = get_freepointer(s, p))
643	set_bit(__obj_to_index(s, addr, p), obj_map);
644	}
645
646	#if IS_ENABLED(CONFIG_KUNIT)
647	static bool slab_add_kunit_errors(void)
648	{
649	struct kunit_resource *resource;
650
651	if (!kunit_get_current_test())
652	return false;
653
654	resource = kunit_find_named_resource(current->kunit_test, "slab_errors");
655	if (!resource)
656	return false;
657
658	((int* *)resource->data)++;
659	kunit_put_resource(resource);
660	return true;
661	}
662	#else
663	static inline bool slab_add_kunit_errors(void) { return false; }
664	#endif
665
666	static inline unsigned int size_from_object(struct kmem_cache *s)
667	{
668	if (s->flags & SLAB_RED_ZONE)
669	return s->size - s->red_left_pad;
670
671	return s->size;
672	}
673
674	static inline void restore_red_left(struct* kmem_cache s, void* *p)
675	{
676	if (s->flags & SLAB_RED_ZONE)
677	p -= s->red_left_pad;
678
679	return p;
680	}
681
682	/*
683	* Debug settings:
684	*/
685	#if defined(CONFIG_SLUB_DEBUG_ON)
686	static slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS;
687	#else
688	static slab_flags_t slub_debug;
689	#endif
690
691	static char *slub_debug_string;
692	static int disable_higher_order_debug;
693
694	/*
695	* slub is about to manipulate internal object metadata. This memory lies
696	* outside the range of the allocated object, so accessing it would normally
697	* be reported by kasan as a bounds error. metadata_access_enable() is used
698	* to tell kasan that these accesses are OK.
699	*/
700	static inline void metadata_access_enable(void)
701	{
702	kasan_disable_current();
703	}
704
705	static inline void metadata_access_disable(void)
706	{
707	kasan_enable_current();
708	}
709
710	/*
711	* Object debugging
712	*/
713
714	/ Verify that a pointer has an address that is valid within a slab page /
715	static inline int check_valid_pointer(struct kmem_cache *s,
716	struct slab slab, void* *object)
717	{
718	void *base;
719
720	if (!object)
721	return `1`;
722
723	base = slab_address(slab);
724	object = kasan_reset_tag(object);
725	object = restore_red_left(s, object);
726	if (object < base \|\| object >= base + slab->objects * s->size \|\|
727	(object - base) % s->size) {
728	return `0`;
729	}
730
731	return `1`;
732	}
733
734	static void print_section(char level, char* text, u8 addr,
735	unsigned int length)
736	{
737	metadata_access_enable();
738	print_hex_dump(level, text, DUMP_PREFIX_ADDRESS,
739	`16`, `1`, kasan_reset_tag((void *)addr), length, `1`);
740	metadata_access_disable();
741	}
742
743	/*
744	* See comment in calculate_sizes().
745	*/
746	static inline bool freeptr_outside_object(struct kmem_cache *s)
747	{
748	return s->offset >= s->inuse;
749	}
750
751	/*
752	* Return offset of the end of info block which is inuse + free pointer if
753	* not overlapping with object.
754	*/
755	static inline unsigned int get_info_end(struct kmem_cache *s)
756	{
757	if (freeptr_outside_object(s))
758	return s->inuse + sizeof(void *);
759	else
760	return s->inuse;
761	}
762
763	static struct track get_track(struct* kmem_cache s, void* *object,
764	enum track_item alloc)
765	{
766	struct track *p;
767
768	p = object + get_info_end(s);
769
770	return kasan_reset_tag(p + alloc);
771	}
772
773	#ifdef CONFIG_STACKDEPOT
774	static noinline depot_stack_handle_t set_track_prepare(void)
775	{
776	depot_stack_handle_t handle;
777	unsigned long entries[TRACK_ADDRS_COUNT];
778	unsigned int nr_entries;
779
780	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), `3`);
781	handle = stack_depot_save(entries, nr_entries, GFP_NOWAIT);
782
783	return handle;
784	}
785	#else
786	static inline depot_stack_handle_t set_track_prepare(void)
787	{
788	return `0`;
789	}
790	#endif
791
792	static void set_track_update(struct kmem_cache s, void* *object,
793	enum track_item alloc, unsigned long addr,
794	depot_stack_handle_t handle)
795	{
796	struct track *p = get_track(s, object, alloc);
797
798	#ifdef CONFIG_STACKDEPOT
799	p->handle = handle;
800	#endif
801	p->addr = addr;
802	p->cpu = smp_processor_id();
803	p->pid = current->pid;
804	p->when = jiffies;
805	}
806
807	static __always_inline void set_track(struct kmem_cache s, void* *object,
808	enum track_item alloc, unsigned long addr)
809	{
810	depot_stack_handle_t handle = set_track_prepare();
811
812	set_track_update(s, object, alloc, addr, handle);
813	}
814
815	static void init_tracking(struct kmem_cache s, void* *object)
816	{
817	struct track *p;
818
819	if (!(s->flags & SLAB_STORE_USER))
820	return;
821
822	p = get_track(s, object, TRACK_ALLOC);
823	memset(p, `0`, `2`*sizeof(struct track));
824	}
825
826	static void print_track(const char s, struct* track t, unsigned* long pr_time)
827	{
828	depot_stack_handle_t handle __maybe_unused;
829
830	if (!t->addr)
831	return;
832
833	pr_err("%s in %pS age=%lu cpu=%u pid=%d\n",
834	s, (void *)t->addr, pr_time - t->when, t->cpu, t->pid);
835	#ifdef CONFIG_STACKDEPOT
836	handle = READ_ONCE(t->handle);
837	if (handle)
838	stack_depot_print(handle);
839	else
840	pr_err("object allocation/free stack trace missing\n");
841	#endif
842	}
843
844	void print_tracking(struct kmem_cache s, void* *object)
845	{
846	unsigned long pr_time = jiffies;
847	if (!(s->flags & SLAB_STORE_USER))
848	return;
849
850	print_track("Allocated", get_track(s, object, TRACK_ALLOC), pr_time);
851	print_track("Freed", get_track(s, object, TRACK_FREE), pr_time);
852	}
853
854	static void print_slab_info(const struct slab *slab)
855	{
856	struct folio folio = (struct* folio *)slab_folio(slab);
857
858	pr_err("Slab 0x%p objects=%u used=%u fp=0x%p flags=%pGp\n",
859	slab, slab->objects, slab->inuse, slab->freelist,
860	folio_flags(folio, `0`));
861	}
862
863	/*
864	* kmalloc caches has fixed sizes (mostly power of 2), and kmalloc() API
865	* family will round up the real request size to these fixed ones, so
866	* there could be an extra area than what is requested. Save the original
867	* request size in the meta data area, for better debug and sanity check.
868	*/
869	static inline void set_orig_size(struct kmem_cache *s,
870	void object, unsigned* int orig_size)
871	{
872	void *p = kasan_reset_tag(object);
873
874	if (!slub_debug_orig_size(s))
875	return;
876
877	#ifdef CONFIG_KASAN_GENERIC
878	/*
879	* KASAN could save its free meta data in object's data area at
880	* offset 0, if the size is larger than 'orig_size', it will
881	* overlap the data redzone in [orig_size+1, object_size], and
882	* the check should be skipped.
883	*/
884	if (kasan_metadata_size(s, true) > orig_size)
885	orig_size = s->object_size;
886	#endif
887
888	p += get_info_end(s);
889	p += sizeof(struct track) * `2`;
890
891	(unsigned* int *)p = orig_size;
892	}
893
894	static inline unsigned int get_orig_size(struct kmem_cache s, void* *object)
895	{
896	void *p = kasan_reset_tag(object);
897
898	if (!slub_debug_orig_size(s))
899	return s->object_size;
900
901	p += get_info_end(s);
902	p += sizeof(struct track) * `2`;
903
904	return (unsigned* int *)p;
905	}
906
907	void skip_orig_size_check(struct kmem_cache s, const* void *object)
908	{
909	set_orig_size(s, (void *)object, s->object_size);
910	}
911
912	static void slab_bug(struct kmem_cache s, char* *fmt, ...)
913	{
914	struct va_format vaf;
915	va_list args;
916
917	va_start(args, fmt);
918	vaf.fmt = fmt;
919	vaf.va = &args;
920	pr_err("=============================================================================\n");
921	pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf);
922	pr_err("-----------------------------------------------------------------------------\n\n");
923	va_end(args);
924	}
925
926	__printf(`2`, `3`)
927	static void slab_fix(struct kmem_cache s, char* *fmt, ...)
928	{
929	struct va_format vaf;
930	va_list args;
931
932	if (slab_add_kunit_errors())
933	return;
934
935	va_start(args, fmt);
936	vaf.fmt = fmt;
937	vaf.va = &args;
938	pr_err("FIX %s: %pV\n", s->name, &vaf);
939	va_end(args);
940	}
941
942	static void print_trailer(struct kmem_cache s, struct* slab slab, u8 p)
943	{
944	unsigned int off; / Offset of last byte /
945	u8 *addr = slab_address(slab);
946
947	print_tracking(s, p);
948
949	print_slab_info(slab);
950
951	pr_err("Object 0x%p @offset=%tu fp=0x%p\n\n",
952	p, p - addr, get_freepointer(s, p));
953
954	if (s->flags & SLAB_RED_ZONE)
955	print_section(KERN_ERR, "Redzone ", p - s->red_left_pad,
956	s->red_left_pad);
957	else if (p > addr + `16`)
958	print_section(KERN_ERR, "Bytes b4 ", p - `16`, `16`);
959
960	print_section(KERN_ERR, "Object ", p,
961	min_t(unsigned int, s->object_size, PAGE_SIZE));
962	if (s->flags & SLAB_RED_ZONE)
963	print_section(KERN_ERR, "Redzone ", p + s->object_size,
964	s->inuse - s->object_size);
965
966	off = get_info_end(s);
967
968	if (s->flags & SLAB_STORE_USER)
969	off += `2` * sizeof(struct track);
970
971	if (slub_debug_orig_size(s))
972	off += sizeof(unsigned int);
973
974	off += kasan_metadata_size(s, false);
975
976	if (off != size_from_object(s))
977	/ Beginning of the filler is the free pointer /
978	print_section(KERN_ERR, "Padding ", p + off,
979	size_from_object(s) - off);
980
981	dump_stack();
982	}
983
984	static void object_err(struct kmem_cache s, struct* slab *slab,
985	u8 object, char* *reason)
986	{
987	if (slab_add_kunit_errors())
988	return;
989
990	slab_bug(s, "%s", reason);
991	print_trailer(s, slab, object);
992	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
993	}
994
995	static bool freelist_corrupted(struct kmem_cache s, struct* slab *slab,
996	void *freelist, void* *nextfree)
997	{
998	if ((s->flags & SLAB_CONSISTENCY_CHECKS) &&
999	!check_valid_pointer(s, slab, nextfree) && freelist) {
1000	object_err(s, slab, *freelist, "Freechain corrupt");
1001	*freelist = NULL;
1002	slab_fix(s, "Isolate corrupted freechain");
1003	return true;
1004	}
1005
1006	return false;
1007	}
1008
1009	static __printf(`3`, `4`) void slab_err(struct kmem_cache s, struct* slab *slab,
1010	const char *fmt, ...)
1011	{
1012	va_list args;
1013	char buf[`100`];
1014
1015	if (slab_add_kunit_errors())
1016	return;
1017
1018	va_start(args, fmt);
1019	vsnprintf(buf, sizeof(buf), fmt, args);
1020	va_end(args);
1021	slab_bug(s, "%s", buf);
1022	print_slab_info(slab);
1023	dump_stack();
1024	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
1025	}
1026
1027	static void init_object(struct kmem_cache s, void* *object, u8 val)
1028	{
1029	u8 *p = kasan_reset_tag(object);
1030	unsigned int poison_size = s->object_size;
1031
1032	if (s->flags & SLAB_RED_ZONE) {
1033	memset(p - s->red_left_pad, val, s->red_left_pad);
1034
1035	if (slub_debug_orig_size(s) && val == SLUB_RED_ACTIVE) {
1036	/*
1037	* Redzone the extra allocated space by kmalloc than
1038	* requested, and the poison size will be limited to
1039	* the original request size accordingly.
1040	*/
1041	poison_size = get_orig_size(s, object);
1042	}
1043	}
1044
1045	if (s->flags & __OBJECT_POISON) {
1046	memset(p, POISON_FREE, poison_size - `1`);
1047	p[poison_size - `1`] = POISON_END;
1048	}
1049
1050	if (s->flags & SLAB_RED_ZONE)
1051	memset(p + poison_size, val, s->inuse - poison_size);
1052	}
1053
1054	static void restore_bytes(struct kmem_cache s, char* *message, u8 data,
1055	void from, void* *to)
1056	{
1057	slab_fix(s, "Restoring %s 0x%p-0x%p=0x%x", message, from, to - `1`, data);
1058	memset(from, data, to - from);
1059	}
1060
1061	static int check_bytes_and_report(struct kmem_cache s, struct* slab *slab,
1062	u8 object, char* *what,
1063	u8 start, unsigned* int value, unsigned int bytes)
1064	{
1065	u8 *fault;
1066	u8 *end;
1067	u8 *addr = slab_address(slab);
1068
1069	metadata_access_enable();
1070	fault = memchr_inv(kasan_reset_tag(start), value, bytes);
1071	metadata_access_disable();
1072	if (!fault)
1073	return `1`;
1074
1075	end = start + bytes;
1076	while (end > fault && end[-`1`] == value)
1077	end--;
1078
1079	if (slab_add_kunit_errors())
1080	goto skip_bug_print;
1081
1082	slab_bug(s, "%s overwritten", what);
1083	pr_err("0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
1084	fault, end - `1`, fault - addr,
1085	fault[`0`], value);
1086	print_trailer(s, slab, object);
1087	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
1088
1089	skip_bug_print:
1090	restore_bytes(s, what, value, fault, end);
1091	return `0`;
1092	}
1093
1094	/*
1095	* Object layout:
1096	*
1097	* object address
1098	* Bytes of the object to be managed.
1099	* If the freepointer may overlay the object then the free
1100	* pointer is at the middle of the object.
1101	*
1102	* Poisoning uses 0x6b (POISON_FREE) and the last byte is
1103	* 0xa5 (POISON_END)
1104	*
1105	* object + s->object_size
1106	* Padding to reach word boundary. This is also used for Redzoning.
1107	* Padding is extended by another word if Redzoning is enabled and
1108	* object_size == inuse.
1109	*
1110	* We fill with 0xbb (RED_INACTIVE) for inactive objects and with
1111	* 0xcc (RED_ACTIVE) for objects in use.
1112	*
1113	* object + s->inuse
1114	* Meta data starts here.
1115	*
1116	* A. Free pointer (if we cannot overwrite object on free)
1117	* B. Tracking data for SLAB_STORE_USER
1118	* C. Original request size for kmalloc object (SLAB_STORE_USER enabled)
1119	* D. Padding to reach required alignment boundary or at minimum
1120	* one word if debugging is on to be able to detect writes
1121	* before the word boundary.
1122	*
1123	* Padding is done using 0x5a (POISON_INUSE)
1124	*
1125	* object + s->size
1126	* Nothing is used beyond s->size.
1127	*
1128	* If slabcaches are merged then the object_size and inuse boundaries are mostly
1129	* ignored. And therefore no slab options that rely on these boundaries
1130	* may be used with merged slabcaches.
1131	*/
1132
1133	static int check_pad_bytes(struct kmem_cache s, struct* slab slab, u8 p)
1134	{
1135	unsigned long off = get_info_end(s); / The end of info /
1136
1137	if (s->flags & SLAB_STORE_USER) {
1138	/ We also have user information there /
1139	off += `2` * sizeof(struct track);
1140
1141	if (s->flags & SLAB_KMALLOC)
1142	off += sizeof(unsigned int);
1143	}
1144
1145	off += kasan_metadata_size(s, false);
1146
1147	if (size_from_object(s) == off)
1148	return `1`;
1149
1150	return check_bytes_and_report(s, slab, p, "Object padding",
1151	p + off, POISON_INUSE, size_from_object(s) - off);
1152	}
1153
1154	/ Check the pad bytes at the end of a slab page /
1155	static void slab_pad_check(struct kmem_cache s, struct* slab *slab)
1156	{
1157	u8 *start;
1158	u8 *fault;
1159	u8 *end;
1160	u8 *pad;
1161	int length;
1162	int remainder;
1163
1164	if (!(s->flags & SLAB_POISON))
1165	return;
1166
1167	start = slab_address(slab);
1168	length = slab_size(slab);
1169	end = start + length;
1170	remainder = length % s->size;
1171	if (!remainder)
1172	return;
1173
1174	pad = end - remainder;
1175	metadata_access_enable();
1176	fault = memchr_inv(kasan_reset_tag(pad), POISON_INUSE, remainder);
1177	metadata_access_disable();
1178	if (!fault)
1179	return;
1180	while (end > fault && end[-`1`] == POISON_INUSE)
1181	end--;
1182
1183	slab_err(s, slab, "Padding overwritten. 0x%p-0x%p @offset=%tu",
1184	fault, end - `1`, fault - start);
1185	print_section(KERN_ERR, "Padding ", pad, remainder);
1186
1187	restore_bytes(s, "slab padding", POISON_INUSE, fault, end);
1188	}
1189
1190	static int check_object(struct kmem_cache s, struct* slab *slab,
1191	void *object, u8 val)
1192	{
1193	u8 *p = object;
1194	u8 *endobject = object + s->object_size;
1195	unsigned int orig_size;
1196
1197	if (s->flags & SLAB_RED_ZONE) {
1198	if (!check_bytes_and_report(s, slab, object, "Left Redzone",
1199	object - s->red_left_pad, val, s->red_left_pad))
1200	return `0`;
1201
1202	if (!check_bytes_and_report(s, slab, object, "Right Redzone",
1203	endobject, val, s->inuse - s->object_size))
1204	return `0`;
1205
1206	if (slub_debug_orig_size(s) && val == SLUB_RED_ACTIVE) {
1207	orig_size = get_orig_size(s, object);
1208
1209	if (s->object_size > orig_size &&
1210	!check_bytes_and_report(s, slab, object,
1211	"kmalloc Redzone", p + orig_size,
1212	val, s->object_size - orig_size)) {
1213	return `0`;
1214	}
1215	}
1216	} else {
1217	if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
1218	check_bytes_and_report(s, slab, p, "Alignment padding",
1219	endobject, POISON_INUSE,
1220	s->inuse - s->object_size);
1221	}
1222	}
1223
1224	if (s->flags & SLAB_POISON) {
1225	if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
1226	(!check_bytes_and_report(s, slab, p, "Poison", p,
1227	POISON_FREE, s->object_size - `1`) \|\|
1228	!check_bytes_and_report(s, slab, p, "End Poison",
1229	p + s->object_size - `1`, POISON_END, `1`)))
1230	return `0`;
1231	/*
1232	* check_pad_bytes cleans up on its own.
1233	*/
1234	check_pad_bytes(s, slab, p);
1235	}
1236
1237	if (!freeptr_outside_object(s) && val == SLUB_RED_ACTIVE)
1238	/*
1239	* Object and freepointer overlap. Cannot check
1240	* freepointer while object is allocated.
1241	*/
1242	return `1`;
1243
1244	/ Check free pointer validity /
1245	if (!check_valid_pointer(s, slab, get_freepointer(s, p))) {
1246	object_err(s, slab, p, "Freepointer corrupt");
1247	/*
1248	* No choice but to zap it and thus lose the remainder
1249	* of the free objects in this slab. May cause
1250	* another error because the object count is now wrong.
1251	*/
1252	set_freepointer(s, p, NULL);
1253	return `0`;
1254	}
1255	return `1`;
1256	}
1257
1258	static int check_slab(struct kmem_cache s, struct* slab *slab)
1259	{
1260	int maxobj;
1261
1262	if (!folio_test_slab(slab_folio(slab))) {
1263	slab_err(s, slab, "Not a valid slab page");
1264	return `0`;
1265	}
1266
1267	maxobj = order_objects(slab_order(slab), s->size);
1268	if (slab->objects > maxobj) {
1269	slab_err(s, slab, "objects %u > max %u",
1270	slab->objects, maxobj);
1271	return `0`;
1272	}
1273	if (slab->inuse > slab->objects) {
1274	slab_err(s, slab, "inuse %u > max %u",
1275	slab->inuse, slab->objects);
1276	return `0`;
1277	}
1278	/ Slab_pad_check fixes things up after itself /
1279	slab_pad_check(s, slab);
1280	return `1`;
1281	}
1282
1283	/*
1284	* Determine if a certain object in a slab is on the freelist. Must hold the
1285	* slab lock to guarantee that the chains are in a consistent state.
1286	*/
1287	static int on_freelist(struct kmem_cache s, struct* slab slab, void* *search)
1288	{
1289	int nr = `0`;
1290	void *fp;
1291	void *object = NULL;
1292	int max_objects;
1293
1294	fp = slab->freelist;
1295	while (fp && nr <= slab->objects) {
1296	if (fp == search)
1297	return `1`;
1298	if (!check_valid_pointer(s, slab, fp)) {
1299	if (object) {
1300	object_err(s, slab, object,
1301	"Freechain corrupt");
1302	set_freepointer(s, object, NULL);
1303	} else {
1304	slab_err(s, slab, "Freepointer corrupt");
1305	slab->freelist = NULL;
1306	slab->inuse = slab->objects;
1307	slab_fix(s, "Freelist cleared");
1308	return `0`;
1309	}
1310	break;
1311	}
1312	object = fp;
1313	fp = get_freepointer(s, object);
1314	nr++;
1315	}
1316
1317	max_objects = order_objects(slab_order(slab), s->size);
1318	if (max_objects > MAX_OBJS_PER_PAGE)
1319	max_objects = MAX_OBJS_PER_PAGE;
1320
1321	if (slab->objects != max_objects) {
1322	slab_err(s, slab, "Wrong number of objects. Found %d but should be %d",
1323	slab->objects, max_objects);
1324	slab->objects = max_objects;
1325	slab_fix(s, "Number of objects adjusted");
1326	}
1327	if (slab->inuse != slab->objects - nr) {
1328	slab_err(s, slab, "Wrong object count. Counter is %d but counted were %d",
1329	slab->inuse, slab->objects - nr);
1330	slab->inuse = slab->objects - nr;
1331	slab_fix(s, "Object count adjusted");
1332	}
1333	return search == NULL;
1334	}
1335
1336	static void trace(struct kmem_cache s, struct* slab slab, void* *object,
1337	int alloc)
1338	{
1339	if (s->flags & SLAB_TRACE) {
1340	pr_info("TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
1341	s->name,
1342	alloc ? "alloc" : "free",
1343	object, slab->inuse,
1344	slab->freelist);
1345
1346	if (!alloc)
1347	print_section(KERN_INFO, "Object ", (void *)object,
1348	s->object_size);
1349
1350	dump_stack();
1351	}
1352	}
1353
1354	/*
1355	* Tracking of fully allocated slabs for debugging purposes.
1356	*/
1357	static void add_full(struct kmem_cache *s,
1358	struct kmem_cache_node n, struct* slab *slab)
1359	{
1360	if (!(s->flags & SLAB_STORE_USER))
1361	return;
1362
1363	lockdep_assert_held(&n->list_lock);
1364	list_add(&slab->slab_list, &n->full);
1365	}
1366
1367	static void remove_full(struct kmem_cache s, struct* kmem_cache_node n, struct* slab *slab)
1368	{
1369	if (!(s->flags & SLAB_STORE_USER))
1370	return;
1371
1372	lockdep_assert_held(&n->list_lock);
1373	list_del(&slab->slab_list);
1374	}
1375
1376	static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1377	{
1378	return atomic_long_read(&n->nr_slabs);
1379	}
1380
1381	static inline void inc_slabs_node(struct kmem_cache s, int* node, int objects)
1382	{
1383	struct kmem_cache_node *n = get_node(s, node);
1384
1385	/*
1386	* May be called early in order to allocate a slab for the
1387	* kmem_cache_node structure. Solve the chicken-egg
1388	* dilemma by deferring the increment of the count during
1389	* bootstrap (see early_kmem_cache_node_alloc).
1390	*/
1391	if (likely(n)) {
1392	atomic_long_inc(&n->nr_slabs);
1393	atomic_long_add(objects, &n->total_objects);
1394	}
1395	}
1396	static inline void dec_slabs_node(struct kmem_cache s, int* node, int objects)
1397	{
1398	struct kmem_cache_node *n = get_node(s, node);
1399
1400	atomic_long_dec(&n->nr_slabs);
1401	atomic_long_sub(objects, &n->total_objects);
1402	}
1403
1404	/ Object debug checks for alloc/free paths /
1405	static void setup_object_debug(struct kmem_cache s, void* *object)
1406	{
1407	if (!kmem_cache_debug_flags(s, SLAB_STORE_USER\|SLAB_RED_ZONE\|__OBJECT_POISON))
1408	return;
1409
1410	init_object(s, object, SLUB_RED_INACTIVE);
1411	init_tracking(s, object);
1412	}
1413
1414	static
1415	void setup_slab_debug(struct kmem_cache s, struct* slab slab, void* *addr)
1416	{
1417	if (!kmem_cache_debug_flags(s, SLAB_POISON))
1418	return;
1419
1420	metadata_access_enable();
1421	memset(kasan_reset_tag(addr), POISON_INUSE, slab_size(slab));
1422	metadata_access_disable();
1423	}
1424
1425	static inline int alloc_consistency_checks(struct kmem_cache *s,
1426	struct slab slab, void* *object)
1427	{
1428	if (!check_slab(s, slab))
1429	return `0`;
1430
1431	if (!check_valid_pointer(s, slab, object)) {
1432	object_err(s, slab, object, "Freelist Pointer check fails");
1433	return `0`;
1434	}
1435
1436	if (!check_object(s, slab, object, SLUB_RED_INACTIVE))
1437	return `0`;
1438
1439	return `1`;
1440	}
1441
1442	static noinline bool alloc_debug_processing(struct kmem_cache *s,
1443	struct slab slab, void* object, int* orig_size)
1444	{
1445	if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1446	if (!alloc_consistency_checks(s, slab, object))
1447	goto bad;
1448	}
1449
1450	/ Success. Perform special debug activities for allocs /
1451	trace(s, slab, object, `1`);
1452	set_orig_size(s, object, orig_size);
1453	init_object(s, object, SLUB_RED_ACTIVE);
1454	return true;
1455
1456	bad:
1457	if (folio_test_slab(slab_folio(slab))) {
1458	/*
1459	* If this is a slab page then lets do the best we can
1460	* to avoid issues in the future. Marking all objects
1461	* as used avoids touching the remaining objects.
1462	*/
1463	slab_fix(s, "Marking all objects used");
1464	slab->inuse = slab->objects;
1465	slab->freelist = NULL;
1466	}
1467	return false;
1468	}
1469
1470	static inline int free_consistency_checks(struct kmem_cache *s,
1471	struct slab slab, void* object, unsigned* long addr)
1472	{
1473	if (!check_valid_pointer(s, slab, object)) {
1474	slab_err(s, slab, "Invalid object pointer 0x%p", object);
1475	return `0`;
1476	}
1477
1478	if (on_freelist(s, slab, object)) {
1479	object_err(s, slab, object, "Object already free");
1480	return `0`;
1481	}
1482
1483	if (!check_object(s, slab, object, SLUB_RED_ACTIVE))
1484	return `0`;
1485
1486	if (unlikely(s != slab->slab_cache)) {
1487	if (!folio_test_slab(slab_folio(slab))) {
1488	slab_err(s, slab, "Attempt to free object(0x%p) outside of slab",
1489	object);
1490	} else if (!slab->slab_cache) {
1491	pr_err("SLUB <none>: no slab for object 0x%p.\n",
1492	object);
1493	dump_stack();
1494	} else
1495	object_err(s, slab, object,
1496	"page slab pointer corrupt.");
1497	return `0`;
1498	}
1499	return `1`;
1500	}
1501
1502	/*
1503	* Parse a block of slub_debug options. Blocks are delimited by ';'
1504	*
1505	* @str: start of block
1506	* @flags: returns parsed flags, or DEBUG_DEFAULT_FLAGS if none specified
1507	* @slabs: return start of list of slabs, or NULL when there's no list
1508	* @init: assume this is initial parsing and not per-kmem-create parsing
1509	*
1510	* returns the start of next block if there's any, or NULL
1511	*/
1512	static char *
1513	parse_slub_debug_flags(char str, slab_flags_t flags, char **slabs, bool init)
1514	{
1515	bool higher_order_disable = false;
1516
1517	/ Skip any completely empty blocks /
1518	while (str && str == `';'`)
1519	str++;
1520
1521	if (*str == `','`) {
1522	/*
1523	* No options but restriction on slabs. This means full
1524	* debugging for slabs matching a pattern.
1525	*/
1526	*flags = DEBUG_DEFAULT_FLAGS;
1527	goto check_slabs;
1528	}
1529	*flags = `0`;
1530
1531	/ Determine which debug features should be switched on /
1532	for (; str && str != `','` && *str != `';'`; str++) {
1533	switch (tolower(*str)) {
1534	case `'-'`:
1535	*flags = `0`;
1536	break;
1537	case `'f'`:
1538	*flags \|= SLAB_CONSISTENCY_CHECKS;
1539	break;
1540	case `'z'`:
1541	*flags \|= SLAB_RED_ZONE;
1542	break;
1543	case `'p'`:
1544	*flags \|= SLAB_POISON;
1545	break;
1546	case `'u'`:
1547	*flags \|= SLAB_STORE_USER;
1548	break;
1549	case `'t'`:
1550	*flags \|= SLAB_TRACE;
1551	break;
1552	case `'a'`:
1553	*flags \|= SLAB_FAILSLAB;
1554	break;
1555	case `'o'`:
1556	/*
1557	* Avoid enabling debugging on caches if its minimum
1558	* order would increase as a result.
1559	*/
1560	higher_order_disable = true;
1561	break;
1562	default:
1563	if (init)
1564	pr_err("slub_debug option '%c' unknown. skipped\n", *str);
1565	}
1566	}
1567	check_slabs:
1568	if (*str == `','`)
1569	*slabs = ++str;
1570	else
1571	*slabs = NULL;
1572
1573	/ Skip over the slab list /
1574	while (str && str != `';'`)
1575	str++;
1576
1577	/ Skip any completely empty blocks /
1578	while (str && str == `';'`)
1579	str++;
1580
1581	if (init && higher_order_disable)
1582	disable_higher_order_debug = `1`;
1583
1584	if (*str)
1585	return str;
1586	else
1587	return NULL;
1588	}
1589
1590	static int __init setup_slub_debug(char *str)
1591	{
1592	slab_flags_t flags;
1593	slab_flags_t global_flags;
1594	char *saved_str;
1595	char *slab_list;
1596	bool global_slub_debug_changed = false;
1597	bool slab_list_specified = false;
1598
1599	global_flags = DEBUG_DEFAULT_FLAGS;
1600	if (str++ != `'='` \|\| !str)
1601	/*
1602	* No options specified. Switch on full debugging.
1603	*/
1604	goto out;
1605
1606	saved_str = str;
1607	while (str) {
1608	str = parse_slub_debug_flags(str, &flags, &slab_list, true);
1609
1610	if (!slab_list) {
1611	global_flags = flags;
1612	global_slub_debug_changed = true;
1613	} else {
1614	slab_list_specified = true;
1615	if (flags & SLAB_STORE_USER)
1616	stack_depot_request_early_init();
1617	}
1618	}
1619
1620	/*
1621	* For backwards compatibility, a single list of flags with list of
1622	* slabs means debugging is only changed for those slabs, so the global
1623	* slub_debug should be unchanged (0 or DEBUG_DEFAULT_FLAGS, depending
1624	* on CONFIG_SLUB_DEBUG_ON). We can extended that to multiple lists as
1625	* long as there is no option specifying flags without a slab list.
1626	*/
1627	if (slab_list_specified) {
1628	if (!global_slub_debug_changed)
1629	global_flags = slub_debug;
1630	slub_debug_string = saved_str;
1631	}
1632	out:
1633	slub_debug = global_flags;
1634	if (slub_debug & SLAB_STORE_USER)
1635	stack_depot_request_early_init();
1636	if (slub_debug != `0` \|\| slub_debug_string)
1637	static_branch_enable(&slub_debug_enabled);
1638	else
1639	static_branch_disable(&slub_debug_enabled);
1640	if ((static_branch_unlikely(&init_on_alloc) \|\|
1641	static_branch_unlikely(&init_on_free)) &&
1642	(slub_debug & SLAB_POISON))
1643	pr_info("mem auto-init: SLAB_POISON will take precedence over init_on_alloc/init_on_free\n");
1644	return `1`;
1645	}
1646
1647	__setup("slub_debug", setup_slub_debug);
1648
1649	/*
1650	* kmem_cache_flags - apply debugging options to the cache
1651	* @object_size: the size of an object without meta data
1652	* @flags: flags to set
1653	* @name: name of the cache
1654	*
1655	* Debug option(s) are applied to @flags. In addition to the debug
1656	* option(s), if a slab name (or multiple) is specified i.e.
1657	* slub_debug=<Debug-Options>,<slab name1>,<slab name2> ...
1658	* then only the select slabs will receive the debug option(s).
1659	*/
1660	slab_flags_t kmem_cache_flags(unsigned int object_size,
1661	slab_flags_t flags, const char *name)
1662	{
1663	char *iter;
1664	size_t len;
1665	char *next_block;
1666	slab_flags_t block_flags;
1667	slab_flags_t slub_debug_local = slub_debug;
1668
1669	if (flags & SLAB_NO_USER_FLAGS)
1670	return flags;
1671
1672	/*
1673	* If the slab cache is for debugging (e.g. kmemleak) then
1674	* don't store user (stack trace) information by default,
1675	* but let the user enable it via the command line below.
1676	*/
1677	if (flags & SLAB_NOLEAKTRACE)
1678	slub_debug_local &= ~SLAB_STORE_USER;
1679
1680	len = strlen(name);
1681	next_block = slub_debug_string;
1682	/ Go through all blocks of debug options, see if any matches our slab's name /
1683	while (next_block) {
1684	next_block = parse_slub_debug_flags(next_block, &block_flags, &iter, false);
1685	if (!iter)
1686	continue;
1687	/ Found a block that has a slab list, search it /
1688	while (*iter) {
1689	char end, glob;
1690	size_t cmplen;
1691
1692	end = strchrnul(iter, `','`);
1693	if (next_block && next_block < end)
1694	end = next_block - `1`;
1695
1696	glob = strnchr(iter, end - iter, `'*'`);
1697	if (glob)
1698	cmplen = glob - iter;
1699	else
1700	cmplen = max_t(size_t, len, (end - iter));
1701
1702	if (!strncmp(name, iter, cmplen)) {
1703	flags \|= block_flags;
1704	return flags;
1705	}
1706
1707	if (!end \|\| end == `';'`)
1708	break;
1709	iter = end + `1`;
1710	}
1711	}
1712
1713	return flags \| slub_debug_local;
1714	}
1715	#else /* !CONFIG_SLUB_DEBUG */
1716	static inline void setup_object_debug(struct kmem_cache s, void* *object) {}
1717	static inline
1718	void setup_slab_debug(struct kmem_cache s, struct* slab slab, void* *addr) {}
1719
1720	static inline bool alloc_debug_processing(struct kmem_cache *s,
1721	struct slab slab, void* object, int* orig_size) { return true; }
1722
1723	static inline bool free_debug_processing(struct kmem_cache *s,
1724	struct slab slab, void* head, void* tail, int* *bulk_cnt,
1725	unsigned long addr, depot_stack_handle_t handle) { return true; }
1726
1727	static inline void slab_pad_check(struct kmem_cache s, struct* slab *slab) {}
1728	static inline int check_object(struct kmem_cache s, struct* slab *slab,
1729	void object, u8 val) { return* `1`; }
1730	static inline depot_stack_handle_t set_track_prepare(void) { return `0`; }
1731	static inline void set_track(struct kmem_cache s, void* *object,
1732	enum track_item alloc, unsigned long addr) {}
1733	static inline void add_full(struct kmem_cache s, struct* kmem_cache_node *n,
1734	struct slab *slab) {}
1735	static inline void remove_full(struct kmem_cache s, struct* kmem_cache_node *n,
1736	struct slab *slab) {}
1737	slab_flags_t kmem_cache_flags(unsigned int object_size,
1738	slab_flags_t flags, const char *name)
1739	{
1740	return flags;
1741	}
1742	#define slub_debug 0
1743
1744	#define disable_higher_order_debug 0
1745
1746	static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1747	{ return `0`; }
1748	static inline void inc_slabs_node(struct kmem_cache s, int* node,
1749	int objects) {}
1750	static inline void dec_slabs_node(struct kmem_cache s, int* node,
1751	int objects) {}
1752
1753	#ifndef CONFIG_SLUB_TINY
1754	static bool freelist_corrupted(struct kmem_cache s, struct* slab *slab,
1755	void *freelist, void* *nextfree)
1756	{
1757	return false;
1758	}
1759	#endif
1760	#endif /* CONFIG_SLUB_DEBUG */
1761
1762	/*
1763	* Hooks for other subsystems that check memory allocations. In a typical
1764	* production configuration these hooks all should produce no code at all.
1765	*/
1766	static __always_inline bool slab_free_hook(struct kmem_cache *s,
1767	void *x, bool init)
1768	{
1769	kmemleak_free_recursive(ptr: x, flags: s->flags);
1770	kmsan_slab_free(s, object: x);
1771
1772	debug_check_no_locks_freed(from: x, len: s->object_size);
1773
1774	if (!(s->flags & SLAB_DEBUG_OBJECTS))
1775	debug_check_no_obj_freed(address: x, size: s->object_size);
1776
1777	/ Use KCSAN to help debug racy use-after-free. /
1778	if (!(s->flags & SLAB_TYPESAFE_BY_RCU))
1779	__kcsan_check_access(ptr: x, size: s->object_size,
1780	KCSAN_ACCESS_WRITE \| KCSAN_ACCESS_ASSERT);
1781
1782	/*
1783	* As memory initialization might be integrated into KASAN,
1784	* kasan_slab_free and initialization memset's must be
1785	* kept together to avoid discrepancies in behavior.
1786	*
1787	* The initialization memset's clear the object and the metadata,
1788	* but don't touch the SLAB redzone.
1789	*/
1790	if (init) {
1791	int rsize;
1792
1793	if (!kasan_has_integrated_init())
1794	memset(kasan_reset_tag(x), `0`, s->object_size);
1795	rsize = (s->flags & SLAB_RED_ZONE) ? s->red_left_pad : `0`;
1796	memset((char *)kasan_reset_tag(x) + s->inuse, `0`,
1797	s->size - s->inuse - rsize);
1798	}
1799	/ KASAN might put x into memory quarantine, delaying its reuse. /
1800	return kasan_slab_free(s, object: x, init);
1801	}
1802
1803	static inline bool slab_free_freelist_hook(struct kmem_cache *s,
1804	void *head, void* **tail,
1805	int *cnt)
1806	{
1807
1808	void *object;
1809	void next = head;
1810	void old_tail = tail ? tail : head;
1811
1812	if (is_kfence_address(addr: next)) {
1813	slab_free_hook(s, x: next, init: false);
1814	return true;
1815	}
1816
1817	/ Head and tail of the reconstructed freelist /
1818	*head = NULL;
1819	*tail = NULL;
1820
1821	do {
1822	object = next;
1823	next = get_freepointer(s, object);
1824
1825	/ If object's reuse doesn't have to be delayed /
1826	if (!slab_free_hook(s, x: object, init: slab_want_init_on_free(c: s))) {
1827	/ Move object to the new freelist /
1828	set_freepointer(s, object, fp: *head);
1829	*head = object;
1830	if (!*tail)
1831	*tail = object;
1832	} else {
1833	/*
1834	* Adjust the reconstructed freelist depth
1835	* accordingly if object's reuse is delayed.
1836	*/
1837	--(*cnt);
1838	}
1839	} while (object != old_tail);
1840
1841	if (head == tail)
1842	*tail = NULL;
1843
1844	return *head != NULL;
1845	}
1846
1847	static void setup_object(struct* kmem_cache s, void* *object)
1848	{
1849	setup_object_debug(s, object);
1850	object = kasan_init_slab_obj(cache: s, object);
1851	if (unlikely(s->ctor)) {
1852	kasan_unpoison_object_data(cache: s, object);
1853	s->ctor(object);
1854	kasan_poison_object_data(cache: s, object);
1855	}
1856	return object;
1857	}
1858
1859	/*
1860	* Slab allocation and freeing
1861	*/
1862	static inline struct slab alloc_slab_page(gfp_t flags, int* node,
1863	struct kmem_cache_order_objects oo)
1864	{
1865	struct folio *folio;
1866	struct slab *slab;
1867	unsigned int order = oo_order(x: oo);
1868
1869	if (node == NUMA_NO_NODE)
1870	folio = (struct folio *)alloc_pages(gfp: flags, order);
1871	else
1872	folio = (struct folio *)__alloc_pages_node(nid: node, gfp_mask: flags, order);
1873
1874	if (!folio)
1875	return NULL;
1876
1877	slab = folio_slab(folio);
1878	__folio_set_slab(folio);
1879	/ Make the flag visible before any changes to folio->mapping /
1880	smp_wmb();
1881	if (folio_is_pfmemalloc(folio))
1882	slab_set_pfmemalloc(slab);
1883
1884	return slab;
1885	}
1886
1887	#ifdef CONFIG_SLAB_FREELIST_RANDOM
1888	/ Pre-initialize the random sequence cache /
1889	static int init_cache_random_seq(struct kmem_cache *s)
1890	{
1891	unsigned int count = oo_objects(s->oo);
1892	int err;
1893
1894	/ Bailout if already initialised /
1895	if (s->random_seq)
1896	return `0`;
1897
1898	err = cache_random_seq_create(s, count, GFP_KERNEL);
1899	if (err) {
1900	pr_err("SLUB: Unable to initialize free list for %s\n",
1901	s->name);
1902	return err;
1903	}
1904
1905	/ Transform to an offset on the set of pages /
1906	if (s->random_seq) {
1907	unsigned int i;
1908
1909	for (i = `0`; i < count; i++)
1910	s->random_seq[i] *= s->size;
1911	}
1912	return `0`;
1913	}
1914
1915	/ Initialize each random sequence freelist per cache /
1916	static void __init init_freelist_randomization(void)
1917	{
1918	struct kmem_cache *s;
1919
1920	mutex_lock(&slab_mutex);
1921
1922	list_for_each_entry(s, &slab_caches, list)
1923	init_cache_random_seq(s);
1924
1925	mutex_unlock(&slab_mutex);
1926	}
1927
1928	/ Get the next entry on the pre-computed freelist randomized /
1929	static void next_freelist_entry(struct* kmem_cache s, struct* slab *slab,
1930	unsigned long pos, void* *start,
1931	unsigned long page_limit,
1932	unsigned long freelist_count)
1933	{
1934	unsigned int idx;
1935
1936	/*
1937	* If the target page allocation failed, the number of objects on the
1938	* page might be smaller than the usual size defined by the cache.
1939	*/
1940	do {
1941	idx = s->random_seq[*pos];
1942	*pos += `1`;
1943	if (*pos >= freelist_count)
1944	*pos = `0`;
1945	} while (unlikely(idx >= page_limit));
1946
1947	return (char *)start + idx;
1948	}
1949
1950	/ Shuffle the single linked freelist based on a random pre-computed sequence /
1951	static bool shuffle_freelist(struct kmem_cache s, struct* slab *slab)
1952	{
1953	void *start;
1954	void *cur;
1955	void *next;
1956	unsigned long idx, pos, page_limit, freelist_count;
1957
1958	if (slab->objects < `2` \|\| !s->random_seq)
1959	return false;
1960
1961	freelist_count = oo_objects(s->oo);
1962	pos = get_random_u32_below(freelist_count);
1963
1964	page_limit = slab->objects * s->size;
1965	start = fixup_red_left(s, slab_address(slab));
1966
1967	/ First entry is used as the base of the freelist /
1968	cur = next_freelist_entry(s, slab, &pos, start, page_limit,
1969	freelist_count);
1970	cur = setup_object(s, cur);
1971	slab->freelist = cur;
1972
1973	for (idx = `1`; idx < slab->objects; idx++) {
1974	next = next_freelist_entry(s, slab, &pos, start, page_limit,
1975	freelist_count);
1976	next = setup_object(s, next);
1977	set_freepointer(s, cur, next);
1978	cur = next;
1979	}
1980	set_freepointer(s, cur, NULL);
1981
1982	return true;
1983	}
1984	#else
1985	static inline int init_cache_random_seq(struct kmem_cache *s)
1986	{
1987	return `0`;
1988	}
1989	static inline void init_freelist_randomization(void) { }
1990	static inline bool shuffle_freelist(struct kmem_cache s, struct* slab *slab)
1991	{
1992	return false;
1993	}
1994	#endif /* CONFIG_SLAB_FREELIST_RANDOM */
1995
1996	static struct slab allocate_slab(struct* kmem_cache s, gfp_t flags, int* node)
1997	{
1998	struct slab *slab;
1999	struct kmem_cache_order_objects oo = s->oo;
2000	gfp_t alloc_gfp;
2001	void start, p, *next;
2002	int idx;
2003	bool shuffle;
2004
2005	flags &= gfp_allowed_mask;
2006
2007	flags \|= s->allocflags;
2008
2009	/*
2010	* Let the initial higher-order allocation fail under memory pressure
2011	* so we fall-back to the minimum order allocation.
2012	*/
2013	alloc_gfp = (flags \| __GFP_NOWARN \| __GFP_NORETRY) & ~__GFP_NOFAIL;
2014	if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(x: oo) > oo_order(x: s->min))
2015	alloc_gfp = (alloc_gfp \| __GFP_NOMEMALLOC) & ~__GFP_RECLAIM;
2016
2017	slab = alloc_slab_page(flags: alloc_gfp, node, oo);
2018	if (unlikely(!slab)) {
2019	oo = s->min;
2020	alloc_gfp = flags;
2021	/*
2022	* Allocation may have failed due to fragmentation.
2023	* Try a lower order alloc if possible
2024	*/
2025	slab = alloc_slab_page(flags: alloc_gfp, node, oo);
2026	if (unlikely(!slab))
2027	return NULL;
2028	stat(s, si: ORDER_FALLBACK);
2029	}
2030
2031	slab->objects = oo_objects(x: oo);
2032	slab->inuse = `0`;
2033	slab->frozen = `0`;
2034
2035	account_slab(slab, order: oo_order(x: oo), s, gfp: flags);
2036
2037	slab->slab_cache = s;
2038
2039	kasan_poison_slab(slab);
2040
2041	start = slab_address(slab);
2042
2043	setup_slab_debug(s, slab, addr: start);
2044
2045	shuffle = shuffle_freelist(s, slab);
2046
2047	if (!shuffle) {
2048	start = fixup_red_left(s, p: start);
2049	start = setup_object(s, object: start);
2050	slab->freelist = start;
2051	for (idx = `0`, p = start; idx < slab->objects - `1`; idx++) {
2052	next = p + s->size;
2053	next = setup_object(s, object: next);
2054	set_freepointer(s, object: p, fp: next);
2055	p = next;
2056	}
2057	set_freepointer(s, object: p, NULL);
2058	}
2059
2060	return slab;
2061	}
2062
2063	static struct slab new_slab(struct* kmem_cache s, gfp_t flags, int* node)
2064	{
2065	if (unlikely(flags & GFP_SLAB_BUG_MASK))
2066	flags = kmalloc_fix_flags(flags);
2067
2068	WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO));
2069
2070	return allocate_slab(s,
2071	flags: flags & (GFP_RECLAIM_MASK \| GFP_CONSTRAINT_MASK), node);
2072	}
2073
2074	static void __free_slab(struct kmem_cache s, struct* slab *slab)
2075	{
2076	struct folio *folio = slab_folio(slab);
2077	int order = folio_order(folio);
2078	int pages = `1` << order;
2079
2080	__slab_clear_pfmemalloc(slab);
2081	folio->mapping = NULL;
2082	/ Make the mapping reset visible before clearing the flag /
2083	smp_wmb();
2084	__folio_clear_slab(folio);
2085	mm_account_reclaimed_pages(pages);
2086	unaccount_slab(slab, order, s);
2087	__free_pages(page: &folio->page, order);
2088	}
2089
2090	static void rcu_free_slab(struct rcu_head *h)
2091	{
2092	struct slab slab = container_of(h, struct* slab, rcu_head);
2093
2094	__free_slab(s: slab->slab_cache, slab);
2095	}
2096
2097	static void free_slab(struct kmem_cache s, struct* slab *slab)
2098	{
2099	if (kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) {
2100	void *p;
2101
2102	slab_pad_check(s, slab);
2103	for_each_object(p, s, slab_address(slab), slab->objects)
2104	check_object(s, slab, object: p, SLUB_RED_INACTIVE);
2105	}
2106
2107	if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU))
2108	call_rcu(head: &slab->rcu_head, func: rcu_free_slab);
2109	else
2110	__free_slab(s, slab);
2111	}
2112
2113	static void discard_slab(struct kmem_cache s, struct* slab *slab)
2114	{
2115	dec_slabs_node(s, node: slab_nid(slab), objects: slab->objects);
2116	free_slab(s, slab);
2117	}
2118
2119	/*
2120	* Management of partially allocated slabs.
2121	*/
2122	static inline void
2123	__add_partial(struct kmem_cache_node n, struct* slab slab, int* tail)
2124	{
2125	n->nr_partial++;
2126	if (tail == DEACTIVATE_TO_TAIL)
2127	list_add_tail(new: &slab->slab_list, head: &n->partial);
2128	else
2129	list_add(new: &slab->slab_list, head: &n->partial);
2130	}
2131
2132	static inline void add_partial(struct kmem_cache_node *n,
2133	struct slab slab, int* tail)
2134	{
2135	lockdep_assert_held(&n->list_lock);
2136	__add_partial(n, slab, tail);
2137	}
2138
2139	static inline void remove_partial(struct kmem_cache_node *n,
2140	struct slab *slab)
2141	{
2142	lockdep_assert_held(&n->list_lock);
2143	list_del(entry: &slab->slab_list);
2144	n->nr_partial--;
2145	}
2146
2147	/*
2148	* Called only for kmem_cache_debug() caches instead of acquire_slab(), with a
2149	* slab from the n->partial list. Remove only a single object from the slab, do
2150	* the alloc_debug_processing() checks and leave the slab on the list, or move
2151	* it to full list if it was the last free object.
2152	*/
2153	static void alloc_single_from_partial(struct* kmem_cache *s,
2154	struct kmem_cache_node n, struct* slab slab, int* orig_size)
2155	{
2156	void *object;
2157
2158	lockdep_assert_held(&n->list_lock);
2159
2160	object = slab->freelist;
2161	slab->freelist = get_freepointer(s, object);
2162	slab->inuse++;
2163
2164	if (!alloc_debug_processing(s, slab, object, orig_size)) {
2165	remove_partial(n, slab);
2166	return NULL;
2167	}
2168
2169	if (slab->inuse == slab->objects) {
2170	remove_partial(n, slab);
2171	add_full(s, n, slab);
2172	}
2173
2174	return object;
2175	}
2176
2177	/*
2178	* Called only for kmem_cache_debug() caches to allocate from a freshly
2179	* allocated slab. Allocate a single object instead of whole freelist
2180	* and put the slab to the partial (or full) list.
2181	*/
2182	static void alloc_single_from_new_slab(struct* kmem_cache *s,
2183	struct slab slab, int* orig_size)
2184	{
2185	int nid = slab_nid(slab);
2186	struct kmem_cache_node *n = get_node(s, node: nid);
2187	unsigned long flags;
2188	void *object;
2189
2190
2191	object = slab->freelist;
2192	slab->freelist = get_freepointer(s, object);
2193	slab->inuse = `1`;
2194
2195	if (!alloc_debug_processing(s, slab, object, orig_size))
2196	/*
2197	* It's not really expected that this would fail on a
2198	* freshly allocated slab, but a concurrent memory
2199	* corruption in theory could cause that.
2200	*/
2201	return NULL;
2202
2203	spin_lock_irqsave(&n->list_lock, flags);
2204
2205	if (slab->inuse == slab->objects)
2206	add_full(s, n, slab);
2207	else
2208	add_partial(n, slab, tail: DEACTIVATE_TO_HEAD);
2209
2210	inc_slabs_node(s, node: nid, objects: slab->objects);
2211	spin_unlock_irqrestore(lock: &n->list_lock, flags);
2212
2213	return object;
2214	}
2215
2216	/*
2217	* Remove slab from the partial list, freeze it and
2218	* return the pointer to the freelist.
2219	*
2220	* Returns a list of objects or NULL if it fails.
2221	*/
2222	static inline void acquire_slab(struct* kmem_cache *s,
2223	struct kmem_cache_node n, struct* slab *slab,
2224	int mode)
2225	{
2226	void *freelist;
2227	unsigned long counters;
2228	struct slab new;
2229
2230	lockdep_assert_held(&n->list_lock);
2231
2232	/*
2233	* Zap the freelist and set the frozen bit.
2234	* The old freelist is the list of objects for the
2235	* per cpu allocation list.
2236	*/
2237	freelist = slab->freelist;
2238	counters = slab->counters;
2239	new.counters = counters;
2240	if (mode) {
2241	new.inuse = slab->objects;
2242	new.freelist = NULL;
2243	} else {
2244	new.freelist = freelist;
2245	}
2246
2247	VM_BUG_ON(new.frozen);
2248	new.frozen = `1`;
2249
2250	if (!__slab_update_freelist(s, slab,
2251	freelist_old: freelist, counters_old: counters,
2252	freelist_new: new.freelist, counters_new: new.counters,
2253	n: "acquire_slab"))
2254	return NULL;
2255
2256	remove_partial(n, slab);
2257	WARN_ON(!freelist);
2258	return freelist;
2259	}
2260
2261	#ifdef CONFIG_SLUB_CPU_PARTIAL
2262	static void put_cpu_partial(struct kmem_cache s, struct* slab slab, int* drain);
2263	#else
2264	static inline void put_cpu_partial(struct kmem_cache s, struct* slab *slab,
2265	int drain) { }
2266	#endif
2267	static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags);
2268
2269	/*
2270	* Try to allocate a partial slab from a specific node.
2271	*/
2272	static void get_partial_node(struct* kmem_cache s, struct* kmem_cache_node *n,
2273	struct partial_context *pc)
2274	{
2275	struct slab slab, slab2;
2276	void *object = NULL;
2277	unsigned long flags;
2278	unsigned int partial_slabs = `0`;
2279
2280	/*
2281	* Racy check. If we mistakenly see no partial slabs then we
2282	* just allocate an empty slab. If we mistakenly try to get a
2283	* partial slab and there is none available then get_partial()
2284	* will return NULL.
2285	*/
2286	if (!n \|\| !n->nr_partial)
2287	return NULL;
2288
2289	spin_lock_irqsave(&n->list_lock, flags);
2290	list_for_each_entry_safe(slab, slab2, &n->partial, slab_list) {
2291	void *t;
2292
2293	if (!pfmemalloc_match(slab, gfpflags: pc->flags))
2294	continue;
2295
2296	if (IS_ENABLED(CONFIG_SLUB_TINY) \|\| kmem_cache_debug(s)) {
2297	object = alloc_single_from_partial(s, n, slab,
2298	orig_size: pc->orig_size);
2299	if (object)
2300	break;
2301	continue;
2302	}
2303
2304	t = acquire_slab(s, n, slab, mode: object == NULL);
2305	if (!t)
2306	break;
2307
2308	if (!object) {
2309	*pc->slab = slab;
2310	stat(s, si: ALLOC_FROM_PARTIAL);
2311	object = t;
2312	} else {
2313	put_cpu_partial(s, slab, drain: `0`);
2314	stat(s, si: CPU_PARTIAL_NODE);
2315	partial_slabs++;
2316	}
2317	#ifdef CONFIG_SLUB_CPU_PARTIAL
2318	if (!kmem_cache_has_cpu_partial(s)
2319	\|\| partial_slabs > s->cpu_partial_slabs / `2`)
2320	break;
2321	#else
2322	break;
2323	#endif
2324
2325	}
2326	spin_unlock_irqrestore(lock: &n->list_lock, flags);
2327	return object;
2328	}
2329
2330	/*
2331	* Get a slab from somewhere. Search in increasing NUMA distances.
2332	*/
2333	static void get_any_partial(struct* kmem_cache s, struct* partial_context *pc)
2334	{
2335	#ifdef CONFIG_NUMA
2336	struct zonelist *zonelist;
2337	struct zoneref *z;
2338	struct zone *zone;
2339	enum zone_type highest_zoneidx = gfp_zone(flags: pc->flags);
2340	void *object;
2341	unsigned int cpuset_mems_cookie;
2342
2343	/*
2344	* The defrag ratio allows a configuration of the tradeoffs between
2345	* inter node defragmentation and node local allocations. A lower
2346	* defrag_ratio increases the tendency to do local allocations
2347	* instead of attempting to obtain partial slabs from other nodes.
2348	*
2349	* If the defrag_ratio is set to 0 then kmalloc() always
2350	* returns node local objects. If the ratio is higher then kmalloc()
2351	* may return off node objects because partial slabs are obtained
2352	* from other nodes and filled up.
2353	*
2354	* If /sys/kernel/slab/xx/remote_node_defrag_ratio is set to 100
2355	* (which makes defrag_ratio = 1000) then every (well almost)
2356	* allocation will first attempt to defrag slab caches on other nodes.
2357	* This means scanning over all nodes to look for partial slabs which
2358	* may be expensive if we do it every time we are trying to find a slab
2359	* with available objects.
2360	*/
2361	if (!s->remote_node_defrag_ratio \|\|
2362	get_cycles() % `1024` > s->remote_node_defrag_ratio)
2363	return NULL;
2364
2365	do {
2366	cpuset_mems_cookie = read_mems_allowed_begin();
2367	zonelist = node_zonelist(nid: mempolicy_slab_node(), flags: pc->flags);
2368	for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) {
2369	struct kmem_cache_node *n;
2370
2371	n = get_node(s, node: zone_to_nid(zone));
2372
2373	if (n && cpuset_zone_allowed(z: zone, gfp_mask: pc->flags) &&
2374	n->nr_partial > s->min_partial) {
2375	object = get_partial_node(s, n, pc);
2376	if (object) {
2377	/*
2378	* Don't check read_mems_allowed_retry()
2379	* here - if mems_allowed was updated in
2380	* parallel, that was a harmless race
2381	* between allocation and the cpuset
2382	* update
2383	*/
2384	return object;
2385	}
2386	}
2387	}
2388	} while (read_mems_allowed_retry(seq: cpuset_mems_cookie));
2389	#endif /* CONFIG_NUMA */
2390	return NULL;
2391	}
2392
2393	/*
2394	* Get a partial slab, lock it and return it.
2395	*/
2396	static void get_partial(struct* kmem_cache s, int* node, struct partial_context *pc)
2397	{
2398	void *object;
2399	int searchnode = node;
2400
2401	if (node == NUMA_NO_NODE)
2402	searchnode = numa_mem_id();
2403
2404	object = get_partial_node(s, n: get_node(s, node: searchnode), pc);
2405	if (object \|\| node != NUMA_NO_NODE)
2406	return object;
2407
2408	return get_any_partial(s, pc);
2409	}
2410
2411	#ifndef CONFIG_SLUB_TINY
2412
2413	#ifdef CONFIG_PREEMPTION
2414	/*
2415	* Calculate the next globally unique transaction for disambiguation
2416	* during cmpxchg. The transactions start with the cpu number and are then
2417	* incremented by CONFIG_NR_CPUS.
2418	*/
2419	#define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)
2420	#else
2421	/*
2422	* No preemption supported therefore also no need to check for
2423	* different cpus.
2424	*/
2425	#define TID_STEP 1
2426	#endif /* CONFIG_PREEMPTION */
2427
2428	static inline unsigned long next_tid(unsigned long tid)
2429	{
2430	return tid + TID_STEP;
2431	}
2432
2433	#ifdef SLUB_DEBUG_CMPXCHG
2434	static inline unsigned int tid_to_cpu(unsigned long tid)
2435	{
2436	return tid % TID_STEP;
2437	}
2438
2439	static inline unsigned long tid_to_event(unsigned long tid)
2440	{
2441	return tid / TID_STEP;
2442	}
2443	#endif
2444
2445	static inline unsigned int init_tid(int cpu)
2446	{
2447	return cpu;
2448	}
2449
2450	static inline void note_cmpxchg_failure(const char *n,
2451	const struct kmem_cache s, unsigned* long tid)
2452	{
2453	#ifdef SLUB_DEBUG_CMPXCHG
2454	unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
2455
2456	pr_info("%s %s: cmpxchg redo ", n, s->name);
2457
2458	#ifdef CONFIG_PREEMPTION
2459	if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
2460	pr_warn("due to cpu change %d -> %d\n",
2461	tid_to_cpu(tid), tid_to_cpu(actual_tid));
2462	else
2463	#endif
2464	if (tid_to_event(tid) != tid_to_event(actual_tid))
2465	pr_warn("due to cpu running other code. Event %ld->%ld\n",
2466	tid_to_event(tid), tid_to_event(actual_tid));
2467	else
2468	pr_warn("for unknown reason: actual=%lx was=%lx target=%lx\n",
2469	actual_tid, tid, next_tid(tid));
2470	#endif
2471	stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
2472	}
2473
2474	static void init_kmem_cache_cpus(struct kmem_cache *s)
2475	{
2476	int cpu;
2477	struct kmem_cache_cpu *c;
2478
2479	for_each_possible_cpu(cpu) {
2480	c = per_cpu_ptr(s->cpu_slab, cpu);
2481	local_lock_init(&c->lock);
2482	c->tid = init_tid(cpu);
2483	}
2484	}
2485
2486	/*
2487	* Finishes removing the cpu slab. Merges cpu's freelist with slab's freelist,
2488	* unfreezes the slabs and puts it on the proper list.
2489	* Assumes the slab has been already safely taken away from kmem_cache_cpu
2490	* by the caller.
2491	*/
2492	static void deactivate_slab(struct kmem_cache s, struct* slab *slab,
2493	void *freelist)
2494	{
2495	enum slab_modes { M_NONE, M_PARTIAL, M_FREE, M_FULL_NOLIST };
2496	struct kmem_cache_node *n = get_node(s, slab_nid(slab));
2497	int free_delta = `0`;
2498	enum slab_modes mode = M_NONE;
2499	void nextfree, freelist_iter, *freelist_tail;
2500	int tail = DEACTIVATE_TO_HEAD;
2501	unsigned long flags = `0`;
2502	struct slab new;
2503	struct slab old;
2504
2505	if (slab->freelist) {
2506	stat(s, DEACTIVATE_REMOTE_FREES);
2507	tail = DEACTIVATE_TO_TAIL;
2508	}
2509
2510	/*
2511	* Stage one: Count the objects on cpu's freelist as free_delta and
2512	* remember the last object in freelist_tail for later splicing.
2513	*/
2514	freelist_tail = NULL;
2515	freelist_iter = freelist;
2516	while (freelist_iter) {
2517	nextfree = get_freepointer(s, freelist_iter);
2518
2519	/*
2520	* If 'nextfree' is invalid, it is possible that the object at
2521	* 'freelist_iter' is already corrupted. So isolate all objects
2522	* starting at 'freelist_iter' by skipping them.
2523	*/
2524	if (freelist_corrupted(s, slab, &freelist_iter, nextfree))
2525	break;
2526
2527	freelist_tail = freelist_iter;
2528	free_delta++;
2529
2530	freelist_iter = nextfree;
2531	}
2532
2533	/*
2534	* Stage two: Unfreeze the slab while splicing the per-cpu
2535	* freelist to the head of slab's freelist.
2536	*
2537	* Ensure that the slab is unfrozen while the list presence
2538	* reflects the actual number of objects during unfreeze.
2539	*
2540	* We first perform cmpxchg holding lock and insert to list
2541	* when it succeed. If there is mismatch then the slab is not
2542	* unfrozen and number of objects in the slab may have changed.
2543	* Then release lock and retry cmpxchg again.
2544	*/
2545	redo:
2546
2547	old.freelist = READ_ONCE(slab->freelist);
2548	old.counters = READ_ONCE(slab->counters);
2549	VM_BUG_ON(!old.frozen);
2550
2551	/ Determine target state of the slab /
2552	new.counters = old.counters;
2553	if (freelist_tail) {
2554	new.inuse -= free_delta;
2555	set_freepointer(s, freelist_tail, old.freelist);
2556	new.freelist = freelist;
2557	} else
2558	new.freelist = old.freelist;
2559
2560	new.frozen = `0`;
2561
2562	if (!new.inuse && n->nr_partial >= s->min_partial) {
2563	mode = M_FREE;
2564	} else if (new.freelist) {
2565	mode = M_PARTIAL;
2566	/*
2567	* Taking the spinlock removes the possibility that
2568	* acquire_slab() will see a slab that is frozen
2569	*/
2570	spin_lock_irqsave(&n->list_lock, flags);
2571	} else {
2572	mode = M_FULL_NOLIST;
2573	}
2574
2575
2576	if (!slab_update_freelist(s, slab,
2577	old.freelist, old.counters,
2578	new.freelist, new.counters,
2579	"unfreezing slab")) {
2580	if (mode == M_PARTIAL)
2581	spin_unlock_irqrestore(&n->list_lock, flags);
2582	goto redo;
2583	}
2584
2585
2586	if (mode == M_PARTIAL) {
2587	add_partial(n, slab, tail);
2588	spin_unlock_irqrestore(&n->list_lock, flags);
2589	stat(s, tail);
2590	} else if (mode == M_FREE) {
2591	stat(s, DEACTIVATE_EMPTY);
2592	discard_slab(s, slab);
2593	stat(s, FREE_SLAB);
2594	} else if (mode == M_FULL_NOLIST) {
2595	stat(s, DEACTIVATE_FULL);
2596	}
2597	}
2598
2599	#ifdef CONFIG_SLUB_CPU_PARTIAL
2600	static void __unfreeze_partials(struct kmem_cache s, struct* slab *partial_slab)
2601	{
2602	struct kmem_cache_node n = NULL, n2 = NULL;
2603	struct slab slab, slab_to_discard = NULL;
2604	unsigned long flags = `0`;
2605
2606	while (partial_slab) {
2607	struct slab new;
2608	struct slab old;
2609
2610	slab = partial_slab;
2611	partial_slab = slab->next;
2612
2613	n2 = get_node(s, slab_nid(slab));
2614	if (n != n2) {
2615	if (n)
2616	spin_unlock_irqrestore(&n->list_lock, flags);
2617
2618	n = n2;
2619	spin_lock_irqsave(&n->list_lock, flags);
2620	}
2621
2622	do {
2623
2624	old.freelist = slab->freelist;
2625	old.counters = slab->counters;
2626	VM_BUG_ON(!old.frozen);
2627
2628	new.counters = old.counters;
2629	new.freelist = old.freelist;
2630
2631	new.frozen = `0`;
2632
2633	} while (!__slab_update_freelist(s, slab,
2634	old.freelist, old.counters,
2635	new.freelist, new.counters,
2636	"unfreezing slab"));
2637
2638	if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) {
2639	slab->next = slab_to_discard;
2640	slab_to_discard = slab;
2641	} else {
2642	add_partial(n, slab, DEACTIVATE_TO_TAIL);
2643	stat(s, FREE_ADD_PARTIAL);
2644	}
2645	}
2646
2647	if (n)
2648	spin_unlock_irqrestore(&n->list_lock, flags);
2649
2650	while (slab_to_discard) {
2651	slab = slab_to_discard;
2652	slab_to_discard = slab_to_discard->next;
2653
2654	stat(s, DEACTIVATE_EMPTY);
2655	discard_slab(s, slab);
2656	stat(s, FREE_SLAB);
2657	}
2658	}
2659
2660	/*
2661	* Unfreeze all the cpu partial slabs.
2662	*/
2663	static void unfreeze_partials(struct kmem_cache *s)
2664	{
2665	struct slab *partial_slab;
2666	unsigned long flags;
2667
2668	local_lock_irqsave(&s->cpu_slab->lock, flags);
2669	partial_slab = this_cpu_read(s->cpu_slab->partial);
2670	this_cpu_write(s->cpu_slab->partial, NULL);
2671	local_unlock_irqrestore(&s->cpu_slab->lock, flags);
2672
2673	if (partial_slab)
2674	__unfreeze_partials(s, partial_slab);
2675	}
2676
2677	static void unfreeze_partials_cpu(struct kmem_cache *s,
2678	struct kmem_cache_cpu *c)
2679	{
2680	struct slab *partial_slab;
2681
2682	partial_slab = slub_percpu_partial(c);
2683	c->partial = NULL;
2684
2685	if (partial_slab)
2686	__unfreeze_partials(s, partial_slab);
2687	}
2688
2689	/*
2690	* Put a slab that was just frozen (in __slab_free\|get_partial_node) into a
2691	* partial slab slot if available.
2692	*
2693	* If we did not find a slot then simply move all the partials to the
2694	* per node partial list.
2695	*/
2696	static void put_cpu_partial(struct kmem_cache s, struct* slab slab, int* drain)
2697	{
2698	struct slab *oldslab;
2699	struct slab *slab_to_unfreeze = NULL;
2700	unsigned long flags;
2701	int slabs = `0`;
2702
2703	local_lock_irqsave(&s->cpu_slab->lock, flags);
2704
2705	oldslab = this_cpu_read(s->cpu_slab->partial);
2706
2707	if (oldslab) {
2708	if (drain && oldslab->slabs >= s->cpu_partial_slabs) {
2709	/*
2710	* Partial array is full. Move the existing set to the
2711	* per node partial list. Postpone the actual unfreezing
2712	* outside of the critical section.
2713	*/
2714	slab_to_unfreeze = oldslab;
2715	oldslab = NULL;
2716	} else {
2717	slabs = oldslab->slabs;
2718	}
2719	}
2720
2721	slabs++;
2722
2723	slab->slabs = slabs;
2724	slab->next = oldslab;
2725
2726	this_cpu_write(s->cpu_slab->partial, slab);
2727
2728	local_unlock_irqrestore(&s->cpu_slab->lock, flags);
2729
2730	if (slab_to_unfreeze) {
2731	__unfreeze_partials(s, slab_to_unfreeze);
2732	stat(s, CPU_PARTIAL_DRAIN);
2733	}
2734	}
2735
2736	#else /* CONFIG_SLUB_CPU_PARTIAL */
2737
2738	static inline void unfreeze_partials(struct kmem_cache *s) { }
2739	static inline void unfreeze_partials_cpu(struct kmem_cache *s,
2740	struct kmem_cache_cpu *c) { }
2741
2742	#endif /* CONFIG_SLUB_CPU_PARTIAL */
2743
2744	static inline void flush_slab(struct kmem_cache s, struct* kmem_cache_cpu *c)
2745	{
2746	unsigned long flags;
2747	struct slab *slab;
2748	void *freelist;
2749
2750	local_lock_irqsave(&s->cpu_slab->lock, flags);
2751
2752	slab = c->slab;
2753	freelist = c->freelist;
2754
2755	c->slab = NULL;
2756	c->freelist = NULL;
2757	c->tid = next_tid(c->tid);
2758
2759	local_unlock_irqrestore(&s->cpu_slab->lock, flags);
2760
2761	if (slab) {
2762	deactivate_slab(s, slab, freelist);
2763	stat(s, CPUSLAB_FLUSH);
2764	}
2765	}
2766
2767	static inline void __flush_cpu_slab(struct kmem_cache s, int* cpu)
2768	{
2769	struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2770	void *freelist = c->freelist;
2771	struct slab *slab = c->slab;
2772
2773	c->slab = NULL;
2774	c->freelist = NULL;
2775	c->tid = next_tid(c->tid);
2776
2777	if (slab) {
2778	deactivate_slab(s, slab, freelist);
2779	stat(s, CPUSLAB_FLUSH);
2780	}
2781
2782	unfreeze_partials_cpu(s, c);
2783	}
2784
2785	struct slub_flush_work {
2786	struct work_struct work;
2787	struct kmem_cache *s;
2788	bool skip;
2789	};
2790
2791	/*
2792	* Flush cpu slab.
2793	*
2794	* Called from CPU work handler with migration disabled.
2795	*/
2796	static void flush_cpu_slab(struct work_struct *w)
2797	{
2798	struct kmem_cache *s;
2799	struct kmem_cache_cpu *c;
2800	struct slub_flush_work *sfw;
2801
2802	sfw = container_of(w, struct slub_flush_work, work);
2803
2804	s = sfw->s;
2805	c = this_cpu_ptr(s->cpu_slab);
2806
2807	if (c->slab)
2808	flush_slab(s, c);
2809
2810	unfreeze_partials(s);
2811	}
2812
2813	static bool has_cpu_slab(int cpu, struct kmem_cache *s)
2814	{
2815	struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2816
2817	return c->slab \|\| slub_percpu_partial(c);
2818	}
2819
2820	static DEFINE_MUTEX(flush_lock);
2821	static DEFINE_PER_CPU(struct slub_flush_work, slub_flush);
2822
2823	static void flush_all_cpus_locked(struct kmem_cache *s)
2824	{
2825	struct slub_flush_work *sfw;
2826	unsigned int cpu;
2827
2828	lockdep_assert_cpus_held();
2829	mutex_lock(&flush_lock);
2830
2831	for_each_online_cpu(cpu) {
2832	sfw = &per_cpu(slub_flush, cpu);
2833	if (!has_cpu_slab(cpu, s)) {
2834	sfw->skip = true;
2835	continue;
2836	}
2837	INIT_WORK(&sfw->work, flush_cpu_slab);
2838	sfw->skip = false;
2839	sfw->s = s;
2840	queue_work_on(cpu, flushwq, &sfw->work);
2841	}
2842
2843	for_each_online_cpu(cpu) {
2844	sfw = &per_cpu(slub_flush, cpu);
2845	if (sfw->skip)
2846	continue;
2847	flush_work(&sfw->work);
2848	}
2849
2850	mutex_unlock(&flush_lock);
2851	}
2852
2853	static void flush_all(struct kmem_cache *s)
2854	{
2855	cpus_read_lock();
2856	flush_all_cpus_locked(s);
2857	cpus_read_unlock();
2858	}
2859
2860	/*
2861	* Use the cpu notifier to insure that the cpu slabs are flushed when
2862	* necessary.
2863	*/
2864	static int slub_cpu_dead(unsigned int cpu)
2865	{
2866	struct kmem_cache *s;
2867
2868	mutex_lock(&slab_mutex);
2869	list_for_each_entry(s, &slab_caches, list)
2870	__flush_cpu_slab(s, cpu);
2871	mutex_unlock(&slab_mutex);
2872	return `0`;
2873	}
2874
2875	#else /* CONFIG_SLUB_TINY */
2876	static inline void flush_all_cpus_locked(struct kmem_cache *s) { }
2877	static inline void flush_all(struct kmem_cache *s) { }
2878	static inline void __flush_cpu_slab(struct kmem_cache s, int* cpu) { }
2879	static inline int slub_cpu_dead(unsigned int cpu) { return `0`; }
2880	#endif /* CONFIG_SLUB_TINY */
2881
2882	/*
2883	* Check if the objects in a per cpu structure fit numa
2884	* locality expectations.
2885	*/
2886	static inline int node_match(struct slab slab, int* node)
2887	{
2888	#ifdef CONFIG_NUMA
2889	if (node != NUMA_NO_NODE && slab_nid(slab) != node)
2890	return `0`;
2891	#endif
2892	return `1`;
2893	}
2894
2895	#ifdef CONFIG_SLUB_DEBUG
2896	static int count_free(struct slab *slab)
2897	{
2898	return slab->objects - slab->inuse;
2899	}
2900
2901	static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
2902	{
2903	return atomic_long_read(&n->total_objects);
2904	}
2905
2906	/ Supports checking bulk free of a constructed freelist /
2907	static inline bool free_debug_processing(struct kmem_cache *s,
2908	struct slab slab, void* head, void* tail, int* *bulk_cnt,
2909	unsigned long addr, depot_stack_handle_t handle)
2910	{
2911	bool checks_ok = false;
2912	void *object = head;
2913	int cnt = `0`;
2914
2915	if (s->flags & SLAB_CONSISTENCY_CHECKS) {
2916	if (!check_slab(s, slab))
2917	goto out;
2918	}
2919
2920	if (slab->inuse < *bulk_cnt) {
2921	slab_err(s, slab, "Slab has %d allocated objects but %d are to be freed\n",
2922	slab->inuse, *bulk_cnt);
2923	goto out;
2924	}
2925
2926	next_object:
2927
2928	if (++cnt > *bulk_cnt)
2929	goto out_cnt;
2930
2931	if (s->flags & SLAB_CONSISTENCY_CHECKS) {
2932	if (!free_consistency_checks(s, slab, object, addr))
2933	goto out;
2934	}
2935
2936	if (s->flags & SLAB_STORE_USER)
2937	set_track_update(s, object, TRACK_FREE, addr, handle);
2938	trace(s, slab, object, `0`);
2939	/ Freepointer not overwritten by init_object(), SLAB_POISON moved it /
2940	init_object(s, object, SLUB_RED_INACTIVE);
2941
2942	/ Reached end of constructed freelist yet? /
2943	if (object != tail) {
2944	object = get_freepointer(s, object);
2945	goto next_object;
2946	}
2947	checks_ok = true;
2948
2949	out_cnt:
2950	if (cnt != *bulk_cnt) {
2951	slab_err(s, slab, "Bulk free expected %d objects but found %d\n",
2952	*bulk_cnt, cnt);
2953	*bulk_cnt = cnt;
2954	}
2955
2956	out:
2957
2958	if (!checks_ok)
2959	slab_fix(s, "Object at 0x%p not freed", object);
2960
2961	return checks_ok;
2962	}
2963	#endif /* CONFIG_SLUB_DEBUG */
2964
2965	#if defined(CONFIG_SLUB_DEBUG) \|\| defined(SLAB_SUPPORTS_SYSFS)
2966	static unsigned long count_partial(struct kmem_cache_node *n,
2967	int (get_count)(struct* slab *))
2968	{
2969	unsigned long flags;
2970	unsigned long x = `0`;
2971	struct slab *slab;
2972
2973	spin_lock_irqsave(&n->list_lock, flags);
2974	list_for_each_entry(slab, &n->partial, slab_list)
2975	x += get_count(slab);
2976	spin_unlock_irqrestore(&n->list_lock, flags);
2977	return x;
2978	}
2979	#endif /* CONFIG_SLUB_DEBUG \|\| SLAB_SUPPORTS_SYSFS */
2980
2981	#ifdef CONFIG_SLUB_DEBUG
2982	static noinline void
2983	slab_out_of_memory(struct kmem_cache s, gfp_t gfpflags, int* nid)
2984	{
2985	static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
2986	DEFAULT_RATELIMIT_BURST);
2987	int node;
2988	struct kmem_cache_node *n;
2989
2990	if ((gfpflags & __GFP_NOWARN) \|\| !__ratelimit(&slub_oom_rs))
2991	return;
2992
2993	pr_warn("SLUB: Unable to allocate memory on node %d, gfp=%#x(%pGg)\n",
2994	nid, gfpflags, &gfpflags);
2995	pr_warn(" cache: %s, object size: %u, buffer size: %u, default order: %u, min order: %u\n",
2996	s->name, s->object_size, s->size, oo_order(s->oo),
2997	oo_order(s->min));
2998
2999	if (oo_order(s->min) > get_order(s->object_size))
3000	pr_warn(" %s debugging increased min order, use slub_debug=O to disable.\n",
3001	s->name);
3002
3003	for_each_kmem_cache_node(s, node, n) {
3004	unsigned long nr_slabs;
3005	unsigned long nr_objs;
3006	unsigned long nr_free;
3007
3008	nr_free = count_partial(n, count_free);
3009	nr_slabs = node_nr_slabs(n);
3010	nr_objs = node_nr_objs(n);
3011
3012	pr_warn(" node %d: slabs: %ld, objs: %ld, free: %ld\n",
3013	node, nr_slabs, nr_objs, nr_free);
3014	}
3015	}
3016	#else /* CONFIG_SLUB_DEBUG */
3017	static inline void
3018	slab_out_of_memory(struct kmem_cache s, gfp_t gfpflags, int* nid) { }
3019	#endif
3020
3021	static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags)
3022	{
3023	if (unlikely(slab_test_pfmemalloc(slab)))
3024	return gfp_pfmemalloc_allowed(gfp_mask: gfpflags);
3025
3026	return true;
3027	}
3028
3029	#ifndef CONFIG_SLUB_TINY
3030	static inline bool
3031	__update_cpu_freelist_fast(struct kmem_cache *s,
3032	void freelist_old, void* *freelist_new,
3033	unsigned long tid)
3034	{
3035	freelist_aba_t old = { .freelist = freelist_old, .counter = tid };
3036	freelist_aba_t new = { .freelist = freelist_new, .counter = next_tid(tid) };
3037
3038	return this_cpu_try_cmpxchg_freelist(s->cpu_slab->freelist_tid.full,
3039	&old.full, new.full);
3040	}
3041
3042	/*
3043	* Check the slab->freelist and either transfer the freelist to the
3044	* per cpu freelist or deactivate the slab.
3045	*
3046	* The slab is still frozen if the return value is not NULL.
3047	*
3048	* If this function returns NULL then the slab has been unfrozen.
3049	*/
3050	static inline void get_freelist(struct* kmem_cache s, struct* slab *slab)
3051	{
3052	struct slab new;
3053	unsigned long counters;
3054	void *freelist;
3055
3056	lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock));
3057
3058	do {
3059	freelist = slab->freelist;
3060	counters = slab->counters;
3061
3062	new.counters = counters;
3063	VM_BUG_ON(!new.frozen);
3064
3065	new.inuse = slab->objects;
3066	new.frozen = freelist != NULL;
3067
3068	} while (!__slab_update_freelist(s, slab,
3069	freelist, counters,
3070	NULL, new.counters,
3071	"get_freelist"));
3072
3073	return freelist;
3074	}
3075
3076	/*
3077	* Slow path. The lockless freelist is empty or we need to perform
3078	* debugging duties.
3079	*
3080	* Processing is still very fast if new objects have been freed to the
3081	* regular freelist. In that case we simply take over the regular freelist
3082	* as the lockless freelist and zap the regular freelist.
3083	*
3084	* If that is not working then we fall back to the partial lists. We take the
3085	* first element of the freelist as the object to allocate now and move the
3086	* rest of the freelist to the lockless freelist.
3087	*
3088	* And if we were unable to get a new slab from the partial slab lists then
3089	* we need to allocate a new slab. This is the slowest path since it involves
3090	* a call to the page allocator and the setup of a new slab.
3091	*
3092	* Version of __slab_alloc to use when we know that preemption is
3093	* already disabled (which is the case for bulk allocation).
3094	*/
3095	static void ___slab_alloc(struct* kmem_cache s, gfp_t gfpflags, int* node,
3096	unsigned long addr, struct kmem_cache_cpu c, unsigned* int orig_size)
3097	{
3098	void *freelist;
3099	struct slab *slab;
3100	unsigned long flags;
3101	struct partial_context pc;
3102
3103	stat(s, ALLOC_SLOWPATH);
3104
3105	reread_slab:
3106
3107	slab = READ_ONCE(c->slab);
3108	if (!slab) {
3109	/*
3110	* if the node is not online or has no normal memory, just
3111	* ignore the node constraint
3112	*/
3113	if (unlikely(node != NUMA_NO_NODE &&
3114	!node_isset(node, slab_nodes)))
3115	node = NUMA_NO_NODE;
3116	goto new_slab;
3117	}
3118	redo:
3119
3120	if (unlikely(!node_match(slab, node))) {
3121	/*
3122	* same as above but node_match() being false already
3123	* implies node != NUMA_NO_NODE
3124	*/
3125	if (!node_isset(node, slab_nodes)) {
3126	node = NUMA_NO_NODE;
3127	} else {
3128	stat(s, ALLOC_NODE_MISMATCH);
3129	goto deactivate_slab;
3130	}
3131	}
3132
3133	/*
3134	* By rights, we should be searching for a slab page that was
3135	* PFMEMALLOC but right now, we are losing the pfmemalloc
3136	* information when the page leaves the per-cpu allocator
3137	*/
3138	if (unlikely(!pfmemalloc_match(slab, gfpflags)))
3139	goto deactivate_slab;
3140
3141	/ must check again c->slab in case we got preempted and it changed /
3142	local_lock_irqsave(&s->cpu_slab->lock, flags);
3143	if (unlikely(slab != c->slab)) {
3144	local_unlock_irqrestore(&s->cpu_slab->lock, flags);
3145	goto reread_slab;
3146	}
3147	freelist = c->freelist;
3148	if (freelist)
3149	goto load_freelist;
3150
3151	freelist = get_freelist(s, slab);
3152
3153	if (!freelist) {
3154	c->slab = NULL;
3155	c->tid = next_tid(c->tid);
3156	local_unlock_irqrestore(&s->cpu_slab->lock, flags);
3157	stat(s, DEACTIVATE_BYPASS);
3158	goto new_slab;
3159	}
3160
3161	stat(s, ALLOC_REFILL);
3162
3163	load_freelist:
3164
3165	lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock));
3166
3167	/*
3168	* freelist is pointing to the list of objects to be used.
3169	* slab is pointing to the slab from which the objects are obtained.
3170	* That slab must be frozen for per cpu allocations to work.
3171	*/
3172	VM_BUG_ON(!c->slab->frozen);
3173	c->freelist = get_freepointer(s, freelist);
3174	c->tid = next_tid(c->tid);
3175	local_unlock_irqrestore(&s->cpu_slab->lock, flags);
3176	return freelist;
3177
3178	deactivate_slab:
3179
3180	local_lock_irqsave(&s->cpu_slab->lock, flags);
3181	if (slab != c->slab) {
3182	local_unlock_irqrestore(&s->cpu_slab->lock, flags);
3183	goto reread_slab;
3184	}
3185	freelist = c->freelist;
3186	c->slab = NULL;
3187	c->freelist = NULL;
3188	c->tid = next_tid(c->tid);
3189	local_unlock_irqrestore(&s->cpu_slab->lock, flags);
3190	deactivate_slab(s, slab, freelist);
3191
3192	new_slab:
3193
3194	if (slub_percpu_partial(c)) {
3195	local_lock_irqsave(&s->cpu_slab->lock, flags);
3196	if (unlikely(c->slab)) {
3197	local_unlock_irqrestore(&s->cpu_slab->lock, flags);
3198	goto reread_slab;
3199	}
3200	if (unlikely(!slub_percpu_partial(c))) {
3201	local_unlock_irqrestore(&s->cpu_slab->lock, flags);
3202	/ we were preempted and partial list got empty /
3203	goto new_objects;
3204	}
3205
3206	slab = c->slab = slub_percpu_partial(c);
3207	slub_set_percpu_partial(c, slab);
3208	local_unlock_irqrestore(&s->cpu_slab->lock, flags);
3209	stat(s, CPU_PARTIAL_ALLOC);
3210	goto redo;
3211	}
3212
3213	new_objects:
3214
3215	pc.flags = gfpflags;
3216	pc.slab = &slab;
3217	pc.orig_size = orig_size;
3218	freelist = get_partial(s, node, &pc);
3219	if (freelist)
3220	goto check_new_slab;
3221
3222	slub_put_cpu_ptr(s->cpu_slab);
3223	slab = new_slab(s, gfpflags, node);
3224	c = slub_get_cpu_ptr(s->cpu_slab);
3225
3226	if (unlikely(!slab)) {
3227	slab_out_of_memory(s, gfpflags, node);
3228	return NULL;
3229	}
3230
3231	stat(s, ALLOC_SLAB);
3232
3233	if (kmem_cache_debug(s)) {
3234	freelist = alloc_single_from_new_slab(s, slab, orig_size);
3235
3236	if (unlikely(!freelist))
3237	goto new_objects;
3238
3239	if (s->flags & SLAB_STORE_USER)
3240	set_track(s, freelist, TRACK_ALLOC, addr);
3241
3242	return freelist;
3243	}
3244
3245	/*
3246	* No other reference to the slab yet so we can
3247	* muck around with it freely without cmpxchg
3248	*/
3249	freelist = slab->freelist;
3250	slab->freelist = NULL;
3251	slab->inuse = slab->objects;
3252	slab->frozen = `1`;
3253
3254	inc_slabs_node(s, slab_nid(slab), slab->objects);
3255
3256	check_new_slab:
3257
3258	if (kmem_cache_debug(s)) {
3259	/*
3260	* For debug caches here we had to go through
3261	* alloc_single_from_partial() so just store the tracking info
3262	* and return the object
3263	*/
3264	if (s->flags & SLAB_STORE_USER)
3265	set_track(s, freelist, TRACK_ALLOC, addr);
3266
3267	return freelist;
3268	}
3269
3270	if (unlikely(!pfmemalloc_match(slab, gfpflags))) {
3271	/*
3272	* For !pfmemalloc_match() case we don't load freelist so that
3273	* we don't make further mismatched allocations easier.
3274	*/
3275	deactivate_slab(s, slab, get_freepointer(s, freelist));
3276	return freelist;
3277	}
3278
3279	retry_load_slab:
3280
3281	local_lock_irqsave(&s->cpu_slab->lock, flags);
3282	if (unlikely(c->slab)) {
3283	void *flush_freelist = c->freelist;
3284	struct slab *flush_slab = c->slab;
3285
3286	c->slab = NULL;
3287	c->freelist = NULL;
3288	c->tid = next_tid(c->tid);
3289
3290	local_unlock_irqrestore(&s->cpu_slab->lock, flags);
3291
3292	deactivate_slab(s, flush_slab, flush_freelist);
3293
3294	stat(s, CPUSLAB_FLUSH);
3295
3296	goto retry_load_slab;
3297	}
3298	c->slab = slab;
3299
3300	goto load_freelist;
3301	}
3302
3303	/*
3304	* A wrapper for ___slab_alloc() for contexts where preemption is not yet
3305	* disabled. Compensates for possible cpu changes by refetching the per cpu area
3306	* pointer.
3307	*/
3308	static void __slab_alloc(struct* kmem_cache s, gfp_t gfpflags, int* node,
3309	unsigned long addr, struct kmem_cache_cpu c, unsigned* int orig_size)
3310	{
3311	void *p;
3312
3313	#ifdef CONFIG_PREEMPT_COUNT
3314	/*
3315	* We may have been preempted and rescheduled on a different
3316	* cpu before disabling preemption. Need to reload cpu area
3317	* pointer.
3318	*/
3319	c = slub_get_cpu_ptr(s->cpu_slab);
3320	#endif
3321
3322	p = ___slab_alloc(s, gfpflags, node, addr, c, orig_size);
3323	#ifdef CONFIG_PREEMPT_COUNT
3324	slub_put_cpu_ptr(s->cpu_slab);
3325	#endif
3326	return p;
3327	}
3328
3329	static __always_inline void __slab_alloc_node(struct* kmem_cache *s,
3330	gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
3331	{
3332	struct kmem_cache_cpu *c;
3333	struct slab *slab;
3334	unsigned long tid;
3335	void *object;
3336
3337	redo:
3338	/*
3339	* Must read kmem_cache cpu data via this cpu ptr. Preemption is
3340	* enabled. We may switch back and forth between cpus while
3341	* reading from one cpu area. That does not matter as long
3342	* as we end up on the original cpu again when doing the cmpxchg.
3343	*
3344	* We must guarantee that tid and kmem_cache_cpu are retrieved on the
3345	* same cpu. We read first the kmem_cache_cpu pointer and use it to read
3346	* the tid. If we are preempted and switched to another cpu between the
3347	* two reads, it's OK as the two are still associated with the same cpu
3348	* and cmpxchg later will validate the cpu.
3349	*/
3350	c = raw_cpu_ptr(s->cpu_slab);
3351	tid = READ_ONCE(c->tid);
3352
3353	/*
3354	* Irqless object alloc/free algorithm used here depends on sequence
3355	* of fetching cpu_slab's data. tid should be fetched before anything
3356	* on c to guarantee that object and slab associated with previous tid
3357	* won't be used with current tid. If we fetch tid first, object and
3358	* slab could be one associated with next tid and our alloc/free
3359	* request will be failed. In this case, we will retry. So, no problem.
3360	*/
3361	barrier();
3362
3363	/*
3364	* The transaction ids are globally unique per cpu and per operation on
3365	* a per cpu queue. Thus they can be guarantee that the cmpxchg_double
3366	* occurs on the right processor and that there was no operation on the
3367	* linked list in between.
3368	*/
3369
3370	object = c->freelist;
3371	slab = c->slab;
3372
3373	if (!USE_LOCKLESS_FAST_PATH() \|\|
3374	unlikely(!object \|\| !slab \|\| !node_match(slab, node))) {
3375	object = __slab_alloc(s, gfpflags, node, addr, c, orig_size);
3376	} else {
3377	void *next_object = get_freepointer_safe(s, object);
3378
3379	/*
3380	* The cmpxchg will only match if there was no additional
3381	* operation and if we are on the right processor.
3382	*
3383	* The cmpxchg does the following atomically (without lock
3384	* semantics!)
3385	* 1. Relocate first pointer to the current per cpu area.
3386	* 2. Verify that tid and freelist have not been changed
3387	* 3. If they were not changed replace tid and freelist
3388	*
3389	* Since this is without lock semantics the protection is only
3390	* against code executing on this cpu not from access by
3391	* other cpus.
3392	*/
3393	if (unlikely(!__update_cpu_freelist_fast(s, object, next_object, tid))) {
3394	note_cmpxchg_failure("slab_alloc", s, tid);
3395	goto redo;
3396	}
3397	prefetch_freepointer(s, next_object);
3398	stat(s, ALLOC_FASTPATH);
3399	}
3400
3401	return object;
3402	}
3403	#else /* CONFIG_SLUB_TINY */
3404	static void __slab_alloc_node(struct* kmem_cache *s,
3405	gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
3406	{
3407	struct partial_context pc;
3408	struct slab *slab;
3409	void *object;
3410
3411	pc.flags = gfpflags;
3412	pc.slab = &slab;
3413	pc.orig_size = orig_size;
3414	object = get_partial(s, node, pc: &pc);
3415
3416	if (object)
3417	return object;
3418
3419	slab = new_slab(s, flags: gfpflags, node);
3420	if (unlikely(!slab)) {
3421	slab_out_of_memory(s, gfpflags, nid: node);
3422	return NULL;
3423	}
3424
3425	object = alloc_single_from_new_slab(s, slab, orig_size);
3426
3427	return object;
3428	}
3429	#endif /* CONFIG_SLUB_TINY */
3430
3431	/*
3432	* If the object has been wiped upon free, make sure it's fully initialized by
3433	* zeroing out freelist pointer.
3434	*/
3435	static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
3436	void *obj)
3437	{
3438	if (unlikely(slab_want_init_on_free(s)) && obj)
3439	memset((void )((char* *)kasan_reset_tag(obj) + s->offset),
3440	`0`, sizeof(void *));
3441	}
3442
3443	/*
3444	* Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
3445	* have the fastpath folded into their functions. So no function call
3446	* overhead for requests that can be satisfied on the fastpath.
3447	*
3448	* The fastpath works by first checking if the lockless freelist can be used.
3449	* If not then __slab_alloc is called for slow processing.
3450	*
3451	* Otherwise we can simply pick the next object from the lockless free list.
3452	*/
3453	static __fastpath_inline void slab_alloc_node(struct* kmem_cache s, struct* list_lru *lru,
3454	gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
3455	{
3456	void *object;
3457	struct obj_cgroup *objcg = NULL;
3458	bool init = false;
3459
3460	s = slab_pre_alloc_hook(s, lru, objcgp: &objcg, size: `1`, flags: gfpflags);
3461	if (!s)
3462	return NULL;
3463
3464	object = kfence_alloc(s, size: orig_size, flags: gfpflags);
3465	if (unlikely(object))
3466	goto out;
3467
3468	object = __slab_alloc_node(s, gfpflags, node, addr, orig_size);
3469
3470	maybe_wipe_obj_freeptr(s, obj: object);
3471	init = slab_want_init_on_alloc(flags: gfpflags, c: s);
3472
3473	out:
3474	/*
3475	* When init equals 'true', like for kzalloc() family, only
3476	* @orig_size bytes might be zeroed instead of s->object_size
3477	*/
3478	slab_post_alloc_hook(s, objcg, flags: gfpflags, size: `1`, p: &object, init, orig_size);
3479
3480	return object;
3481	}
3482
3483	static __fastpath_inline void slab_alloc(struct* kmem_cache s, struct* list_lru *lru,
3484	gfp_t gfpflags, unsigned long addr, size_t orig_size)
3485	{
3486	return slab_alloc_node(s, lru, gfpflags, NUMA_NO_NODE, addr, orig_size);
3487	}
3488
3489	static __fastpath_inline
3490	void __kmem_cache_alloc_lru(struct* kmem_cache s, struct* list_lru *lru,
3491	gfp_t gfpflags)
3492	{
3493	void *ret = slab_alloc(s, lru, gfpflags, _RET_IP_, orig_size: s->object_size);
3494
3495	trace_kmem_cache_alloc(_RET_IP_, ptr: ret, s, gfp_flags: gfpflags, NUMA_NO_NODE);
3496
3497	return ret;
3498	}
3499
3500	void kmem_cache_alloc(struct* kmem_cache *s, gfp_t gfpflags)
3501	{
3502	return __kmem_cache_alloc_lru(s, NULL, gfpflags);
3503	}
3504	EXPORT_SYMBOL(kmem_cache_alloc);
3505
3506	void kmem_cache_alloc_lru(struct* kmem_cache s, struct* list_lru *lru,
3507	gfp_t gfpflags)
3508	{
3509	return __kmem_cache_alloc_lru(s, lru, gfpflags);
3510	}
3511	EXPORT_SYMBOL(kmem_cache_alloc_lru);
3512
3513	void __kmem_cache_alloc_node(struct* kmem_cache *s, gfp_t gfpflags,
3514	int node, size_t orig_size,
3515	unsigned long caller)
3516	{
3517	return slab_alloc_node(s, NULL, gfpflags, node,
3518	addr: caller, orig_size);
3519	}
3520
3521	void kmem_cache_alloc_node(struct* kmem_cache s, gfp_t gfpflags, int* node)
3522	{
3523	void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, orig_size: s->object_size);
3524
3525	trace_kmem_cache_alloc(_RET_IP_, ptr: ret, s, gfp_flags: gfpflags, node);
3526
3527	return ret;
3528	}
3529	EXPORT_SYMBOL(kmem_cache_alloc_node);
3530
3531	static noinline void free_to_partial_list(
3532	struct kmem_cache s, struct* slab *slab,
3533	void head, void* tail, int* bulk_cnt,
3534	unsigned long addr)
3535	{
3536	struct kmem_cache_node *n = get_node(s, node: slab_nid(slab));
3537	struct slab *slab_free = NULL;
3538	int cnt = bulk_cnt;
3539	unsigned long flags;
3540	depot_stack_handle_t handle = `0`;
3541
3542	if (s->flags & SLAB_STORE_USER)
3543	handle = set_track_prepare();
3544
3545	spin_lock_irqsave(&n->list_lock, flags);
3546
3547	if (free_debug_processing(s, slab, head, tail, bulk_cnt: &cnt, addr, handle)) {
3548	void *prior = slab->freelist;
3549
3550	/ Perform the actual freeing while we still hold the locks /
3551	slab->inuse -= cnt;
3552	set_freepointer(s, object: tail, fp: prior);
3553	slab->freelist = head;
3554
3555	/*
3556	* If the slab is empty, and node's partial list is full,
3557	* it should be discarded anyway no matter it's on full or
3558	* partial list.
3559	*/
3560	if (slab->inuse == `0` && n->nr_partial >= s->min_partial)
3561	slab_free = slab;
3562
3563	if (!prior) {
3564	/ was on full list /
3565	remove_full(s, n, slab);
3566	if (!slab_free) {
3567	add_partial(n, slab, tail: DEACTIVATE_TO_TAIL);
3568	stat(s, si: FREE_ADD_PARTIAL);
3569	}
3570	} else if (slab_free) {
3571	remove_partial(n, slab);
3572	stat(s, si: FREE_REMOVE_PARTIAL);
3573	}
3574	}
3575
3576	if (slab_free) {
3577	/*
3578	* Update the counters while still holding n->list_lock to
3579	* prevent spurious validation warnings
3580	*/
3581	dec_slabs_node(s, node: slab_nid(slab: slab_free), objects: slab_free->objects);
3582	}
3583
3584	spin_unlock_irqrestore(lock: &n->list_lock, flags);
3585
3586	if (slab_free) {
3587	stat(s, si: FREE_SLAB);
3588	free_slab(s, slab: slab_free);
3589	}
3590	}
3591
3592	/*
3593	* Slow path handling. This may still be called frequently since objects
3594	* have a longer lifetime than the cpu slabs in most processing loads.
3595	*
3596	* So we still attempt to reduce cache line usage. Just take the slab
3597	* lock and free the item. If there is no additional partial slab
3598	* handling required then we can return immediately.
3599	*/
3600	static void __slab_free(struct kmem_cache s, struct* slab *slab,
3601	void head, void* tail, int* cnt,
3602	unsigned long addr)
3603
3604	{
3605	void *prior;
3606	int was_frozen;
3607	struct slab new;
3608	unsigned long counters;
3609	struct kmem_cache_node *n = NULL;
3610	unsigned long flags;
3611
3612	stat(s, si: FREE_SLOWPATH);
3613
3614	if (kfence_free(addr: head))
3615	return;
3616
3617	if (IS_ENABLED(CONFIG_SLUB_TINY) \|\| kmem_cache_debug(s)) {
3618	free_to_partial_list(s, slab, head, tail, bulk_cnt: cnt, addr);
3619	return;
3620	}
3621
3622	do {
3623	if (unlikely(n)) {
3624	spin_unlock_irqrestore(lock: &n->list_lock, flags);
3625	n = NULL;
3626	}
3627	prior = slab->freelist;
3628	counters = slab->counters;
3629	set_freepointer(s, object: tail, fp: prior);
3630	new.counters = counters;
3631	was_frozen = new.frozen;
3632	new.inuse -= cnt;
3633	if ((!new.inuse \|\| !prior) && !was_frozen) {
3634
3635	if (kmem_cache_has_cpu_partial(s) && !prior) {
3636
3637	/*
3638	* Slab was on no list before and will be
3639	* partially empty
3640	* We can defer the list move and instead
3641	* freeze it.
3642	*/
3643	new.frozen = `1`;
3644
3645	} else { / Needs to be taken off a list /
3646
3647	n = get_node(s, node: slab_nid(slab));
3648	/*
3649	* Speculatively acquire the list_lock.
3650	* If the cmpxchg does not succeed then we may
3651	* drop the list_lock without any processing.
3652	*
3653	* Otherwise the list_lock will synchronize with
3654	* other processors updating the list of slabs.
3655	*/
3656	spin_lock_irqsave(&n->list_lock, flags);
3657
3658	}
3659	}
3660
3661	} while (!slab_update_freelist(s, slab,
3662	freelist_old: prior, counters_old: counters,
3663	freelist_new: head, counters_new: new.counters,
3664	n: "__slab_free"));
3665
3666	if (likely(!n)) {
3667
3668	if (likely(was_frozen)) {
3669	/*
3670	* The list lock was not taken therefore no list
3671	* activity can be necessary.
3672	*/
3673	stat(s, si: FREE_FROZEN);
3674	} else if (new.frozen) {
3675	/*
3676	* If we just froze the slab then put it onto the
3677	* per cpu partial list.
3678	*/
3679	put_cpu_partial(s, slab, drain: `1`);
3680	stat(s, si: CPU_PARTIAL_FREE);
3681	}
3682
3683	return;
3684	}
3685
3686	if (unlikely(!new.inuse && n->nr_partial >= s->min_partial))
3687	goto slab_empty;
3688
3689	/*
3690	* Objects left in the slab. If it was not on the partial list before
3691	* then add it.
3692	*/
3693	if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
3694	remove_full(s, n, slab);
3695	add_partial(n, slab, tail: DEACTIVATE_TO_TAIL);
3696	stat(s, si: FREE_ADD_PARTIAL);
3697	}
3698	spin_unlock_irqrestore(lock: &n->list_lock, flags);
3699	return;
3700
3701	slab_empty:
3702	if (prior) {
3703	/*
3704	* Slab on the partial list.
3705	*/
3706	remove_partial(n, slab);
3707	stat(s, si: FREE_REMOVE_PARTIAL);
3708	} else {
3709	/ Slab must be on the full list /
3710	remove_full(s, n, slab);
3711	}
3712
3713	spin_unlock_irqrestore(lock: &n->list_lock, flags);
3714	stat(s, si: FREE_SLAB);
3715	discard_slab(s, slab);
3716	}
3717
3718	#ifndef CONFIG_SLUB_TINY
3719	/*
3720	* Fastpath with forced inlining to produce a kfree and kmem_cache_free that
3721	* can perform fastpath freeing without additional function calls.
3722	*
3723	* The fastpath is only possible if we are freeing to the current cpu slab
3724	* of this processor. This typically the case if we have just allocated
3725	* the item before.
3726	*
3727	* If fastpath is not possible then fall back to __slab_free where we deal
3728	* with all sorts of special processing.
3729	*
3730	* Bulk free of a freelist with several objects (all pointing to the
3731	* same slab) possible by specifying head and tail ptr, plus objects
3732	* count (cnt). Bulk free indicated by tail pointer being set.
3733	*/
3734	static __always_inline void do_slab_free(struct kmem_cache *s,
3735	struct slab slab, void* head, void* *tail,
3736	int cnt, unsigned long addr)
3737	{
3738	void *tail_obj = tail ? : head;
3739	struct kmem_cache_cpu *c;
3740	unsigned long tid;
3741	void **freelist;
3742
3743	redo:
3744	/*
3745	* Determine the currently cpus per cpu slab.
3746	* The cpu may change afterward. However that does not matter since
3747	* data is retrieved via this pointer. If we are on the same cpu
3748	* during the cmpxchg then the free will succeed.
3749	*/
3750	c = raw_cpu_ptr(s->cpu_slab);
3751	tid = READ_ONCE(c->tid);
3752
3753	/ Same with comment on barrier() in slab_alloc_node() /
3754	barrier();
3755
3756	if (unlikely(slab != c->slab)) {
3757	__slab_free(s, slab, head, tail_obj, cnt, addr);
3758	return;
3759	}
3760
3761	if (USE_LOCKLESS_FAST_PATH()) {
3762	freelist = READ_ONCE(c->freelist);
3763
3764	set_freepointer(s, tail_obj, freelist);
3765
3766	if (unlikely(!__update_cpu_freelist_fast(s, freelist, head, tid))) {
3767	note_cmpxchg_failure("slab_free", s, tid);
3768	goto redo;
3769	}
3770	} else {
3771	/ Update the free list under the local lock /
3772	local_lock(&s->cpu_slab->lock);
3773	c = this_cpu_ptr(s->cpu_slab);
3774	if (unlikely(slab != c->slab)) {
3775	local_unlock(&s->cpu_slab->lock);
3776	goto redo;
3777	}
3778	tid = c->tid;
3779	freelist = c->freelist;
3780
3781	set_freepointer(s, tail_obj, freelist);
3782	c->freelist = head;
3783	c->tid = next_tid(tid);
3784
3785	local_unlock(&s->cpu_slab->lock);
3786	}
3787	stat(s, FREE_FASTPATH);
3788	}
3789	#else /* CONFIG_SLUB_TINY */
3790	static void do_slab_free(struct kmem_cache *s,
3791	struct slab slab, void* head, void* *tail,
3792	int cnt, unsigned long addr)
3793	{
3794	void *tail_obj = tail ? : head;
3795
3796	__slab_free(s, slab, head, tail: tail_obj, cnt, addr);
3797	}
3798	#endif /* CONFIG_SLUB_TINY */
3799
3800	static __fastpath_inline void slab_free(struct kmem_cache s, struct* slab *slab,
3801	void head, void* tail, void* *p, int* cnt,
3802	unsigned long addr)
3803	{
3804	memcg_slab_free_hook(s, slab, p, objects: cnt);
3805	/*
3806	* With KASAN enabled slab_free_freelist_hook modifies the freelist
3807	* to remove objects, whose reuse must be delayed.
3808	*/
3809	if (slab_free_freelist_hook(s, head: &head, tail: &tail, cnt: &cnt))
3810	do_slab_free(s, slab, head, tail, cnt, addr);
3811	}
3812
3813	#ifdef CONFIG_KASAN_GENERIC
3814	void ___cache_free(struct kmem_cache cache, void* x, unsigned* long addr)
3815	{
3816	do_slab_free(cache, virt_to_slab(x), x, NULL, `1`, addr);
3817	}
3818	#endif
3819
3820	void __kmem_cache_free(struct kmem_cache s, void* x, unsigned* long caller)
3821	{
3822	slab_free(s, slab: virt_to_slab(addr: x), head: x, NULL, p: &x, cnt: `1`, addr: caller);
3823	}
3824
3825	void kmem_cache_free(struct kmem_cache s, void* *x)
3826	{
3827	s = cache_from_obj(s, x);
3828	if (!s)
3829	return;
3830	trace_kmem_cache_free(_RET_IP_, ptr: x, s);
3831	slab_free(s, slab: virt_to_slab(addr: x), head: x, NULL, p: &x, cnt: `1`, _RET_IP_);
3832	}
3833	EXPORT_SYMBOL(kmem_cache_free);
3834
3835	struct detached_freelist {
3836	struct slab *slab;
3837	void *tail;
3838	void *freelist;
3839	int cnt;
3840	struct kmem_cache *s;
3841	};
3842
3843	/*
3844	* This function progressively scans the array with free objects (with
3845	* a limited look ahead) and extract objects belonging to the same
3846	* slab. It builds a detached freelist directly within the given
3847	* slab/objects. This can happen without any need for
3848	* synchronization, because the objects are owned by running process.
3849	* The freelist is build up as a single linked list in the objects.
3850	* The idea is, that this detached freelist can then be bulk
3851	* transferred to the real freelist(s), but only requiring a single
3852	* synchronization primitive. Look ahead in the array is limited due
3853	* to performance reasons.
3854	*/
3855	static inline
3856	int build_detached_freelist(struct kmem_cache *s, size_t size,
3857	void p, struct** detached_freelist *df)
3858	{
3859	int lookahead = `3`;
3860	void *object;
3861	struct folio *folio;
3862	size_t same;
3863
3864	object = p[--size];
3865	folio = virt_to_folio(x: object);
3866	if (!s) {
3867	/ Handle kalloc'ed objects /
3868	if (unlikely(!folio_test_slab(folio))) {
3869	free_large_kmalloc(folio, object);
3870	df->slab = NULL;
3871	return size;
3872	}
3873	/ Derive kmem_cache from object /
3874	df->slab = folio_slab(folio);
3875	df->s = df->slab->slab_cache;
3876	} else {
3877	df->slab = folio_slab(folio);
3878	df->s = cache_from_obj(s, x: object); / Support for memcg /
3879	}
3880
3881	/ Start new detached freelist /
3882	df->tail = object;
3883	df->freelist = object;
3884	df->cnt = `1`;
3885
3886	if (is_kfence_address(addr: object))
3887	return size;
3888
3889	set_freepointer(s: df->s, object, NULL);
3890
3891	same = size;
3892	while (size) {
3893	object = p[--size];
3894	/ df->slab is always set at this point /
3895	if (df->slab == virt_to_slab(addr: object)) {
3896	/ Opportunity build freelist /
3897	set_freepointer(s: df->s, object, fp: df->freelist);
3898	df->freelist = object;
3899	df->cnt++;
3900	same--;
3901	if (size != same)
3902	swap(p[size], p[same]);
3903	continue;
3904	}
3905
3906	/ Limit look ahead search /
3907	if (!--lookahead)
3908	break;
3909	}
3910
3911	return same;
3912	}
3913
3914	/ Note that interrupts must be enabled when calling this function. /
3915	void kmem_cache_free_bulk(struct kmem_cache s, size_t size, void* **p)
3916	{
3917	if (!size)
3918	return;
3919
3920	do {
3921	struct detached_freelist df;
3922
3923	size = build_detached_freelist(s, size, p, df: &df);
3924	if (!df.slab)
3925	continue;
3926
3927	slab_free(s: df.s, slab: df.slab, head: df.freelist, tail: df.tail, p: &p[size], cnt: df.cnt,
3928	_RET_IP_);
3929	} while (likely(size));
3930	}
3931	EXPORT_SYMBOL(kmem_cache_free_bulk);
3932
3933	#ifndef CONFIG_SLUB_TINY
3934	static inline int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
3935	size_t size, void p, struct** obj_cgroup *objcg)
3936	{
3937	struct kmem_cache_cpu *c;
3938	unsigned long irqflags;
3939	int i;
3940
3941	/*
3942	* Drain objects in the per cpu slab, while disabling local
3943	* IRQs, which protects against PREEMPT and interrupts
3944	* handlers invoking normal fastpath.
3945	*/
3946	c = slub_get_cpu_ptr(s->cpu_slab);
3947	local_lock_irqsave(&s->cpu_slab->lock, irqflags);
3948
3949	for (i = `0`; i < size; i++) {
3950	void *object = kfence_alloc(s, s->object_size, flags);
3951
3952	if (unlikely(object)) {
3953	p[i] = object;
3954	continue;
3955	}
3956
3957	object = c->freelist;
3958	if (unlikely(!object)) {
3959	/*
3960	* We may have removed an object from c->freelist using
3961	* the fastpath in the previous iteration; in that case,
3962	* c->tid has not been bumped yet.
3963	* Since ___slab_alloc() may reenable interrupts while
3964	* allocating memory, we should bump c->tid now.
3965	*/
3966	c->tid = next_tid(c->tid);
3967
3968	local_unlock_irqrestore(&s->cpu_slab->lock, irqflags);
3969
3970	/*
3971	* Invoking slow path likely have side-effect
3972	* of re-populating per CPU c->freelist
3973	*/
3974	p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
3975	_RET_IP_, c, s->object_size);
3976	if (unlikely(!p[i]))
3977	goto error;
3978
3979	c = this_cpu_ptr(s->cpu_slab);
3980	maybe_wipe_obj_freeptr(s, p[i]);
3981
3982	local_lock_irqsave(&s->cpu_slab->lock, irqflags);
3983
3984	continue; / goto for-loop /
3985	}
3986	c->freelist = get_freepointer(s, object);
3987	p[i] = object;
3988	maybe_wipe_obj_freeptr(s, p[i]);
3989	}
3990	c->tid = next_tid(c->tid);
3991	local_unlock_irqrestore(&s->cpu_slab->lock, irqflags);
3992	slub_put_cpu_ptr(s->cpu_slab);
3993
3994	return i;
3995
3996	error:
3997	slub_put_cpu_ptr(s->cpu_slab);
3998	slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size);
3999	kmem_cache_free_bulk(s, i, p);
4000	return `0`;
4001
4002	}
4003	#else /* CONFIG_SLUB_TINY */
4004	static int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
4005	size_t size, void p, struct** obj_cgroup *objcg)
4006	{
4007	int i;
4008
4009	for (i = `0`; i < size; i++) {
4010	void *object = kfence_alloc(s, size: s->object_size, flags);
4011
4012	if (unlikely(object)) {
4013	p[i] = object;
4014	continue;
4015	}
4016
4017	p[i] = __slab_alloc_node(s, gfpflags: flags, NUMA_NO_NODE,
4018	_RET_IP_, orig_size: s->object_size);
4019	if (unlikely(!p[i]))
4020	goto error;
4021
4022	maybe_wipe_obj_freeptr(s, obj: p[i]);
4023	}
4024
4025	return i;
4026
4027	error:
4028	slab_post_alloc_hook(s, objcg, flags, size: i, p, init: false, orig_size: s->object_size);
4029	kmem_cache_free_bulk(s, i, p);
4030	return `0`;
4031	}
4032	#endif /* CONFIG_SLUB_TINY */
4033
4034	/ Note that interrupts must be enabled when calling this function. /
4035	int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
4036	void **p)
4037	{
4038	int i;
4039	struct obj_cgroup *objcg = NULL;
4040
4041	if (!size)
4042	return `0`;
4043
4044	/ memcg and kmem_cache debug support /
4045	s = slab_pre_alloc_hook(s, NULL, objcgp: &objcg, size, flags);
4046	if (unlikely(!s))
4047	return `0`;
4048
4049	i = __kmem_cache_alloc_bulk(s, flags, size, p, objcg);
4050
4051	/*
4052	* memcg and kmem_cache debug support and memory initialization.
4053	* Done outside of the IRQ disabled fastpath loop.
4054	*/
4055	if (i != `0`)
4056	slab_post_alloc_hook(s, objcg, flags, size, p,
4057	init: slab_want_init_on_alloc(flags, c: s), orig_size: s->object_size);
4058	return i;
4059	}
4060	EXPORT_SYMBOL(kmem_cache_alloc_bulk);
4061
4062
4063	/*
4064	* Object placement in a slab is made very easy because we always start at
4065	* offset 0. If we tune the size of the object to the alignment then we can
4066	* get the required alignment by putting one properly sized object after
4067	* another.
4068	*
4069	* Notice that the allocation order determines the sizes of the per cpu
4070	* caches. Each processor has always one slab available for allocations.
4071	* Increasing the allocation order reduces the number of times that slabs
4072	* must be moved on and off the partial lists and is therefore a factor in
4073	* locking overhead.
4074	*/
4075
4076	/*
4077	* Minimum / Maximum order of slab pages. This influences locking overhead
4078	* and slab fragmentation. A higher order reduces the number of partial slabs
4079	* and increases the number of allocations possible without having to
4080	* take the list_lock.
4081	*/
4082	static unsigned int slub_min_order;
4083	static unsigned int slub_max_order =
4084	IS_ENABLED(CONFIG_SLUB_TINY) ? `1` : PAGE_ALLOC_COSTLY_ORDER;
4085	static unsigned int slub_min_objects;
4086
4087	/*
4088	* Calculate the order of allocation given an slab object size.
4089	*
4090	* The order of allocation has significant impact on performance and other
4091	* system components. Generally order 0 allocations should be preferred since
4092	* order 0 does not cause fragmentation in the page allocator. Larger objects
4093	* be problematic to put into order 0 slabs because there may be too much
4094	* unused space left. We go to a higher order if more than 1/16th of the slab
4095	* would be wasted.
4096	*
4097	* In order to reach satisfactory performance we must ensure that a minimum
4098	* number of objects is in one slab. Otherwise we may generate too much
4099	* activity on the partial lists which requires taking the list_lock. This is
4100	* less a concern for large slabs though which are rarely used.
4101	*
4102	* slub_max_order specifies the order where we begin to stop considering the
4103	* number of objects in a slab as critical. If we reach slub_max_order then
4104	* we try to keep the page order as low as possible. So we accept more waste
4105	* of space in favor of a small page order.
4106	*
4107	* Higher order allocations also allow the placement of more objects in a
4108	* slab and thereby reduce object handling overhead. If the user has
4109	* requested a higher minimum order then we start with that one instead of
4110	* the smallest order which will fit the object.
4111	*/
4112	static inline unsigned int calc_slab_order(unsigned int size,
4113	unsigned int min_order, unsigned int max_order,
4114	unsigned int fract_leftover)
4115	{
4116	unsigned int order;
4117
4118	for (order = min_order; order <= max_order; order++) {
4119
4120	unsigned int slab_size = (unsigned int)PAGE_SIZE << order;
4121	unsigned int rem;
4122
4123	rem = slab_size % size;
4124
4125	if (rem <= slab_size / fract_leftover)
4126	break;
4127	}
4128
4129	return order;
4130	}
4131
4132	static inline int calculate_order(unsigned int size)
4133	{
4134	unsigned int order;
4135	unsigned int min_objects;
4136	unsigned int max_objects;
4137	unsigned int min_order;
4138
4139	min_objects = slub_min_objects;
4140	if (!min_objects) {
4141	/*
4142	* Some architectures will only update present cpus when
4143	* onlining them, so don't trust the number if it's just 1. But
4144	* we also don't want to use nr_cpu_ids always, as on some other
4145	* architectures, there can be many possible cpus, but never
4146	* onlined. Here we compromise between trying to avoid too high
4147	* order on systems that appear larger than they are, and too
4148	* low order on systems that appear smaller than they are.
4149	*/
4150	unsigned int nr_cpus = num_present_cpus();
4151	if (nr_cpus <= `1`)
4152	nr_cpus = nr_cpu_ids;
4153	min_objects = `4` * (fls(x: nr_cpus) + `1`);
4154	}
4155	/ min_objects can't be 0 because get_order(0) is undefined /
4156	max_objects = max(order_objects(slub_max_order, size), `1U`);
4157	min_objects = min(min_objects, max_objects);
4158
4159	min_order = max_t(unsigned int, slub_min_order,
4160	get_order(min_objects * size));
4161	if (order_objects(order: min_order, size) > MAX_OBJS_PER_PAGE)
4162	return get_order(size: size * MAX_OBJS_PER_PAGE) - `1`;
4163
4164	/*
4165	* Attempt to find best configuration for a slab. This works by first
4166	* attempting to generate a layout with the best possible configuration
4167	* and backing off gradually.
4168	*
4169	* We start with accepting at most 1/16 waste and try to find the
4170	* smallest order from min_objects-derived/slub_min_order up to
4171	* slub_max_order that will satisfy the constraint. Note that increasing
4172	* the order can only result in same or less fractional waste, not more.
4173	*
4174	* If that fails, we increase the acceptable fraction of waste and try
4175	* again. The last iteration with fraction of 1/2 would effectively
4176	* accept any waste and give us the order determined by min_objects, as
4177	* long as at least single object fits within slub_max_order.
4178	*/
4179	for (unsigned int fraction = `16`; fraction > `1`; fraction /= `2`) {
4180	order = calc_slab_order(size, min_order, max_order: slub_max_order,
4181	fract_leftover: fraction);
4182	if (order <= slub_max_order)
4183	return order;
4184	}
4185
4186	/*
4187	* Doh this slab cannot be placed using slub_max_order.
4188	*/
4189	order = get_order(size);
4190	if (order <= MAX_ORDER)
4191	return order;
4192	return -ENOSYS;
4193	}
4194
4195	static void
4196	init_kmem_cache_node(struct kmem_cache_node *n)
4197	{
4198	n->nr_partial = `0`;
4199	spin_lock_init(&n->list_lock);
4200	INIT_LIST_HEAD(list: &n->partial);
4201	#ifdef CONFIG_SLUB_DEBUG
4202	atomic_long_set(&n->nr_slabs, `0`);
4203	atomic_long_set(&n->total_objects, `0`);
4204	INIT_LIST_HEAD(&n->full);
4205	#endif
4206	}
4207
4208	#ifndef CONFIG_SLUB_TINY
4209	static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
4210	{
4211	BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
4212	NR_KMALLOC_TYPES * KMALLOC_SHIFT_HIGH *
4213	sizeof(struct kmem_cache_cpu));
4214
4215	/*
4216	* Must align to double word boundary for the double cmpxchg
4217	* instructions to work; see __pcpu_double_call_return_bool().
4218	*/
4219	s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
4220	`2` * sizeof(void *));
4221
4222	if (!s->cpu_slab)
4223	return `0`;
4224
4225	init_kmem_cache_cpus(s);
4226
4227	return `1`;
4228	}
4229	#else
4230	static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
4231	{
4232	return `1`;
4233	}
4234	#endif /* CONFIG_SLUB_TINY */
4235
4236	static struct kmem_cache *kmem_cache_node;
4237
4238	/*
4239	* No kmalloc_node yet so do it by hand. We know that this is the first
4240	* slab on the node for this slabcache. There are no concurrent accesses
4241	* possible.
4242	*
4243	* Note that this function only works on the kmem_cache_node
4244	* when allocating for the kmem_cache_node. This is used for bootstrapping
4245	* memory on a fresh node that has no slab structures yet.
4246	*/
4247	static void early_kmem_cache_node_alloc(int node)
4248	{
4249	struct slab *slab;
4250	struct kmem_cache_node *n;
4251
4252	BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
4253
4254	slab = new_slab(s: kmem_cache_node, GFP_NOWAIT, node);
4255
4256	BUG_ON(!slab);
4257	inc_slabs_node(s: kmem_cache_node, node: slab_nid(slab), objects: slab->objects);
4258	if (slab_nid(slab) != node) {
4259	pr_err("SLUB: Unable to allocate memory from node %d\n", node);
4260	pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n");
4261	}
4262
4263	n = slab->freelist;
4264	BUG_ON(!n);
4265	#ifdef CONFIG_SLUB_DEBUG
4266	init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
4267	init_tracking(kmem_cache_node, n);
4268	#endif
4269	n = kasan_slab_alloc(s: kmem_cache_node, object: n, GFP_KERNEL, init: false);
4270	slab->freelist = get_freepointer(s: kmem_cache_node, object: n);
4271	slab->inuse = `1`;
4272	kmem_cache_node->node[node] = n;
4273	init_kmem_cache_node(n);
4274	inc_slabs_node(s: kmem_cache_node, node, objects: slab->objects);
4275
4276	/*
4277	* No locks need to be taken here as it has just been
4278	* initialized and there is no concurrent access.
4279	*/
4280	__add_partial(n, slab, tail: DEACTIVATE_TO_HEAD);
4281	}
4282
4283	static void free_kmem_cache_nodes(struct kmem_cache *s)
4284	{
4285	int node;
4286	struct kmem_cache_node *n;
4287
4288	for_each_kmem_cache_node(s, node, n) {
4289	s->node[node] = NULL;
4290	kmem_cache_free(kmem_cache_node, n);
4291	}
4292	}
4293
4294	void __kmem_cache_release(struct kmem_cache *s)
4295	{
4296	cache_random_seq_destroy(cachep: s);
4297	#ifndef CONFIG_SLUB_TINY
4298	free_percpu(s->cpu_slab);
4299	#endif
4300	free_kmem_cache_nodes(s);
4301	}
4302
4303	static int init_kmem_cache_nodes(struct kmem_cache *s)
4304	{
4305	int node;
4306
4307	for_each_node_mask(node, slab_nodes) {
4308	struct kmem_cache_node *n;
4309
4310	if (slab_state == DOWN) {
4311	early_kmem_cache_node_alloc(node);
4312	continue;
4313	}
4314	n = kmem_cache_alloc_node(kmem_cache_node,
4315	GFP_KERNEL, node);
4316
4317	if (!n) {
4318	free_kmem_cache_nodes(s);
4319	return `0`;
4320	}
4321
4322	init_kmem_cache_node(n);
4323	s->node[node] = n;
4324	}
4325	return `1`;
4326	}
4327
4328	static void set_cpu_partial(struct kmem_cache *s)
4329	{
4330	#ifdef CONFIG_SLUB_CPU_PARTIAL
4331	unsigned int nr_objects;
4332
4333	/*
4334	* cpu_partial determined the maximum number of objects kept in the
4335	* per cpu partial lists of a processor.
4336	*
4337	* Per cpu partial lists mainly contain slabs that just have one
4338	* object freed. If they are used for allocation then they can be
4339	* filled up again with minimal effort. The slab will never hit the
4340	* per node partial lists and therefore no locking will be required.
4341	*
4342	* For backwards compatibility reasons, this is determined as number
4343	* of objects, even though we now limit maximum number of pages, see
4344	* slub_set_cpu_partial()
4345	*/
4346	if (!kmem_cache_has_cpu_partial(s))
4347	nr_objects = `0`;
4348	else if (s->size >= PAGE_SIZE)
4349	nr_objects = `6`;
4350	else if (s->size >= `1024`)
4351	nr_objects = `24`;
4352	else if (s->size >= `256`)
4353	nr_objects = `52`;
4354	else
4355	nr_objects = `120`;
4356
4357	slub_set_cpu_partial(s, nr_objects);
4358	#endif
4359	}
4360
4361	/*
4362	* calculate_sizes() determines the order and the distribution of data within
4363	* a slab object.
4364	*/
4365	static int calculate_sizes(struct kmem_cache *s)
4366	{
4367	slab_flags_t flags = s->flags;
4368	unsigned int size = s->object_size;
4369	unsigned int order;
4370
4371	/*
4372	* Round up object size to the next word boundary. We can only
4373	* place the free pointer at word boundaries and this determines
4374	* the possible location of the free pointer.
4375	*/
4376	size = ALIGN(size, sizeof(void *));
4377
4378	#ifdef CONFIG_SLUB_DEBUG
4379	/*
4380	* Determine if we can poison the object itself. If the user of
4381	* the slab may touch the object after free or before allocation
4382	* then we should never poison the object itself.
4383	*/
4384	if ((flags & SLAB_POISON) && !(flags & SLAB_TYPESAFE_BY_RCU) &&
4385	!s->ctor)
4386	s->flags \|= __OBJECT_POISON;
4387	else
4388	s->flags &= ~__OBJECT_POISON;
4389
4390
4391	/*
4392	* If we are Redzoning then check if there is some space between the
4393	* end of the object and the free pointer. If not then add an
4394	* additional word to have some bytes to store Redzone information.
4395	*/
4396	if ((flags & SLAB_RED_ZONE) && size == s->object_size)
4397	size += sizeof(void *);
4398	#endif
4399
4400	/*
4401	* With that we have determined the number of bytes in actual use
4402	* by the object and redzoning.
4403	*/
4404	s->inuse = size;
4405
4406	if (slub_debug_orig_size(s) \|\|
4407	(flags & (SLAB_TYPESAFE_BY_RCU \| SLAB_POISON)) \|\|
4408	((flags & SLAB_RED_ZONE) && s->object_size < sizeof(void *)) \|\|
4409	s->ctor) {
4410	/*
4411	* Relocate free pointer after the object if it is not
4412	* permitted to overwrite the first word of the object on
4413	* kmem_cache_free.
4414	*
4415	* This is the case if we do RCU, have a constructor or
4416	* destructor, are poisoning the objects, or are
4417	* redzoning an object smaller than sizeof(void *).
4418	*
4419	* The assumption that s->offset >= s->inuse means free
4420	* pointer is outside of the object is used in the
4421	* freeptr_outside_object() function. If that is no
4422	* longer true, the function needs to be modified.
4423	*/
4424	s->offset = size;
4425	size += sizeof(void *);
4426	} else {
4427	/*
4428	* Store freelist pointer near middle of object to keep
4429	* it away from the edges of the object to avoid small
4430	* sized over/underflows from neighboring allocations.
4431	*/
4432	s->offset = ALIGN_DOWN(s->object_size / `2`, sizeof(void *));
4433	}
4434
4435	#ifdef CONFIG_SLUB_DEBUG
4436	if (flags & SLAB_STORE_USER) {
4437	/*
4438	* Need to store information about allocs and frees after
4439	* the object.
4440	*/
4441	size += `2` * sizeof(struct track);
4442
4443	/ Save the original kmalloc request size /
4444	if (flags & SLAB_KMALLOC)
4445	size += sizeof(unsigned int);
4446	}
4447	#endif
4448
4449	kasan_cache_create(cache: s, size: &size, flags: &s->flags);
4450	#ifdef CONFIG_SLUB_DEBUG
4451	if (flags & SLAB_RED_ZONE) {
4452	/*
4453	* Add some empty padding so that we can catch
4454	* overwrites from earlier objects rather than let
4455	* tracking information or the free pointer be
4456	* corrupted if a user writes before the start
4457	* of the object.
4458	*/
4459	size += sizeof(void *);
4460
4461	s->red_left_pad = sizeof(void *);
4462	s->red_left_pad = ALIGN(s->red_left_pad, s->align);
4463	size += s->red_left_pad;
4464	}
4465	#endif
4466
4467	/*
4468	* SLUB stores one object immediately after another beginning from
4469	* offset 0. In order to align the objects we have to simply size
4470	* each object to conform to the alignment.
4471	*/
4472	size = ALIGN(size, s->align);
4473	s->size = size;
4474	s->reciprocal_size = reciprocal_value(d: size);
4475	order = calculate_order(size);
4476
4477	if ((int)order < `0`)
4478	return `0`;
4479
4480	s->allocflags = `0`;
4481	if (order)
4482	s->allocflags \|= __GFP_COMP;
4483
4484	if (s->flags & SLAB_CACHE_DMA)
4485	s->allocflags \|= GFP_DMA;
4486
4487	if (s->flags & SLAB_CACHE_DMA32)
4488	s->allocflags \|= GFP_DMA32;
4489
4490	if (s->flags & SLAB_RECLAIM_ACCOUNT)
4491	s->allocflags \|= __GFP_RECLAIMABLE;
4492
4493	/*
4494	* Determine the number of objects per slab
4495	*/
4496	s->oo = oo_make(order, size);
4497	s->min = oo_make(order: get_order(size), size);
4498
4499	return !!oo_objects(x: s->oo);
4500	}
4501
4502	static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
4503	{
4504	s->flags = kmem_cache_flags(object_size: s->size, flags, name: s->name);
4505	#ifdef CONFIG_SLAB_FREELIST_HARDENED
4506	s->random = get_random_long();
4507	#endif
4508
4509	if (!calculate_sizes(s))
4510	goto error;
4511	if (disable_higher_order_debug) {
4512	/*
4513	* Disable debugging flags that store metadata if the min slab
4514	* order increased.
4515	*/
4516	if (get_order(size: s->size) > get_order(size: s->object_size)) {
4517	s->flags &= ~DEBUG_METADATA_FLAGS;
4518	s->offset = `0`;
4519	if (!calculate_sizes(s))
4520	goto error;
4521	}
4522	}
4523
4524	#ifdef system_has_freelist_aba
4525	if (system_has_freelist_aba() && !(s->flags & SLAB_NO_CMPXCHG)) {
4526	/ Enable fast mode /
4527	s->flags \|= __CMPXCHG_DOUBLE;
4528	}
4529	#endif
4530
4531	/*
4532	* The larger the object size is, the more slabs we want on the partial
4533	* list to avoid pounding the page allocator excessively.
4534	*/
4535	s->min_partial = min_t(unsigned long, MAX_PARTIAL, ilog2(s->size) / `2`);
4536	s->min_partial = max_t(unsigned long, MIN_PARTIAL, s->min_partial);
4537
4538	set_cpu_partial(s);
4539
4540	#ifdef CONFIG_NUMA
4541	s->remote_node_defrag_ratio = `1000`;
4542	#endif
4543
4544	/ Initialize the pre-computed randomized freelist if slab is up /
4545	if (slab_state >= UP) {
4546	if (init_cache_random_seq(s))
4547	goto error;
4548	}
4549
4550	if (!init_kmem_cache_nodes(s))
4551	goto error;
4552
4553	if (alloc_kmem_cache_cpus(s))
4554	return `0`;
4555
4556	error:
4557	__kmem_cache_release(s);
4558	return -EINVAL;
4559	}
4560
4561	static void list_slab_objects(struct kmem_cache s, struct* slab *slab,
4562	const char *text)
4563	{
4564	#ifdef CONFIG_SLUB_DEBUG
4565	void *addr = slab_address(slab);
4566	void *p;
4567
4568	slab_err(s, slab, text, s->name);
4569
4570	spin_lock(&object_map_lock);
4571	__fill_map(object_map, s, slab);
4572
4573	for_each_object(p, s, addr, slab->objects) {
4574
4575	if (!test_bit(__obj_to_index(s, addr, p), object_map)) {
4576	pr_err("Object 0x%p @offset=%tu\n", p, p - addr);
4577	print_tracking(s, p);
4578	}
4579	}
4580	spin_unlock(&object_map_lock);
4581	#endif
4582	}
4583
4584	/*
4585	* Attempt to free all partial slabs on a node.
4586	* This is called from __kmem_cache_shutdown(). We must take list_lock
4587	* because sysfs file might still access partial list after the shutdowning.
4588	*/
4589	static void free_partial(struct kmem_cache s, struct* kmem_cache_node *n)
4590	{
4591	LIST_HEAD(discard);
4592	struct slab slab, h;
4593
4594	BUG_ON(irqs_disabled());
4595	spin_lock_irq(lock: &n->list_lock);
4596	list_for_each_entry_safe(slab, h, &n->partial, slab_list) {
4597	if (!slab->inuse) {
4598	remove_partial(n, slab);
4599	list_add(new: &slab->slab_list, head: &discard);
4600	} else {
4601	list_slab_objects(s, slab,
4602	text: "Objects remaining in %s on __kmem_cache_shutdown()");
4603	}
4604	}
4605	spin_unlock_irq(lock: &n->list_lock);
4606
4607	list_for_each_entry_safe(slab, h, &discard, slab_list)
4608	discard_slab(s, slab);
4609	}
4610
4611	bool __kmem_cache_empty(struct kmem_cache *s)
4612	{
4613	int node;
4614	struct kmem_cache_node *n;
4615
4616	for_each_kmem_cache_node(s, node, n)
4617	if (n->nr_partial \|\| node_nr_slabs(n))
4618	return false;
4619	return true;
4620	}
4621
4622	/*
4623	* Release all resources used by a slab cache.
4624	*/
4625	int __kmem_cache_shutdown(struct kmem_cache *s)
4626	{
4627	int node;
4628	struct kmem_cache_node *n;
4629
4630	flush_all_cpus_locked(s);
4631	/ Attempt to free all objects /
4632	for_each_kmem_cache_node(s, node, n) {
4633	free_partial(s, n);
4634	if (n->nr_partial \|\| node_nr_slabs(n))
4635	return `1`;
4636	}
4637	return `0`;
4638	}
4639
4640	#ifdef CONFIG_PRINTK
4641	void __kmem_obj_info(struct kmem_obj_info kpp, void* object, struct* slab *slab)
4642	{
4643	void *base;
4644	int __maybe_unused i;
4645	unsigned int objnr;
4646	void *objp;
4647	void *objp0;
4648	struct kmem_cache *s = slab->slab_cache;
4649	struct track __maybe_unused *trackp;
4650
4651	kpp->kp_ptr = object;
4652	kpp->kp_slab = slab;
4653	kpp->kp_slab_cache = s;
4654	base = slab_address(slab);
4655	objp0 = kasan_reset_tag(addr: object);
4656	#ifdef CONFIG_SLUB_DEBUG
4657	objp = restore_red_left(s, objp0);
4658	#else
4659	objp = objp0;
4660	#endif
4661	objnr = obj_to_index(cache: s, slab, obj: objp);
4662	kpp->kp_data_offset = (unsigned long)((char )objp0 - (char* *)objp);
4663	objp = base + s->size * objnr;
4664	kpp->kp_objp = objp;
4665	if (WARN_ON_ONCE(objp < base \|\| objp >= base + slab->objects * s->size
4666	\|\| (objp - base) % s->size) \|\|
4667	!(s->flags & SLAB_STORE_USER))
4668	return;
4669	#ifdef CONFIG_SLUB_DEBUG
4670	objp = fixup_red_left(s, objp);
4671	trackp = get_track(s, objp, TRACK_ALLOC);
4672	kpp->kp_ret = (void *)trackp->addr;
4673	#ifdef CONFIG_STACKDEPOT
4674	{
4675	depot_stack_handle_t handle;
4676	unsigned long *entries;
4677	unsigned int nr_entries;
4678
4679	handle = READ_ONCE(trackp->handle);
4680	if (handle) {
4681	nr_entries = stack_depot_fetch(handle, &entries);
4682	for (i = `0`; i < KS_ADDRS_COUNT && i < nr_entries; i++)
4683	kpp->kp_stack[i] = (void *)entries[i];
4684	}
4685
4686	trackp = get_track(s, objp, TRACK_FREE);
4687	handle = READ_ONCE(trackp->handle);
4688	if (handle) {
4689	nr_entries = stack_depot_fetch(handle, &entries);
4690	for (i = `0`; i < KS_ADDRS_COUNT && i < nr_entries; i++)
4691	kpp->kp_free_stack[i] = (void *)entries[i];
4692	}
4693	}
4694	#endif
4695	#endif
4696	}
4697	#endif
4698
4699	/********************************************************************
4700	* Kmalloc subsystem
4701	*******************************************************************/
4702
4703	static int __init setup_slub_min_order(char *str)
4704	{
4705	get_option(str: &str, pint: (int *)&slub_min_order);
4706
4707	if (slub_min_order > slub_max_order)
4708	slub_max_order = slub_min_order;
4709
4710	return `1`;
4711	}
4712
4713	__setup("slub_min_order=", setup_slub_min_order);
4714
4715	static int __init setup_slub_max_order(char *str)
4716	{
4717	get_option(str: &str, pint: (int *)&slub_max_order);
4718	slub_max_order = min_t(unsigned int, slub_max_order, MAX_ORDER);
4719
4720	if (slub_min_order > slub_max_order)
4721	slub_min_order = slub_max_order;
4722
4723	return `1`;
4724	}
4725
4726	__setup("slub_max_order=", setup_slub_max_order);
4727
4728	static int __init setup_slub_min_objects(char *str)
4729	{
4730	get_option(str: &str, pint: (int *)&slub_min_objects);
4731
4732	return `1`;
4733	}
4734
4735	__setup("slub_min_objects=", setup_slub_min_objects);
4736
4737	#ifdef CONFIG_HARDENED_USERCOPY
4738	/*
4739	* Rejects incorrectly sized objects and objects that are to be copied
4740	* to/from userspace but do not fall entirely within the containing slab
4741	* cache's usercopy region.
4742	*
4743	* Returns NULL if check passes, otherwise const char * to name of cache
4744	* to indicate an error.
4745	*/
4746	void __check_heap_object(const void ptr, unsigned* long n,
4747	const struct slab *slab, bool to_user)
4748	{
4749	struct kmem_cache *s;
4750	unsigned int offset;
4751	bool is_kfence = is_kfence_address(addr: ptr);
4752
4753	ptr = kasan_reset_tag(addr: ptr);
4754
4755	/ Find object and usable object size. /
4756	s = slab->slab_cache;
4757
4758	/ Reject impossible pointers. /
4759	if (ptr < slab_address(slab))
4760	usercopy_abort(name: "SLUB object not in SLUB page?!", NULL,
4761	to_user, offset: `0`, len: n);
4762
4763	/ Find offset within object. /
4764	if (is_kfence)
4765	offset = ptr - kfence_object_start(addr: ptr);
4766	else
4767	offset = (ptr - slab_address(slab)) % s->size;
4768
4769	/ Adjust for redzone and reject if within the redzone. /
4770	if (!is_kfence && kmem_cache_debug_flags(s, SLAB_RED_ZONE)) {
4771	if (offset < s->red_left_pad)
4772	usercopy_abort(name: "SLUB object in left red zone",
4773	detail: s->name, to_user, offset, len: n);
4774	offset -= s->red_left_pad;
4775	}
4776
4777	/ Allow address range falling entirely within usercopy region. /
4778	if (offset >= s->useroffset &&
4779	offset - s->useroffset <= s->usersize &&
4780	n <= s->useroffset - offset + s->usersize)
4781	return;
4782
4783	usercopy_abort(name: "SLUB object", detail: s->name, to_user, offset, len: n);
4784	}
4785	#endif /* CONFIG_HARDENED_USERCOPY */
4786
4787	#define SHRINK_PROMOTE_MAX 32
4788
4789	/*
4790	* kmem_cache_shrink discards empty slabs and promotes the slabs filled
4791	* up most to the head of the partial lists. New allocations will then
4792	* fill those up and thus they can be removed from the partial lists.
4793	*
4794	* The slabs with the least items are placed last. This results in them
4795	* being allocated from last increasing the chance that the last objects
4796	* are freed in them.
4797	*/
4798	static int __kmem_cache_do_shrink(struct kmem_cache *s)
4799	{
4800	int node;
4801	int i;
4802	struct kmem_cache_node *n;
4803	struct slab *slab;
4804	struct slab *t;
4805	struct list_head discard;
4806	struct list_head promote[SHRINK_PROMOTE_MAX];
4807	unsigned long flags;
4808	int ret = `0`;
4809
4810	for_each_kmem_cache_node(s, node, n) {
4811	INIT_LIST_HEAD(list: &discard);
4812	for (i = `0`; i < SHRINK_PROMOTE_MAX; i++)
4813	INIT_LIST_HEAD(list: promote + i);
4814
4815	spin_lock_irqsave(&n->list_lock, flags);
4816
4817	/*
4818	* Build lists of slabs to discard or promote.
4819	*
4820	* Note that concurrent frees may occur while we hold the
4821	* list_lock. slab->inuse here is the upper limit.
4822	*/
4823	list_for_each_entry_safe(slab, t, &n->partial, slab_list) {
4824	int free = slab->objects - slab->inuse;
4825
4826	/ Do not reread slab->inuse /
4827	barrier();
4828
4829	/ We do not keep full slabs on the list /
4830	BUG_ON(free <= `0`);
4831
4832	if (free == slab->objects) {
4833	list_move(list: &slab->slab_list, head: &discard);
4834	n->nr_partial--;
4835	dec_slabs_node(s, node, objects: slab->objects);
4836	} else if (free <= SHRINK_PROMOTE_MAX)
4837	list_move(list: &slab->slab_list, head: promote + free - `1`);
4838	}
4839
4840	/*
4841	* Promote the slabs filled up most to the head of the
4842	* partial list.
4843	*/
4844	for (i = SHRINK_PROMOTE_MAX - `1`; i >= `0`; i--)
4845	list_splice(list: promote + i, head: &n->partial);
4846
4847	spin_unlock_irqrestore(lock: &n->list_lock, flags);
4848
4849	/ Release empty slabs /
4850	list_for_each_entry_safe(slab, t, &discard, slab_list)
4851	free_slab(s, slab);
4852
4853	if (node_nr_slabs(n))
4854	ret = `1`;
4855	}
4856
4857	return ret;
4858	}
4859
4860	int __kmem_cache_shrink(struct kmem_cache *s)
4861	{
4862	flush_all(s);
4863	return __kmem_cache_do_shrink(s);
4864	}
4865
4866	static int slab_mem_going_offline_callback(void *arg)
4867	{
4868	struct kmem_cache *s;
4869
4870	mutex_lock(&slab_mutex);
4871	list_for_each_entry(s, &slab_caches, list) {
4872	flush_all_cpus_locked(s);
4873	__kmem_cache_do_shrink(s);
4874	}
4875	mutex_unlock(lock: &slab_mutex);
4876
4877	return `0`;
4878	}
4879
4880	static void slab_mem_offline_callback(void *arg)
4881	{
4882	struct memory_notify *marg = arg;
4883	int offline_node;
4884
4885	offline_node = marg->status_change_nid_normal;
4886
4887	/*
4888	* If the node still has available memory. we need kmem_cache_node
4889	* for it yet.
4890	*/
4891	if (offline_node < `0`)
4892	return;
4893
4894	mutex_lock(&slab_mutex);
4895	node_clear(offline_node, slab_nodes);
4896	/*
4897	* We no longer free kmem_cache_node structures here, as it would be
4898	* racy with all get_node() users, and infeasible to protect them with
4899	* slab_mutex.
4900	*/
4901	mutex_unlock(lock: &slab_mutex);
4902	}
4903
4904	static int slab_mem_going_online_callback(void *arg)
4905	{
4906	struct kmem_cache_node *n;
4907	struct kmem_cache *s;
4908	struct memory_notify *marg = arg;
4909	int nid = marg->status_change_nid_normal;
4910	int ret = `0`;
4911
4912	/*
4913	* If the node's memory is already available, then kmem_cache_node is
4914	* already created. Nothing to do.
4915	*/
4916	if (nid < `0`)
4917	return `0`;
4918
4919	/*
4920	* We are bringing a node online. No memory is available yet. We must
4921	* allocate a kmem_cache_node structure in order to bring the node
4922	* online.
4923	*/
4924	mutex_lock(&slab_mutex);
4925	list_for_each_entry(s, &slab_caches, list) {
4926	/*
4927	* The structure may already exist if the node was previously
4928	* onlined and offlined.
4929	*/
4930	if (get_node(s, node: nid))
4931	continue;
4932	/*
4933	* XXX: kmem_cache_alloc_node will fallback to other nodes
4934	* since memory is not yet available from the node that
4935	* is brought up.
4936	*/
4937	n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL);
4938	if (!n) {
4939	ret = -ENOMEM;
4940	goto out;
4941	}
4942	init_kmem_cache_node(n);
4943	s->node[nid] = n;
4944	}
4945	/*
4946	* Any cache created after this point will also have kmem_cache_node
4947	* initialized for the new node.
4948	*/
4949	node_set(nid, slab_nodes);
4950	out:
4951	mutex_unlock(lock: &slab_mutex);
4952	return ret;
4953	}
4954
4955	static int slab_memory_callback(struct notifier_block *self,
4956	unsigned long action, void *arg)
4957	{
4958	int ret = `0`;
4959
4960	switch (action) {
4961	case MEM_GOING_ONLINE:
4962	ret = slab_mem_going_online_callback(arg);
4963	break;
4964	case MEM_GOING_OFFLINE:
4965	ret = slab_mem_going_offline_callback(arg);
4966	break;
4967	case MEM_OFFLINE:
4968	case MEM_CANCEL_ONLINE:
4969	slab_mem_offline_callback(arg);
4970	break;
4971	case MEM_ONLINE:
4972	case MEM_CANCEL_OFFLINE:
4973	break;
4974	}
4975	if (ret)
4976	ret = notifier_from_errno(err: ret);
4977	else
4978	ret = NOTIFY_OK;
4979	return ret;
4980	}
4981
4982	/********************************************************************
4983	* Basic setup of slabs
4984	*******************************************************************/
4985
4986	/*
4987	* Used for early kmem_cache structures that were allocated using
4988	* the page allocator. Allocate them properly then fix up the pointers
4989	* that may be pointing to the wrong kmem_cache structure.
4990	*/
4991
4992	static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
4993	{
4994	int node;
4995	struct kmem_cache *s = kmem_cache_zalloc(k: kmem_cache, GFP_NOWAIT);
4996	struct kmem_cache_node *n;
4997
4998	memcpy(s, static_cache, kmem_cache->object_size);
4999
5000	/*
5001	* This runs very early, and only the boot processor is supposed to be
5002	* up. Even if it weren't true, IRQs are not up so we couldn't fire
5003	* IPIs around.
5004	*/
5005	__flush_cpu_slab(s, smp_processor_id());
5006	for_each_kmem_cache_node(s, node, n) {
5007	struct slab *p;
5008
5009	list_for_each_entry(p, &n->partial, slab_list)
5010	p->slab_cache = s;
5011
5012	#ifdef CONFIG_SLUB_DEBUG
5013	list_for_each_entry(p, &n->full, slab_list)
5014	p->slab_cache = s;
5015	#endif
5016	}
5017	list_add(new: &s->list, head: &slab_caches);
5018	return s;
5019	}
5020
5021	void __init kmem_cache_init(void)
5022	{
5023	static __initdata struct kmem_cache boot_kmem_cache,
5024	boot_kmem_cache_node;
5025	int node;
5026
5027	if (debug_guardpage_minorder())
5028	slub_max_order = `0`;
5029
5030	/ Print slub debugging pointers without hashing /
5031	if (__slub_debug_enabled())
5032	no_hash_pointers_enable(NULL);
5033
5034	kmem_cache_node = &boot_kmem_cache_node;
5035	kmem_cache = &boot_kmem_cache;
5036
5037	/*
5038	* Initialize the nodemask for which we will allocate per node
5039	* structures. Here we don't need taking slab_mutex yet.
5040	*/
5041	for_each_node_state(node, N_NORMAL_MEMORY)
5042	node_set(node, slab_nodes);
5043
5044	create_boot_cache(kmem_cache_node, name: "kmem_cache_node",
5045	size: sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN, useroffset: `0`, usersize: `0`);
5046
5047	hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
5048
5049	/ Able to allocate the per node structures /
5050	slab_state = PARTIAL;
5051
5052	create_boot_cache(kmem_cache, name: "kmem_cache",
5053	offsetof(struct kmem_cache, node) +
5054	nr_node_ids * sizeof(struct kmem_cache_node *),
5055	SLAB_HWCACHE_ALIGN, useroffset: `0`, usersize: `0`);
5056
5057	kmem_cache = bootstrap(static_cache: &boot_kmem_cache);
5058	kmem_cache_node = bootstrap(static_cache: &boot_kmem_cache_node);
5059
5060	/ Now we can use the kmem_cache to allocate kmalloc slabs /
5061	setup_kmalloc_cache_index_table();
5062	create_kmalloc_caches(`0`);
5063
5064	/ Setup random freelists for each cache /
5065	init_freelist_randomization();
5066
5067	cpuhp_setup_state_nocalls(state: CPUHP_SLUB_DEAD, name: "slub:dead", NULL,
5068	teardown: slub_cpu_dead);
5069
5070	pr_info("SLUB: HWalign=%d, Order=%u-%u, MinObjects=%u, CPUs=%u, Nodes=%u\n",
5071	cache_line_size(),
5072	slub_min_order, slub_max_order, slub_min_objects,
5073	nr_cpu_ids, nr_node_ids);
5074	}
5075
5076	void __init kmem_cache_init_late(void)
5077	{
5078	#ifndef CONFIG_SLUB_TINY
5079	flushwq = alloc_workqueue("slub_flushwq", WQ_MEM_RECLAIM, `0`);
5080	WARN_ON(!flushwq);
5081	#endif
5082	}
5083
5084	struct kmem_cache *
5085	__kmem_cache_alias(const char name, unsigned* int size, unsigned int align,
5086	slab_flags_t flags, void (ctor)(void* *))
5087	{
5088	struct kmem_cache *s;
5089
5090	s = find_mergeable(size, align, flags, name, ctor);
5091	if (s) {
5092	if (sysfs_slab_alias(s, p: name))
5093	return NULL;
5094
5095	s->refcount++;
5096
5097	/*
5098	* Adjust the object sizes so that we clear
5099	* the complete object on kzalloc.
5100	*/
5101	s->object_size = max(s->object_size, size);
5102	s->inuse = max(s->inuse, ALIGN(size, sizeof(void *)));
5103	}
5104
5105	return s;
5106	}
5107
5108	int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
5109	{
5110	int err;
5111
5112	err = kmem_cache_open(s, flags);
5113	if (err)
5114	return err;
5115
5116	/ Mutex is not taken during early boot /
5117	if (slab_state <= UP)
5118	return `0`;
5119
5120	err = sysfs_slab_add(s);
5121	if (err) {
5122	__kmem_cache_release(s);
5123	return err;
5124	}
5125
5126	if (s->flags & SLAB_STORE_USER)
5127	debugfs_slab_add(s);
5128
5129	return `0`;
5130	}
5131
5132	#ifdef SLAB_SUPPORTS_SYSFS
5133	static int count_inuse(struct slab *slab)
5134	{
5135	return slab->inuse;
5136	}
5137
5138	static int count_total(struct slab *slab)
5139	{
5140	return slab->objects;
5141	}
5142	#endif
5143
5144	#ifdef CONFIG_SLUB_DEBUG
5145	static void validate_slab(struct kmem_cache s, struct* slab *slab,
5146	unsigned long *obj_map)
5147	{
5148	void *p;
5149	void *addr = slab_address(slab);
5150
5151	if (!check_slab(s, slab) \|\| !on_freelist(s, slab, NULL))
5152	return;
5153
5154	/ Now we know that a valid freelist exists /
5155	__fill_map(obj_map, s, slab);
5156	for_each_object(p, s, addr, slab->objects) {
5157	u8 val = test_bit(__obj_to_index(s, addr, p), obj_map) ?
5158	SLUB_RED_INACTIVE : SLUB_RED_ACTIVE;
5159
5160	if (!check_object(s, slab, p, val))
5161	break;
5162	}
5163	}
5164
5165	static int validate_slab_node(struct kmem_cache *s,
5166	struct kmem_cache_node n, unsigned* long *obj_map)
5167	{
5168	unsigned long count = `0`;
5169	struct slab *slab;
5170	unsigned long flags;
5171
5172	spin_lock_irqsave(&n->list_lock, flags);
5173
5174	list_for_each_entry(slab, &n->partial, slab_list) {
5175	validate_slab(s, slab, obj_map);
5176	count++;
5177	}
5178	if (count != n->nr_partial) {
5179	pr_err("SLUB %s: %ld partial slabs counted but counter=%ld\n",
5180	s->name, count, n->nr_partial);
5181	slab_add_kunit_errors();
5182	}
5183
5184	if (!(s->flags & SLAB_STORE_USER))
5185	goto out;
5186
5187	list_for_each_entry(slab, &n->full, slab_list) {
5188	validate_slab(s, slab, obj_map);
5189	count++;
5190	}
5191	if (count != node_nr_slabs(n)) {
5192	pr_err("SLUB: %s %ld slabs counted but counter=%ld\n",
5193	s->name, count, node_nr_slabs(n));
5194	slab_add_kunit_errors();
5195	}
5196
5197	out:
5198	spin_unlock_irqrestore(&n->list_lock, flags);
5199	return count;
5200	}
5201
5202	long validate_slab_cache(struct kmem_cache *s)
5203	{
5204	int node;
5205	unsigned long count = `0`;
5206	struct kmem_cache_node *n;
5207	unsigned long *obj_map;
5208
5209	obj_map = bitmap_alloc(oo_objects(s->oo), GFP_KERNEL);
5210	if (!obj_map)
5211	return -ENOMEM;
5212
5213	flush_all(s);
5214	for_each_kmem_cache_node(s, node, n)
5215	count += validate_slab_node(s, n, obj_map);
5216
5217	bitmap_free(obj_map);
5218
5219	return count;
5220	}
5221	EXPORT_SYMBOL(validate_slab_cache);
5222
5223	#ifdef CONFIG_DEBUG_FS
5224	/*
5225	* Generate lists of code addresses where slabcache objects are allocated
5226	* and freed.
5227	*/
5228
5229	struct location {
5230	depot_stack_handle_t handle;
5231	unsigned long count;
5232	unsigned long addr;
5233	unsigned long waste;
5234	long long sum_time;
5235	long min_time;
5236	long max_time;
5237	long min_pid;
5238	long max_pid;
5239	DECLARE_BITMAP(cpus, NR_CPUS);
5240	nodemask_t nodes;
5241	};
5242
5243	struct loc_track {
5244	unsigned long max;
5245	unsigned long count;
5246	struct location *loc;
5247	loff_t idx;
5248	};
5249
5250	static struct dentry *slab_debugfs_root;
5251
5252	static void free_loc_track(struct loc_track *t)
5253	{
5254	if (t->max)
5255	free_pages((unsigned long)t->loc,
5256	get_order(sizeof(struct location) * t->max));
5257	}
5258
5259	static int alloc_loc_track(struct loc_track t, unsigned* long max, gfp_t flags)
5260	{
5261	struct location *l;
5262	int order;
5263
5264	order = get_order(sizeof(struct location) * max);
5265
5266	l = (void *)__get_free_pages(flags, order);
5267	if (!l)
5268	return `0`;
5269
5270	if (t->count) {
5271	memcpy(l, t->loc, sizeof(struct location) * t->count);
5272	free_loc_track(t);
5273	}
5274	t->max = max;
5275	t->loc = l;
5276	return `1`;
5277	}
5278
5279	static int add_location(struct loc_track t, struct* kmem_cache *s,
5280	const struct track *track,
5281	unsigned int orig_size)
5282	{
5283	long start, end, pos;
5284	struct location *l;
5285	unsigned long caddr, chandle, cwaste;
5286	unsigned long age = jiffies - track->when;
5287	depot_stack_handle_t handle = `0`;
5288	unsigned int waste = s->object_size - orig_size;
5289
5290	#ifdef CONFIG_STACKDEPOT
5291	handle = READ_ONCE(track->handle);
5292	#endif
5293	start = -`1`;
5294	end = t->count;
5295
5296	for ( ; ; ) {
5297	pos = start + (end - start + `1`) / `2`;
5298
5299	/*
5300	* There is nothing at "end". If we end up there
5301	* we need to add something to before end.
5302	*/
5303	if (pos == end)
5304	break;
5305
5306	l = &t->loc[pos];
5307	caddr = l->addr;
5308	chandle = l->handle;
5309	cwaste = l->waste;
5310	if ((track->addr == caddr) && (handle == chandle) &&
5311	(waste == cwaste)) {
5312
5313	l->count++;
5314	if (track->when) {
5315	l->sum_time += age;
5316	if (age < l->min_time)
5317	l->min_time = age;
5318	if (age > l->max_time)
5319	l->max_time = age;
5320
5321	if (track->pid < l->min_pid)
5322	l->min_pid = track->pid;
5323	if (track->pid > l->max_pid)
5324	l->max_pid = track->pid;
5325
5326	cpumask_set_cpu(track->cpu,
5327	to_cpumask(l->cpus));
5328	}
5329	node_set(page_to_nid(virt_to_page(track)), l->nodes);
5330	return `1`;
5331	}
5332
5333	if (track->addr < caddr)
5334	end = pos;
5335	else if (track->addr == caddr && handle < chandle)
5336	end = pos;
5337	else if (track->addr == caddr && handle == chandle &&
5338	waste < cwaste)
5339	end = pos;
5340	else
5341	start = pos;
5342	}
5343
5344	/*
5345	* Not found. Insert new tracking element.
5346	*/
5347	if (t->count >= t->max && !alloc_loc_track(t, `2` * t->max, GFP_ATOMIC))
5348	return `0`;
5349
5350	l = t->loc + pos;
5351	if (pos < t->count)
5352	memmove(l + `1`, l,
5353	(t->count - pos) * sizeof(struct location));
5354	t->count++;
5355	l->count = `1`;
5356	l->addr = track->addr;
5357	l->sum_time = age;
5358	l->min_time = age;
5359	l->max_time = age;
5360	l->min_pid = track->pid;
5361	l->max_pid = track->pid;
5362	l->handle = handle;
5363	l->waste = waste;
5364	cpumask_clear(to_cpumask(l->cpus));
5365	cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
5366	nodes_clear(l->nodes);
5367	node_set(page_to_nid(virt_to_page(track)), l->nodes);
5368	return `1`;
5369	}
5370
5371	static void process_slab(struct loc_track t, struct* kmem_cache *s,
5372	struct slab slab, enum* track_item alloc,
5373	unsigned long *obj_map)
5374	{
5375	void *addr = slab_address(slab);
5376	bool is_alloc = (alloc == TRACK_ALLOC);
5377	void *p;
5378
5379	__fill_map(obj_map, s, slab);
5380
5381	for_each_object(p, s, addr, slab->objects)
5382	if (!test_bit(__obj_to_index(s, addr, p), obj_map))
5383	add_location(t, s, get_track(s, p, alloc),
5384	is_alloc ? get_orig_size(s, p) :
5385	s->object_size);
5386	}
5387	#endif /* CONFIG_DEBUG_FS */
5388	#endif /* CONFIG_SLUB_DEBUG */
5389
5390	#ifdef SLAB_SUPPORTS_SYSFS
5391	enum slab_stat_type {
5392	SL_ALL, / All slabs /
5393	SL_PARTIAL, / Only partially allocated slabs /
5394	SL_CPU, / Only slabs used for cpu caches /
5395	SL_OBJECTS, / Determine allocated objects not slabs /
5396	SL_TOTAL / Determine object capacity not slabs /
5397	};
5398
5399	#define SO_ALL (1 << SL_ALL)
5400	#define SO_PARTIAL (1 << SL_PARTIAL)
5401	#define SO_CPU (1 << SL_CPU)
5402	#define SO_OBJECTS (1 << SL_OBJECTS)
5403	#define SO_TOTAL (1 << SL_TOTAL)
5404
5405	static ssize_t show_slab_objects(struct kmem_cache *s,
5406	char buf, unsigned* long flags)
5407	{
5408	unsigned long total = `0`;
5409	int node;
5410	int x;
5411	unsigned long *nodes;
5412	int len = `0`;
5413
5414	nodes = kcalloc(nr_node_ids, sizeof(unsigned long), GFP_KERNEL);
5415	if (!nodes)
5416	return -ENOMEM;
5417
5418	if (flags & SO_CPU) {
5419	int cpu;
5420
5421	for_each_possible_cpu(cpu) {
5422	struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab,
5423	cpu);
5424	int node;
5425	struct slab *slab;
5426
5427	slab = READ_ONCE(c->slab);
5428	if (!slab)
5429	continue;
5430
5431	node = slab_nid(slab);
5432	if (flags & SO_TOTAL)
5433	x = slab->objects;
5434	else if (flags & SO_OBJECTS)
5435	x = slab->inuse;
5436	else
5437	x = `1`;
5438
5439	total += x;
5440	nodes[node] += x;
5441
5442	#ifdef CONFIG_SLUB_CPU_PARTIAL
5443	slab = slub_percpu_partial_read_once(c);
5444	if (slab) {
5445	node = slab_nid(slab);
5446	if (flags & SO_TOTAL)
5447	WARN_ON_ONCE(`1`);
5448	else if (flags & SO_OBJECTS)
5449	WARN_ON_ONCE(`1`);
5450	else
5451	x = slab->slabs;
5452	total += x;
5453	nodes[node] += x;
5454	}
5455	#endif
5456	}
5457	}
5458
5459	/*
5460	* It is impossible to take "mem_hotplug_lock" here with "kernfs_mutex"
5461	* already held which will conflict with an existing lock order:
5462	*
5463	* mem_hotplug_lock->slab_mutex->kernfs_mutex
5464	*
5465	* We don't really need mem_hotplug_lock (to hold off
5466	* slab_mem_going_offline_callback) here because slab's memory hot
5467	* unplug code doesn't destroy the kmem_cache->node[] data.
5468	*/
5469
5470	#ifdef CONFIG_SLUB_DEBUG
5471	if (flags & SO_ALL) {
5472	struct kmem_cache_node *n;
5473
5474	for_each_kmem_cache_node(s, node, n) {
5475
5476	if (flags & SO_TOTAL)
5477	x = node_nr_objs(n);
5478	else if (flags & SO_OBJECTS)
5479	x = node_nr_objs(n) - count_partial(n, count_free);
5480	else
5481	x = node_nr_slabs(n);
5482	total += x;
5483	nodes[node] += x;
5484	}
5485
5486	} else
5487	#endif
5488	if (flags & SO_PARTIAL) {
5489	struct kmem_cache_node *n;
5490
5491	for_each_kmem_cache_node(s, node, n) {
5492	if (flags & SO_TOTAL)
5493	x = count_partial(n, count_total);
5494	else if (flags & SO_OBJECTS)
5495	x = count_partial(n, count_inuse);
5496	else
5497	x = n->nr_partial;
5498	total += x;
5499	nodes[node] += x;
5500	}
5501	}
5502
5503	len += sysfs_emit_at(buf, len, "%lu", total);
5504	#ifdef CONFIG_NUMA
5505	for (node = `0`; node < nr_node_ids; node++) {
5506	if (nodes[node])
5507	len += sysfs_emit_at(buf, len, " N%d=%lu",
5508	node, nodes[node]);
5509	}
5510	#endif
5511	len += sysfs_emit_at(buf, len, "\n");
5512	kfree(nodes);
5513
5514	return len;
5515	}
5516
5517	#define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
5518	#define to_slab(n) container_of(n, struct kmem_cache, kobj)
5519
5520	struct slab_attribute {
5521	struct attribute attr;
5522	ssize_t (show)(struct* kmem_cache s, char* *buf);
5523	ssize_t (store)(struct* kmem_cache s, const* char *x, size_t count);
5524	};
5525
5526	#define SLAB_ATTR_RO(_name) \
5527	static struct slab_attribute _name##_attr = __ATTR_RO_MODE(_name, 0400)
5528
5529	#define SLAB_ATTR(_name) \
5530	static struct slab_attribute _name##_attr = __ATTR_RW_MODE(_name, 0600)
5531
5532	static ssize_t slab_size_show(struct kmem_cache s, char* *buf)
5533	{
5534	return sysfs_emit(buf, "%u\n", s->size);
5535	}
5536	SLAB_ATTR_RO(slab_size);
5537
5538	static ssize_t align_show(struct kmem_cache s, char* *buf)
5539	{
5540	return sysfs_emit(buf, "%u\n", s->align);
5541	}
5542	SLAB_ATTR_RO(align);
5543
5544	static ssize_t object_size_show(struct kmem_cache s, char* *buf)
5545	{
5546	return sysfs_emit(buf, "%u\n", s->object_size);
5547	}
5548	SLAB_ATTR_RO(object_size);
5549
5550	static ssize_t objs_per_slab_show(struct kmem_cache s, char* *buf)
5551	{
5552	return sysfs_emit(buf, "%u\n", oo_objects(s->oo));
5553	}
5554	SLAB_ATTR_RO(objs_per_slab);
5555
5556	static ssize_t order_show(struct kmem_cache s, char* *buf)
5557	{
5558	return sysfs_emit(buf, "%u\n", oo_order(s->oo));
5559	}
5560	SLAB_ATTR_RO(order);
5561
5562	static ssize_t min_partial_show(struct kmem_cache s, char* *buf)
5563	{
5564	return sysfs_emit(buf, "%lu\n", s->min_partial);
5565	}
5566
5567	static ssize_t min_partial_store(struct kmem_cache s, const* char *buf,
5568	size_t length)
5569	{
5570	unsigned long min;
5571	int err;
5572
5573	err = kstrtoul(buf, `10`, &min);
5574	if (err)
5575	return err;
5576
5577	s->min_partial = min;
5578	return length;
5579	}
5580	SLAB_ATTR(min_partial);
5581
5582	static ssize_t cpu_partial_show(struct kmem_cache s, char* *buf)
5583	{
5584	unsigned int nr_partial = `0`;
5585	#ifdef CONFIG_SLUB_CPU_PARTIAL
5586	nr_partial = s->cpu_partial;
5587	#endif
5588
5589	return sysfs_emit(buf, "%u\n", nr_partial);
5590	}
5591
5592	static ssize_t cpu_partial_store(struct kmem_cache s, const* char *buf,
5593	size_t length)
5594	{
5595	unsigned int objects;
5596	int err;
5597
5598	err = kstrtouint(buf, `10`, &objects);
5599	if (err)
5600	return err;
5601	if (objects && !kmem_cache_has_cpu_partial(s))
5602	return -EINVAL;
5603
5604	slub_set_cpu_partial(s, objects);
5605	flush_all(s);
5606	return length;
5607	}
5608	SLAB_ATTR(cpu_partial);
5609
5610	static ssize_t ctor_show(struct kmem_cache s, char* *buf)
5611	{
5612	if (!s->ctor)
5613	return `0`;
5614	return sysfs_emit(buf, "%pS\n", s->ctor);
5615	}
5616	SLAB_ATTR_RO(ctor);
5617
5618	static ssize_t aliases_show(struct kmem_cache s, char* *buf)
5619	{
5620	return sysfs_emit(buf, "%d\n", s->refcount < `0` ? `0` : s->refcount - `1`);
5621	}
5622	SLAB_ATTR_RO(aliases);
5623
5624	static ssize_t partial_show(struct kmem_cache s, char* *buf)
5625	{
5626	return show_slab_objects(s, buf, SO_PARTIAL);
5627	}
5628	SLAB_ATTR_RO(partial);
5629
5630	static ssize_t cpu_slabs_show(struct kmem_cache s, char* *buf)
5631	{
5632	return show_slab_objects(s, buf, SO_CPU);
5633	}
5634	SLAB_ATTR_RO(cpu_slabs);
5635
5636	static ssize_t objects_partial_show(struct kmem_cache s, char* *buf)
5637	{
5638	return show_slab_objects(s, buf, SO_PARTIAL\|SO_OBJECTS);
5639	}
5640	SLAB_ATTR_RO(objects_partial);
5641
5642	static ssize_t slabs_cpu_partial_show(struct kmem_cache s, char* *buf)
5643	{
5644	int objects = `0`;
5645	int slabs = `0`;
5646	int cpu __maybe_unused;
5647	int len = `0`;
5648
5649	#ifdef CONFIG_SLUB_CPU_PARTIAL
5650	for_each_online_cpu(cpu) {
5651	struct slab *slab;
5652
5653	slab = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
5654
5655	if (slab)
5656	slabs += slab->slabs;
5657	}
5658	#endif
5659
5660	/ Approximate half-full slabs, see slub_set_cpu_partial() /
5661	objects = (slabs * oo_objects(s->oo)) / `2`;
5662	len += sysfs_emit_at(buf, len, "%d(%d)", objects, slabs);
5663
5664	#ifdef CONFIG_SLUB_CPU_PARTIAL
5665	for_each_online_cpu(cpu) {
5666	struct slab *slab;
5667
5668	slab = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
5669	if (slab) {
5670	slabs = READ_ONCE(slab->slabs);
5671	objects = (slabs * oo_objects(s->oo)) / `2`;
5672	len += sysfs_emit_at(buf, len, " C%d=%d(%d)",
5673	cpu, objects, slabs);
5674	}
5675	}
5676	#endif
5677	len += sysfs_emit_at(buf, len, "\n");
5678
5679	return len;
5680	}
5681	SLAB_ATTR_RO(slabs_cpu_partial);
5682
5683	static ssize_t reclaim_account_show(struct kmem_cache s, char* *buf)
5684	{
5685	return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
5686	}
5687	SLAB_ATTR_RO(reclaim_account);
5688
5689	static ssize_t hwcache_align_show(struct kmem_cache s, char* *buf)
5690	{
5691	return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
5692	}
5693	SLAB_ATTR_RO(hwcache_align);
5694
5695	#ifdef CONFIG_ZONE_DMA
5696	static ssize_t cache_dma_show(struct kmem_cache s, char* *buf)
5697	{
5698	return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
5699	}
5700	SLAB_ATTR_RO(cache_dma);
5701	#endif
5702
5703	#ifdef CONFIG_HARDENED_USERCOPY
5704	static ssize_t usersize_show(struct kmem_cache s, char* *buf)
5705	{
5706	return sysfs_emit(buf, "%u\n", s->usersize);
5707	}
5708	SLAB_ATTR_RO(usersize);
5709	#endif
5710
5711	static ssize_t destroy_by_rcu_show(struct kmem_cache s, char* *buf)
5712	{
5713	return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_TYPESAFE_BY_RCU));
5714	}
5715	SLAB_ATTR_RO(destroy_by_rcu);
5716
5717	#ifdef CONFIG_SLUB_DEBUG
5718	static ssize_t slabs_show(struct kmem_cache s, char* *buf)
5719	{
5720	return show_slab_objects(s, buf, SO_ALL);
5721	}
5722	SLAB_ATTR_RO(slabs);
5723
5724	static ssize_t total_objects_show(struct kmem_cache s, char* *buf)
5725	{
5726	return show_slab_objects(s, buf, SO_ALL\|SO_TOTAL);
5727	}
5728	SLAB_ATTR_RO(total_objects);
5729
5730	static ssize_t objects_show(struct kmem_cache s, char* *buf)
5731	{
5732	return show_slab_objects(s, buf, SO_ALL\|SO_OBJECTS);
5733	}
5734	SLAB_ATTR_RO(objects);
5735
5736	static ssize_t sanity_checks_show(struct kmem_cache s, char* *buf)
5737	{
5738	return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_CONSISTENCY_CHECKS));
5739	}
5740	SLAB_ATTR_RO(sanity_checks);
5741
5742	static ssize_t trace_show(struct kmem_cache s, char* *buf)
5743	{
5744	return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_TRACE));
5745	}
5746	SLAB_ATTR_RO(trace);
5747
5748	static ssize_t red_zone_show(struct kmem_cache s, char* *buf)
5749	{
5750	return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
5751	}
5752
5753	SLAB_ATTR_RO(red_zone);
5754
5755	static ssize_t poison_show(struct kmem_cache s, char* *buf)
5756	{
5757	return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_POISON));
5758	}
5759
5760	SLAB_ATTR_RO(poison);
5761
5762	static ssize_t store_user_show(struct kmem_cache s, char* *buf)
5763	{
5764	return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
5765	}
5766
5767	SLAB_ATTR_RO(store_user);
5768
5769	static ssize_t validate_show(struct kmem_cache s, char* *buf)
5770	{
5771	return `0`;
5772	}
5773
5774	static ssize_t validate_store(struct kmem_cache *s,
5775	const char *buf, size_t length)
5776	{
5777	int ret = -EINVAL;
5778
5779	if (buf[`0`] == `'1'` && kmem_cache_debug(s)) {
5780	ret = validate_slab_cache(s);
5781	if (ret >= `0`)
5782	ret = length;
5783	}
5784	return ret;
5785	}
5786	SLAB_ATTR(validate);
5787
5788	#endif /* CONFIG_SLUB_DEBUG */
5789
5790	#ifdef CONFIG_FAILSLAB
5791	static ssize_t failslab_show(struct kmem_cache s, char* *buf)
5792	{
5793	return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
5794	}
5795
5796	static ssize_t failslab_store(struct kmem_cache s, const* char *buf,
5797	size_t length)
5798	{
5799	if (s->refcount > `1`)
5800	return -EINVAL;
5801
5802	if (buf[`0`] == `'1'`)
5803	WRITE_ONCE(s->flags, s->flags \| SLAB_FAILSLAB);
5804	else
5805	WRITE_ONCE(s->flags, s->flags & ~SLAB_FAILSLAB);
5806
5807	return length;
5808	}
5809	SLAB_ATTR(failslab);
5810	#endif
5811
5812	static ssize_t shrink_show(struct kmem_cache s, char* *buf)
5813	{
5814	return `0`;
5815	}
5816
5817	static ssize_t shrink_store(struct kmem_cache *s,
5818	const char *buf, size_t length)
5819	{
5820	if (buf[`0`] == `'1'`)
5821	kmem_cache_shrink(s);
5822	else
5823	return -EINVAL;
5824	return length;
5825	}
5826	SLAB_ATTR(shrink);
5827
5828	#ifdef CONFIG_NUMA
5829	static ssize_t remote_node_defrag_ratio_show(struct kmem_cache s, char* *buf)
5830	{
5831	return sysfs_emit(buf, "%u\n", s->remote_node_defrag_ratio / `10`);
5832	}
5833
5834	static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
5835	const char *buf, size_t length)
5836	{
5837	unsigned int ratio;
5838	int err;
5839
5840	err = kstrtouint(buf, `10`, &ratio);
5841	if (err)
5842	return err;
5843	if (ratio > `100`)
5844	return -ERANGE;
5845
5846	s->remote_node_defrag_ratio = ratio * `10`;
5847
5848	return length;
5849	}
5850	SLAB_ATTR(remote_node_defrag_ratio);
5851	#endif
5852
5853	#ifdef CONFIG_SLUB_STATS
5854	static int show_stat(struct kmem_cache s, char* buf, enum* stat_item si)
5855	{
5856	unsigned long sum = `0`;
5857	int cpu;
5858	int len = `0`;
5859	int data = kmalloc_array(nr_cpu_ids, sizeof(int*), GFP_KERNEL);
5860
5861	if (!data)
5862	return -ENOMEM;
5863
5864	for_each_online_cpu(cpu) {
5865	unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
5866
5867	data[cpu] = x;
5868	sum += x;
5869	}
5870
5871	len += sysfs_emit_at(buf, len, "%lu", sum);
5872
5873	#ifdef CONFIG_SMP
5874	for_each_online_cpu(cpu) {
5875	if (data[cpu])
5876	len += sysfs_emit_at(buf, len, " C%d=%u",
5877	cpu, data[cpu]);
5878	}
5879	#endif
5880	kfree(data);
5881	len += sysfs_emit_at(buf, len, "\n");
5882
5883	return len;
5884	}
5885
5886	static void clear_stat(struct kmem_cache s, enum* stat_item si)
5887	{
5888	int cpu;
5889
5890	for_each_online_cpu(cpu)
5891	per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = `0`;
5892	}
5893
5894	#define STAT_ATTR(si, text) \
5895	static ssize_t text##_show(struct kmem_cache s, char buf) \
5896	{ \
5897	return show_stat(s, buf, si); \
5898	} \
5899	static ssize_t text##_store(struct kmem_cache *s, \
5900	const char *buf, size_t length) \
5901	{ \
5902	if (buf[0] != '0') \
5903	return -EINVAL; \
5904	clear_stat(s, si); \
5905	return length; \
5906	} \
5907	SLAB_ATTR(text); \
5908
5909	STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
5910	STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
5911	STAT_ATTR(FREE_FASTPATH, free_fastpath);
5912	STAT_ATTR(FREE_SLOWPATH, free_slowpath);
5913	STAT_ATTR(FREE_FROZEN, free_frozen);
5914	STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
5915	STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
5916	STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
5917	STAT_ATTR(ALLOC_SLAB, alloc_slab);
5918	STAT_ATTR(ALLOC_REFILL, alloc_refill);
5919	STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch);
5920	STAT_ATTR(FREE_SLAB, free_slab);
5921	STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
5922	STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
5923	STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
5924	STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
5925	STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
5926	STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
5927	STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
5928	STAT_ATTR(ORDER_FALLBACK, order_fallback);
5929	STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
5930	STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
5931	STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
5932	STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
5933	STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node);
5934	STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain);
5935	#endif /* CONFIG_SLUB_STATS */
5936
5937	#ifdef CONFIG_KFENCE
5938	static ssize_t skip_kfence_show(struct kmem_cache s, char* *buf)
5939	{
5940	return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_SKIP_KFENCE));
5941	}
5942
5943	static ssize_t skip_kfence_store(struct kmem_cache *s,
5944	const char *buf, size_t length)
5945	{
5946	int ret = length;
5947
5948	if (buf[`0`] == `'0'`)
5949	s->flags &= ~SLAB_SKIP_KFENCE;
5950	else if (buf[`0`] == `'1'`)
5951	s->flags \|= SLAB_SKIP_KFENCE;
5952	else
5953	ret = -EINVAL;
5954
5955	return ret;
5956	}
5957	SLAB_ATTR(skip_kfence);
5958	#endif
5959
5960	static struct attribute *slab_attrs[] = {
5961	&slab_size_attr.attr,
5962	&object_size_attr.attr,
5963	&objs_per_slab_attr.attr,
5964	&order_attr.attr,
5965	&min_partial_attr.attr,
5966	&cpu_partial_attr.attr,
5967	&objects_partial_attr.attr,
5968	&partial_attr.attr,
5969	&cpu_slabs_attr.attr,
5970	&ctor_attr.attr,
5971	&aliases_attr.attr,
5972	&align_attr.attr,
5973	&hwcache_align_attr.attr,
5974	&reclaim_account_attr.attr,
5975	&destroy_by_rcu_attr.attr,
5976	&shrink_attr.attr,
5977	&slabs_cpu_partial_attr.attr,
5978	#ifdef CONFIG_SLUB_DEBUG
5979	&total_objects_attr.attr,
5980	&objects_attr.attr,
5981	&slabs_attr.attr,
5982	&sanity_checks_attr.attr,
5983	&trace_attr.attr,
5984	&red_zone_attr.attr,
5985	&poison_attr.attr,
5986	&store_user_attr.attr,
5987	&validate_attr.attr,
5988	#endif
5989	#ifdef CONFIG_ZONE_DMA
5990	&cache_dma_attr.attr,
5991	#endif
5992	#ifdef CONFIG_NUMA
5993	&remote_node_defrag_ratio_attr.attr,
5994	#endif
5995	#ifdef CONFIG_SLUB_STATS
5996	&alloc_fastpath_attr.attr,
5997	&alloc_slowpath_attr.attr,
5998	&free_fastpath_attr.attr,
5999	&free_slowpath_attr.attr,
6000	&free_frozen_attr.attr,
6001	&free_add_partial_attr.attr,
6002	&free_remove_partial_attr.attr,
6003	&alloc_from_partial_attr.attr,
6004	&alloc_slab_attr.attr,
6005	&alloc_refill_attr.attr,
6006	&alloc_node_mismatch_attr.attr,
6007	&free_slab_attr.attr,
6008	&cpuslab_flush_attr.attr,
6009	&deactivate_full_attr.attr,
6010	&deactivate_empty_attr.attr,
6011	&deactivate_to_head_attr.attr,
6012	&deactivate_to_tail_attr.attr,
6013	&deactivate_remote_frees_attr.attr,
6014	&deactivate_bypass_attr.attr,
6015	&order_fallback_attr.attr,
6016	&cmpxchg_double_fail_attr.attr,
6017	&cmpxchg_double_cpu_fail_attr.attr,
6018	&cpu_partial_alloc_attr.attr,
6019	&cpu_partial_free_attr.attr,
6020	&cpu_partial_node_attr.attr,
6021	&cpu_partial_drain_attr.attr,
6022	#endif
6023	#ifdef CONFIG_FAILSLAB
6024	&failslab_attr.attr,
6025	#endif
6026	#ifdef CONFIG_HARDENED_USERCOPY
6027	&usersize_attr.attr,
6028	#endif
6029	#ifdef CONFIG_KFENCE
6030	&skip_kfence_attr.attr,
6031	#endif
6032
6033	NULL
6034	};
6035
6036	static const struct attribute_group slab_attr_group = {
6037	.attrs = slab_attrs,
6038	};
6039
6040	static ssize_t slab_attr_show(struct kobject *kobj,
6041	struct attribute *attr,
6042	char *buf)
6043	{
6044	struct slab_attribute *attribute;
6045	struct kmem_cache *s;
6046
6047	attribute = to_slab_attr(attr);
6048	s = to_slab(kobj);
6049
6050	if (!attribute->show)
6051	return -EIO;
6052
6053	return attribute->show(s, buf);
6054	}
6055
6056	static ssize_t slab_attr_store(struct kobject *kobj,
6057	struct attribute *attr,
6058	const char *buf, size_t len)
6059	{
6060	struct slab_attribute *attribute;
6061	struct kmem_cache *s;
6062
6063	attribute = to_slab_attr(attr);
6064	s = to_slab(kobj);
6065
6066	if (!attribute->store)
6067	return -EIO;
6068
6069	return attribute->store(s, buf, len);
6070	}
6071
6072	static void kmem_cache_release(struct kobject *k)
6073	{
6074	slab_kmem_cache_release(to_slab(k));
6075	}
6076
6077	static const struct sysfs_ops slab_sysfs_ops = {
6078	.show = slab_attr_show,
6079	.store = slab_attr_store,
6080	};
6081
6082	static const struct kobj_type slab_ktype = {
6083	.sysfs_ops = &slab_sysfs_ops,
6084	.release = kmem_cache_release,
6085	};
6086
6087	static struct kset *slab_kset;
6088
6089	static inline struct kset cache_kset(struct* kmem_cache *s)
6090	{
6091	return slab_kset;
6092	}
6093
6094	#define ID_STR_LENGTH 32
6095
6096	/ Create a unique string id for a slab cache:*
6097	*
6098	* Format :[flags-]size
6099	*/
6100	static char create_unique_id(struct* kmem_cache *s)
6101	{
6102	char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
6103	char *p = name;
6104
6105	if (!name)
6106	return ERR_PTR(-ENOMEM);
6107
6108	*p++ = `':'`;
6109	/*
6110	* First flags affecting slabcache operations. We will only
6111	* get here for aliasable slabs so we do not need to support
6112	* too many flags. The flags here must cover all flags that
6113	* are matched during merging to guarantee that the id is
6114	* unique.
6115	*/
6116	if (s->flags & SLAB_CACHE_DMA)
6117	*p++ = `'d'`;
6118	if (s->flags & SLAB_CACHE_DMA32)
6119	*p++ = `'D'`;
6120	if (s->flags & SLAB_RECLAIM_ACCOUNT)
6121	*p++ = `'a'`;
6122	if (s->flags & SLAB_CONSISTENCY_CHECKS)
6123	*p++ = `'F'`;
6124	if (s->flags & SLAB_ACCOUNT)
6125	*p++ = `'A'`;
6126	if (p != name + `1`)
6127	*p++ = `'-'`;
6128	p += snprintf(p, ID_STR_LENGTH - (p - name), "%07u", s->size);
6129
6130	if (WARN_ON(p > name + ID_STR_LENGTH - `1`)) {
6131	kfree(name);
6132	return ERR_PTR(-EINVAL);
6133	}
6134	kmsan_unpoison_memory(name, p - name);
6135	return name;
6136	}
6137
6138	static int sysfs_slab_add(struct kmem_cache *s)
6139	{
6140	int err;
6141	const char *name;
6142	struct kset *kset = cache_kset(s);
6143	int unmergeable = slab_unmergeable(s);
6144
6145	if (!unmergeable && disable_higher_order_debug &&
6146	(slub_debug & DEBUG_METADATA_FLAGS))
6147	unmergeable = `1`;
6148
6149	if (unmergeable) {
6150	/*
6151	* Slabcache can never be merged so we can use the name proper.
6152	* This is typically the case for debug situations. In that
6153	* case we can catch duplicate names easily.
6154	*/
6155	sysfs_remove_link(&slab_kset->kobj, s->name);
6156	name = s->name;
6157	} else {
6158	/*
6159	* Create a unique name for the slab as a target
6160	* for the symlinks.
6161	*/
6162	name = create_unique_id(s);
6163	if (IS_ERR(name))
6164	return PTR_ERR(name);
6165	}
6166
6167	s->kobj.kset = kset;
6168	err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name);
6169	if (err)
6170	goto out;
6171
6172	err = sysfs_create_group(&s->kobj, &slab_attr_group);
6173	if (err)
6174	goto out_del_kobj;
6175
6176	if (!unmergeable) {
6177	/ Setup first alias /
6178	sysfs_slab_alias(s, s->name);
6179	}
6180	out:
6181	if (!unmergeable)
6182	kfree(name);
6183	return err;
6184	out_del_kobj:
6185	kobject_del(&s->kobj);
6186	goto out;
6187	}
6188
6189	void sysfs_slab_unlink(struct kmem_cache *s)
6190	{
6191	if (slab_state >= FULL)
6192	kobject_del(&s->kobj);
6193	}
6194
6195	void sysfs_slab_release(struct kmem_cache *s)
6196	{
6197	if (slab_state >= FULL)
6198	kobject_put(&s->kobj);
6199	}
6200
6201	/*
6202	* Need to buffer aliases during bootup until sysfs becomes
6203	* available lest we lose that information.
6204	*/
6205	struct saved_alias {
6206	struct kmem_cache *s;
6207	const char *name;
6208	struct saved_alias *next;
6209	};
6210
6211	static struct saved_alias *alias_list;
6212
6213	static int sysfs_slab_alias(struct kmem_cache s, const* char *name)
6214	{
6215	struct saved_alias *al;
6216
6217	if (slab_state == FULL) {
6218	/*
6219	* If we have a leftover link then remove it.
6220	*/
6221	sysfs_remove_link(&slab_kset->kobj, name);
6222	return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
6223	}
6224
6225	al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
6226	if (!al)
6227	return -ENOMEM;
6228
6229	al->s = s;
6230	al->name = name;
6231	al->next = alias_list;
6232	alias_list = al;
6233	kmsan_unpoison_memory(al, sizeof(*al));
6234	return `0`;
6235	}
6236
6237	static int __init slab_sysfs_init(void)
6238	{
6239	struct kmem_cache *s;
6240	int err;
6241
6242	mutex_lock(&slab_mutex);
6243
6244	slab_kset = kset_create_and_add("slab", NULL, kernel_kobj);
6245	if (!slab_kset) {
6246	mutex_unlock(&slab_mutex);
6247	pr_err("Cannot register slab subsystem.\n");
6248	return -ENOMEM;
6249	}
6250
6251	slab_state = FULL;
6252
6253	list_for_each_entry(s, &slab_caches, list) {
6254	err = sysfs_slab_add(s);
6255	if (err)
6256	pr_err("SLUB: Unable to add boot slab %s to sysfs\n",
6257	s->name);
6258	}
6259
6260	while (alias_list) {
6261	struct saved_alias *al = alias_list;
6262
6263	alias_list = alias_list->next;
6264	err = sysfs_slab_alias(al->s, al->name);
6265	if (err)
6266	pr_err("SLUB: Unable to add boot slab alias %s to sysfs\n",
6267	al->name);
6268	kfree(al);
6269	}
6270
6271	mutex_unlock(&slab_mutex);
6272	return `0`;
6273	}
6274	late_initcall(slab_sysfs_init);
6275	#endif /* SLAB_SUPPORTS_SYSFS */
6276
6277	#if defined(CONFIG_SLUB_DEBUG) && defined(CONFIG_DEBUG_FS)
6278	static int slab_debugfs_show(struct seq_file seq, void* *v)
6279	{
6280	struct loc_track *t = seq->private;
6281	struct location *l;
6282	unsigned long idx;
6283
6284	idx = (unsigned long) t->idx;
6285	if (idx < t->count) {
6286	l = &t->loc[idx];
6287
6288	seq_printf(seq, "%7ld ", l->count);
6289
6290	if (l->addr)
6291	seq_printf(seq, "%pS", (void *)l->addr);
6292	else
6293	seq_puts(seq, "<not-available>");
6294
6295	if (l->waste)
6296	seq_printf(seq, " waste=%lu/%lu",
6297	l->count * l->waste, l->waste);
6298
6299	if (l->sum_time != l->min_time) {
6300	seq_printf(seq, " age=%ld/%llu/%ld",
6301	l->min_time, div_u64(l->sum_time, l->count),
6302	l->max_time);
6303	} else
6304	seq_printf(seq, " age=%ld", l->min_time);
6305
6306	if (l->min_pid != l->max_pid)
6307	seq_printf(seq, " pid=%ld-%ld", l->min_pid, l->max_pid);
6308	else
6309	seq_printf(seq, " pid=%ld",
6310	l->min_pid);
6311
6312	if (num_online_cpus() > `1` && !cpumask_empty(to_cpumask(l->cpus)))
6313	seq_printf(seq, " cpus=%*pbl",
6314	cpumask_pr_args(to_cpumask(l->cpus)));
6315
6316	if (nr_online_nodes > `1` && !nodes_empty(l->nodes))
6317	seq_printf(seq, " nodes=%*pbl",
6318	nodemask_pr_args(&l->nodes));
6319
6320	#ifdef CONFIG_STACKDEPOT
6321	{
6322	depot_stack_handle_t handle;
6323	unsigned long *entries;
6324	unsigned int nr_entries, j;
6325
6326	handle = READ_ONCE(l->handle);
6327	if (handle) {
6328	nr_entries = stack_depot_fetch(handle, &entries);
6329	seq_puts(seq, "\n");
6330	for (j = `0`; j < nr_entries; j++)
6331	seq_printf(seq, " %pS\n", (void *)entries[j]);
6332	}
6333	}
6334	#endif
6335	seq_puts(seq, "\n");
6336	}
6337
6338	if (!idx && !t->count)
6339	seq_puts(seq, "No data\n");
6340
6341	return `0`;
6342	}
6343
6344	static void slab_debugfs_stop(struct seq_file seq, void* *v)
6345	{
6346	}
6347
6348	static void slab_debugfs_next(struct* seq_file seq, void* v, loff_t ppos)
6349	{
6350	struct loc_track *t = seq->private;
6351
6352	t->idx = ++(*ppos);
6353	if (*ppos <= t->count)
6354	return ppos;
6355
6356	return NULL;
6357	}
6358
6359	static int cmp_loc_by_count(const void a, const* void b, const* void *data)
6360	{
6361	struct location loc1 = (struct* location *)a;
6362	struct location loc2 = (struct* location *)b;
6363
6364	if (loc1->count > loc2->count)
6365	return -`1`;
6366	else
6367	return `1`;
6368	}
6369
6370	static void slab_debugfs_start(struct* seq_file seq, loff_t ppos)
6371	{
6372	struct loc_track *t = seq->private;
6373
6374	t->idx = *ppos;
6375	return ppos;
6376	}
6377
6378	static const struct seq_operations slab_debugfs_sops = {
6379	.start = slab_debugfs_start,
6380	.next = slab_debugfs_next,
6381	.stop = slab_debugfs_stop,
6382	.show = slab_debugfs_show,
6383	};
6384
6385	static int slab_debug_trace_open(struct inode inode, struct* file *filep)
6386	{
6387
6388	struct kmem_cache_node *n;
6389	enum track_item alloc;
6390	int node;
6391	struct loc_track *t = __seq_open_private(filep, &slab_debugfs_sops,
6392	sizeof(struct loc_track));
6393	struct kmem_cache *s = file_inode(filep)->i_private;
6394	unsigned long *obj_map;
6395
6396	if (!t)
6397	return -ENOMEM;
6398
6399	obj_map = bitmap_alloc(oo_objects(s->oo), GFP_KERNEL);
6400	if (!obj_map) {
6401	seq_release_private(inode, filep);
6402	return -ENOMEM;
6403	}
6404
6405	if (strcmp(filep->f_path.dentry->d_name.name, "alloc_traces") == `0`)
6406	alloc = TRACK_ALLOC;
6407	else
6408	alloc = TRACK_FREE;
6409
6410	if (!alloc_loc_track(t, PAGE_SIZE / sizeof(struct location), GFP_KERNEL)) {
6411	bitmap_free(obj_map);
6412	seq_release_private(inode, filep);
6413	return -ENOMEM;
6414	}
6415
6416	for_each_kmem_cache_node(s, node, n) {
6417	unsigned long flags;
6418	struct slab *slab;
6419
6420	if (!node_nr_slabs(n))
6421	continue;
6422
6423	spin_lock_irqsave(&n->list_lock, flags);
6424	list_for_each_entry(slab, &n->partial, slab_list)
6425	process_slab(t, s, slab, alloc, obj_map);
6426	list_for_each_entry(slab, &n->full, slab_list)
6427	process_slab(t, s, slab, alloc, obj_map);
6428	spin_unlock_irqrestore(&n->list_lock, flags);
6429	}
6430
6431	/ Sort locations by count /
6432	sort_r(t->loc, t->count, sizeof(struct location),
6433	cmp_loc_by_count, NULL, NULL);
6434
6435	bitmap_free(obj_map);
6436	return `0`;
6437	}
6438
6439	static int slab_debug_trace_release(struct inode inode, struct* file *file)
6440	{
6441	struct seq_file *seq = file->private_data;
6442	struct loc_track *t = seq->private;
6443
6444	free_loc_track(t);
6445	return seq_release_private(inode, file);
6446	}
6447
6448	static const struct file_operations slab_debugfs_fops = {
6449	.open = slab_debug_trace_open,
6450	.read = seq_read,
6451	.llseek = seq_lseek,
6452	.release = slab_debug_trace_release,
6453	};
6454
6455	static void debugfs_slab_add(struct kmem_cache *s)
6456	{
6457	struct dentry *slab_cache_dir;
6458
6459	if (unlikely(!slab_debugfs_root))
6460	return;
6461
6462	slab_cache_dir = debugfs_create_dir(s->name, slab_debugfs_root);
6463
6464	debugfs_create_file("alloc_traces", `0400`,
6465	slab_cache_dir, s, &slab_debugfs_fops);
6466
6467	debugfs_create_file("free_traces", `0400`,
6468	slab_cache_dir, s, &slab_debugfs_fops);
6469	}
6470
6471	void debugfs_slab_release(struct kmem_cache *s)
6472	{
6473	debugfs_lookup_and_remove(s->name, slab_debugfs_root);
6474	}
6475
6476	static int __init slab_debugfs_init(void)
6477	{
6478	struct kmem_cache *s;
6479
6480	slab_debugfs_root = debugfs_create_dir("slab", NULL);
6481
6482	list_for_each_entry(s, &slab_caches, list)
6483	if (s->flags & SLAB_STORE_USER)
6484	debugfs_slab_add(s);
6485
6486	return `0`;
6487
6488	}
6489	__initcall(slab_debugfs_init);
6490	#endif
6491	/*
6492	* The /proc/slabinfo ABI
6493	*/
6494	#ifdef CONFIG_SLUB_DEBUG
6495	void get_slabinfo(struct kmem_cache s, struct* slabinfo *sinfo)
6496	{
6497	unsigned long nr_slabs = `0`;
6498	unsigned long nr_objs = `0`;
6499	unsigned long nr_free = `0`;
6500	int node;
6501	struct kmem_cache_node *n;
6502
6503	for_each_kmem_cache_node(s, node, n) {
6504	nr_slabs += node_nr_slabs(n);
6505	nr_objs += node_nr_objs(n);
6506	nr_free += count_partial(n, count_free);
6507	}
6508
6509	sinfo->active_objs = nr_objs - nr_free;
6510	sinfo->num_objs = nr_objs;
6511	sinfo->active_slabs = nr_slabs;
6512	sinfo->num_slabs = nr_slabs;
6513	sinfo->objects_per_slab = oo_objects(s->oo);
6514	sinfo->cache_order = oo_order(s->oo);
6515	}
6516
6517	void slabinfo_show_stats(struct seq_file m, struct* kmem_cache *s)
6518	{
6519	}
6520
6521	ssize_t slabinfo_write(struct file file, const* char __user *buffer,
6522	size_t count, loff_t *ppos)
6523	{
6524	return -EIO;
6525	}
6526	#endif /* CONFIG_SLUB_DEBUG */
6527

source code of linux/mm/slub.c