1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Slab allocator functions that are independent of the allocator strategy |
4 | * |
5 | * (C) 2012 Christoph Lameter <cl@linux.com> |
6 | */ |
7 | #include <linux/slab.h> |
8 | |
9 | #include <linux/mm.h> |
10 | #include <linux/poison.h> |
11 | #include <linux/interrupt.h> |
12 | #include <linux/memory.h> |
13 | #include <linux/cache.h> |
14 | #include <linux/compiler.h> |
15 | #include <linux/kfence.h> |
16 | #include <linux/module.h> |
17 | #include <linux/cpu.h> |
18 | #include <linux/uaccess.h> |
19 | #include <linux/seq_file.h> |
20 | #include <linux/dma-mapping.h> |
21 | #include <linux/swiotlb.h> |
22 | #include <linux/proc_fs.h> |
23 | #include <linux/debugfs.h> |
24 | #include <linux/kasan.h> |
25 | #include <asm/cacheflush.h> |
26 | #include <asm/tlbflush.h> |
27 | #include <asm/page.h> |
28 | #include <linux/memcontrol.h> |
29 | #include <linux/stackdepot.h> |
30 | |
31 | #include "internal.h" |
32 | #include "slab.h" |
33 | |
34 | #define CREATE_TRACE_POINTS |
35 | #include <trace/events/kmem.h> |
36 | |
37 | enum slab_state slab_state; |
38 | LIST_HEAD(slab_caches); |
39 | DEFINE_MUTEX(slab_mutex); |
40 | struct kmem_cache *kmem_cache; |
41 | |
42 | static LIST_HEAD(slab_caches_to_rcu_destroy); |
43 | static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work); |
44 | static DECLARE_WORK(slab_caches_to_rcu_destroy_work, |
45 | slab_caches_to_rcu_destroy_workfn); |
46 | |
47 | /* |
48 | * Set of flags that will prevent slab merging |
49 | */ |
50 | #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ |
51 | SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \ |
52 | SLAB_FAILSLAB | SLAB_NO_MERGE | kasan_never_merge()) |
53 | |
54 | #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \ |
55 | SLAB_CACHE_DMA32 | SLAB_ACCOUNT) |
56 | |
57 | /* |
58 | * Merge control. If this is set then no merging of slab caches will occur. |
59 | */ |
60 | static bool slab_nomerge = !IS_ENABLED(CONFIG_SLAB_MERGE_DEFAULT); |
61 | |
62 | static int __init setup_slab_nomerge(char *str) |
63 | { |
64 | slab_nomerge = true; |
65 | return 1; |
66 | } |
67 | |
68 | static int __init setup_slab_merge(char *str) |
69 | { |
70 | slab_nomerge = false; |
71 | return 1; |
72 | } |
73 | |
74 | #ifdef CONFIG_SLUB |
75 | __setup_param("slub_nomerge" , slub_nomerge, setup_slab_nomerge, 0); |
76 | __setup_param("slub_merge" , slub_merge, setup_slab_merge, 0); |
77 | #endif |
78 | |
79 | __setup("slab_nomerge" , setup_slab_nomerge); |
80 | __setup("slab_merge" , setup_slab_merge); |
81 | |
82 | /* |
83 | * Determine the size of a slab object |
84 | */ |
85 | unsigned int kmem_cache_size(struct kmem_cache *s) |
86 | { |
87 | return s->object_size; |
88 | } |
89 | EXPORT_SYMBOL(kmem_cache_size); |
90 | |
91 | #ifdef CONFIG_DEBUG_VM |
92 | static int kmem_cache_sanity_check(const char *name, unsigned int size) |
93 | { |
94 | if (!name || in_interrupt() || size > KMALLOC_MAX_SIZE) { |
95 | pr_err("kmem_cache_create(%s) integrity check failed\n" , name); |
96 | return -EINVAL; |
97 | } |
98 | |
99 | WARN_ON(strchr(name, ' ')); /* It confuses parsers */ |
100 | return 0; |
101 | } |
102 | #else |
103 | static inline int kmem_cache_sanity_check(const char *name, unsigned int size) |
104 | { |
105 | return 0; |
106 | } |
107 | #endif |
108 | |
109 | /* |
110 | * Figure out what the alignment of the objects will be given a set of |
111 | * flags, a user specified alignment and the size of the objects. |
112 | */ |
113 | static unsigned int calculate_alignment(slab_flags_t flags, |
114 | unsigned int align, unsigned int size) |
115 | { |
116 | /* |
117 | * If the user wants hardware cache aligned objects then follow that |
118 | * suggestion if the object is sufficiently large. |
119 | * |
120 | * The hardware cache alignment cannot override the specified |
121 | * alignment though. If that is greater then use it. |
122 | */ |
123 | if (flags & SLAB_HWCACHE_ALIGN) { |
124 | unsigned int ralign; |
125 | |
126 | ralign = cache_line_size(); |
127 | while (size <= ralign / 2) |
128 | ralign /= 2; |
129 | align = max(align, ralign); |
130 | } |
131 | |
132 | align = max(align, arch_slab_minalign()); |
133 | |
134 | return ALIGN(align, sizeof(void *)); |
135 | } |
136 | |
137 | /* |
138 | * Find a mergeable slab cache |
139 | */ |
140 | int slab_unmergeable(struct kmem_cache *s) |
141 | { |
142 | if (slab_nomerge || (s->flags & SLAB_NEVER_MERGE)) |
143 | return 1; |
144 | |
145 | if (s->ctor) |
146 | return 1; |
147 | |
148 | #ifdef CONFIG_HARDENED_USERCOPY |
149 | if (s->usersize) |
150 | return 1; |
151 | #endif |
152 | |
153 | /* |
154 | * We may have set a slab to be unmergeable during bootstrap. |
155 | */ |
156 | if (s->refcount < 0) |
157 | return 1; |
158 | |
159 | return 0; |
160 | } |
161 | |
162 | struct kmem_cache *find_mergeable(unsigned int size, unsigned int align, |
163 | slab_flags_t flags, const char *name, void (*ctor)(void *)) |
164 | { |
165 | struct kmem_cache *s; |
166 | |
167 | if (slab_nomerge) |
168 | return NULL; |
169 | |
170 | if (ctor) |
171 | return NULL; |
172 | |
173 | size = ALIGN(size, sizeof(void *)); |
174 | align = calculate_alignment(flags, align, size); |
175 | size = ALIGN(size, align); |
176 | flags = kmem_cache_flags(object_size: size, flags, name); |
177 | |
178 | if (flags & SLAB_NEVER_MERGE) |
179 | return NULL; |
180 | |
181 | list_for_each_entry_reverse(s, &slab_caches, list) { |
182 | if (slab_unmergeable(s)) |
183 | continue; |
184 | |
185 | if (size > s->size) |
186 | continue; |
187 | |
188 | if ((flags & SLAB_MERGE_SAME) != (s->flags & SLAB_MERGE_SAME)) |
189 | continue; |
190 | /* |
191 | * Check if alignment is compatible. |
192 | * Courtesy of Adrian Drzewiecki |
193 | */ |
194 | if ((s->size & ~(align - 1)) != s->size) |
195 | continue; |
196 | |
197 | if (s->size - size >= sizeof(void *)) |
198 | continue; |
199 | |
200 | if (IS_ENABLED(CONFIG_SLAB) && align && |
201 | (align > s->align || s->align % align)) |
202 | continue; |
203 | |
204 | return s; |
205 | } |
206 | return NULL; |
207 | } |
208 | |
209 | static struct kmem_cache *create_cache(const char *name, |
210 | unsigned int object_size, unsigned int align, |
211 | slab_flags_t flags, unsigned int useroffset, |
212 | unsigned int usersize, void (*ctor)(void *), |
213 | struct kmem_cache *root_cache) |
214 | { |
215 | struct kmem_cache *s; |
216 | int err; |
217 | |
218 | if (WARN_ON(useroffset + usersize > object_size)) |
219 | useroffset = usersize = 0; |
220 | |
221 | err = -ENOMEM; |
222 | s = kmem_cache_zalloc(k: kmem_cache, GFP_KERNEL); |
223 | if (!s) |
224 | goto out; |
225 | |
226 | s->name = name; |
227 | s->size = s->object_size = object_size; |
228 | s->align = align; |
229 | s->ctor = ctor; |
230 | #ifdef CONFIG_HARDENED_USERCOPY |
231 | s->useroffset = useroffset; |
232 | s->usersize = usersize; |
233 | #endif |
234 | |
235 | err = __kmem_cache_create(s, flags); |
236 | if (err) |
237 | goto out_free_cache; |
238 | |
239 | s->refcount = 1; |
240 | list_add(new: &s->list, head: &slab_caches); |
241 | return s; |
242 | |
243 | out_free_cache: |
244 | kmem_cache_free(s: kmem_cache, objp: s); |
245 | out: |
246 | return ERR_PTR(error: err); |
247 | } |
248 | |
249 | /** |
250 | * kmem_cache_create_usercopy - Create a cache with a region suitable |
251 | * for copying to userspace |
252 | * @name: A string which is used in /proc/slabinfo to identify this cache. |
253 | * @size: The size of objects to be created in this cache. |
254 | * @align: The required alignment for the objects. |
255 | * @flags: SLAB flags |
256 | * @useroffset: Usercopy region offset |
257 | * @usersize: Usercopy region size |
258 | * @ctor: A constructor for the objects. |
259 | * |
260 | * Cannot be called within a interrupt, but can be interrupted. |
261 | * The @ctor is run when new pages are allocated by the cache. |
262 | * |
263 | * The flags are |
264 | * |
265 | * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5) |
266 | * to catch references to uninitialised memory. |
267 | * |
268 | * %SLAB_RED_ZONE - Insert `Red` zones around the allocated memory to check |
269 | * for buffer overruns. |
270 | * |
271 | * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware |
272 | * cacheline. This can be beneficial if you're counting cycles as closely |
273 | * as davem. |
274 | * |
275 | * Return: a pointer to the cache on success, NULL on failure. |
276 | */ |
277 | struct kmem_cache * |
278 | kmem_cache_create_usercopy(const char *name, |
279 | unsigned int size, unsigned int align, |
280 | slab_flags_t flags, |
281 | unsigned int useroffset, unsigned int usersize, |
282 | void (*ctor)(void *)) |
283 | { |
284 | struct kmem_cache *s = NULL; |
285 | const char *cache_name; |
286 | int err; |
287 | |
288 | #ifdef CONFIG_SLUB_DEBUG |
289 | /* |
290 | * If no slub_debug was enabled globally, the static key is not yet |
291 | * enabled by setup_slub_debug(). Enable it if the cache is being |
292 | * created with any of the debugging flags passed explicitly. |
293 | * It's also possible that this is the first cache created with |
294 | * SLAB_STORE_USER and we should init stack_depot for it. |
295 | */ |
296 | if (flags & SLAB_DEBUG_FLAGS) |
297 | static_branch_enable(&slub_debug_enabled); |
298 | if (flags & SLAB_STORE_USER) |
299 | stack_depot_init(); |
300 | #endif |
301 | |
302 | mutex_lock(&slab_mutex); |
303 | |
304 | err = kmem_cache_sanity_check(name, size); |
305 | if (err) { |
306 | goto out_unlock; |
307 | } |
308 | |
309 | /* Refuse requests with allocator specific flags */ |
310 | if (flags & ~SLAB_FLAGS_PERMITTED) { |
311 | err = -EINVAL; |
312 | goto out_unlock; |
313 | } |
314 | |
315 | /* |
316 | * Some allocators will constraint the set of valid flags to a subset |
317 | * of all flags. We expect them to define CACHE_CREATE_MASK in this |
318 | * case, and we'll just provide them with a sanitized version of the |
319 | * passed flags. |
320 | */ |
321 | flags &= CACHE_CREATE_MASK; |
322 | |
323 | /* Fail closed on bad usersize of useroffset values. */ |
324 | if (!IS_ENABLED(CONFIG_HARDENED_USERCOPY) || |
325 | WARN_ON(!usersize && useroffset) || |
326 | WARN_ON(size < usersize || size - usersize < useroffset)) |
327 | usersize = useroffset = 0; |
328 | |
329 | if (!usersize) |
330 | s = __kmem_cache_alias(name, size, align, flags, ctor); |
331 | if (s) |
332 | goto out_unlock; |
333 | |
334 | cache_name = kstrdup_const(s: name, GFP_KERNEL); |
335 | if (!cache_name) { |
336 | err = -ENOMEM; |
337 | goto out_unlock; |
338 | } |
339 | |
340 | s = create_cache(name: cache_name, object_size: size, |
341 | align: calculate_alignment(flags, align, size), |
342 | flags, useroffset, usersize, ctor, NULL); |
343 | if (IS_ERR(ptr: s)) { |
344 | err = PTR_ERR(ptr: s); |
345 | kfree_const(x: cache_name); |
346 | } |
347 | |
348 | out_unlock: |
349 | mutex_unlock(lock: &slab_mutex); |
350 | |
351 | if (err) { |
352 | if (flags & SLAB_PANIC) |
353 | panic(fmt: "%s: Failed to create slab '%s'. Error %d\n" , |
354 | __func__, name, err); |
355 | else { |
356 | pr_warn("%s(%s) failed with error %d\n" , |
357 | __func__, name, err); |
358 | dump_stack(); |
359 | } |
360 | return NULL; |
361 | } |
362 | return s; |
363 | } |
364 | EXPORT_SYMBOL(kmem_cache_create_usercopy); |
365 | |
366 | /** |
367 | * kmem_cache_create - Create a cache. |
368 | * @name: A string which is used in /proc/slabinfo to identify this cache. |
369 | * @size: The size of objects to be created in this cache. |
370 | * @align: The required alignment for the objects. |
371 | * @flags: SLAB flags |
372 | * @ctor: A constructor for the objects. |
373 | * |
374 | * Cannot be called within a interrupt, but can be interrupted. |
375 | * The @ctor is run when new pages are allocated by the cache. |
376 | * |
377 | * The flags are |
378 | * |
379 | * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5) |
380 | * to catch references to uninitialised memory. |
381 | * |
382 | * %SLAB_RED_ZONE - Insert `Red` zones around the allocated memory to check |
383 | * for buffer overruns. |
384 | * |
385 | * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware |
386 | * cacheline. This can be beneficial if you're counting cycles as closely |
387 | * as davem. |
388 | * |
389 | * Return: a pointer to the cache on success, NULL on failure. |
390 | */ |
391 | struct kmem_cache * |
392 | kmem_cache_create(const char *name, unsigned int size, unsigned int align, |
393 | slab_flags_t flags, void (*ctor)(void *)) |
394 | { |
395 | return kmem_cache_create_usercopy(name, size, align, flags, 0, 0, |
396 | ctor); |
397 | } |
398 | EXPORT_SYMBOL(kmem_cache_create); |
399 | |
400 | #ifdef SLAB_SUPPORTS_SYSFS |
401 | /* |
402 | * For a given kmem_cache, kmem_cache_destroy() should only be called |
403 | * once or there will be a use-after-free problem. The actual deletion |
404 | * and release of the kobject does not need slab_mutex or cpu_hotplug_lock |
405 | * protection. So they are now done without holding those locks. |
406 | * |
407 | * Note that there will be a slight delay in the deletion of sysfs files |
408 | * if kmem_cache_release() is called indrectly from a work function. |
409 | */ |
410 | static void kmem_cache_release(struct kmem_cache *s) |
411 | { |
412 | sysfs_slab_unlink(s); |
413 | sysfs_slab_release(s); |
414 | } |
415 | #else |
416 | static void kmem_cache_release(struct kmem_cache *s) |
417 | { |
418 | slab_kmem_cache_release(s); |
419 | } |
420 | #endif |
421 | |
422 | static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work) |
423 | { |
424 | LIST_HEAD(to_destroy); |
425 | struct kmem_cache *s, *s2; |
426 | |
427 | /* |
428 | * On destruction, SLAB_TYPESAFE_BY_RCU kmem_caches are put on the |
429 | * @slab_caches_to_rcu_destroy list. The slab pages are freed |
430 | * through RCU and the associated kmem_cache are dereferenced |
431 | * while freeing the pages, so the kmem_caches should be freed only |
432 | * after the pending RCU operations are finished. As rcu_barrier() |
433 | * is a pretty slow operation, we batch all pending destructions |
434 | * asynchronously. |
435 | */ |
436 | mutex_lock(&slab_mutex); |
437 | list_splice_init(list: &slab_caches_to_rcu_destroy, head: &to_destroy); |
438 | mutex_unlock(lock: &slab_mutex); |
439 | |
440 | if (list_empty(head: &to_destroy)) |
441 | return; |
442 | |
443 | rcu_barrier(); |
444 | |
445 | list_for_each_entry_safe(s, s2, &to_destroy, list) { |
446 | debugfs_slab_release(s); |
447 | kfence_shutdown_cache(s); |
448 | kmem_cache_release(s); |
449 | } |
450 | } |
451 | |
452 | static int shutdown_cache(struct kmem_cache *s) |
453 | { |
454 | /* free asan quarantined objects */ |
455 | kasan_cache_shutdown(cache: s); |
456 | |
457 | if (__kmem_cache_shutdown(s) != 0) |
458 | return -EBUSY; |
459 | |
460 | list_del(entry: &s->list); |
461 | |
462 | if (s->flags & SLAB_TYPESAFE_BY_RCU) { |
463 | list_add_tail(new: &s->list, head: &slab_caches_to_rcu_destroy); |
464 | schedule_work(work: &slab_caches_to_rcu_destroy_work); |
465 | } else { |
466 | kfence_shutdown_cache(s); |
467 | debugfs_slab_release(s); |
468 | } |
469 | |
470 | return 0; |
471 | } |
472 | |
473 | void slab_kmem_cache_release(struct kmem_cache *s) |
474 | { |
475 | __kmem_cache_release(s); |
476 | kfree_const(x: s->name); |
477 | kmem_cache_free(s: kmem_cache, objp: s); |
478 | } |
479 | |
480 | void kmem_cache_destroy(struct kmem_cache *s) |
481 | { |
482 | int err = -EBUSY; |
483 | bool rcu_set; |
484 | |
485 | if (unlikely(!s) || !kasan_check_byte(address: s)) |
486 | return; |
487 | |
488 | cpus_read_lock(); |
489 | mutex_lock(&slab_mutex); |
490 | |
491 | rcu_set = s->flags & SLAB_TYPESAFE_BY_RCU; |
492 | |
493 | s->refcount--; |
494 | if (s->refcount) |
495 | goto out_unlock; |
496 | |
497 | err = shutdown_cache(s); |
498 | WARN(err, "%s %s: Slab cache still has objects when called from %pS" , |
499 | __func__, s->name, (void *)_RET_IP_); |
500 | out_unlock: |
501 | mutex_unlock(lock: &slab_mutex); |
502 | cpus_read_unlock(); |
503 | if (!err && !rcu_set) |
504 | kmem_cache_release(s); |
505 | } |
506 | EXPORT_SYMBOL(kmem_cache_destroy); |
507 | |
508 | /** |
509 | * kmem_cache_shrink - Shrink a cache. |
510 | * @cachep: The cache to shrink. |
511 | * |
512 | * Releases as many slabs as possible for a cache. |
513 | * To help debugging, a zero exit status indicates all slabs were released. |
514 | * |
515 | * Return: %0 if all slabs were released, non-zero otherwise |
516 | */ |
517 | int kmem_cache_shrink(struct kmem_cache *cachep) |
518 | { |
519 | kasan_cache_shrink(cache: cachep); |
520 | |
521 | return __kmem_cache_shrink(cachep); |
522 | } |
523 | EXPORT_SYMBOL(kmem_cache_shrink); |
524 | |
525 | bool slab_is_available(void) |
526 | { |
527 | return slab_state >= UP; |
528 | } |
529 | |
530 | #ifdef CONFIG_PRINTK |
531 | static void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab) |
532 | { |
533 | if (__kfence_obj_info(kpp, object, slab)) |
534 | return; |
535 | __kmem_obj_info(kpp, object, slab); |
536 | } |
537 | |
538 | /** |
539 | * kmem_dump_obj - Print available slab provenance information |
540 | * @object: slab object for which to find provenance information. |
541 | * |
542 | * This function uses pr_cont(), so that the caller is expected to have |
543 | * printed out whatever preamble is appropriate. The provenance information |
544 | * depends on the type of object and on how much debugging is enabled. |
545 | * For a slab-cache object, the fact that it is a slab object is printed, |
546 | * and, if available, the slab name, return address, and stack trace from |
547 | * the allocation and last free path of that object. |
548 | * |
549 | * Return: %true if the pointer is to a not-yet-freed object from |
550 | * kmalloc() or kmem_cache_alloc(), either %true or %false if the pointer |
551 | * is to an already-freed object, and %false otherwise. |
552 | */ |
553 | bool kmem_dump_obj(void *object) |
554 | { |
555 | char *cp = IS_ENABLED(CONFIG_MMU) ? "" : "/vmalloc" ; |
556 | int i; |
557 | struct slab *slab; |
558 | unsigned long ptroffset; |
559 | struct kmem_obj_info kp = { }; |
560 | |
561 | /* Some arches consider ZERO_SIZE_PTR to be a valid address. */ |
562 | if (object < (void *)PAGE_SIZE || !virt_addr_valid(object)) |
563 | return false; |
564 | slab = virt_to_slab(addr: object); |
565 | if (!slab) |
566 | return false; |
567 | |
568 | kmem_obj_info(kpp: &kp, object, slab); |
569 | if (kp.kp_slab_cache) |
570 | pr_cont(" slab%s %s" , cp, kp.kp_slab_cache->name); |
571 | else |
572 | pr_cont(" slab%s" , cp); |
573 | if (is_kfence_address(addr: object)) |
574 | pr_cont(" (kfence)" ); |
575 | if (kp.kp_objp) |
576 | pr_cont(" start %px" , kp.kp_objp); |
577 | if (kp.kp_data_offset) |
578 | pr_cont(" data offset %lu" , kp.kp_data_offset); |
579 | if (kp.kp_objp) { |
580 | ptroffset = ((char *)object - (char *)kp.kp_objp) - kp.kp_data_offset; |
581 | pr_cont(" pointer offset %lu" , ptroffset); |
582 | } |
583 | if (kp.kp_slab_cache && kp.kp_slab_cache->object_size) |
584 | pr_cont(" size %u" , kp.kp_slab_cache->object_size); |
585 | if (kp.kp_ret) |
586 | pr_cont(" allocated at %pS\n" , kp.kp_ret); |
587 | else |
588 | pr_cont("\n" ); |
589 | for (i = 0; i < ARRAY_SIZE(kp.kp_stack); i++) { |
590 | if (!kp.kp_stack[i]) |
591 | break; |
592 | pr_info(" %pS\n" , kp.kp_stack[i]); |
593 | } |
594 | |
595 | if (kp.kp_free_stack[0]) |
596 | pr_cont(" Free path:\n" ); |
597 | |
598 | for (i = 0; i < ARRAY_SIZE(kp.kp_free_stack); i++) { |
599 | if (!kp.kp_free_stack[i]) |
600 | break; |
601 | pr_info(" %pS\n" , kp.kp_free_stack[i]); |
602 | } |
603 | |
604 | return true; |
605 | } |
606 | EXPORT_SYMBOL_GPL(kmem_dump_obj); |
607 | #endif |
608 | |
609 | /* Create a cache during boot when no slab services are available yet */ |
610 | void __init create_boot_cache(struct kmem_cache *s, const char *name, |
611 | unsigned int size, slab_flags_t flags, |
612 | unsigned int useroffset, unsigned int usersize) |
613 | { |
614 | int err; |
615 | unsigned int align = ARCH_KMALLOC_MINALIGN; |
616 | |
617 | s->name = name; |
618 | s->size = s->object_size = size; |
619 | |
620 | /* |
621 | * For power of two sizes, guarantee natural alignment for kmalloc |
622 | * caches, regardless of SL*B debugging options. |
623 | */ |
624 | if (is_power_of_2(n: size)) |
625 | align = max(align, size); |
626 | s->align = calculate_alignment(flags, align, size); |
627 | |
628 | #ifdef CONFIG_HARDENED_USERCOPY |
629 | s->useroffset = useroffset; |
630 | s->usersize = usersize; |
631 | #endif |
632 | |
633 | err = __kmem_cache_create(s, flags); |
634 | |
635 | if (err) |
636 | panic(fmt: "Creation of kmalloc slab %s size=%u failed. Reason %d\n" , |
637 | name, size, err); |
638 | |
639 | s->refcount = -1; /* Exempt from merging for now */ |
640 | } |
641 | |
642 | static struct kmem_cache *__init create_kmalloc_cache(const char *name, |
643 | unsigned int size, |
644 | slab_flags_t flags) |
645 | { |
646 | struct kmem_cache *s = kmem_cache_zalloc(k: kmem_cache, GFP_NOWAIT); |
647 | |
648 | if (!s) |
649 | panic(fmt: "Out of memory when creating slab %s\n" , name); |
650 | |
651 | create_boot_cache(s, name, size, flags: flags | SLAB_KMALLOC, useroffset: 0, usersize: size); |
652 | list_add(new: &s->list, head: &slab_caches); |
653 | s->refcount = 1; |
654 | return s; |
655 | } |
656 | |
657 | struct kmem_cache * |
658 | kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1] __ro_after_init = |
659 | { /* initialization for https://bugs.llvm.org/show_bug.cgi?id=42570 */ }; |
660 | EXPORT_SYMBOL(kmalloc_caches); |
661 | |
662 | #ifdef CONFIG_RANDOM_KMALLOC_CACHES |
663 | unsigned long random_kmalloc_seed __ro_after_init; |
664 | EXPORT_SYMBOL(random_kmalloc_seed); |
665 | #endif |
666 | |
667 | /* |
668 | * Conversion table for small slabs sizes / 8 to the index in the |
669 | * kmalloc array. This is necessary for slabs < 192 since we have non power |
670 | * of two cache sizes there. The size of larger slabs can be determined using |
671 | * fls. |
672 | */ |
673 | static u8 size_index[24] __ro_after_init = { |
674 | 3, /* 8 */ |
675 | 4, /* 16 */ |
676 | 5, /* 24 */ |
677 | 5, /* 32 */ |
678 | 6, /* 40 */ |
679 | 6, /* 48 */ |
680 | 6, /* 56 */ |
681 | 6, /* 64 */ |
682 | 1, /* 72 */ |
683 | 1, /* 80 */ |
684 | 1, /* 88 */ |
685 | 1, /* 96 */ |
686 | 7, /* 104 */ |
687 | 7, /* 112 */ |
688 | 7, /* 120 */ |
689 | 7, /* 128 */ |
690 | 2, /* 136 */ |
691 | 2, /* 144 */ |
692 | 2, /* 152 */ |
693 | 2, /* 160 */ |
694 | 2, /* 168 */ |
695 | 2, /* 176 */ |
696 | 2, /* 184 */ |
697 | 2 /* 192 */ |
698 | }; |
699 | |
700 | static inline unsigned int size_index_elem(unsigned int bytes) |
701 | { |
702 | return (bytes - 1) / 8; |
703 | } |
704 | |
705 | /* |
706 | * Find the kmem_cache structure that serves a given size of |
707 | * allocation |
708 | */ |
709 | struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags, unsigned long caller) |
710 | { |
711 | unsigned int index; |
712 | |
713 | if (size <= 192) { |
714 | if (!size) |
715 | return ZERO_SIZE_PTR; |
716 | |
717 | index = size_index[size_index_elem(bytes: size)]; |
718 | } else { |
719 | if (WARN_ON_ONCE(size > KMALLOC_MAX_CACHE_SIZE)) |
720 | return NULL; |
721 | index = fls(x: size - 1); |
722 | } |
723 | |
724 | return kmalloc_caches[kmalloc_type(flags, caller)][index]; |
725 | } |
726 | |
727 | size_t kmalloc_size_roundup(size_t size) |
728 | { |
729 | if (size && size <= KMALLOC_MAX_CACHE_SIZE) { |
730 | /* |
731 | * The flags don't matter since size_index is common to all. |
732 | * Neither does the caller for just getting ->object_size. |
733 | */ |
734 | return kmalloc_slab(size, GFP_KERNEL, caller: 0)->object_size; |
735 | } |
736 | |
737 | /* Above the smaller buckets, size is a multiple of page size. */ |
738 | if (size && size <= KMALLOC_MAX_SIZE) |
739 | return PAGE_SIZE << get_order(size); |
740 | |
741 | /* |
742 | * Return 'size' for 0 - kmalloc() returns ZERO_SIZE_PTR |
743 | * and very large size - kmalloc() may fail. |
744 | */ |
745 | return size; |
746 | |
747 | } |
748 | EXPORT_SYMBOL(kmalloc_size_roundup); |
749 | |
750 | #ifdef CONFIG_ZONE_DMA |
751 | #define KMALLOC_DMA_NAME(sz) .name[KMALLOC_DMA] = "dma-kmalloc-" #sz, |
752 | #else |
753 | #define KMALLOC_DMA_NAME(sz) |
754 | #endif |
755 | |
756 | #ifdef CONFIG_MEMCG_KMEM |
757 | #define KMALLOC_CGROUP_NAME(sz) .name[KMALLOC_CGROUP] = "kmalloc-cg-" #sz, |
758 | #else |
759 | #define KMALLOC_CGROUP_NAME(sz) |
760 | #endif |
761 | |
762 | #ifndef CONFIG_SLUB_TINY |
763 | #define KMALLOC_RCL_NAME(sz) .name[KMALLOC_RECLAIM] = "kmalloc-rcl-" #sz, |
764 | #else |
765 | #define KMALLOC_RCL_NAME(sz) |
766 | #endif |
767 | |
768 | #ifdef CONFIG_RANDOM_KMALLOC_CACHES |
769 | #define __KMALLOC_RANDOM_CONCAT(a, b) a ## b |
770 | #define KMALLOC_RANDOM_NAME(N, sz) __KMALLOC_RANDOM_CONCAT(KMA_RAND_, N)(sz) |
771 | #define KMA_RAND_1(sz) .name[KMALLOC_RANDOM_START + 1] = "kmalloc-rnd-01-" #sz, |
772 | #define KMA_RAND_2(sz) KMA_RAND_1(sz) .name[KMALLOC_RANDOM_START + 2] = "kmalloc-rnd-02-" #sz, |
773 | #define KMA_RAND_3(sz) KMA_RAND_2(sz) .name[KMALLOC_RANDOM_START + 3] = "kmalloc-rnd-03-" #sz, |
774 | #define KMA_RAND_4(sz) KMA_RAND_3(sz) .name[KMALLOC_RANDOM_START + 4] = "kmalloc-rnd-04-" #sz, |
775 | #define KMA_RAND_5(sz) KMA_RAND_4(sz) .name[KMALLOC_RANDOM_START + 5] = "kmalloc-rnd-05-" #sz, |
776 | #define KMA_RAND_6(sz) KMA_RAND_5(sz) .name[KMALLOC_RANDOM_START + 6] = "kmalloc-rnd-06-" #sz, |
777 | #define KMA_RAND_7(sz) KMA_RAND_6(sz) .name[KMALLOC_RANDOM_START + 7] = "kmalloc-rnd-07-" #sz, |
778 | #define KMA_RAND_8(sz) KMA_RAND_7(sz) .name[KMALLOC_RANDOM_START + 8] = "kmalloc-rnd-08-" #sz, |
779 | #define KMA_RAND_9(sz) KMA_RAND_8(sz) .name[KMALLOC_RANDOM_START + 9] = "kmalloc-rnd-09-" #sz, |
780 | #define KMA_RAND_10(sz) KMA_RAND_9(sz) .name[KMALLOC_RANDOM_START + 10] = "kmalloc-rnd-10-" #sz, |
781 | #define KMA_RAND_11(sz) KMA_RAND_10(sz) .name[KMALLOC_RANDOM_START + 11] = "kmalloc-rnd-11-" #sz, |
782 | #define KMA_RAND_12(sz) KMA_RAND_11(sz) .name[KMALLOC_RANDOM_START + 12] = "kmalloc-rnd-12-" #sz, |
783 | #define KMA_RAND_13(sz) KMA_RAND_12(sz) .name[KMALLOC_RANDOM_START + 13] = "kmalloc-rnd-13-" #sz, |
784 | #define KMA_RAND_14(sz) KMA_RAND_13(sz) .name[KMALLOC_RANDOM_START + 14] = "kmalloc-rnd-14-" #sz, |
785 | #define KMA_RAND_15(sz) KMA_RAND_14(sz) .name[KMALLOC_RANDOM_START + 15] = "kmalloc-rnd-15-" #sz, |
786 | #else // CONFIG_RANDOM_KMALLOC_CACHES |
787 | #define KMALLOC_RANDOM_NAME(N, sz) |
788 | #endif |
789 | |
790 | #define INIT_KMALLOC_INFO(__size, __short_size) \ |
791 | { \ |
792 | .name[KMALLOC_NORMAL] = "kmalloc-" #__short_size, \ |
793 | KMALLOC_RCL_NAME(__short_size) \ |
794 | KMALLOC_CGROUP_NAME(__short_size) \ |
795 | KMALLOC_DMA_NAME(__short_size) \ |
796 | KMALLOC_RANDOM_NAME(RANDOM_KMALLOC_CACHES_NR, __short_size) \ |
797 | .size = __size, \ |
798 | } |
799 | |
800 | /* |
801 | * kmalloc_info[] is to make slub_debug=,kmalloc-xx option work at boot time. |
802 | * kmalloc_index() supports up to 2^21=2MB, so the final entry of the table is |
803 | * kmalloc-2M. |
804 | */ |
805 | const struct kmalloc_info_struct kmalloc_info[] __initconst = { |
806 | INIT_KMALLOC_INFO(0, 0), |
807 | INIT_KMALLOC_INFO(96, 96), |
808 | INIT_KMALLOC_INFO(192, 192), |
809 | INIT_KMALLOC_INFO(8, 8), |
810 | INIT_KMALLOC_INFO(16, 16), |
811 | INIT_KMALLOC_INFO(32, 32), |
812 | INIT_KMALLOC_INFO(64, 64), |
813 | INIT_KMALLOC_INFO(128, 128), |
814 | INIT_KMALLOC_INFO(256, 256), |
815 | INIT_KMALLOC_INFO(512, 512), |
816 | INIT_KMALLOC_INFO(1024, 1k), |
817 | INIT_KMALLOC_INFO(2048, 2k), |
818 | INIT_KMALLOC_INFO(4096, 4k), |
819 | INIT_KMALLOC_INFO(8192, 8k), |
820 | INIT_KMALLOC_INFO(16384, 16k), |
821 | INIT_KMALLOC_INFO(32768, 32k), |
822 | INIT_KMALLOC_INFO(65536, 64k), |
823 | INIT_KMALLOC_INFO(131072, 128k), |
824 | INIT_KMALLOC_INFO(262144, 256k), |
825 | INIT_KMALLOC_INFO(524288, 512k), |
826 | INIT_KMALLOC_INFO(1048576, 1M), |
827 | INIT_KMALLOC_INFO(2097152, 2M) |
828 | }; |
829 | |
830 | /* |
831 | * Patch up the size_index table if we have strange large alignment |
832 | * requirements for the kmalloc array. This is only the case for |
833 | * MIPS it seems. The standard arches will not generate any code here. |
834 | * |
835 | * Largest permitted alignment is 256 bytes due to the way we |
836 | * handle the index determination for the smaller caches. |
837 | * |
838 | * Make sure that nothing crazy happens if someone starts tinkering |
839 | * around with ARCH_KMALLOC_MINALIGN |
840 | */ |
841 | void __init setup_kmalloc_cache_index_table(void) |
842 | { |
843 | unsigned int i; |
844 | |
845 | BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 || |
846 | !is_power_of_2(KMALLOC_MIN_SIZE)); |
847 | |
848 | for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) { |
849 | unsigned int elem = size_index_elem(bytes: i); |
850 | |
851 | if (elem >= ARRAY_SIZE(size_index)) |
852 | break; |
853 | size_index[elem] = KMALLOC_SHIFT_LOW; |
854 | } |
855 | |
856 | if (KMALLOC_MIN_SIZE >= 64) { |
857 | /* |
858 | * The 96 byte sized cache is not used if the alignment |
859 | * is 64 byte. |
860 | */ |
861 | for (i = 64 + 8; i <= 96; i += 8) |
862 | size_index[size_index_elem(bytes: i)] = 7; |
863 | |
864 | } |
865 | |
866 | if (KMALLOC_MIN_SIZE >= 128) { |
867 | /* |
868 | * The 192 byte sized cache is not used if the alignment |
869 | * is 128 byte. Redirect kmalloc to use the 256 byte cache |
870 | * instead. |
871 | */ |
872 | for (i = 128 + 8; i <= 192; i += 8) |
873 | size_index[size_index_elem(bytes: i)] = 8; |
874 | } |
875 | } |
876 | |
877 | static unsigned int __kmalloc_minalign(void) |
878 | { |
879 | unsigned int minalign = dma_get_cache_alignment(); |
880 | |
881 | if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) && |
882 | is_swiotlb_allocated()) |
883 | minalign = ARCH_KMALLOC_MINALIGN; |
884 | |
885 | return max(minalign, arch_slab_minalign()); |
886 | } |
887 | |
888 | void __init |
889 | new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags) |
890 | { |
891 | unsigned int minalign = __kmalloc_minalign(); |
892 | unsigned int aligned_size = kmalloc_info[idx].size; |
893 | int aligned_idx = idx; |
894 | |
895 | if ((KMALLOC_RECLAIM != KMALLOC_NORMAL) && (type == KMALLOC_RECLAIM)) { |
896 | flags |= SLAB_RECLAIM_ACCOUNT; |
897 | } else if (IS_ENABLED(CONFIG_MEMCG_KMEM) && (type == KMALLOC_CGROUP)) { |
898 | if (mem_cgroup_kmem_disabled()) { |
899 | kmalloc_caches[type][idx] = kmalloc_caches[KMALLOC_NORMAL][idx]; |
900 | return; |
901 | } |
902 | flags |= SLAB_ACCOUNT; |
903 | } else if (IS_ENABLED(CONFIG_ZONE_DMA) && (type == KMALLOC_DMA)) { |
904 | flags |= SLAB_CACHE_DMA; |
905 | } |
906 | |
907 | #ifdef CONFIG_RANDOM_KMALLOC_CACHES |
908 | if (type >= KMALLOC_RANDOM_START && type <= KMALLOC_RANDOM_END) |
909 | flags |= SLAB_NO_MERGE; |
910 | #endif |
911 | |
912 | /* |
913 | * If CONFIG_MEMCG_KMEM is enabled, disable cache merging for |
914 | * KMALLOC_NORMAL caches. |
915 | */ |
916 | if (IS_ENABLED(CONFIG_MEMCG_KMEM) && (type == KMALLOC_NORMAL)) |
917 | flags |= SLAB_NO_MERGE; |
918 | |
919 | if (minalign > ARCH_KMALLOC_MINALIGN) { |
920 | aligned_size = ALIGN(aligned_size, minalign); |
921 | aligned_idx = __kmalloc_index(size: aligned_size, size_is_constant: false); |
922 | } |
923 | |
924 | if (!kmalloc_caches[type][aligned_idx]) |
925 | kmalloc_caches[type][aligned_idx] = create_kmalloc_cache( |
926 | name: kmalloc_info[aligned_idx].name[type], |
927 | size: aligned_size, flags); |
928 | if (idx != aligned_idx) |
929 | kmalloc_caches[type][idx] = kmalloc_caches[type][aligned_idx]; |
930 | } |
931 | |
932 | /* |
933 | * Create the kmalloc array. Some of the regular kmalloc arrays |
934 | * may already have been created because they were needed to |
935 | * enable allocations for slab creation. |
936 | */ |
937 | void __init create_kmalloc_caches(slab_flags_t flags) |
938 | { |
939 | int i; |
940 | enum kmalloc_cache_type type; |
941 | |
942 | /* |
943 | * Including KMALLOC_CGROUP if CONFIG_MEMCG_KMEM defined |
944 | */ |
945 | for (type = KMALLOC_NORMAL; type < NR_KMALLOC_TYPES; type++) { |
946 | for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { |
947 | if (!kmalloc_caches[type][i]) |
948 | new_kmalloc_cache(idx: i, type, flags); |
949 | |
950 | /* |
951 | * Caches that are not of the two-to-the-power-of size. |
952 | * These have to be created immediately after the |
953 | * earlier power of two caches |
954 | */ |
955 | if (KMALLOC_MIN_SIZE <= 32 && i == 6 && |
956 | !kmalloc_caches[type][1]) |
957 | new_kmalloc_cache(idx: 1, type, flags); |
958 | if (KMALLOC_MIN_SIZE <= 64 && i == 7 && |
959 | !kmalloc_caches[type][2]) |
960 | new_kmalloc_cache(idx: 2, type, flags); |
961 | } |
962 | } |
963 | #ifdef CONFIG_RANDOM_KMALLOC_CACHES |
964 | random_kmalloc_seed = get_random_u64(); |
965 | #endif |
966 | |
967 | /* Kmalloc array is now usable */ |
968 | slab_state = UP; |
969 | } |
970 | |
971 | void free_large_kmalloc(struct folio *folio, void *object) |
972 | { |
973 | unsigned int order = folio_order(folio); |
974 | |
975 | if (WARN_ON_ONCE(order == 0)) |
976 | pr_warn_once("object pointer: 0x%p\n" , object); |
977 | |
978 | kmemleak_free(ptr: object); |
979 | kasan_kfree_large(ptr: object); |
980 | kmsan_kfree_large(ptr: object); |
981 | |
982 | mod_lruvec_page_state(folio_page(folio, 0), idx: NR_SLAB_UNRECLAIMABLE_B, |
983 | val: -(PAGE_SIZE << order)); |
984 | __free_pages(folio_page(folio, 0), order); |
985 | } |
986 | |
987 | static void *__kmalloc_large_node(size_t size, gfp_t flags, int node); |
988 | static __always_inline |
989 | void *__do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller) |
990 | { |
991 | struct kmem_cache *s; |
992 | void *ret; |
993 | |
994 | if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) { |
995 | ret = __kmalloc_large_node(size, flags, node); |
996 | trace_kmalloc(call_site: caller, ptr: ret, bytes_req: size, |
997 | PAGE_SIZE << get_order(size), gfp_flags: flags, node); |
998 | return ret; |
999 | } |
1000 | |
1001 | s = kmalloc_slab(size, flags, caller); |
1002 | |
1003 | if (unlikely(ZERO_OR_NULL_PTR(s))) |
1004 | return s; |
1005 | |
1006 | ret = __kmem_cache_alloc_node(s, gfpflags: flags, node, orig_size: size, caller); |
1007 | ret = kasan_kmalloc(s, object: ret, size, flags); |
1008 | trace_kmalloc(call_site: caller, ptr: ret, bytes_req: size, bytes_alloc: s->size, gfp_flags: flags, node); |
1009 | return ret; |
1010 | } |
1011 | |
1012 | void *__kmalloc_node(size_t size, gfp_t flags, int node) |
1013 | { |
1014 | return __do_kmalloc_node(size, flags, node, _RET_IP_); |
1015 | } |
1016 | EXPORT_SYMBOL(__kmalloc_node); |
1017 | |
1018 | void *__kmalloc(size_t size, gfp_t flags) |
1019 | { |
1020 | return __do_kmalloc_node(size, flags, NUMA_NO_NODE, _RET_IP_); |
1021 | } |
1022 | EXPORT_SYMBOL(__kmalloc); |
1023 | |
1024 | void *__kmalloc_node_track_caller(size_t size, gfp_t flags, |
1025 | int node, unsigned long caller) |
1026 | { |
1027 | return __do_kmalloc_node(size, flags, node, caller); |
1028 | } |
1029 | EXPORT_SYMBOL(__kmalloc_node_track_caller); |
1030 | |
1031 | /** |
1032 | * kfree - free previously allocated memory |
1033 | * @object: pointer returned by kmalloc() or kmem_cache_alloc() |
1034 | * |
1035 | * If @object is NULL, no operation is performed. |
1036 | */ |
1037 | void kfree(const void *object) |
1038 | { |
1039 | struct folio *folio; |
1040 | struct slab *slab; |
1041 | struct kmem_cache *s; |
1042 | |
1043 | trace_kfree(_RET_IP_, ptr: object); |
1044 | |
1045 | if (unlikely(ZERO_OR_NULL_PTR(object))) |
1046 | return; |
1047 | |
1048 | folio = virt_to_folio(x: object); |
1049 | if (unlikely(!folio_test_slab(folio))) { |
1050 | free_large_kmalloc(folio, object: (void *)object); |
1051 | return; |
1052 | } |
1053 | |
1054 | slab = folio_slab(folio); |
1055 | s = slab->slab_cache; |
1056 | __kmem_cache_free(s, x: (void *)object, _RET_IP_); |
1057 | } |
1058 | EXPORT_SYMBOL(kfree); |
1059 | |
1060 | /** |
1061 | * __ksize -- Report full size of underlying allocation |
1062 | * @object: pointer to the object |
1063 | * |
1064 | * This should only be used internally to query the true size of allocations. |
1065 | * It is not meant to be a way to discover the usable size of an allocation |
1066 | * after the fact. Instead, use kmalloc_size_roundup(). Using memory beyond |
1067 | * the originally requested allocation size may trigger KASAN, UBSAN_BOUNDS, |
1068 | * and/or FORTIFY_SOURCE. |
1069 | * |
1070 | * Return: size of the actual memory used by @object in bytes |
1071 | */ |
1072 | size_t __ksize(const void *object) |
1073 | { |
1074 | struct folio *folio; |
1075 | |
1076 | if (unlikely(object == ZERO_SIZE_PTR)) |
1077 | return 0; |
1078 | |
1079 | folio = virt_to_folio(x: object); |
1080 | |
1081 | if (unlikely(!folio_test_slab(folio))) { |
1082 | if (WARN_ON(folio_size(folio) <= KMALLOC_MAX_CACHE_SIZE)) |
1083 | return 0; |
1084 | if (WARN_ON(object != folio_address(folio))) |
1085 | return 0; |
1086 | return folio_size(folio); |
1087 | } |
1088 | |
1089 | #ifdef CONFIG_SLUB_DEBUG |
1090 | skip_orig_size_check(folio_slab(folio)->slab_cache, object); |
1091 | #endif |
1092 | |
1093 | return slab_ksize(folio_slab(folio)->slab_cache); |
1094 | } |
1095 | |
1096 | void *kmalloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size) |
1097 | { |
1098 | void *ret = __kmem_cache_alloc_node(s, gfpflags, NUMA_NO_NODE, |
1099 | orig_size: size, _RET_IP_); |
1100 | |
1101 | trace_kmalloc(_RET_IP_, ptr: ret, bytes_req: size, bytes_alloc: s->size, gfp_flags: gfpflags, NUMA_NO_NODE); |
1102 | |
1103 | ret = kasan_kmalloc(s, object: ret, size, flags: gfpflags); |
1104 | return ret; |
1105 | } |
1106 | EXPORT_SYMBOL(kmalloc_trace); |
1107 | |
1108 | void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, |
1109 | int node, size_t size) |
1110 | { |
1111 | void *ret = __kmem_cache_alloc_node(s, gfpflags, node, orig_size: size, _RET_IP_); |
1112 | |
1113 | trace_kmalloc(_RET_IP_, ptr: ret, bytes_req: size, bytes_alloc: s->size, gfp_flags: gfpflags, node); |
1114 | |
1115 | ret = kasan_kmalloc(s, object: ret, size, flags: gfpflags); |
1116 | return ret; |
1117 | } |
1118 | EXPORT_SYMBOL(kmalloc_node_trace); |
1119 | |
1120 | gfp_t kmalloc_fix_flags(gfp_t flags) |
1121 | { |
1122 | gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK; |
1123 | |
1124 | flags &= ~GFP_SLAB_BUG_MASK; |
1125 | pr_warn("Unexpected gfp: %#x (%pGg). Fixing up to gfp: %#x (%pGg). Fix your code!\n" , |
1126 | invalid_mask, &invalid_mask, flags, &flags); |
1127 | dump_stack(); |
1128 | |
1129 | return flags; |
1130 | } |
1131 | |
1132 | /* |
1133 | * To avoid unnecessary overhead, we pass through large allocation requests |
1134 | * directly to the page allocator. We use __GFP_COMP, because we will need to |
1135 | * know the allocation order to free the pages properly in kfree. |
1136 | */ |
1137 | |
1138 | static void *__kmalloc_large_node(size_t size, gfp_t flags, int node) |
1139 | { |
1140 | struct page *page; |
1141 | void *ptr = NULL; |
1142 | unsigned int order = get_order(size); |
1143 | |
1144 | if (unlikely(flags & GFP_SLAB_BUG_MASK)) |
1145 | flags = kmalloc_fix_flags(flags); |
1146 | |
1147 | flags |= __GFP_COMP; |
1148 | page = alloc_pages_node(nid: node, gfp_mask: flags, order); |
1149 | if (page) { |
1150 | ptr = page_address(page); |
1151 | mod_lruvec_page_state(page, idx: NR_SLAB_UNRECLAIMABLE_B, |
1152 | PAGE_SIZE << order); |
1153 | } |
1154 | |
1155 | ptr = kasan_kmalloc_large(ptr, size, flags); |
1156 | /* As ptr might get tagged, call kmemleak hook after KASAN. */ |
1157 | kmemleak_alloc(ptr, size, min_count: 1, gfp: flags); |
1158 | kmsan_kmalloc_large(ptr, size, flags); |
1159 | |
1160 | return ptr; |
1161 | } |
1162 | |
1163 | void *kmalloc_large(size_t size, gfp_t flags) |
1164 | { |
1165 | void *ret = __kmalloc_large_node(size, flags, NUMA_NO_NODE); |
1166 | |
1167 | trace_kmalloc(_RET_IP_, ptr: ret, bytes_req: size, PAGE_SIZE << get_order(size), |
1168 | gfp_flags: flags, NUMA_NO_NODE); |
1169 | return ret; |
1170 | } |
1171 | EXPORT_SYMBOL(kmalloc_large); |
1172 | |
1173 | void *kmalloc_large_node(size_t size, gfp_t flags, int node) |
1174 | { |
1175 | void *ret = __kmalloc_large_node(size, flags, node); |
1176 | |
1177 | trace_kmalloc(_RET_IP_, ptr: ret, bytes_req: size, PAGE_SIZE << get_order(size), |
1178 | gfp_flags: flags, node); |
1179 | return ret; |
1180 | } |
1181 | EXPORT_SYMBOL(kmalloc_large_node); |
1182 | |
1183 | #ifdef CONFIG_SLAB_FREELIST_RANDOM |
1184 | /* Randomize a generic freelist */ |
1185 | static void freelist_randomize(unsigned int *list, |
1186 | unsigned int count) |
1187 | { |
1188 | unsigned int rand; |
1189 | unsigned int i; |
1190 | |
1191 | for (i = 0; i < count; i++) |
1192 | list[i] = i; |
1193 | |
1194 | /* Fisher-Yates shuffle */ |
1195 | for (i = count - 1; i > 0; i--) { |
1196 | rand = get_random_u32_below(i + 1); |
1197 | swap(list[i], list[rand]); |
1198 | } |
1199 | } |
1200 | |
1201 | /* Create a random sequence per cache */ |
1202 | int cache_random_seq_create(struct kmem_cache *cachep, unsigned int count, |
1203 | gfp_t gfp) |
1204 | { |
1205 | |
1206 | if (count < 2 || cachep->random_seq) |
1207 | return 0; |
1208 | |
1209 | cachep->random_seq = kcalloc(count, sizeof(unsigned int), gfp); |
1210 | if (!cachep->random_seq) |
1211 | return -ENOMEM; |
1212 | |
1213 | freelist_randomize(cachep->random_seq, count); |
1214 | return 0; |
1215 | } |
1216 | |
1217 | /* Destroy the per-cache random freelist sequence */ |
1218 | void cache_random_seq_destroy(struct kmem_cache *cachep) |
1219 | { |
1220 | kfree(cachep->random_seq); |
1221 | cachep->random_seq = NULL; |
1222 | } |
1223 | #endif /* CONFIG_SLAB_FREELIST_RANDOM */ |
1224 | |
1225 | #if defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG) |
1226 | #ifdef CONFIG_SLAB |
1227 | #define SLABINFO_RIGHTS (0600) |
1228 | #else |
1229 | #define SLABINFO_RIGHTS (0400) |
1230 | #endif |
1231 | |
1232 | static void print_slabinfo_header(struct seq_file *m) |
1233 | { |
1234 | /* |
1235 | * Output format version, so at least we can change it |
1236 | * without _too_ many complaints. |
1237 | */ |
1238 | #ifdef CONFIG_DEBUG_SLAB |
1239 | seq_puts(m, "slabinfo - version: 2.1 (statistics)\n" ); |
1240 | #else |
1241 | seq_puts(m, "slabinfo - version: 2.1\n" ); |
1242 | #endif |
1243 | seq_puts(m, "# name <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab>" ); |
1244 | seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>" ); |
1245 | seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>" ); |
1246 | #ifdef CONFIG_DEBUG_SLAB |
1247 | seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> <error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>" ); |
1248 | seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>" ); |
1249 | #endif |
1250 | seq_putc(m, '\n'); |
1251 | } |
1252 | |
1253 | static void *slab_start(struct seq_file *m, loff_t *pos) |
1254 | { |
1255 | mutex_lock(&slab_mutex); |
1256 | return seq_list_start(&slab_caches, *pos); |
1257 | } |
1258 | |
1259 | static void *slab_next(struct seq_file *m, void *p, loff_t *pos) |
1260 | { |
1261 | return seq_list_next(p, &slab_caches, pos); |
1262 | } |
1263 | |
1264 | static void slab_stop(struct seq_file *m, void *p) |
1265 | { |
1266 | mutex_unlock(&slab_mutex); |
1267 | } |
1268 | |
1269 | static void cache_show(struct kmem_cache *s, struct seq_file *m) |
1270 | { |
1271 | struct slabinfo sinfo; |
1272 | |
1273 | memset(&sinfo, 0, sizeof(sinfo)); |
1274 | get_slabinfo(s, &sinfo); |
1275 | |
1276 | seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d" , |
1277 | s->name, sinfo.active_objs, sinfo.num_objs, s->size, |
1278 | sinfo.objects_per_slab, (1 << sinfo.cache_order)); |
1279 | |
1280 | seq_printf(m, " : tunables %4u %4u %4u" , |
1281 | sinfo.limit, sinfo.batchcount, sinfo.shared); |
1282 | seq_printf(m, " : slabdata %6lu %6lu %6lu" , |
1283 | sinfo.active_slabs, sinfo.num_slabs, sinfo.shared_avail); |
1284 | slabinfo_show_stats(m, s); |
1285 | seq_putc(m, '\n'); |
1286 | } |
1287 | |
1288 | static int slab_show(struct seq_file *m, void *p) |
1289 | { |
1290 | struct kmem_cache *s = list_entry(p, struct kmem_cache, list); |
1291 | |
1292 | if (p == slab_caches.next) |
1293 | print_slabinfo_header(m); |
1294 | cache_show(s, m); |
1295 | return 0; |
1296 | } |
1297 | |
1298 | void dump_unreclaimable_slab(void) |
1299 | { |
1300 | struct kmem_cache *s; |
1301 | struct slabinfo sinfo; |
1302 | |
1303 | /* |
1304 | * Here acquiring slab_mutex is risky since we don't prefer to get |
1305 | * sleep in oom path. But, without mutex hold, it may introduce a |
1306 | * risk of crash. |
1307 | * Use mutex_trylock to protect the list traverse, dump nothing |
1308 | * without acquiring the mutex. |
1309 | */ |
1310 | if (!mutex_trylock(&slab_mutex)) { |
1311 | pr_warn("excessive unreclaimable slab but cannot dump stats\n" ); |
1312 | return; |
1313 | } |
1314 | |
1315 | pr_info("Unreclaimable slab info:\n" ); |
1316 | pr_info("Name Used Total\n" ); |
1317 | |
1318 | list_for_each_entry(s, &slab_caches, list) { |
1319 | if (s->flags & SLAB_RECLAIM_ACCOUNT) |
1320 | continue; |
1321 | |
1322 | get_slabinfo(s, &sinfo); |
1323 | |
1324 | if (sinfo.num_objs > 0) |
1325 | pr_info("%-17s %10luKB %10luKB\n" , s->name, |
1326 | (sinfo.active_objs * s->size) / 1024, |
1327 | (sinfo.num_objs * s->size) / 1024); |
1328 | } |
1329 | mutex_unlock(&slab_mutex); |
1330 | } |
1331 | |
1332 | /* |
1333 | * slabinfo_op - iterator that generates /proc/slabinfo |
1334 | * |
1335 | * Output layout: |
1336 | * cache-name |
1337 | * num-active-objs |
1338 | * total-objs |
1339 | * object size |
1340 | * num-active-slabs |
1341 | * total-slabs |
1342 | * num-pages-per-slab |
1343 | * + further values on SMP and with statistics enabled |
1344 | */ |
1345 | static const struct seq_operations slabinfo_op = { |
1346 | .start = slab_start, |
1347 | .next = slab_next, |
1348 | .stop = slab_stop, |
1349 | .show = slab_show, |
1350 | }; |
1351 | |
1352 | static int slabinfo_open(struct inode *inode, struct file *file) |
1353 | { |
1354 | return seq_open(file, &slabinfo_op); |
1355 | } |
1356 | |
1357 | static const struct proc_ops slabinfo_proc_ops = { |
1358 | .proc_flags = PROC_ENTRY_PERMANENT, |
1359 | .proc_open = slabinfo_open, |
1360 | .proc_read = seq_read, |
1361 | .proc_write = slabinfo_write, |
1362 | .proc_lseek = seq_lseek, |
1363 | .proc_release = seq_release, |
1364 | }; |
1365 | |
1366 | static int __init slab_proc_init(void) |
1367 | { |
1368 | proc_create("slabinfo" , SLABINFO_RIGHTS, NULL, &slabinfo_proc_ops); |
1369 | return 0; |
1370 | } |
1371 | module_init(slab_proc_init); |
1372 | |
1373 | #endif /* CONFIG_SLAB || CONFIG_SLUB_DEBUG */ |
1374 | |
1375 | static __always_inline __realloc_size(2) void * |
1376 | __do_krealloc(const void *p, size_t new_size, gfp_t flags) |
1377 | { |
1378 | void *ret; |
1379 | size_t ks; |
1380 | |
1381 | /* Check for double-free before calling ksize. */ |
1382 | if (likely(!ZERO_OR_NULL_PTR(p))) { |
1383 | if (!kasan_check_byte(address: p)) |
1384 | return NULL; |
1385 | ks = ksize(objp: p); |
1386 | } else |
1387 | ks = 0; |
1388 | |
1389 | /* If the object still fits, repoison it precisely. */ |
1390 | if (ks >= new_size) { |
1391 | p = kasan_krealloc(object: (void *)p, new_size, flags); |
1392 | return (void *)p; |
1393 | } |
1394 | |
1395 | ret = kmalloc_track_caller(new_size, flags); |
1396 | if (ret && p) { |
1397 | /* Disable KASAN checks as the object's redzone is accessed. */ |
1398 | kasan_disable_current(); |
1399 | memcpy(ret, kasan_reset_tag(p), ks); |
1400 | kasan_enable_current(); |
1401 | } |
1402 | |
1403 | return ret; |
1404 | } |
1405 | |
1406 | /** |
1407 | * krealloc - reallocate memory. The contents will remain unchanged. |
1408 | * @p: object to reallocate memory for. |
1409 | * @new_size: how many bytes of memory are required. |
1410 | * @flags: the type of memory to allocate. |
1411 | * |
1412 | * The contents of the object pointed to are preserved up to the |
1413 | * lesser of the new and old sizes (__GFP_ZERO flag is effectively ignored). |
1414 | * If @p is %NULL, krealloc() behaves exactly like kmalloc(). If @new_size |
1415 | * is 0 and @p is not a %NULL pointer, the object pointed to is freed. |
1416 | * |
1417 | * Return: pointer to the allocated memory or %NULL in case of error |
1418 | */ |
1419 | void *krealloc(const void *p, size_t new_size, gfp_t flags) |
1420 | { |
1421 | void *ret; |
1422 | |
1423 | if (unlikely(!new_size)) { |
1424 | kfree(p); |
1425 | return ZERO_SIZE_PTR; |
1426 | } |
1427 | |
1428 | ret = __do_krealloc(p, new_size, flags); |
1429 | if (ret && kasan_reset_tag(addr: p) != kasan_reset_tag(addr: ret)) |
1430 | kfree(p); |
1431 | |
1432 | return ret; |
1433 | } |
1434 | EXPORT_SYMBOL(krealloc); |
1435 | |
1436 | /** |
1437 | * kfree_sensitive - Clear sensitive information in memory before freeing |
1438 | * @p: object to free memory of |
1439 | * |
1440 | * The memory of the object @p points to is zeroed before freed. |
1441 | * If @p is %NULL, kfree_sensitive() does nothing. |
1442 | * |
1443 | * Note: this function zeroes the whole allocated buffer which can be a good |
1444 | * deal bigger than the requested buffer size passed to kmalloc(). So be |
1445 | * careful when using this function in performance sensitive code. |
1446 | */ |
1447 | void kfree_sensitive(const void *p) |
1448 | { |
1449 | size_t ks; |
1450 | void *mem = (void *)p; |
1451 | |
1452 | ks = ksize(objp: mem); |
1453 | if (ks) { |
1454 | kasan_unpoison_range(address: mem, size: ks); |
1455 | memzero_explicit(s: mem, count: ks); |
1456 | } |
1457 | kfree(mem); |
1458 | } |
1459 | EXPORT_SYMBOL(kfree_sensitive); |
1460 | |
1461 | size_t ksize(const void *objp) |
1462 | { |
1463 | /* |
1464 | * We need to first check that the pointer to the object is valid. |
1465 | * The KASAN report printed from ksize() is more useful, then when |
1466 | * it's printed later when the behaviour could be undefined due to |
1467 | * a potential use-after-free or double-free. |
1468 | * |
1469 | * We use kasan_check_byte(), which is supported for the hardware |
1470 | * tag-based KASAN mode, unlike kasan_check_read/write(). |
1471 | * |
1472 | * If the pointed to memory is invalid, we return 0 to avoid users of |
1473 | * ksize() writing to and potentially corrupting the memory region. |
1474 | * |
1475 | * We want to perform the check before __ksize(), to avoid potentially |
1476 | * crashing in __ksize() due to accessing invalid metadata. |
1477 | */ |
1478 | if (unlikely(ZERO_OR_NULL_PTR(objp)) || !kasan_check_byte(address: objp)) |
1479 | return 0; |
1480 | |
1481 | return kfence_ksize(addr: objp) ?: __ksize(object: objp); |
1482 | } |
1483 | EXPORT_SYMBOL(ksize); |
1484 | |
1485 | /* Tracepoints definitions. */ |
1486 | EXPORT_TRACEPOINT_SYMBOL(kmalloc); |
1487 | EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc); |
1488 | EXPORT_TRACEPOINT_SYMBOL(kfree); |
1489 | EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free); |
1490 | |
1491 | int should_failslab(struct kmem_cache *s, gfp_t gfpflags) |
1492 | { |
1493 | if (__should_failslab(s, gfpflags)) |
1494 | return -ENOMEM; |
1495 | return 0; |
1496 | } |
1497 | ALLOW_ERROR_INJECTION(should_failslab, ERRNO); |
1498 | |