1/* SPDX-License-Identifier: GPL-2.0 */
2#define _GNU_SOURCE
3
4#include <linux/limits.h>
5#include <linux/oom.h>
6#include <fcntl.h>
7#include <stdio.h>
8#include <stdlib.h>
9#include <string.h>
10#include <sys/stat.h>
11#include <sys/types.h>
12#include <unistd.h>
13#include <sys/socket.h>
14#include <sys/wait.h>
15#include <arpa/inet.h>
16#include <netinet/in.h>
17#include <netdb.h>
18#include <errno.h>
19#include <sys/mman.h>
20
21#include "../kselftest.h"
22#include "cgroup_util.h"
23
24static bool has_localevents;
25static bool has_recursiveprot;
26
27/*
28 * This test creates two nested cgroups with and without enabling
29 * the memory controller.
30 */
31static int test_memcg_subtree_control(const char *root)
32{
33 char *parent, *child, *parent2 = NULL, *child2 = NULL;
34 int ret = KSFT_FAIL;
35 char buf[PAGE_SIZE];
36
37 /* Create two nested cgroups with the memory controller enabled */
38 parent = cg_name(root, name: "memcg_test_0");
39 child = cg_name(root, name: "memcg_test_0/memcg_test_1");
40 if (!parent || !child)
41 goto cleanup_free;
42
43 if (cg_create(cgroup: parent))
44 goto cleanup_free;
45
46 if (cg_write(cgroup: parent, control: "cgroup.subtree_control", buf: "+memory"))
47 goto cleanup_parent;
48
49 if (cg_create(cgroup: child))
50 goto cleanup_parent;
51
52 if (cg_read_strstr(cgroup: child, control: "cgroup.controllers", needle: "memory"))
53 goto cleanup_child;
54
55 /* Create two nested cgroups without enabling memory controller */
56 parent2 = cg_name(root, name: "memcg_test_1");
57 child2 = cg_name(root, name: "memcg_test_1/memcg_test_1");
58 if (!parent2 || !child2)
59 goto cleanup_free2;
60
61 if (cg_create(cgroup: parent2))
62 goto cleanup_free2;
63
64 if (cg_create(cgroup: child2))
65 goto cleanup_parent2;
66
67 if (cg_read(cgroup: child2, control: "cgroup.controllers", buf, len: sizeof(buf)))
68 goto cleanup_all;
69
70 if (!cg_read_strstr(cgroup: child2, control: "cgroup.controllers", needle: "memory"))
71 goto cleanup_all;
72
73 ret = KSFT_PASS;
74
75cleanup_all:
76 cg_destroy(cgroup: child2);
77cleanup_parent2:
78 cg_destroy(cgroup: parent2);
79cleanup_free2:
80 free(parent2);
81 free(child2);
82cleanup_child:
83 cg_destroy(cgroup: child);
84cleanup_parent:
85 cg_destroy(cgroup: parent);
86cleanup_free:
87 free(parent);
88 free(child);
89
90 return ret;
91}
92
93static int alloc_anon_50M_check(const char *cgroup, void *arg)
94{
95 size_t size = MB(50);
96 char *buf, *ptr;
97 long anon, current;
98 int ret = -1;
99
100 buf = malloc(size);
101 if (buf == NULL) {
102 fprintf(stderr, "malloc() failed\n");
103 return -1;
104 }
105
106 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
107 *ptr = 0;
108
109 current = cg_read_long(cgroup, control: "memory.current");
110 if (current < size)
111 goto cleanup;
112
113 if (!values_close(a: size, current, err: 3))
114 goto cleanup;
115
116 anon = cg_read_key_long(cgroup, control: "memory.stat", key: "anon ");
117 if (anon < 0)
118 goto cleanup;
119
120 if (!values_close(a: anon, current, err: 3))
121 goto cleanup;
122
123 ret = 0;
124cleanup:
125 free(buf);
126 return ret;
127}
128
129static int alloc_pagecache_50M_check(const char *cgroup, void *arg)
130{
131 size_t size = MB(50);
132 int ret = -1;
133 long current, file;
134 int fd;
135
136 fd = get_temp_fd();
137 if (fd < 0)
138 return -1;
139
140 if (alloc_pagecache(fd, size))
141 goto cleanup;
142
143 current = cg_read_long(cgroup, control: "memory.current");
144 if (current < size)
145 goto cleanup;
146
147 file = cg_read_key_long(cgroup, control: "memory.stat", key: "file ");
148 if (file < 0)
149 goto cleanup;
150
151 if (!values_close(a: file, current, err: 10))
152 goto cleanup;
153
154 ret = 0;
155
156cleanup:
157 close(fd);
158 return ret;
159}
160
161/*
162 * This test create a memory cgroup, allocates
163 * some anonymous memory and some pagecache
164 * and check memory.current and some memory.stat values.
165 */
166static int test_memcg_current(const char *root)
167{
168 int ret = KSFT_FAIL;
169 long current;
170 char *memcg;
171
172 memcg = cg_name(root, name: "memcg_test");
173 if (!memcg)
174 goto cleanup;
175
176 if (cg_create(cgroup: memcg))
177 goto cleanup;
178
179 current = cg_read_long(cgroup: memcg, control: "memory.current");
180 if (current != 0)
181 goto cleanup;
182
183 if (cg_run(cgroup: memcg, fn: alloc_anon_50M_check, NULL))
184 goto cleanup;
185
186 if (cg_run(cgroup: memcg, fn: alloc_pagecache_50M_check, NULL))
187 goto cleanup;
188
189 ret = KSFT_PASS;
190
191cleanup:
192 cg_destroy(cgroup: memcg);
193 free(memcg);
194
195 return ret;
196}
197
198static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
199{
200 int fd = (long)arg;
201 int ppid = getppid();
202
203 if (alloc_pagecache(fd, MB(50)))
204 return -1;
205
206 while (getppid() == ppid)
207 sleep(1);
208
209 return 0;
210}
211
212static int alloc_anon_noexit(const char *cgroup, void *arg)
213{
214 int ppid = getppid();
215 size_t size = (unsigned long)arg;
216 char *buf, *ptr;
217
218 buf = malloc(size);
219 if (buf == NULL) {
220 fprintf(stderr, "malloc() failed\n");
221 return -1;
222 }
223
224 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
225 *ptr = 0;
226
227 while (getppid() == ppid)
228 sleep(1);
229
230 free(buf);
231 return 0;
232}
233
234/*
235 * Wait until processes are killed asynchronously by the OOM killer
236 * If we exceed a timeout, fail.
237 */
238static int cg_test_proc_killed(const char *cgroup)
239{
240 int limit;
241
242 for (limit = 10; limit > 0; limit--) {
243 if (cg_read_strcmp(cgroup, control: "cgroup.procs", expected: "") == 0)
244 return 0;
245
246 usleep(100000);
247 }
248 return -1;
249}
250
251static bool reclaim_until(const char *memcg, long goal);
252
253/*
254 * First, this test creates the following hierarchy:
255 * A memory.min = 0, memory.max = 200M
256 * A/B memory.min = 50M
257 * A/B/C memory.min = 75M, memory.current = 50M
258 * A/B/D memory.min = 25M, memory.current = 50M
259 * A/B/E memory.min = 0, memory.current = 50M
260 * A/B/F memory.min = 500M, memory.current = 0
261 *
262 * (or memory.low if we test soft protection)
263 *
264 * Usages are pagecache and the test keeps a running
265 * process in every leaf cgroup.
266 * Then it creates A/G and creates a significant
267 * memory pressure in A.
268 *
269 * Then it checks actual memory usages and expects that:
270 * A/B memory.current ~= 50M
271 * A/B/C memory.current ~= 29M
272 * A/B/D memory.current ~= 21M
273 * A/B/E memory.current ~= 0
274 * A/B/F memory.current = 0
275 * (for origin of the numbers, see model in memcg_protection.m.)
276 *
277 * After that it tries to allocate more than there is
278 * unprotected memory in A available, and checks that:
279 * a) memory.min protects pagecache even in this case,
280 * b) memory.low allows reclaiming page cache with low events.
281 *
282 * Then we try to reclaim from A/B/C using memory.reclaim until its
283 * usage reaches 10M.
284 * This makes sure that:
285 * (a) We ignore the protection of the reclaim target memcg.
286 * (b) The previously calculated emin value (~29M) should be dismissed.
287 */
288static int test_memcg_protection(const char *root, bool min)
289{
290 int ret = KSFT_FAIL, rc;
291 char *parent[3] = {NULL};
292 char *children[4] = {NULL};
293 const char *attribute = min ? "memory.min" : "memory.low";
294 long c[4];
295 long current;
296 int i, attempts;
297 int fd;
298
299 fd = get_temp_fd();
300 if (fd < 0)
301 goto cleanup;
302
303 parent[0] = cg_name(root, name: "memcg_test_0");
304 if (!parent[0])
305 goto cleanup;
306
307 parent[1] = cg_name(root: parent[0], name: "memcg_test_1");
308 if (!parent[1])
309 goto cleanup;
310
311 parent[2] = cg_name(root: parent[0], name: "memcg_test_2");
312 if (!parent[2])
313 goto cleanup;
314
315 if (cg_create(cgroup: parent[0]))
316 goto cleanup;
317
318 if (cg_read_long(cgroup: parent[0], control: attribute)) {
319 /* No memory.min on older kernels is fine */
320 if (min)
321 ret = KSFT_SKIP;
322 goto cleanup;
323 }
324
325 if (cg_write(cgroup: parent[0], control: "cgroup.subtree_control", buf: "+memory"))
326 goto cleanup;
327
328 if (cg_write(cgroup: parent[0], control: "memory.max", buf: "200M"))
329 goto cleanup;
330
331 if (cg_write(cgroup: parent[0], control: "memory.swap.max", buf: "0"))
332 goto cleanup;
333
334 if (cg_create(cgroup: parent[1]))
335 goto cleanup;
336
337 if (cg_write(cgroup: parent[1], control: "cgroup.subtree_control", buf: "+memory"))
338 goto cleanup;
339
340 if (cg_create(cgroup: parent[2]))
341 goto cleanup;
342
343 for (i = 0; i < ARRAY_SIZE(children); i++) {
344 children[i] = cg_name_indexed(root: parent[1], name: "child_memcg", index: i);
345 if (!children[i])
346 goto cleanup;
347
348 if (cg_create(cgroup: children[i]))
349 goto cleanup;
350
351 if (i > 2)
352 continue;
353
354 cg_run_nowait(cgroup: children[i], fn: alloc_pagecache_50M_noexit,
355 arg: (void *)(long)fd);
356 }
357
358 if (cg_write(cgroup: parent[1], control: attribute, buf: "50M"))
359 goto cleanup;
360 if (cg_write(cgroup: children[0], control: attribute, buf: "75M"))
361 goto cleanup;
362 if (cg_write(cgroup: children[1], control: attribute, buf: "25M"))
363 goto cleanup;
364 if (cg_write(cgroup: children[2], control: attribute, buf: "0"))
365 goto cleanup;
366 if (cg_write(cgroup: children[3], control: attribute, buf: "500M"))
367 goto cleanup;
368
369 attempts = 0;
370 while (!values_close(a: cg_read_long(cgroup: parent[1], control: "memory.current"),
371 MB(150), err: 3)) {
372 if (attempts++ > 5)
373 break;
374 sleep(1);
375 }
376
377 if (cg_run(cgroup: parent[2], fn: alloc_anon, arg: (void *)MB(148)))
378 goto cleanup;
379
380 if (!values_close(a: cg_read_long(cgroup: parent[1], control: "memory.current"), MB(50), err: 3))
381 goto cleanup;
382
383 for (i = 0; i < ARRAY_SIZE(children); i++)
384 c[i] = cg_read_long(cgroup: children[i], control: "memory.current");
385
386 if (!values_close(a: c[0], MB(29), err: 10))
387 goto cleanup;
388
389 if (!values_close(a: c[1], MB(21), err: 10))
390 goto cleanup;
391
392 if (c[3] != 0)
393 goto cleanup;
394
395 rc = cg_run(cgroup: parent[2], fn: alloc_anon, arg: (void *)MB(170));
396 if (min && !rc)
397 goto cleanup;
398 else if (!min && rc) {
399 fprintf(stderr,
400 "memory.low prevents from allocating anon memory\n");
401 goto cleanup;
402 }
403
404 current = min ? MB(50) : MB(30);
405 if (!values_close(a: cg_read_long(cgroup: parent[1], control: "memory.current"), current, err: 3))
406 goto cleanup;
407
408 if (!reclaim_until(memcg: children[0], MB(10)))
409 goto cleanup;
410
411 if (min) {
412 ret = KSFT_PASS;
413 goto cleanup;
414 }
415
416 for (i = 0; i < ARRAY_SIZE(children); i++) {
417 int no_low_events_index = 1;
418 long low, oom;
419
420 oom = cg_read_key_long(cgroup: children[i], control: "memory.events", key: "oom ");
421 low = cg_read_key_long(cgroup: children[i], control: "memory.events", key: "low ");
422
423 if (oom)
424 goto cleanup;
425 if (i <= no_low_events_index && low <= 0)
426 goto cleanup;
427 if (i > no_low_events_index && low)
428 goto cleanup;
429
430 }
431
432 ret = KSFT_PASS;
433
434cleanup:
435 for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
436 if (!children[i])
437 continue;
438
439 cg_destroy(cgroup: children[i]);
440 free(children[i]);
441 }
442
443 for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
444 if (!parent[i])
445 continue;
446
447 cg_destroy(cgroup: parent[i]);
448 free(parent[i]);
449 }
450 close(fd);
451 return ret;
452}
453
454static int test_memcg_min(const char *root)
455{
456 return test_memcg_protection(root, min: true);
457}
458
459static int test_memcg_low(const char *root)
460{
461 return test_memcg_protection(root, min: false);
462}
463
464static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
465{
466 size_t size = MB(50);
467 int ret = -1;
468 long current, high, max;
469 int fd;
470
471 high = cg_read_long(cgroup, control: "memory.high");
472 max = cg_read_long(cgroup, control: "memory.max");
473 if (high != MB(30) && max != MB(30))
474 return -1;
475
476 fd = get_temp_fd();
477 if (fd < 0)
478 return -1;
479
480 if (alloc_pagecache(fd, size))
481 goto cleanup;
482
483 current = cg_read_long(cgroup, control: "memory.current");
484 if (!values_close(current, MB(30), err: 5))
485 goto cleanup;
486
487 ret = 0;
488
489cleanup:
490 close(fd);
491 return ret;
492
493}
494
495/*
496 * This test checks that memory.high limits the amount of
497 * memory which can be consumed by either anonymous memory
498 * or pagecache.
499 */
500static int test_memcg_high(const char *root)
501{
502 int ret = KSFT_FAIL;
503 char *memcg;
504 long high;
505
506 memcg = cg_name(root, name: "memcg_test");
507 if (!memcg)
508 goto cleanup;
509
510 if (cg_create(cgroup: memcg))
511 goto cleanup;
512
513 if (cg_read_strcmp(cgroup: memcg, control: "memory.high", expected: "max\n"))
514 goto cleanup;
515
516 if (cg_write(cgroup: memcg, control: "memory.swap.max", buf: "0"))
517 goto cleanup;
518
519 if (cg_write(cgroup: memcg, control: "memory.high", buf: "30M"))
520 goto cleanup;
521
522 if (cg_run(cgroup: memcg, fn: alloc_anon, arg: (void *)MB(31)))
523 goto cleanup;
524
525 if (!cg_run(cgroup: memcg, fn: alloc_pagecache_50M_check, NULL))
526 goto cleanup;
527
528 if (cg_run(cgroup: memcg, fn: alloc_pagecache_max_30M, NULL))
529 goto cleanup;
530
531 high = cg_read_key_long(cgroup: memcg, control: "memory.events", key: "high ");
532 if (high <= 0)
533 goto cleanup;
534
535 ret = KSFT_PASS;
536
537cleanup:
538 cg_destroy(cgroup: memcg);
539 free(memcg);
540
541 return ret;
542}
543
544static int alloc_anon_mlock(const char *cgroup, void *arg)
545{
546 size_t size = (size_t)arg;
547 void *buf;
548
549 buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
550 0, 0);
551 if (buf == MAP_FAILED)
552 return -1;
553
554 mlock(buf, size);
555 munmap(buf, size);
556 return 0;
557}
558
559/*
560 * This test checks that memory.high is able to throttle big single shot
561 * allocation i.e. large allocation within one kernel entry.
562 */
563static int test_memcg_high_sync(const char *root)
564{
565 int ret = KSFT_FAIL, pid, fd = -1;
566 char *memcg;
567 long pre_high, pre_max;
568 long post_high, post_max;
569
570 memcg = cg_name(root, name: "memcg_test");
571 if (!memcg)
572 goto cleanup;
573
574 if (cg_create(cgroup: memcg))
575 goto cleanup;
576
577 pre_high = cg_read_key_long(cgroup: memcg, control: "memory.events", key: "high ");
578 pre_max = cg_read_key_long(cgroup: memcg, control: "memory.events", key: "max ");
579 if (pre_high < 0 || pre_max < 0)
580 goto cleanup;
581
582 if (cg_write(cgroup: memcg, control: "memory.swap.max", buf: "0"))
583 goto cleanup;
584
585 if (cg_write(cgroup: memcg, control: "memory.high", buf: "30M"))
586 goto cleanup;
587
588 if (cg_write(cgroup: memcg, control: "memory.max", buf: "140M"))
589 goto cleanup;
590
591 fd = memcg_prepare_for_wait(cgroup: memcg);
592 if (fd < 0)
593 goto cleanup;
594
595 pid = cg_run_nowait(cgroup: memcg, fn: alloc_anon_mlock, arg: (void *)MB(200));
596 if (pid < 0)
597 goto cleanup;
598
599 cg_wait_for(fd);
600
601 post_high = cg_read_key_long(cgroup: memcg, control: "memory.events", key: "high ");
602 post_max = cg_read_key_long(cgroup: memcg, control: "memory.events", key: "max ");
603 if (post_high < 0 || post_max < 0)
604 goto cleanup;
605
606 if (pre_high == post_high || pre_max != post_max)
607 goto cleanup;
608
609 ret = KSFT_PASS;
610
611cleanup:
612 if (fd >= 0)
613 close(fd);
614 cg_destroy(cgroup: memcg);
615 free(memcg);
616
617 return ret;
618}
619
620/*
621 * This test checks that memory.max limits the amount of
622 * memory which can be consumed by either anonymous memory
623 * or pagecache.
624 */
625static int test_memcg_max(const char *root)
626{
627 int ret = KSFT_FAIL;
628 char *memcg;
629 long current, max;
630
631 memcg = cg_name(root, name: "memcg_test");
632 if (!memcg)
633 goto cleanup;
634
635 if (cg_create(cgroup: memcg))
636 goto cleanup;
637
638 if (cg_read_strcmp(cgroup: memcg, control: "memory.max", expected: "max\n"))
639 goto cleanup;
640
641 if (cg_write(cgroup: memcg, control: "memory.swap.max", buf: "0"))
642 goto cleanup;
643
644 if (cg_write(cgroup: memcg, control: "memory.max", buf: "30M"))
645 goto cleanup;
646
647 /* Should be killed by OOM killer */
648 if (!cg_run(cgroup: memcg, fn: alloc_anon, arg: (void *)MB(100)))
649 goto cleanup;
650
651 if (cg_run(cgroup: memcg, fn: alloc_pagecache_max_30M, NULL))
652 goto cleanup;
653
654 current = cg_read_long(cgroup: memcg, control: "memory.current");
655 if (current > MB(30) || !current)
656 goto cleanup;
657
658 max = cg_read_key_long(cgroup: memcg, control: "memory.events", key: "max ");
659 if (max <= 0)
660 goto cleanup;
661
662 ret = KSFT_PASS;
663
664cleanup:
665 cg_destroy(cgroup: memcg);
666 free(memcg);
667
668 return ret;
669}
670
671/*
672 * Reclaim from @memcg until usage reaches @goal by writing to
673 * memory.reclaim.
674 *
675 * This function will return false if the usage is already below the
676 * goal.
677 *
678 * This function assumes that writing to memory.reclaim is the only
679 * source of change in memory.current (no concurrent allocations or
680 * reclaim).
681 *
682 * This function makes sure memory.reclaim is sane. It will return
683 * false if memory.reclaim's error codes do not make sense, even if
684 * the usage goal was satisfied.
685 */
686static bool reclaim_until(const char *memcg, long goal)
687{
688 char buf[64];
689 int retries, err;
690 long current, to_reclaim;
691 bool reclaimed = false;
692
693 for (retries = 5; retries > 0; retries--) {
694 current = cg_read_long(cgroup: memcg, control: "memory.current");
695
696 if (current < goal || values_close(current, b: goal, err: 3))
697 break;
698 /* Did memory.reclaim return 0 incorrectly? */
699 else if (reclaimed)
700 return false;
701
702 to_reclaim = current - goal;
703 snprintf(buf, size: sizeof(buf), fmt: "%ld", to_reclaim);
704 err = cg_write(cgroup: memcg, control: "memory.reclaim", buf);
705 if (!err)
706 reclaimed = true;
707 else if (err != -EAGAIN)
708 return false;
709 }
710 return reclaimed;
711}
712
713/*
714 * This test checks that memory.reclaim reclaims the given
715 * amount of memory (from both anon and file, if possible).
716 */
717static int test_memcg_reclaim(const char *root)
718{
719 int ret = KSFT_FAIL, fd, retries;
720 char *memcg;
721 long current, expected_usage;
722
723 memcg = cg_name(root, name: "memcg_test");
724 if (!memcg)
725 goto cleanup;
726
727 if (cg_create(cgroup: memcg))
728 goto cleanup;
729
730 current = cg_read_long(cgroup: memcg, control: "memory.current");
731 if (current != 0)
732 goto cleanup;
733
734 fd = get_temp_fd();
735 if (fd < 0)
736 goto cleanup;
737
738 cg_run_nowait(cgroup: memcg, fn: alloc_pagecache_50M_noexit, arg: (void *)(long)fd);
739
740 /*
741 * If swap is enabled, try to reclaim from both anon and file, else try
742 * to reclaim from file only.
743 */
744 if (is_swap_enabled()) {
745 cg_run_nowait(cgroup: memcg, fn: alloc_anon_noexit, arg: (void *) MB(50));
746 expected_usage = MB(100);
747 } else
748 expected_usage = MB(50);
749
750 /*
751 * Wait until current usage reaches the expected usage (or we run out of
752 * retries).
753 */
754 retries = 5;
755 while (!values_close(a: cg_read_long(cgroup: memcg, control: "memory.current"),
756 b: expected_usage, err: 10)) {
757 if (retries--) {
758 sleep(1);
759 continue;
760 } else {
761 fprintf(stderr,
762 "failed to allocate %ld for memcg reclaim test\n",
763 expected_usage);
764 goto cleanup;
765 }
766 }
767
768 /*
769 * Reclaim until current reaches 30M, this makes sure we hit both anon
770 * and file if swap is enabled.
771 */
772 if (!reclaim_until(memcg, MB(30)))
773 goto cleanup;
774
775 ret = KSFT_PASS;
776cleanup:
777 cg_destroy(cgroup: memcg);
778 free(memcg);
779 close(fd);
780
781 return ret;
782}
783
784static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
785{
786 long mem_max = (long)arg;
787 size_t size = MB(50);
788 char *buf, *ptr;
789 long mem_current, swap_current;
790 int ret = -1;
791
792 buf = malloc(size);
793 if (buf == NULL) {
794 fprintf(stderr, "malloc() failed\n");
795 return -1;
796 }
797
798 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
799 *ptr = 0;
800
801 mem_current = cg_read_long(cgroup, control: "memory.current");
802 if (!mem_current || !values_close(a: mem_current, b: mem_max, err: 3))
803 goto cleanup;
804
805 swap_current = cg_read_long(cgroup, control: "memory.swap.current");
806 if (!swap_current ||
807 !values_close(a: mem_current + swap_current, b: size, err: 3))
808 goto cleanup;
809
810 ret = 0;
811cleanup:
812 free(buf);
813 return ret;
814}
815
816/*
817 * This test checks that memory.swap.max limits the amount of
818 * anonymous memory which can be swapped out.
819 */
820static int test_memcg_swap_max(const char *root)
821{
822 int ret = KSFT_FAIL;
823 char *memcg;
824 long max;
825
826 if (!is_swap_enabled())
827 return KSFT_SKIP;
828
829 memcg = cg_name(root, name: "memcg_test");
830 if (!memcg)
831 goto cleanup;
832
833 if (cg_create(cgroup: memcg))
834 goto cleanup;
835
836 if (cg_read_long(cgroup: memcg, control: "memory.swap.current")) {
837 ret = KSFT_SKIP;
838 goto cleanup;
839 }
840
841 if (cg_read_strcmp(cgroup: memcg, control: "memory.max", expected: "max\n"))
842 goto cleanup;
843
844 if (cg_read_strcmp(cgroup: memcg, control: "memory.swap.max", expected: "max\n"))
845 goto cleanup;
846
847 if (cg_write(cgroup: memcg, control: "memory.swap.max", buf: "30M"))
848 goto cleanup;
849
850 if (cg_write(cgroup: memcg, control: "memory.max", buf: "30M"))
851 goto cleanup;
852
853 /* Should be killed by OOM killer */
854 if (!cg_run(cgroup: memcg, fn: alloc_anon, arg: (void *)MB(100)))
855 goto cleanup;
856
857 if (cg_read_key_long(cgroup: memcg, control: "memory.events", key: "oom ") != 1)
858 goto cleanup;
859
860 if (cg_read_key_long(cgroup: memcg, control: "memory.events", key: "oom_kill ") != 1)
861 goto cleanup;
862
863 if (cg_run(cgroup: memcg, fn: alloc_anon_50M_check_swap, arg: (void *)MB(30)))
864 goto cleanup;
865
866 max = cg_read_key_long(cgroup: memcg, control: "memory.events", key: "max ");
867 if (max <= 0)
868 goto cleanup;
869
870 ret = KSFT_PASS;
871
872cleanup:
873 cg_destroy(cgroup: memcg);
874 free(memcg);
875
876 return ret;
877}
878
879/*
880 * This test disables swapping and tries to allocate anonymous memory
881 * up to OOM. Then it checks for oom and oom_kill events in
882 * memory.events.
883 */
884static int test_memcg_oom_events(const char *root)
885{
886 int ret = KSFT_FAIL;
887 char *memcg;
888
889 memcg = cg_name(root, name: "memcg_test");
890 if (!memcg)
891 goto cleanup;
892
893 if (cg_create(cgroup: memcg))
894 goto cleanup;
895
896 if (cg_write(cgroup: memcg, control: "memory.max", buf: "30M"))
897 goto cleanup;
898
899 if (cg_write(cgroup: memcg, control: "memory.swap.max", buf: "0"))
900 goto cleanup;
901
902 if (!cg_run(cgroup: memcg, fn: alloc_anon, arg: (void *)MB(100)))
903 goto cleanup;
904
905 if (cg_read_strcmp(cgroup: memcg, control: "cgroup.procs", expected: ""))
906 goto cleanup;
907
908 if (cg_read_key_long(cgroup: memcg, control: "memory.events", key: "oom ") != 1)
909 goto cleanup;
910
911 if (cg_read_key_long(cgroup: memcg, control: "memory.events", key: "oom_kill ") != 1)
912 goto cleanup;
913
914 ret = KSFT_PASS;
915
916cleanup:
917 cg_destroy(cgroup: memcg);
918 free(memcg);
919
920 return ret;
921}
922
923struct tcp_server_args {
924 unsigned short port;
925 int ctl[2];
926};
927
928static int tcp_server(const char *cgroup, void *arg)
929{
930 struct tcp_server_args *srv_args = arg;
931 struct sockaddr_in6 saddr = { 0 };
932 socklen_t slen = sizeof(saddr);
933 int sk, client_sk, ctl_fd, yes = 1, ret = -1;
934
935 close(srv_args->ctl[0]);
936 ctl_fd = srv_args->ctl[1];
937
938 saddr.sin6_family = AF_INET6;
939 saddr.sin6_addr = in6addr_any;
940 saddr.sin6_port = htons(srv_args->port);
941
942 sk = socket(AF_INET6, SOCK_STREAM, 0);
943 if (sk < 0)
944 return ret;
945
946 if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
947 goto cleanup;
948
949 if (bind(sk, (struct sockaddr *)&saddr, slen)) {
950 write(ctl_fd, &errno, sizeof(errno));
951 goto cleanup;
952 }
953
954 if (listen(sk, 1))
955 goto cleanup;
956
957 ret = 0;
958 if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) {
959 ret = -1;
960 goto cleanup;
961 }
962
963 client_sk = accept(sk, NULL, NULL);
964 if (client_sk < 0)
965 goto cleanup;
966
967 ret = -1;
968 for (;;) {
969 uint8_t buf[0x100000];
970
971 if (write(client_sk, buf, sizeof(buf)) <= 0) {
972 if (errno == ECONNRESET)
973 ret = 0;
974 break;
975 }
976 }
977
978 close(client_sk);
979
980cleanup:
981 close(sk);
982 return ret;
983}
984
985static int tcp_client(const char *cgroup, unsigned short port)
986{
987 const char server[] = "localhost";
988 struct addrinfo *ai;
989 char servport[6];
990 int retries = 0x10; /* nice round number */
991 int sk, ret;
992 long allocated;
993
994 allocated = cg_read_long(cgroup, control: "memory.current");
995 snprintf(buf: servport, size: sizeof(servport), fmt: "%hd", port);
996 ret = getaddrinfo(server, servport, NULL, &ai);
997 if (ret)
998 return ret;
999
1000 sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
1001 if (sk < 0)
1002 goto free_ainfo;
1003
1004 ret = connect(sk, ai->ai_addr, ai->ai_addrlen);
1005 if (ret < 0)
1006 goto close_sk;
1007
1008 ret = KSFT_FAIL;
1009 while (retries--) {
1010 uint8_t buf[0x100000];
1011 long current, sock;
1012
1013 if (read(sk, buf, sizeof(buf)) <= 0)
1014 goto close_sk;
1015
1016 current = cg_read_long(cgroup, control: "memory.current");
1017 sock = cg_read_key_long(cgroup, control: "memory.stat", key: "sock ");
1018
1019 if (current < 0 || sock < 0)
1020 goto close_sk;
1021
1022 /* exclude the memory not related to socket connection */
1023 if (values_close(current - allocated, b: sock, err: 10)) {
1024 ret = KSFT_PASS;
1025 break;
1026 }
1027 }
1028
1029close_sk:
1030 close(sk);
1031free_ainfo:
1032 freeaddrinfo(ai);
1033 return ret;
1034}
1035
1036/*
1037 * This test checks socket memory accounting.
1038 * The test forks a TCP server listens on a random port between 1000
1039 * and 61000. Once it gets a client connection, it starts writing to
1040 * its socket.
1041 * The TCP client interleaves reads from the socket with check whether
1042 * memory.current and memory.stat.sock are similar.
1043 */
1044static int test_memcg_sock(const char *root)
1045{
1046 int bind_retries = 5, ret = KSFT_FAIL, pid, err;
1047 unsigned short port;
1048 char *memcg;
1049
1050 memcg = cg_name(root, name: "memcg_test");
1051 if (!memcg)
1052 goto cleanup;
1053
1054 if (cg_create(cgroup: memcg))
1055 goto cleanup;
1056
1057 while (bind_retries--) {
1058 struct tcp_server_args args;
1059
1060 if (pipe(args.ctl))
1061 goto cleanup;
1062
1063 port = args.port = 1000 + rand() % 60000;
1064
1065 pid = cg_run_nowait(cgroup: memcg, fn: tcp_server, arg: &args);
1066 if (pid < 0)
1067 goto cleanup;
1068
1069 close(args.ctl[1]);
1070 if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err))
1071 goto cleanup;
1072 close(args.ctl[0]);
1073
1074 if (!err)
1075 break;
1076 if (err != EADDRINUSE)
1077 goto cleanup;
1078
1079 waitpid(pid, NULL, 0);
1080 }
1081
1082 if (err == EADDRINUSE) {
1083 ret = KSFT_SKIP;
1084 goto cleanup;
1085 }
1086
1087 if (tcp_client(cgroup: memcg, port) != KSFT_PASS)
1088 goto cleanup;
1089
1090 waitpid(pid, &err, 0);
1091 if (WEXITSTATUS(err))
1092 goto cleanup;
1093
1094 if (cg_read_long(cgroup: memcg, control: "memory.current") < 0)
1095 goto cleanup;
1096
1097 if (cg_read_key_long(cgroup: memcg, control: "memory.stat", key: "sock "))
1098 goto cleanup;
1099
1100 ret = KSFT_PASS;
1101
1102cleanup:
1103 cg_destroy(cgroup: memcg);
1104 free(memcg);
1105
1106 return ret;
1107}
1108
1109/*
1110 * This test disables swapping and tries to allocate anonymous memory
1111 * up to OOM with memory.group.oom set. Then it checks that all
1112 * processes in the leaf were killed. It also checks that oom_events
1113 * were propagated to the parent level.
1114 */
1115static int test_memcg_oom_group_leaf_events(const char *root)
1116{
1117 int ret = KSFT_FAIL;
1118 char *parent, *child;
1119 long parent_oom_events;
1120
1121 parent = cg_name(root, name: "memcg_test_0");
1122 child = cg_name(root, name: "memcg_test_0/memcg_test_1");
1123
1124 if (!parent || !child)
1125 goto cleanup;
1126
1127 if (cg_create(cgroup: parent))
1128 goto cleanup;
1129
1130 if (cg_create(cgroup: child))
1131 goto cleanup;
1132
1133 if (cg_write(cgroup: parent, control: "cgroup.subtree_control", buf: "+memory"))
1134 goto cleanup;
1135
1136 if (cg_write(cgroup: child, control: "memory.max", buf: "50M"))
1137 goto cleanup;
1138
1139 if (cg_write(cgroup: child, control: "memory.swap.max", buf: "0"))
1140 goto cleanup;
1141
1142 if (cg_write(cgroup: child, control: "memory.oom.group", buf: "1"))
1143 goto cleanup;
1144
1145 cg_run_nowait(cgroup: parent, fn: alloc_anon_noexit, arg: (void *) MB(60));
1146 cg_run_nowait(cgroup: child, fn: alloc_anon_noexit, arg: (void *) MB(1));
1147 cg_run_nowait(cgroup: child, fn: alloc_anon_noexit, arg: (void *) MB(1));
1148 if (!cg_run(cgroup: child, fn: alloc_anon, arg: (void *)MB(100)))
1149 goto cleanup;
1150
1151 if (cg_test_proc_killed(cgroup: child))
1152 goto cleanup;
1153
1154 if (cg_read_key_long(cgroup: child, control: "memory.events", key: "oom_kill ") <= 0)
1155 goto cleanup;
1156
1157 parent_oom_events = cg_read_key_long(
1158 cgroup: parent, control: "memory.events", key: "oom_kill ");
1159 /*
1160 * If memory_localevents is not enabled (the default), the parent should
1161 * count OOM events in its children groups. Otherwise, it should not
1162 * have observed any events.
1163 */
1164 if (has_localevents && parent_oom_events != 0)
1165 goto cleanup;
1166 else if (!has_localevents && parent_oom_events <= 0)
1167 goto cleanup;
1168
1169 ret = KSFT_PASS;
1170
1171cleanup:
1172 if (child)
1173 cg_destroy(cgroup: child);
1174 if (parent)
1175 cg_destroy(cgroup: parent);
1176 free(child);
1177 free(parent);
1178
1179 return ret;
1180}
1181
1182/*
1183 * This test disables swapping and tries to allocate anonymous memory
1184 * up to OOM with memory.group.oom set. Then it checks that all
1185 * processes in the parent and leaf were killed.
1186 */
1187static int test_memcg_oom_group_parent_events(const char *root)
1188{
1189 int ret = KSFT_FAIL;
1190 char *parent, *child;
1191
1192 parent = cg_name(root, name: "memcg_test_0");
1193 child = cg_name(root, name: "memcg_test_0/memcg_test_1");
1194
1195 if (!parent || !child)
1196 goto cleanup;
1197
1198 if (cg_create(cgroup: parent))
1199 goto cleanup;
1200
1201 if (cg_create(cgroup: child))
1202 goto cleanup;
1203
1204 if (cg_write(cgroup: parent, control: "memory.max", buf: "80M"))
1205 goto cleanup;
1206
1207 if (cg_write(cgroup: parent, control: "memory.swap.max", buf: "0"))
1208 goto cleanup;
1209
1210 if (cg_write(cgroup: parent, control: "memory.oom.group", buf: "1"))
1211 goto cleanup;
1212
1213 cg_run_nowait(cgroup: parent, fn: alloc_anon_noexit, arg: (void *) MB(60));
1214 cg_run_nowait(cgroup: child, fn: alloc_anon_noexit, arg: (void *) MB(1));
1215 cg_run_nowait(cgroup: child, fn: alloc_anon_noexit, arg: (void *) MB(1));
1216
1217 if (!cg_run(cgroup: child, fn: alloc_anon, arg: (void *)MB(100)))
1218 goto cleanup;
1219
1220 if (cg_test_proc_killed(cgroup: child))
1221 goto cleanup;
1222 if (cg_test_proc_killed(cgroup: parent))
1223 goto cleanup;
1224
1225 ret = KSFT_PASS;
1226
1227cleanup:
1228 if (child)
1229 cg_destroy(cgroup: child);
1230 if (parent)
1231 cg_destroy(cgroup: parent);
1232 free(child);
1233 free(parent);
1234
1235 return ret;
1236}
1237
1238/*
1239 * This test disables swapping and tries to allocate anonymous memory
1240 * up to OOM with memory.group.oom set. Then it checks that all
1241 * processes were killed except those set with OOM_SCORE_ADJ_MIN
1242 */
1243static int test_memcg_oom_group_score_events(const char *root)
1244{
1245 int ret = KSFT_FAIL;
1246 char *memcg;
1247 int safe_pid;
1248
1249 memcg = cg_name(root, name: "memcg_test_0");
1250
1251 if (!memcg)
1252 goto cleanup;
1253
1254 if (cg_create(cgroup: memcg))
1255 goto cleanup;
1256
1257 if (cg_write(cgroup: memcg, control: "memory.max", buf: "50M"))
1258 goto cleanup;
1259
1260 if (cg_write(cgroup: memcg, control: "memory.swap.max", buf: "0"))
1261 goto cleanup;
1262
1263 if (cg_write(cgroup: memcg, control: "memory.oom.group", buf: "1"))
1264 goto cleanup;
1265
1266 safe_pid = cg_run_nowait(cgroup: memcg, fn: alloc_anon_noexit, arg: (void *) MB(1));
1267 if (set_oom_adj_score(pid: safe_pid, OOM_SCORE_ADJ_MIN))
1268 goto cleanup;
1269
1270 cg_run_nowait(cgroup: memcg, fn: alloc_anon_noexit, arg: (void *) MB(1));
1271 if (!cg_run(cgroup: memcg, fn: alloc_anon, arg: (void *)MB(100)))
1272 goto cleanup;
1273
1274 if (cg_read_key_long(cgroup: memcg, control: "memory.events", key: "oom_kill ") != 3)
1275 goto cleanup;
1276
1277 if (kill(safe_pid, SIGKILL))
1278 goto cleanup;
1279
1280 ret = KSFT_PASS;
1281
1282cleanup:
1283 if (memcg)
1284 cg_destroy(cgroup: memcg);
1285 free(memcg);
1286
1287 return ret;
1288}
1289
1290#define T(x) { x, #x }
1291struct memcg_test {
1292 int (*fn)(const char *root);
1293 const char *name;
1294} tests[] = {
1295 T(test_memcg_subtree_control),
1296 T(test_memcg_current),
1297 T(test_memcg_min),
1298 T(test_memcg_low),
1299 T(test_memcg_high),
1300 T(test_memcg_high_sync),
1301 T(test_memcg_max),
1302 T(test_memcg_reclaim),
1303 T(test_memcg_oom_events),
1304 T(test_memcg_swap_max),
1305 T(test_memcg_sock),
1306 T(test_memcg_oom_group_leaf_events),
1307 T(test_memcg_oom_group_parent_events),
1308 T(test_memcg_oom_group_score_events),
1309};
1310#undef T
1311
1312int main(int argc, char **argv)
1313{
1314 char root[PATH_MAX];
1315 int i, proc_status, ret = EXIT_SUCCESS;
1316
1317 if (cg_find_unified_root(root, len: sizeof(root)))
1318 ksft_exit_skip(msg: "cgroup v2 isn't mounted\n");
1319
1320 /*
1321 * Check that memory controller is available:
1322 * memory is listed in cgroup.controllers
1323 */
1324 if (cg_read_strstr(cgroup: root, control: "cgroup.controllers", needle: "memory"))
1325 ksft_exit_skip(msg: "memory controller isn't available\n");
1326
1327 if (cg_read_strstr(cgroup: root, control: "cgroup.subtree_control", needle: "memory"))
1328 if (cg_write(cgroup: root, control: "cgroup.subtree_control", buf: "+memory"))
1329 ksft_exit_skip(msg: "Failed to set memory controller\n");
1330
1331 proc_status = proc_mount_contains(option: "memory_recursiveprot");
1332 if (proc_status < 0)
1333 ksft_exit_skip(msg: "Failed to query cgroup mount option\n");
1334 has_recursiveprot = proc_status;
1335
1336 proc_status = proc_mount_contains(option: "memory_localevents");
1337 if (proc_status < 0)
1338 ksft_exit_skip(msg: "Failed to query cgroup mount option\n");
1339 has_localevents = proc_status;
1340
1341 for (i = 0; i < ARRAY_SIZE(tests); i++) {
1342 switch (tests[i].fn(root)) {
1343 case KSFT_PASS:
1344 ksft_test_result_pass(msg: "%s\n", tests[i].name);
1345 break;
1346 case KSFT_SKIP:
1347 ksft_test_result_skip(msg: "%s\n", tests[i].name);
1348 break;
1349 default:
1350 ret = EXIT_FAILURE;
1351 ksft_test_result_fail(msg: "%s\n", tests[i].name);
1352 break;
1353 }
1354 }
1355
1356 return ret;
1357}
1358

source code of linux/tools/testing/selftests/cgroup/test_memcontrol.c