1// SPDX-License-Identifier: GPL-2.0
2#define _GNU_SOURCE
3
4#include <linux/limits.h>
5#include <unistd.h>
6#include <stdio.h>
7#include <signal.h>
8#include <sys/sysinfo.h>
9#include <string.h>
10#include <sys/wait.h>
11#include <sys/mman.h>
12
13#include "../kselftest.h"
14#include "cgroup_util.h"
15
16static int read_int(const char *path, size_t *value)
17{
18 FILE *file;
19 int ret = 0;
20
21 file = fopen(path, "r");
22 if (!file)
23 return -1;
24 if (fscanf(file, "%ld", value) != 1)
25 ret = -1;
26 fclose(file);
27 return ret;
28}
29
30static int set_min_free_kb(size_t value)
31{
32 FILE *file;
33 int ret;
34
35 file = fopen("/proc/sys/vm/min_free_kbytes", "w");
36 if (!file)
37 return -1;
38 ret = fprintf(file, "%ld\n", value);
39 fclose(file);
40 return ret;
41}
42
43static int read_min_free_kb(size_t *value)
44{
45 return read_int(path: "/proc/sys/vm/min_free_kbytes", value);
46}
47
48static int get_zswap_stored_pages(size_t *value)
49{
50 return read_int(path: "/sys/kernel/debug/zswap/stored_pages", value);
51}
52
53static int get_cg_wb_count(const char *cg)
54{
55 return cg_read_key_long(cgroup: cg, control: "memory.stat", key: "zswpwb");
56}
57
58static long get_zswpout(const char *cgroup)
59{
60 return cg_read_key_long(cgroup, control: "memory.stat", key: "zswpout ");
61}
62
63static int allocate_and_read_bytes(const char *cgroup, void *arg)
64{
65 size_t size = (size_t)arg;
66 char *mem = (char *)malloc(size);
67 int ret = 0;
68
69 if (!mem)
70 return -1;
71 for (int i = 0; i < size; i += 4095)
72 mem[i] = 'a';
73
74 /* Go through the allocated memory to (z)swap in and out pages */
75 for (int i = 0; i < size; i += 4095) {
76 if (mem[i] != 'a')
77 ret = -1;
78 }
79
80 free(mem);
81 return ret;
82}
83
84static int allocate_bytes(const char *cgroup, void *arg)
85{
86 size_t size = (size_t)arg;
87 char *mem = (char *)malloc(size);
88
89 if (!mem)
90 return -1;
91 for (int i = 0; i < size; i += 4095)
92 mem[i] = 'a';
93 free(mem);
94 return 0;
95}
96
97static char *setup_test_group_1M(const char *root, const char *name)
98{
99 char *group_name = cg_name(root, name);
100
101 if (!group_name)
102 return NULL;
103 if (cg_create(cgroup: group_name))
104 goto fail;
105 if (cg_write(cgroup: group_name, control: "memory.max", buf: "1M")) {
106 cg_destroy(cgroup: group_name);
107 goto fail;
108 }
109 return group_name;
110fail:
111 free(group_name);
112 return NULL;
113}
114
115/*
116 * Sanity test to check that pages are written into zswap.
117 */
118static int test_zswap_usage(const char *root)
119{
120 long zswpout_before, zswpout_after;
121 int ret = KSFT_FAIL;
122 char *test_group;
123
124 test_group = cg_name(root, name: "no_shrink_test");
125 if (!test_group)
126 goto out;
127 if (cg_create(cgroup: test_group))
128 goto out;
129 if (cg_write(cgroup: test_group, control: "memory.max", buf: "1M"))
130 goto out;
131
132 zswpout_before = get_zswpout(cgroup: test_group);
133 if (zswpout_before < 0) {
134 ksft_print_msg(msg: "Failed to get zswpout\n");
135 goto out;
136 }
137
138 /* Allocate more than memory.max to push memory into zswap */
139 if (cg_run(cgroup: test_group, fn: allocate_bytes, arg: (void *)MB(4)))
140 goto out;
141
142 /* Verify that pages come into zswap */
143 zswpout_after = get_zswpout(cgroup: test_group);
144 if (zswpout_after <= zswpout_before) {
145 ksft_print_msg(msg: "zswpout does not increase after test program\n");
146 goto out;
147 }
148 ret = KSFT_PASS;
149
150out:
151 cg_destroy(cgroup: test_group);
152 free(test_group);
153 return ret;
154}
155
156/*
157 * Check that when memory.zswap.max = 0, no pages can go to the zswap pool for
158 * the cgroup.
159 */
160static int test_swapin_nozswap(const char *root)
161{
162 int ret = KSFT_FAIL;
163 char *test_group;
164 long swap_peak, zswpout;
165
166 test_group = cg_name(root, name: "no_zswap_test");
167 if (!test_group)
168 goto out;
169 if (cg_create(cgroup: test_group))
170 goto out;
171 if (cg_write(cgroup: test_group, control: "memory.max", buf: "8M"))
172 goto out;
173 if (cg_write(cgroup: test_group, control: "memory.zswap.max", buf: "0"))
174 goto out;
175
176 /* Allocate and read more than memory.max to trigger swapin */
177 if (cg_run(cgroup: test_group, fn: allocate_and_read_bytes, arg: (void *)MB(32)))
178 goto out;
179
180 /* Verify that pages are swapped out, but no zswap happened */
181 swap_peak = cg_read_long(cgroup: test_group, control: "memory.swap.peak");
182 if (swap_peak < 0) {
183 ksft_print_msg(msg: "failed to get cgroup's swap_peak\n");
184 goto out;
185 }
186
187 if (swap_peak < MB(24)) {
188 ksft_print_msg(msg: "at least 24MB of memory should be swapped out\n");
189 goto out;
190 }
191
192 zswpout = get_zswpout(cgroup: test_group);
193 if (zswpout < 0) {
194 ksft_print_msg(msg: "failed to get zswpout\n");
195 goto out;
196 }
197
198 if (zswpout > 0) {
199 ksft_print_msg(msg: "zswapout > 0 when memory.zswap.max = 0\n");
200 goto out;
201 }
202
203 ret = KSFT_PASS;
204
205out:
206 cg_destroy(cgroup: test_group);
207 free(test_group);
208 return ret;
209}
210
211/* Simple test to verify the (z)swapin code paths */
212static int test_zswapin(const char *root)
213{
214 int ret = KSFT_FAIL;
215 char *test_group;
216 long zswpin;
217
218 test_group = cg_name(root, name: "zswapin_test");
219 if (!test_group)
220 goto out;
221 if (cg_create(cgroup: test_group))
222 goto out;
223 if (cg_write(cgroup: test_group, control: "memory.max", buf: "8M"))
224 goto out;
225 if (cg_write(cgroup: test_group, control: "memory.zswap.max", buf: "max"))
226 goto out;
227
228 /* Allocate and read more than memory.max to trigger (z)swap in */
229 if (cg_run(cgroup: test_group, fn: allocate_and_read_bytes, arg: (void *)MB(32)))
230 goto out;
231
232 zswpin = cg_read_key_long(cgroup: test_group, control: "memory.stat", key: "zswpin ");
233 if (zswpin < 0) {
234 ksft_print_msg(msg: "failed to get zswpin\n");
235 goto out;
236 }
237
238 if (zswpin < MB(24) / PAGE_SIZE) {
239 ksft_print_msg(msg: "at least 24MB should be brought back from zswap\n");
240 goto out;
241 }
242
243 ret = KSFT_PASS;
244
245out:
246 cg_destroy(cgroup: test_group);
247 free(test_group);
248 return ret;
249}
250
251/*
252 * When trying to store a memcg page in zswap, if the memcg hits its memory
253 * limit in zswap, writeback should affect only the zswapped pages of that
254 * memcg.
255 */
256static int test_no_invasive_cgroup_shrink(const char *root)
257{
258 int ret = KSFT_FAIL;
259 size_t control_allocation_size = MB(10);
260 char *control_allocation, *wb_group = NULL, *control_group = NULL;
261
262 wb_group = setup_test_group_1M(root, name: "per_memcg_wb_test1");
263 if (!wb_group)
264 return KSFT_FAIL;
265 if (cg_write(cgroup: wb_group, control: "memory.zswap.max", buf: "10K"))
266 goto out;
267 control_group = setup_test_group_1M(root, name: "per_memcg_wb_test2");
268 if (!control_group)
269 goto out;
270
271 /* Push some test_group2 memory into zswap */
272 if (cg_enter_current(cgroup: control_group))
273 goto out;
274 control_allocation = malloc(control_allocation_size);
275 for (int i = 0; i < control_allocation_size; i += 4095)
276 control_allocation[i] = 'a';
277 if (cg_read_key_long(cgroup: control_group, control: "memory.stat", key: "zswapped") < 1)
278 goto out;
279
280 /* Allocate 10x memory.max to push wb_group memory into zswap and trigger wb */
281 if (cg_run(cgroup: wb_group, fn: allocate_bytes, arg: (void *)MB(10)))
282 goto out;
283
284 /* Verify that only zswapped memory from gwb_group has been written back */
285 if (get_cg_wb_count(cg: wb_group) > 0 && get_cg_wb_count(cg: control_group) == 0)
286 ret = KSFT_PASS;
287out:
288 cg_enter_current(cgroup: root);
289 if (control_group) {
290 cg_destroy(cgroup: control_group);
291 free(control_group);
292 }
293 cg_destroy(cgroup: wb_group);
294 free(wb_group);
295 if (control_allocation)
296 free(control_allocation);
297 return ret;
298}
299
300struct no_kmem_bypass_child_args {
301 size_t target_alloc_bytes;
302 size_t child_allocated;
303};
304
305static int no_kmem_bypass_child(const char *cgroup, void *arg)
306{
307 struct no_kmem_bypass_child_args *values = arg;
308 void *allocation;
309
310 allocation = malloc(values->target_alloc_bytes);
311 if (!allocation) {
312 values->child_allocated = true;
313 return -1;
314 }
315 for (long i = 0; i < values->target_alloc_bytes; i += 4095)
316 ((char *)allocation)[i] = 'a';
317 values->child_allocated = true;
318 pause();
319 free(allocation);
320 return 0;
321}
322
323/*
324 * When pages owned by a memcg are pushed to zswap by kswapd, they should be
325 * charged to that cgroup. This wasn't the case before commit
326 * cd08d80ecdac("mm: correctly charge compressed memory to its memcg").
327 *
328 * The test first allocates memory in a memcg, then raises min_free_kbytes to
329 * a very high value so that the allocation falls below low wm, then makes
330 * another allocation to trigger kswapd that should push the memcg-owned pages
331 * to zswap and verifies that the zswap pages are correctly charged.
332 *
333 * To be run on a VM with at most 4G of memory.
334 */
335static int test_no_kmem_bypass(const char *root)
336{
337 size_t min_free_kb_high, min_free_kb_low, min_free_kb_original;
338 struct no_kmem_bypass_child_args *values;
339 size_t trigger_allocation_size;
340 int wait_child_iteration = 0;
341 long stored_pages_threshold;
342 struct sysinfo sys_info;
343 int ret = KSFT_FAIL;
344 int child_status;
345 char *test_group;
346 pid_t child_pid;
347
348 /* Read sys info and compute test values accordingly */
349 if (sysinfo(&sys_info) != 0)
350 return KSFT_FAIL;
351 if (sys_info.totalram > 5000000000)
352 return KSFT_SKIP;
353 values = mmap(0, sizeof(struct no_kmem_bypass_child_args), PROT_READ |
354 PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
355 if (values == MAP_FAILED)
356 return KSFT_FAIL;
357 if (read_min_free_kb(value: &min_free_kb_original))
358 return KSFT_FAIL;
359 min_free_kb_high = sys_info.totalram / 2000;
360 min_free_kb_low = sys_info.totalram / 500000;
361 values->target_alloc_bytes = (sys_info.totalram - min_free_kb_high * 1000) +
362 sys_info.totalram * 5 / 100;
363 stored_pages_threshold = sys_info.totalram / 5 / 4096;
364 trigger_allocation_size = sys_info.totalram / 20;
365
366 /* Set up test memcg */
367 if (cg_write(cgroup: root, control: "cgroup.subtree_control", buf: "+memory"))
368 goto out;
369 test_group = cg_name(root, name: "kmem_bypass_test");
370 if (!test_group)
371 goto out;
372
373 /* Spawn memcg child and wait for it to allocate */
374 set_min_free_kb(min_free_kb_low);
375 if (cg_create(cgroup: test_group))
376 goto out;
377 values->child_allocated = false;
378 child_pid = cg_run_nowait(cgroup: test_group, fn: no_kmem_bypass_child, arg: values);
379 if (child_pid < 0)
380 goto out;
381 while (!values->child_allocated && wait_child_iteration++ < 10000)
382 usleep(1000);
383
384 /* Try to wakeup kswapd and let it push child memory to zswap */
385 set_min_free_kb(min_free_kb_high);
386 for (int i = 0; i < 20; i++) {
387 size_t stored_pages;
388 char *trigger_allocation = malloc(trigger_allocation_size);
389
390 if (!trigger_allocation)
391 break;
392 for (int i = 0; i < trigger_allocation_size; i += 4095)
393 trigger_allocation[i] = 'b';
394 usleep(100000);
395 free(trigger_allocation);
396 if (get_zswap_stored_pages(value: &stored_pages))
397 break;
398 if (stored_pages < 0)
399 break;
400 /* If memory was pushed to zswap, verify it belongs to memcg */
401 if (stored_pages > stored_pages_threshold) {
402 int zswapped = cg_read_key_long(cgroup: test_group, control: "memory.stat", key: "zswapped ");
403 int delta = stored_pages * 4096 - zswapped;
404 int result_ok = delta < stored_pages * 4096 / 4;
405
406 ret = result_ok ? KSFT_PASS : KSFT_FAIL;
407 break;
408 }
409 }
410
411 kill(child_pid, SIGTERM);
412 waitpid(child_pid, &child_status, 0);
413out:
414 set_min_free_kb(min_free_kb_original);
415 cg_destroy(cgroup: test_group);
416 free(test_group);
417 return ret;
418}
419
420#define T(x) { x, #x }
421struct zswap_test {
422 int (*fn)(const char *root);
423 const char *name;
424} tests[] = {
425 T(test_zswap_usage),
426 T(test_swapin_nozswap),
427 T(test_zswapin),
428 T(test_no_kmem_bypass),
429 T(test_no_invasive_cgroup_shrink),
430};
431#undef T
432
433static bool zswap_configured(void)
434{
435 return access("/sys/module/zswap", F_OK) == 0;
436}
437
438int main(int argc, char **argv)
439{
440 char root[PATH_MAX];
441 int i, ret = EXIT_SUCCESS;
442
443 if (cg_find_unified_root(root, len: sizeof(root)))
444 ksft_exit_skip(msg: "cgroup v2 isn't mounted\n");
445
446 if (!zswap_configured())
447 ksft_exit_skip(msg: "zswap isn't configured\n");
448
449 /*
450 * Check that memory controller is available:
451 * memory is listed in cgroup.controllers
452 */
453 if (cg_read_strstr(cgroup: root, control: "cgroup.controllers", needle: "memory"))
454 ksft_exit_skip(msg: "memory controller isn't available\n");
455
456 if (cg_read_strstr(cgroup: root, control: "cgroup.subtree_control", needle: "memory"))
457 if (cg_write(cgroup: root, control: "cgroup.subtree_control", buf: "+memory"))
458 ksft_exit_skip(msg: "Failed to set memory controller\n");
459
460 for (i = 0; i < ARRAY_SIZE(tests); i++) {
461 switch (tests[i].fn(root)) {
462 case KSFT_PASS:
463 ksft_test_result_pass(msg: "%s\n", tests[i].name);
464 break;
465 case KSFT_SKIP:
466 ksft_test_result_skip(msg: "%s\n", tests[i].name);
467 break;
468 default:
469 ret = EXIT_FAILURE;
470 ksft_test_result_fail(msg: "%s\n", tests[i].name);
471 break;
472 }
473 }
474
475 return ret;
476}
477

source code of linux/tools/testing/selftests/cgroup/test_zswap.c