1 | /* |
2 | * Copyright (c) 2019 Alexey Dobriyan <adobriyan@gmail.com> |
3 | * |
4 | * Permission to use, copy, modify, and distribute this software for any |
5 | * purpose with or without fee is hereby granted, provided that the above |
6 | * copyright notice and this permission notice appear in all copies. |
7 | * |
8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
15 | */ |
16 | /* |
17 | * Fork and exec tiny 1 page executable which precisely controls its VM. |
18 | * Test /proc/$PID/maps |
19 | * Test /proc/$PID/smaps |
20 | * Test /proc/$PID/smaps_rollup |
21 | * Test /proc/$PID/statm |
22 | * |
23 | * FIXME require CONFIG_TMPFS which can be disabled |
24 | * FIXME test other values from "smaps" |
25 | * FIXME support other archs |
26 | */ |
27 | #undef NDEBUG |
28 | #include <assert.h> |
29 | #include <errno.h> |
30 | #include <sched.h> |
31 | #include <signal.h> |
32 | #include <stdbool.h> |
33 | #include <stdint.h> |
34 | #include <stdio.h> |
35 | #include <string.h> |
36 | #include <stdlib.h> |
37 | #include <sys/mount.h> |
38 | #include <sys/types.h> |
39 | #include <sys/stat.h> |
40 | #include <sys/wait.h> |
41 | #include <fcntl.h> |
42 | #include <unistd.h> |
43 | #include <sys/syscall.h> |
44 | #include <sys/uio.h> |
45 | #include <linux/kdev_t.h> |
46 | #include <sys/time.h> |
47 | #include <sys/resource.h> |
48 | |
49 | #include "../kselftest.h" |
50 | |
51 | static inline long sys_execveat(int dirfd, const char *pathname, char **argv, char **envp, int flags) |
52 | { |
53 | return syscall(SYS_execveat, dirfd, pathname, argv, envp, flags); |
54 | } |
55 | |
56 | static void make_private_tmp(void) |
57 | { |
58 | if (unshare(CLONE_NEWNS) == -1) { |
59 | if (errno == ENOSYS || errno == EPERM) { |
60 | exit(4); |
61 | } |
62 | exit(1); |
63 | } |
64 | if (mount(NULL, "/" , NULL, MS_PRIVATE|MS_REC, NULL) == -1) { |
65 | exit(1); |
66 | } |
67 | if (mount(NULL, "/tmp" , "tmpfs" , 0, NULL) == -1) { |
68 | exit(1); |
69 | } |
70 | } |
71 | |
72 | static pid_t pid = -1; |
73 | static void ate(void) |
74 | { |
75 | if (pid > 0) { |
76 | kill(pid, SIGTERM); |
77 | } |
78 | } |
79 | |
80 | struct elf64_hdr { |
81 | uint8_t e_ident[16]; |
82 | uint16_t e_type; |
83 | uint16_t e_machine; |
84 | uint32_t e_version; |
85 | uint64_t e_entry; |
86 | uint64_t e_phoff; |
87 | uint64_t e_shoff; |
88 | uint32_t e_flags; |
89 | uint16_t e_ehsize; |
90 | uint16_t e_phentsize; |
91 | uint16_t e_phnum; |
92 | uint16_t e_shentsize; |
93 | uint16_t e_shnum; |
94 | uint16_t e_shstrndx; |
95 | }; |
96 | |
97 | struct elf64_phdr { |
98 | uint32_t p_type; |
99 | uint32_t p_flags; |
100 | uint64_t p_offset; |
101 | uint64_t p_vaddr; |
102 | uint64_t p_paddr; |
103 | uint64_t p_filesz; |
104 | uint64_t p_memsz; |
105 | uint64_t p_align; |
106 | }; |
107 | |
108 | #ifdef __x86_64__ |
109 | #define PAGE_SIZE 4096 |
110 | #define VADDR (1UL << 32) |
111 | #define MAPS_OFFSET 73 |
112 | |
113 | #define syscall 0x0f, 0x05 |
114 | #define mov_rdi(x) \ |
115 | 0x48, 0xbf, \ |
116 | (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff, \ |
117 | ((x)>>32)&0xff, ((x)>>40)&0xff, ((x)>>48)&0xff, ((x)>>56)&0xff |
118 | |
119 | #define mov_rsi(x) \ |
120 | 0x48, 0xbe, \ |
121 | (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff, \ |
122 | ((x)>>32)&0xff, ((x)>>40)&0xff, ((x)>>48)&0xff, ((x)>>56)&0xff |
123 | |
124 | #define mov_eax(x) \ |
125 | 0xb8, (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff |
126 | |
127 | static const uint8_t payload[] = { |
128 | /* Casually unmap stack, vDSO and everything else. */ |
129 | /* munmap */ |
130 | mov_rdi(VADDR + 4096), |
131 | mov_rsi((1ULL << 47) - 4096 - VADDR - 4096), |
132 | mov_eax(11), |
133 | syscall, |
134 | |
135 | /* Ping parent. */ |
136 | /* write(0, &c, 1); */ |
137 | 0x31, 0xff, /* xor edi, edi */ |
138 | 0x48, 0x8d, 0x35, 0x00, 0x00, 0x00, 0x00, /* lea rsi, [rip] */ |
139 | 0xba, 0x01, 0x00, 0x00, 0x00, /* mov edx, 1 */ |
140 | mov_eax(1), |
141 | syscall, |
142 | |
143 | /* 1: pause(); */ |
144 | mov_eax(34), |
145 | syscall, |
146 | |
147 | 0xeb, 0xf7, /* jmp 1b */ |
148 | }; |
149 | |
150 | static int make_exe(const uint8_t *payload, size_t len) |
151 | { |
152 | struct elf64_hdr h; |
153 | struct elf64_phdr ph; |
154 | |
155 | struct iovec iov[3] = { |
156 | {&h, sizeof(struct elf64_hdr)}, |
157 | {&ph, sizeof(struct elf64_phdr)}, |
158 | {(void *)payload, len}, |
159 | }; |
160 | int fd, fd1; |
161 | char buf[64]; |
162 | |
163 | memset(&h, 0, sizeof(h)); |
164 | h.e_ident[0] = 0x7f; |
165 | h.e_ident[1] = 'E'; |
166 | h.e_ident[2] = 'L'; |
167 | h.e_ident[3] = 'F'; |
168 | h.e_ident[4] = 2; |
169 | h.e_ident[5] = 1; |
170 | h.e_ident[6] = 1; |
171 | h.e_ident[7] = 0; |
172 | h.e_type = 2; |
173 | h.e_machine = 0x3e; |
174 | h.e_version = 1; |
175 | h.e_entry = VADDR + sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr); |
176 | h.e_phoff = sizeof(struct elf64_hdr); |
177 | h.e_shoff = 0; |
178 | h.e_flags = 0; |
179 | h.e_ehsize = sizeof(struct elf64_hdr); |
180 | h.e_phentsize = sizeof(struct elf64_phdr); |
181 | h.e_phnum = 1; |
182 | h.e_shentsize = 0; |
183 | h.e_shnum = 0; |
184 | h.e_shstrndx = 0; |
185 | |
186 | memset(&ph, 0, sizeof(ph)); |
187 | ph.p_type = 1; |
188 | ph.p_flags = (1<<2)|1; |
189 | ph.p_offset = 0; |
190 | ph.p_vaddr = VADDR; |
191 | ph.p_paddr = 0; |
192 | ph.p_filesz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len; |
193 | ph.p_memsz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len; |
194 | ph.p_align = 4096; |
195 | |
196 | fd = openat(AT_FDCWD, "/tmp" , O_WRONLY|O_EXCL|O_TMPFILE, 0700); |
197 | if (fd == -1) { |
198 | exit(1); |
199 | } |
200 | |
201 | if (writev(fd, iov, 3) != sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len) { |
202 | exit(1); |
203 | } |
204 | |
205 | /* Avoid ETXTBSY on exec. */ |
206 | snprintf(buf, sizeof(buf), "/proc/self/fd/%u" , fd); |
207 | fd1 = open(buf, O_RDONLY|O_CLOEXEC); |
208 | close(fd); |
209 | |
210 | return fd1; |
211 | } |
212 | #endif |
213 | |
214 | /* |
215 | * 0: vsyscall VMA doesn't exist vsyscall=none |
216 | * 1: vsyscall VMA is --xp vsyscall=xonly |
217 | * 2: vsyscall VMA is r-xp vsyscall=emulate |
218 | */ |
219 | static volatile int g_vsyscall; |
220 | static const char *str_vsyscall; |
221 | |
222 | static const char str_vsyscall_0[] = "" ; |
223 | static const char str_vsyscall_1[] = |
224 | "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n" ; |
225 | static const char str_vsyscall_2[] = |
226 | "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n" ; |
227 | |
228 | #ifdef __x86_64__ |
229 | static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___) |
230 | { |
231 | _exit(g_vsyscall); |
232 | } |
233 | |
234 | /* |
235 | * vsyscall page can't be unmapped, probe it directly. |
236 | */ |
237 | static void vsyscall(void) |
238 | { |
239 | pid_t pid; |
240 | int wstatus; |
241 | |
242 | pid = fork(); |
243 | if (pid < 0) { |
244 | fprintf(stderr, "fork, errno %d\n" , errno); |
245 | exit(1); |
246 | } |
247 | if (pid == 0) { |
248 | struct rlimit rlim = {0, 0}; |
249 | (void)setrlimit(RLIMIT_CORE, &rlim); |
250 | |
251 | /* Hide "segfault at ffffffffff600000" messages. */ |
252 | struct sigaction act; |
253 | memset(&act, 0, sizeof(struct sigaction)); |
254 | act.sa_flags = SA_SIGINFO; |
255 | act.sa_sigaction = sigaction_SIGSEGV; |
256 | (void)sigaction(SIGSEGV, &act, NULL); |
257 | |
258 | g_vsyscall = 0; |
259 | /* gettimeofday(NULL, NULL); */ |
260 | uint64_t rax = 0xffffffffff600000; |
261 | asm volatile ( |
262 | "call *%[rax]" |
263 | : [rax] "+a" (rax) |
264 | : "D" (NULL), "S" (NULL) |
265 | : "rcx" , "r11" |
266 | ); |
267 | |
268 | g_vsyscall = 1; |
269 | *(volatile int *)0xffffffffff600000UL; |
270 | |
271 | g_vsyscall = 2; |
272 | exit(g_vsyscall); |
273 | } |
274 | waitpid(pid, &wstatus, 0); |
275 | if (WIFEXITED(wstatus)) { |
276 | g_vsyscall = WEXITSTATUS(wstatus); |
277 | } else { |
278 | fprintf(stderr, "error: wstatus %08x\n" , wstatus); |
279 | exit(1); |
280 | } |
281 | } |
282 | |
283 | int main(void) |
284 | { |
285 | int pipefd[2]; |
286 | int exec_fd; |
287 | |
288 | vsyscall(); |
289 | switch (g_vsyscall) { |
290 | case 0: |
291 | str_vsyscall = str_vsyscall_0; |
292 | break; |
293 | case 1: |
294 | str_vsyscall = str_vsyscall_1; |
295 | break; |
296 | case 2: |
297 | str_vsyscall = str_vsyscall_2; |
298 | break; |
299 | default: |
300 | abort(); |
301 | } |
302 | |
303 | atexit(ate); |
304 | |
305 | make_private_tmp(); |
306 | |
307 | /* Reserve fd 0 for 1-byte pipe ping from child. */ |
308 | close(0); |
309 | if (open("/" , O_RDONLY|O_DIRECTORY|O_PATH) != 0) { |
310 | return 1; |
311 | } |
312 | |
313 | exec_fd = make_exe(payload, sizeof(payload)); |
314 | |
315 | if (pipe(pipefd) == -1) { |
316 | return 1; |
317 | } |
318 | if (dup2(pipefd[1], 0) != 0) { |
319 | return 1; |
320 | } |
321 | |
322 | pid = fork(); |
323 | if (pid == -1) { |
324 | return 1; |
325 | } |
326 | if (pid == 0) { |
327 | sys_execveat(exec_fd, "" , NULL, NULL, AT_EMPTY_PATH); |
328 | return 1; |
329 | } |
330 | |
331 | char _; |
332 | if (read(pipefd[0], &_, 1) != 1) { |
333 | return 1; |
334 | } |
335 | |
336 | struct stat st; |
337 | if (fstat(exec_fd, &st) == -1) { |
338 | return 1; |
339 | } |
340 | |
341 | /* Generate "head -n1 /proc/$PID/maps" */ |
342 | char buf0[256]; |
343 | memset(buf0, ' ', sizeof(buf0)); |
344 | int len = snprintf(buf0, sizeof(buf0), |
345 | "%08lx-%08lx r-xp 00000000 %02lx:%02lx %llu" , |
346 | VADDR, VADDR + PAGE_SIZE, |
347 | MAJOR(st.st_dev), MINOR(st.st_dev), |
348 | (unsigned long long)st.st_ino); |
349 | buf0[len] = ' '; |
350 | snprintf(buf0 + MAPS_OFFSET, sizeof(buf0) - MAPS_OFFSET, |
351 | "/tmp/#%llu (deleted)\n" , (unsigned long long)st.st_ino); |
352 | |
353 | /* Test /proc/$PID/maps */ |
354 | { |
355 | const size_t len = strlen(buf0) + strlen(str_vsyscall); |
356 | char buf[256]; |
357 | ssize_t rv; |
358 | int fd; |
359 | |
360 | snprintf(buf, sizeof(buf), "/proc/%u/maps" , pid); |
361 | fd = open(buf, O_RDONLY); |
362 | if (fd == -1) { |
363 | return 1; |
364 | } |
365 | rv = read(fd, buf, sizeof(buf)); |
366 | assert(rv == len); |
367 | assert(memcmp(buf, buf0, strlen(buf0)) == 0); |
368 | if (g_vsyscall > 0) { |
369 | assert(memcmp(buf + strlen(buf0), str_vsyscall, strlen(str_vsyscall)) == 0); |
370 | } |
371 | } |
372 | |
373 | /* Test /proc/$PID/smaps */ |
374 | { |
375 | char buf[4096]; |
376 | ssize_t rv; |
377 | int fd; |
378 | |
379 | snprintf(buf, sizeof(buf), "/proc/%u/smaps" , pid); |
380 | fd = open(buf, O_RDONLY); |
381 | if (fd == -1) { |
382 | return 1; |
383 | } |
384 | rv = read(fd, buf, sizeof(buf)); |
385 | assert(0 <= rv && rv <= sizeof(buf)); |
386 | |
387 | assert(rv >= strlen(buf0)); |
388 | assert(memcmp(buf, buf0, strlen(buf0)) == 0); |
389 | |
390 | #define "Rss: 4 kB\n" |
391 | #define "Rss: 0 kB\n" |
392 | #define PSS1 "Pss: 4 kB\n" |
393 | #define PSS2 "Pss: 0 kB\n" |
394 | assert(memmem(buf, rv, RSS1, strlen(RSS1)) || |
395 | memmem(buf, rv, RSS2, strlen(RSS2))); |
396 | assert(memmem(buf, rv, PSS1, strlen(PSS1)) || |
397 | memmem(buf, rv, PSS2, strlen(PSS2))); |
398 | |
399 | static const char *S[] = { |
400 | "Size: 4 kB\n" , |
401 | "KernelPageSize: 4 kB\n" , |
402 | "MMUPageSize: 4 kB\n" , |
403 | "Anonymous: 0 kB\n" , |
404 | "AnonHugePages: 0 kB\n" , |
405 | "Shared_Hugetlb: 0 kB\n" , |
406 | "Private_Hugetlb: 0 kB\n" , |
407 | "Locked: 0 kB\n" , |
408 | }; |
409 | int i; |
410 | |
411 | for (i = 0; i < ARRAY_SIZE(S); i++) { |
412 | assert(memmem(buf, rv, S[i], strlen(S[i]))); |
413 | } |
414 | |
415 | if (g_vsyscall > 0) { |
416 | assert(memmem(buf, rv, str_vsyscall, strlen(str_vsyscall))); |
417 | } |
418 | } |
419 | |
420 | /* Test /proc/$PID/smaps_rollup */ |
421 | { |
422 | char bufr[256]; |
423 | memset(bufr, ' ', sizeof(bufr)); |
424 | len = snprintf(bufr, sizeof(bufr), |
425 | "%08lx-%08lx ---p 00000000 00:00 0" , |
426 | VADDR, VADDR + PAGE_SIZE); |
427 | bufr[len] = ' '; |
428 | snprintf(bufr + MAPS_OFFSET, sizeof(bufr) - MAPS_OFFSET, |
429 | "[rollup]\n" ); |
430 | |
431 | char buf[1024]; |
432 | ssize_t rv; |
433 | int fd; |
434 | |
435 | snprintf(buf, sizeof(buf), "/proc/%u/smaps_rollup" , pid); |
436 | fd = open(buf, O_RDONLY); |
437 | if (fd == -1) { |
438 | return 1; |
439 | } |
440 | rv = read(fd, buf, sizeof(buf)); |
441 | assert(0 <= rv && rv <= sizeof(buf)); |
442 | |
443 | assert(rv >= strlen(bufr)); |
444 | assert(memcmp(buf, bufr, strlen(bufr)) == 0); |
445 | |
446 | assert(memmem(buf, rv, RSS1, strlen(RSS1)) || |
447 | memmem(buf, rv, RSS2, strlen(RSS2))); |
448 | assert(memmem(buf, rv, PSS1, strlen(PSS1)) || |
449 | memmem(buf, rv, PSS2, strlen(PSS2))); |
450 | |
451 | static const char *S[] = { |
452 | "Anonymous: 0 kB\n" , |
453 | "AnonHugePages: 0 kB\n" , |
454 | "Shared_Hugetlb: 0 kB\n" , |
455 | "Private_Hugetlb: 0 kB\n" , |
456 | "Locked: 0 kB\n" , |
457 | }; |
458 | int i; |
459 | |
460 | for (i = 0; i < ARRAY_SIZE(S); i++) { |
461 | assert(memmem(buf, rv, S[i], strlen(S[i]))); |
462 | } |
463 | } |
464 | |
465 | /* Test /proc/$PID/statm */ |
466 | { |
467 | char buf[64]; |
468 | ssize_t rv; |
469 | int fd; |
470 | |
471 | snprintf(buf, sizeof(buf), "/proc/%u/statm" , pid); |
472 | fd = open(buf, O_RDONLY); |
473 | if (fd == -1) { |
474 | return 1; |
475 | } |
476 | rv = read(fd, buf, sizeof(buf)); |
477 | assert(rv == 7 * 2); |
478 | |
479 | assert(buf[0] == '1'); /* ->total_vm */ |
480 | assert(buf[1] == ' '); |
481 | assert(buf[2] == '0' || buf[2] == '1'); /* rss */ |
482 | assert(buf[3] == ' '); |
483 | assert(buf[4] == '0' || buf[2] == '1'); /* file rss */ |
484 | assert(buf[5] == ' '); |
485 | assert(buf[6] == '1'); /* ELF executable segments */ |
486 | assert(buf[7] == ' '); |
487 | assert(buf[8] == '0'); |
488 | assert(buf[9] == ' '); |
489 | assert(buf[10] == '0'); /* ->data_vm + ->stack_vm */ |
490 | assert(buf[11] == ' '); |
491 | assert(buf[12] == '0'); |
492 | assert(buf[13] == '\n'); |
493 | } |
494 | |
495 | return 0; |
496 | } |
497 | #else |
498 | int main(void) |
499 | { |
500 | return 4; |
501 | } |
502 | #endif |
503 | |