1 | #if defined __amd64__ || defined __i386__ |
2 | /* |
3 | * Copyright (c) 2022 Alexey Dobriyan <adobriyan@gmail.com> |
4 | * |
5 | * Permission to use, copy, modify, and distribute this software for any |
6 | * purpose with or without fee is hereby granted, provided that the above |
7 | * copyright notice and this permission notice appear in all copies. |
8 | * |
9 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
10 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
11 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
12 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
13 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
14 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
15 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
16 | */ |
17 | /* |
18 | * Create a process without mappings by unmapping everything at once and |
19 | * holding it with ptrace(2). See what happens to |
20 | * |
21 | * /proc/${pid}/maps |
22 | * /proc/${pid}/numa_maps |
23 | * /proc/${pid}/smaps |
24 | * /proc/${pid}/smaps_rollup |
25 | */ |
26 | #undef _GNU_SOURCE |
27 | #define _GNU_SOURCE |
28 | |
29 | #undef NDEBUG |
30 | #include <assert.h> |
31 | #include <errno.h> |
32 | #include <stdint.h> |
33 | #include <stdio.h> |
34 | #include <stdlib.h> |
35 | #include <string.h> |
36 | #include <fcntl.h> |
37 | #include <sys/mman.h> |
38 | #include <sys/ptrace.h> |
39 | #include <sys/resource.h> |
40 | #include <sys/syscall.h> |
41 | #include <sys/types.h> |
42 | #include <sys/wait.h> |
43 | #include <unistd.h> |
44 | |
45 | #ifdef __amd64__ |
46 | #define TEST_VSYSCALL |
47 | #endif |
48 | |
49 | #if defined __amd64__ |
50 | #ifndef SYS_pkey_alloc |
51 | #define SYS_pkey_alloc 330 |
52 | #endif |
53 | #ifndef SYS_pkey_free |
54 | #define SYS_pkey_free 331 |
55 | #endif |
56 | #elif defined __i386__ |
57 | #ifndef SYS_pkey_alloc |
58 | #define SYS_pkey_alloc 381 |
59 | #endif |
60 | #ifndef SYS_pkey_free |
61 | #define SYS_pkey_free 382 |
62 | #endif |
63 | #else |
64 | #error "SYS_pkey_alloc" |
65 | #endif |
66 | |
67 | static int g_protection_key_support; |
68 | |
69 | static int protection_key_support(void) |
70 | { |
71 | long rv = syscall(SYS_pkey_alloc, 0, 0); |
72 | if (rv > 0) { |
73 | syscall(SYS_pkey_free, (int)rv); |
74 | return 1; |
75 | } else if (rv == -1 && errno == ENOSYS) { |
76 | return 0; |
77 | } else if (rv == -1 && errno == EINVAL) { |
78 | // ospke=n |
79 | return 0; |
80 | } else { |
81 | fprintf(stderr, "%s: error: rv %ld, errno %d\n" , __func__, rv, errno); |
82 | exit(EXIT_FAILURE); |
83 | } |
84 | } |
85 | |
86 | /* |
87 | * 0: vsyscall VMA doesn't exist vsyscall=none |
88 | * 1: vsyscall VMA is --xp vsyscall=xonly |
89 | * 2: vsyscall VMA is r-xp vsyscall=emulate |
90 | */ |
91 | static volatile int g_vsyscall; |
92 | static const char *g_proc_pid_maps_vsyscall; |
93 | static const char *g_proc_pid_smaps_vsyscall; |
94 | |
95 | static const char proc_pid_maps_vsyscall_0[] = "" ; |
96 | static const char proc_pid_maps_vsyscall_1[] = |
97 | "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n" ; |
98 | static const char proc_pid_maps_vsyscall_2[] = |
99 | "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n" ; |
100 | |
101 | static const char proc_pid_smaps_vsyscall_0[] = "" ; |
102 | |
103 | static const char proc_pid_smaps_vsyscall_1[] = |
104 | "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n" |
105 | "Size: 4 kB\n" |
106 | "KernelPageSize: 4 kB\n" |
107 | "MMUPageSize: 4 kB\n" |
108 | "Rss: 0 kB\n" |
109 | "Pss: 0 kB\n" |
110 | "Pss_Dirty: 0 kB\n" |
111 | "Shared_Clean: 0 kB\n" |
112 | "Shared_Dirty: 0 kB\n" |
113 | "Private_Clean: 0 kB\n" |
114 | "Private_Dirty: 0 kB\n" |
115 | "Referenced: 0 kB\n" |
116 | "Anonymous: 0 kB\n" |
117 | "KSM: 0 kB\n" |
118 | "LazyFree: 0 kB\n" |
119 | "AnonHugePages: 0 kB\n" |
120 | "ShmemPmdMapped: 0 kB\n" |
121 | "FilePmdMapped: 0 kB\n" |
122 | "Shared_Hugetlb: 0 kB\n" |
123 | "Private_Hugetlb: 0 kB\n" |
124 | "Swap: 0 kB\n" |
125 | "SwapPss: 0 kB\n" |
126 | "Locked: 0 kB\n" |
127 | "THPeligible: 0\n" |
128 | ; |
129 | |
130 | static const char proc_pid_smaps_vsyscall_2[] = |
131 | "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n" |
132 | "Size: 4 kB\n" |
133 | "KernelPageSize: 4 kB\n" |
134 | "MMUPageSize: 4 kB\n" |
135 | "Rss: 0 kB\n" |
136 | "Pss: 0 kB\n" |
137 | "Pss_Dirty: 0 kB\n" |
138 | "Shared_Clean: 0 kB\n" |
139 | "Shared_Dirty: 0 kB\n" |
140 | "Private_Clean: 0 kB\n" |
141 | "Private_Dirty: 0 kB\n" |
142 | "Referenced: 0 kB\n" |
143 | "Anonymous: 0 kB\n" |
144 | "KSM: 0 kB\n" |
145 | "LazyFree: 0 kB\n" |
146 | "AnonHugePages: 0 kB\n" |
147 | "ShmemPmdMapped: 0 kB\n" |
148 | "FilePmdMapped: 0 kB\n" |
149 | "Shared_Hugetlb: 0 kB\n" |
150 | "Private_Hugetlb: 0 kB\n" |
151 | "Swap: 0 kB\n" |
152 | "SwapPss: 0 kB\n" |
153 | "Locked: 0 kB\n" |
154 | "THPeligible: 0\n" |
155 | ; |
156 | |
157 | static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___) |
158 | { |
159 | _exit(EXIT_FAILURE); |
160 | } |
161 | |
162 | #ifdef TEST_VSYSCALL |
163 | static void sigaction_SIGSEGV_vsyscall(int _, siginfo_t *__, void *___) |
164 | { |
165 | _exit(g_vsyscall); |
166 | } |
167 | |
168 | /* |
169 | * vsyscall page can't be unmapped, probe it directly. |
170 | */ |
171 | static void vsyscall(void) |
172 | { |
173 | pid_t pid; |
174 | int wstatus; |
175 | |
176 | pid = fork(); |
177 | if (pid < 0) { |
178 | fprintf(stderr, "fork, errno %d\n" , errno); |
179 | exit(1); |
180 | } |
181 | if (pid == 0) { |
182 | setrlimit(RLIMIT_CORE, &(struct rlimit){}); |
183 | |
184 | /* Hide "segfault at ffffffffff600000" messages. */ |
185 | struct sigaction act = {}; |
186 | act.sa_flags = SA_SIGINFO; |
187 | act.sa_sigaction = sigaction_SIGSEGV_vsyscall; |
188 | sigaction(SIGSEGV, &act, NULL); |
189 | |
190 | g_vsyscall = 0; |
191 | /* gettimeofday(NULL, NULL); */ |
192 | uint64_t rax = 0xffffffffff600000; |
193 | asm volatile ( |
194 | "call *%[rax]" |
195 | : [rax] "+a" (rax) |
196 | : "D" (NULL), "S" (NULL) |
197 | : "rcx" , "r11" |
198 | ); |
199 | |
200 | g_vsyscall = 1; |
201 | *(volatile int *)0xffffffffff600000UL; |
202 | |
203 | g_vsyscall = 2; |
204 | exit(g_vsyscall); |
205 | } |
206 | waitpid(pid, &wstatus, 0); |
207 | if (WIFEXITED(wstatus)) { |
208 | g_vsyscall = WEXITSTATUS(wstatus); |
209 | } else { |
210 | fprintf(stderr, "error: vsyscall wstatus %08x\n" , wstatus); |
211 | exit(1); |
212 | } |
213 | } |
214 | #endif |
215 | |
216 | static int test_proc_pid_maps(pid_t pid) |
217 | { |
218 | char buf[4096]; |
219 | snprintf(buf, sizeof(buf), "/proc/%u/maps" , pid); |
220 | int fd = open(buf, O_RDONLY); |
221 | if (fd == -1) { |
222 | perror("open /proc/${pid}/maps" ); |
223 | return EXIT_FAILURE; |
224 | } else { |
225 | ssize_t rv = read(fd, buf, sizeof(buf)); |
226 | close(fd); |
227 | if (g_vsyscall == 0) { |
228 | assert(rv == 0); |
229 | } else { |
230 | size_t len = strlen(g_proc_pid_maps_vsyscall); |
231 | assert(rv == len); |
232 | assert(memcmp(buf, g_proc_pid_maps_vsyscall, len) == 0); |
233 | } |
234 | return EXIT_SUCCESS; |
235 | } |
236 | } |
237 | |
238 | static int test_proc_pid_numa_maps(pid_t pid) |
239 | { |
240 | char buf[4096]; |
241 | snprintf(buf, sizeof(buf), "/proc/%u/numa_maps" , pid); |
242 | int fd = open(buf, O_RDONLY); |
243 | if (fd == -1) { |
244 | if (errno == ENOENT) { |
245 | /* |
246 | * /proc/${pid}/numa_maps is under CONFIG_NUMA, |
247 | * it doesn't necessarily exist. |
248 | */ |
249 | return EXIT_SUCCESS; |
250 | } |
251 | perror("open /proc/${pid}/numa_maps" ); |
252 | return EXIT_FAILURE; |
253 | } else { |
254 | ssize_t rv = read(fd, buf, sizeof(buf)); |
255 | close(fd); |
256 | assert(rv == 0); |
257 | return EXIT_SUCCESS; |
258 | } |
259 | } |
260 | |
261 | static int test_proc_pid_smaps(pid_t pid) |
262 | { |
263 | char buf[4096]; |
264 | snprintf(buf, sizeof(buf), "/proc/%u/smaps" , pid); |
265 | int fd = open(buf, O_RDONLY); |
266 | if (fd == -1) { |
267 | if (errno == ENOENT) { |
268 | /* |
269 | * /proc/${pid}/smaps is under CONFIG_PROC_PAGE_MONITOR, |
270 | * it doesn't necessarily exist. |
271 | */ |
272 | return EXIT_SUCCESS; |
273 | } |
274 | perror("open /proc/${pid}/smaps" ); |
275 | return EXIT_FAILURE; |
276 | } |
277 | ssize_t rv = read(fd, buf, sizeof(buf)); |
278 | close(fd); |
279 | |
280 | assert(0 <= rv); |
281 | assert(rv <= sizeof(buf)); |
282 | |
283 | if (g_vsyscall == 0) { |
284 | assert(rv == 0); |
285 | } else { |
286 | size_t len = strlen(g_proc_pid_smaps_vsyscall); |
287 | assert(rv > len); |
288 | assert(memcmp(buf, g_proc_pid_smaps_vsyscall, len) == 0); |
289 | |
290 | if (g_protection_key_support) { |
291 | #define PROTECTION_KEY "ProtectionKey: 0\n" |
292 | assert(memmem(buf, rv, PROTECTION_KEY, strlen(PROTECTION_KEY))); |
293 | } |
294 | } |
295 | |
296 | return EXIT_SUCCESS; |
297 | } |
298 | |
299 | static const char g_smaps_rollup[] = |
300 | "00000000-00000000 ---p 00000000 00:00 0 [rollup]\n" |
301 | "Rss: 0 kB\n" |
302 | "Pss: 0 kB\n" |
303 | "Pss_Dirty: 0 kB\n" |
304 | "Pss_Anon: 0 kB\n" |
305 | "Pss_File: 0 kB\n" |
306 | "Pss_Shmem: 0 kB\n" |
307 | "Shared_Clean: 0 kB\n" |
308 | "Shared_Dirty: 0 kB\n" |
309 | "Private_Clean: 0 kB\n" |
310 | "Private_Dirty: 0 kB\n" |
311 | "Referenced: 0 kB\n" |
312 | "Anonymous: 0 kB\n" |
313 | "KSM: 0 kB\n" |
314 | "LazyFree: 0 kB\n" |
315 | "AnonHugePages: 0 kB\n" |
316 | "ShmemPmdMapped: 0 kB\n" |
317 | "FilePmdMapped: 0 kB\n" |
318 | "Shared_Hugetlb: 0 kB\n" |
319 | "Private_Hugetlb: 0 kB\n" |
320 | "Swap: 0 kB\n" |
321 | "SwapPss: 0 kB\n" |
322 | "Locked: 0 kB\n" |
323 | ; |
324 | |
325 | static int test_proc_pid_smaps_rollup(pid_t pid) |
326 | { |
327 | char buf[4096]; |
328 | snprintf(buf, sizeof(buf), "/proc/%u/smaps_rollup" , pid); |
329 | int fd = open(buf, O_RDONLY); |
330 | if (fd == -1) { |
331 | if (errno == ENOENT) { |
332 | /* |
333 | * /proc/${pid}/smaps_rollup is under CONFIG_PROC_PAGE_MONITOR, |
334 | * it doesn't necessarily exist. |
335 | */ |
336 | return EXIT_SUCCESS; |
337 | } |
338 | perror("open /proc/${pid}/smaps_rollup" ); |
339 | return EXIT_FAILURE; |
340 | } else { |
341 | ssize_t rv = read(fd, buf, sizeof(buf)); |
342 | close(fd); |
343 | assert(rv == sizeof(g_smaps_rollup) - 1); |
344 | assert(memcmp(buf, g_smaps_rollup, sizeof(g_smaps_rollup) - 1) == 0); |
345 | return EXIT_SUCCESS; |
346 | } |
347 | } |
348 | |
349 | static const char *parse_u64(const char *p, const char *const end, uint64_t *rv) |
350 | { |
351 | *rv = 0; |
352 | for (; p != end; p += 1) { |
353 | if ('0' <= *p && *p <= '9') { |
354 | assert(!__builtin_mul_overflow(*rv, 10, rv)); |
355 | assert(!__builtin_add_overflow(*rv, *p - '0', rv)); |
356 | } else { |
357 | break; |
358 | } |
359 | } |
360 | assert(p != end); |
361 | return p; |
362 | } |
363 | |
364 | /* |
365 | * There seems to be 2 types of valid output: |
366 | * "0 A A B 0 0 0\n" for dynamic exeuctables, |
367 | * "0 0 0 B 0 0 0\n" for static executables. |
368 | */ |
369 | static int test_proc_pid_statm(pid_t pid) |
370 | { |
371 | char buf[4096]; |
372 | snprintf(buf, sizeof(buf), "/proc/%u/statm" , pid); |
373 | int fd = open(buf, O_RDONLY); |
374 | if (fd == -1) { |
375 | perror("open /proc/${pid}/statm" ); |
376 | return EXIT_FAILURE; |
377 | } |
378 | |
379 | ssize_t rv = read(fd, buf, sizeof(buf)); |
380 | close(fd); |
381 | |
382 | assert(rv >= 0); |
383 | assert(rv <= sizeof(buf)); |
384 | if (0) { |
385 | write(1, buf, rv); |
386 | } |
387 | |
388 | const char *p = buf; |
389 | const char *const end = p + rv; |
390 | |
391 | /* size */ |
392 | assert(p != end && *p++ == '0'); |
393 | assert(p != end && *p++ == ' '); |
394 | |
395 | uint64_t resident; |
396 | p = parse_u64(p, end, &resident); |
397 | assert(p != end && *p++ == ' '); |
398 | |
399 | uint64_t shared; |
400 | p = parse_u64(p, end, &shared); |
401 | assert(p != end && *p++ == ' '); |
402 | |
403 | uint64_t text; |
404 | p = parse_u64(p, end, &text); |
405 | assert(p != end && *p++ == ' '); |
406 | |
407 | assert(p != end && *p++ == '0'); |
408 | assert(p != end && *p++ == ' '); |
409 | |
410 | /* data */ |
411 | assert(p != end && *p++ == '0'); |
412 | assert(p != end && *p++ == ' '); |
413 | |
414 | assert(p != end && *p++ == '0'); |
415 | assert(p != end && *p++ == '\n'); |
416 | |
417 | assert(p == end); |
418 | |
419 | /* |
420 | * "text" is "mm->end_code - mm->start_code" at execve(2) time. |
421 | * munmap() doesn't change it. It can be anything (just link |
422 | * statically). It can't be 0 because executing to this point |
423 | * implies at least 1 page of code. |
424 | */ |
425 | assert(text > 0); |
426 | |
427 | /* |
428 | * These two are always equal. Always 0 for statically linked |
429 | * executables and sometimes 0 for dynamically linked executables. |
430 | * There is no way to tell one from another without parsing ELF |
431 | * which is too much for this test. |
432 | */ |
433 | assert(resident == shared); |
434 | |
435 | return EXIT_SUCCESS; |
436 | } |
437 | |
438 | int main(void) |
439 | { |
440 | int rv = EXIT_SUCCESS; |
441 | |
442 | #ifdef TEST_VSYSCALL |
443 | vsyscall(); |
444 | #endif |
445 | |
446 | switch (g_vsyscall) { |
447 | case 0: |
448 | g_proc_pid_maps_vsyscall = proc_pid_maps_vsyscall_0; |
449 | g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_0; |
450 | break; |
451 | case 1: |
452 | g_proc_pid_maps_vsyscall = proc_pid_maps_vsyscall_1; |
453 | g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_1; |
454 | break; |
455 | case 2: |
456 | g_proc_pid_maps_vsyscall = proc_pid_maps_vsyscall_2; |
457 | g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_2; |
458 | break; |
459 | default: |
460 | abort(); |
461 | } |
462 | |
463 | g_protection_key_support = protection_key_support(); |
464 | |
465 | pid_t pid = fork(); |
466 | if (pid == -1) { |
467 | perror("fork" ); |
468 | return EXIT_FAILURE; |
469 | } else if (pid == 0) { |
470 | rv = ptrace(PTRACE_TRACEME, 0, NULL, NULL); |
471 | if (rv != 0) { |
472 | if (errno == EPERM) { |
473 | fprintf(stderr, |
474 | "Did you know? ptrace(PTRACE_TRACEME) doesn't work under strace.\n" |
475 | ); |
476 | kill(getppid(), SIGTERM); |
477 | return EXIT_FAILURE; |
478 | } |
479 | perror("ptrace PTRACE_TRACEME" ); |
480 | return EXIT_FAILURE; |
481 | } |
482 | |
483 | /* |
484 | * Hide "segfault at ..." messages. Signal handler won't run. |
485 | */ |
486 | struct sigaction act = {}; |
487 | act.sa_flags = SA_SIGINFO; |
488 | act.sa_sigaction = sigaction_SIGSEGV; |
489 | sigaction(SIGSEGV, &act, NULL); |
490 | |
491 | #ifdef __amd64__ |
492 | munmap(NULL, ((size_t)1 << 47) - 4096); |
493 | #elif defined __i386__ |
494 | { |
495 | size_t len; |
496 | |
497 | for (len = -4096;; len -= 4096) { |
498 | munmap(NULL, len); |
499 | } |
500 | } |
501 | #else |
502 | #error "implement 'unmap everything'" |
503 | #endif |
504 | return EXIT_FAILURE; |
505 | } else { |
506 | /* |
507 | * TODO find reliable way to signal parent that munmap(2) completed. |
508 | * Child can't do it directly because it effectively doesn't exist |
509 | * anymore. Looking at child's VM files isn't 100% reliable either: |
510 | * due to a bug they may not become empty or empty-like. |
511 | */ |
512 | sleep(1); |
513 | |
514 | if (rv == EXIT_SUCCESS) { |
515 | rv = test_proc_pid_maps(pid); |
516 | } |
517 | if (rv == EXIT_SUCCESS) { |
518 | rv = test_proc_pid_numa_maps(pid); |
519 | } |
520 | if (rv == EXIT_SUCCESS) { |
521 | rv = test_proc_pid_smaps(pid); |
522 | } |
523 | if (rv == EXIT_SUCCESS) { |
524 | rv = test_proc_pid_smaps_rollup(pid); |
525 | } |
526 | if (rv == EXIT_SUCCESS) { |
527 | rv = test_proc_pid_statm(pid); |
528 | } |
529 | |
530 | /* Cut the rope. */ |
531 | int wstatus; |
532 | waitpid(pid, &wstatus, 0); |
533 | assert(WIFSTOPPED(wstatus)); |
534 | assert(WSTOPSIG(wstatus) == SIGSEGV); |
535 | } |
536 | |
537 | return rv; |
538 | } |
539 | #else |
540 | int main(void) |
541 | { |
542 | return 4; |
543 | } |
544 | #endif |
545 | |