1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | |
3 | #define _GNU_SOURCE |
4 | |
5 | #include <stdio.h> |
6 | #include <sys/time.h> |
7 | #include <time.h> |
8 | #include <stdlib.h> |
9 | #include <sys/syscall.h> |
10 | #include <unistd.h> |
11 | #include <dlfcn.h> |
12 | #include <string.h> |
13 | #include <inttypes.h> |
14 | #include <signal.h> |
15 | #include <sys/ucontext.h> |
16 | #include <errno.h> |
17 | #include <err.h> |
18 | #include <sched.h> |
19 | #include <stdbool.h> |
20 | #include <setjmp.h> |
21 | #include <sys/uio.h> |
22 | |
23 | #include "helpers.h" |
24 | |
25 | #ifdef __x86_64__ |
26 | # define VSYS(x) (x) |
27 | #else |
28 | # define VSYS(x) 0 |
29 | #endif |
30 | |
31 | #ifndef SYS_getcpu |
32 | # ifdef __x86_64__ |
33 | # define SYS_getcpu 309 |
34 | # else |
35 | # define SYS_getcpu 318 |
36 | # endif |
37 | #endif |
38 | |
39 | /* max length of lines in /proc/self/maps - anything longer is skipped here */ |
40 | #define MAPS_LINE_LEN 128 |
41 | |
42 | static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), |
43 | int flags) |
44 | { |
45 | struct sigaction sa; |
46 | memset(&sa, 0, sizeof(sa)); |
47 | sa.sa_sigaction = handler; |
48 | sa.sa_flags = SA_SIGINFO | flags; |
49 | sigemptyset(&sa.sa_mask); |
50 | if (sigaction(sig, &sa, 0)) |
51 | err(1, "sigaction" ); |
52 | } |
53 | |
54 | /* vsyscalls and vDSO */ |
55 | bool vsyscall_map_r = false, vsyscall_map_x = false; |
56 | |
57 | typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz); |
58 | const gtod_t vgtod = (gtod_t)VSYS(0xffffffffff600000); |
59 | gtod_t vdso_gtod; |
60 | |
61 | typedef int (*vgettime_t)(clockid_t, struct timespec *); |
62 | vgettime_t vdso_gettime; |
63 | |
64 | typedef long (*time_func_t)(time_t *t); |
65 | const time_func_t vtime = (time_func_t)VSYS(0xffffffffff600400); |
66 | time_func_t vdso_time; |
67 | |
68 | typedef long (*getcpu_t)(unsigned *, unsigned *, void *); |
69 | const getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800); |
70 | getcpu_t vdso_getcpu; |
71 | |
72 | static void init_vdso(void) |
73 | { |
74 | void *vdso = dlopen("linux-vdso.so.1" , RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); |
75 | if (!vdso) |
76 | vdso = dlopen("linux-gate.so.1" , RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); |
77 | if (!vdso) { |
78 | printf("[WARN]\tfailed to find vDSO\n" ); |
79 | return; |
80 | } |
81 | |
82 | vdso_gtod = (gtod_t)dlsym(vdso, "__vdso_gettimeofday" ); |
83 | if (!vdso_gtod) |
84 | printf("[WARN]\tfailed to find gettimeofday in vDSO\n" ); |
85 | |
86 | vdso_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime" ); |
87 | if (!vdso_gettime) |
88 | printf("[WARN]\tfailed to find clock_gettime in vDSO\n" ); |
89 | |
90 | vdso_time = (time_func_t)dlsym(vdso, "__vdso_time" ); |
91 | if (!vdso_time) |
92 | printf("[WARN]\tfailed to find time in vDSO\n" ); |
93 | |
94 | vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu" ); |
95 | if (!vdso_getcpu) |
96 | printf("[WARN]\tfailed to find getcpu in vDSO\n" ); |
97 | } |
98 | |
99 | static int init_vsys(void) |
100 | { |
101 | #ifdef __x86_64__ |
102 | int nerrs = 0; |
103 | FILE *maps; |
104 | char line[MAPS_LINE_LEN]; |
105 | bool found = false; |
106 | |
107 | maps = fopen("/proc/self/maps" , "r" ); |
108 | if (!maps) { |
109 | printf("[WARN]\tCould not open /proc/self/maps -- assuming vsyscall is r-x\n" ); |
110 | vsyscall_map_r = true; |
111 | return 0; |
112 | } |
113 | |
114 | while (fgets(line, MAPS_LINE_LEN, maps)) { |
115 | char r, x; |
116 | void *start, *end; |
117 | char name[MAPS_LINE_LEN]; |
118 | |
119 | /* sscanf() is safe here as strlen(name) >= strlen(line) */ |
120 | if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s" , |
121 | &start, &end, &r, &x, name) != 5) |
122 | continue; |
123 | |
124 | if (strcmp(name, "[vsyscall]" )) |
125 | continue; |
126 | |
127 | printf("\tvsyscall map: %s" , line); |
128 | |
129 | if (start != (void *)0xffffffffff600000 || |
130 | end != (void *)0xffffffffff601000) { |
131 | printf("[FAIL]\taddress range is nonsense\n" ); |
132 | nerrs++; |
133 | } |
134 | |
135 | printf("\tvsyscall permissions are %c-%c\n" , r, x); |
136 | vsyscall_map_r = (r == 'r'); |
137 | vsyscall_map_x = (x == 'x'); |
138 | |
139 | found = true; |
140 | break; |
141 | } |
142 | |
143 | fclose(maps); |
144 | |
145 | if (!found) { |
146 | printf("\tno vsyscall map in /proc/self/maps\n" ); |
147 | vsyscall_map_r = false; |
148 | vsyscall_map_x = false; |
149 | } |
150 | |
151 | return nerrs; |
152 | #else |
153 | return 0; |
154 | #endif |
155 | } |
156 | |
157 | /* syscalls */ |
158 | static inline long sys_gtod(struct timeval *tv, struct timezone *tz) |
159 | { |
160 | return syscall(SYS_gettimeofday, tv, tz); |
161 | } |
162 | |
163 | static inline int sys_clock_gettime(clockid_t id, struct timespec *ts) |
164 | { |
165 | return syscall(SYS_clock_gettime, id, ts); |
166 | } |
167 | |
168 | static inline long sys_time(time_t *t) |
169 | { |
170 | return syscall(SYS_time, t); |
171 | } |
172 | |
173 | static inline long sys_getcpu(unsigned * cpu, unsigned * node, |
174 | void* cache) |
175 | { |
176 | return syscall(SYS_getcpu, cpu, node, cache); |
177 | } |
178 | |
179 | static jmp_buf jmpbuf; |
180 | static volatile unsigned long segv_err; |
181 | |
182 | static void sigsegv(int sig, siginfo_t *info, void *ctx_void) |
183 | { |
184 | ucontext_t *ctx = (ucontext_t *)ctx_void; |
185 | |
186 | segv_err = ctx->uc_mcontext.gregs[REG_ERR]; |
187 | siglongjmp(jmpbuf, 1); |
188 | } |
189 | |
190 | static double tv_diff(const struct timeval *a, const struct timeval *b) |
191 | { |
192 | return (double)(a->tv_sec - b->tv_sec) + |
193 | (double)((int)a->tv_usec - (int)b->tv_usec) * 1e-6; |
194 | } |
195 | |
196 | static int check_gtod(const struct timeval *tv_sys1, |
197 | const struct timeval *tv_sys2, |
198 | const struct timezone *tz_sys, |
199 | const char *which, |
200 | const struct timeval *tv_other, |
201 | const struct timezone *tz_other) |
202 | { |
203 | int nerrs = 0; |
204 | double d1, d2; |
205 | |
206 | if (tz_other && (tz_sys->tz_minuteswest != tz_other->tz_minuteswest || tz_sys->tz_dsttime != tz_other->tz_dsttime)) { |
207 | printf("[FAIL] %s tz mismatch\n" , which); |
208 | nerrs++; |
209 | } |
210 | |
211 | d1 = tv_diff(a: tv_other, b: tv_sys1); |
212 | d2 = tv_diff(a: tv_sys2, b: tv_other); |
213 | printf("\t%s time offsets: %lf %lf\n" , which, d1, d2); |
214 | |
215 | if (d1 < 0 || d2 < 0) { |
216 | printf("[FAIL]\t%s time was inconsistent with the syscall\n" , which); |
217 | nerrs++; |
218 | } else { |
219 | printf("[OK]\t%s gettimeofday()'s timeval was okay\n" , which); |
220 | } |
221 | |
222 | return nerrs; |
223 | } |
224 | |
225 | static int test_gtod(void) |
226 | { |
227 | struct timeval tv_sys1, tv_sys2, tv_vdso, tv_vsys; |
228 | struct timezone tz_sys, tz_vdso, tz_vsys; |
229 | long ret_vdso = -1; |
230 | long ret_vsys = -1; |
231 | int nerrs = 0; |
232 | |
233 | printf("[RUN]\ttest gettimeofday()\n" ); |
234 | |
235 | if (sys_gtod(tv: &tv_sys1, tz: &tz_sys) != 0) |
236 | err(1, "syscall gettimeofday" ); |
237 | if (vdso_gtod) |
238 | ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso); |
239 | if (vsyscall_map_x) |
240 | ret_vsys = vgtod(&tv_vsys, &tz_vsys); |
241 | if (sys_gtod(tv: &tv_sys2, tz: &tz_sys) != 0) |
242 | err(1, "syscall gettimeofday" ); |
243 | |
244 | if (vdso_gtod) { |
245 | if (ret_vdso == 0) { |
246 | nerrs += check_gtod(tv_sys1: &tv_sys1, tv_sys2: &tv_sys2, tz_sys: &tz_sys, which: "vDSO" , tv_other: &tv_vdso, tz_other: &tz_vdso); |
247 | } else { |
248 | printf("[FAIL]\tvDSO gettimeofday() failed: %ld\n" , ret_vdso); |
249 | nerrs++; |
250 | } |
251 | } |
252 | |
253 | if (vsyscall_map_x) { |
254 | if (ret_vsys == 0) { |
255 | nerrs += check_gtod(tv_sys1: &tv_sys1, tv_sys2: &tv_sys2, tz_sys: &tz_sys, which: "vsyscall" , tv_other: &tv_vsys, tz_other: &tz_vsys); |
256 | } else { |
257 | printf("[FAIL]\tvsys gettimeofday() failed: %ld\n" , ret_vsys); |
258 | nerrs++; |
259 | } |
260 | } |
261 | |
262 | return nerrs; |
263 | } |
264 | |
265 | static int test_time(void) { |
266 | int nerrs = 0; |
267 | |
268 | printf("[RUN]\ttest time()\n" ); |
269 | long t_sys1, t_sys2, t_vdso = 0, t_vsys = 0; |
270 | long t2_sys1 = -1, t2_sys2 = -1, t2_vdso = -1, t2_vsys = -1; |
271 | t_sys1 = sys_time(t: &t2_sys1); |
272 | if (vdso_time) |
273 | t_vdso = vdso_time(&t2_vdso); |
274 | if (vsyscall_map_x) |
275 | t_vsys = vtime(&t2_vsys); |
276 | t_sys2 = sys_time(t: &t2_sys2); |
277 | if (t_sys1 < 0 || t_sys1 != t2_sys1 || t_sys2 < 0 || t_sys2 != t2_sys2) { |
278 | printf("[FAIL]\tsyscall failed (ret1:%ld output1:%ld ret2:%ld output2:%ld)\n" , t_sys1, t2_sys1, t_sys2, t2_sys2); |
279 | nerrs++; |
280 | return nerrs; |
281 | } |
282 | |
283 | if (vdso_time) { |
284 | if (t_vdso < 0 || t_vdso != t2_vdso) { |
285 | printf("[FAIL]\tvDSO failed (ret:%ld output:%ld)\n" , t_vdso, t2_vdso); |
286 | nerrs++; |
287 | } else if (t_vdso < t_sys1 || t_vdso > t_sys2) { |
288 | printf("[FAIL]\tvDSO returned the wrong time (%ld %ld %ld)\n" , t_sys1, t_vdso, t_sys2); |
289 | nerrs++; |
290 | } else { |
291 | printf("[OK]\tvDSO time() is okay\n" ); |
292 | } |
293 | } |
294 | |
295 | if (vsyscall_map_x) { |
296 | if (t_vsys < 0 || t_vsys != t2_vsys) { |
297 | printf("[FAIL]\tvsyscall failed (ret:%ld output:%ld)\n" , t_vsys, t2_vsys); |
298 | nerrs++; |
299 | } else if (t_vsys < t_sys1 || t_vsys > t_sys2) { |
300 | printf("[FAIL]\tvsyscall returned the wrong time (%ld %ld %ld)\n" , t_sys1, t_vsys, t_sys2); |
301 | nerrs++; |
302 | } else { |
303 | printf("[OK]\tvsyscall time() is okay\n" ); |
304 | } |
305 | } |
306 | |
307 | return nerrs; |
308 | } |
309 | |
310 | static int test_getcpu(int cpu) |
311 | { |
312 | int nerrs = 0; |
313 | long ret_sys, ret_vdso = -1, ret_vsys = -1; |
314 | |
315 | printf("[RUN]\tgetcpu() on CPU %d\n" , cpu); |
316 | |
317 | cpu_set_t cpuset; |
318 | CPU_ZERO(&cpuset); |
319 | CPU_SET(cpu, &cpuset); |
320 | if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) { |
321 | printf("[SKIP]\tfailed to force CPU %d\n" , cpu); |
322 | return nerrs; |
323 | } |
324 | |
325 | unsigned cpu_sys, cpu_vdso, cpu_vsys, node_sys, node_vdso, node_vsys; |
326 | unsigned node = 0; |
327 | bool have_node = false; |
328 | ret_sys = sys_getcpu(cpu: &cpu_sys, node: &node_sys, cache: 0); |
329 | if (vdso_getcpu) |
330 | ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0); |
331 | if (vsyscall_map_x) |
332 | ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0); |
333 | |
334 | if (ret_sys == 0) { |
335 | if (cpu_sys != cpu) { |
336 | printf("[FAIL]\tsyscall reported CPU %hu but should be %d\n" , cpu_sys, cpu); |
337 | nerrs++; |
338 | } |
339 | |
340 | have_node = true; |
341 | node = node_sys; |
342 | } |
343 | |
344 | if (vdso_getcpu) { |
345 | if (ret_vdso) { |
346 | printf("[FAIL]\tvDSO getcpu() failed\n" ); |
347 | nerrs++; |
348 | } else { |
349 | if (!have_node) { |
350 | have_node = true; |
351 | node = node_vdso; |
352 | } |
353 | |
354 | if (cpu_vdso != cpu) { |
355 | printf("[FAIL]\tvDSO reported CPU %hu but should be %d\n" , cpu_vdso, cpu); |
356 | nerrs++; |
357 | } else { |
358 | printf("[OK]\tvDSO reported correct CPU\n" ); |
359 | } |
360 | |
361 | if (node_vdso != node) { |
362 | printf("[FAIL]\tvDSO reported node %hu but should be %hu\n" , node_vdso, node); |
363 | nerrs++; |
364 | } else { |
365 | printf("[OK]\tvDSO reported correct node\n" ); |
366 | } |
367 | } |
368 | } |
369 | |
370 | if (vsyscall_map_x) { |
371 | if (ret_vsys) { |
372 | printf("[FAIL]\tvsyscall getcpu() failed\n" ); |
373 | nerrs++; |
374 | } else { |
375 | if (!have_node) { |
376 | have_node = true; |
377 | node = node_vsys; |
378 | } |
379 | |
380 | if (cpu_vsys != cpu) { |
381 | printf("[FAIL]\tvsyscall reported CPU %hu but should be %d\n" , cpu_vsys, cpu); |
382 | nerrs++; |
383 | } else { |
384 | printf("[OK]\tvsyscall reported correct CPU\n" ); |
385 | } |
386 | |
387 | if (node_vsys != node) { |
388 | printf("[FAIL]\tvsyscall reported node %hu but should be %hu\n" , node_vsys, node); |
389 | nerrs++; |
390 | } else { |
391 | printf("[OK]\tvsyscall reported correct node\n" ); |
392 | } |
393 | } |
394 | } |
395 | |
396 | return nerrs; |
397 | } |
398 | |
399 | static int test_vsys_r(void) |
400 | { |
401 | #ifdef __x86_64__ |
402 | printf("[RUN]\tChecking read access to the vsyscall page\n" ); |
403 | bool can_read; |
404 | if (sigsetjmp(jmpbuf, 1) == 0) { |
405 | *(volatile int *)0xffffffffff600000; |
406 | can_read = true; |
407 | } else { |
408 | can_read = false; |
409 | } |
410 | |
411 | if (can_read && !vsyscall_map_r) { |
412 | printf("[FAIL]\tWe have read access, but we shouldn't\n" ); |
413 | return 1; |
414 | } else if (!can_read && vsyscall_map_r) { |
415 | printf("[FAIL]\tWe don't have read access, but we should\n" ); |
416 | return 1; |
417 | } else if (can_read) { |
418 | printf("[OK]\tWe have read access\n" ); |
419 | } else { |
420 | printf("[OK]\tWe do not have read access: #PF(0x%lx)\n" , |
421 | segv_err); |
422 | } |
423 | #endif |
424 | |
425 | return 0; |
426 | } |
427 | |
428 | static int test_vsys_x(void) |
429 | { |
430 | #ifdef __x86_64__ |
431 | if (vsyscall_map_x) { |
432 | /* We already tested this adequately. */ |
433 | return 0; |
434 | } |
435 | |
436 | printf("[RUN]\tMake sure that vsyscalls really page fault\n" ); |
437 | |
438 | bool can_exec; |
439 | if (sigsetjmp(jmpbuf, 1) == 0) { |
440 | vgtod(NULL, NULL); |
441 | can_exec = true; |
442 | } else { |
443 | can_exec = false; |
444 | } |
445 | |
446 | if (can_exec) { |
447 | printf("[FAIL]\tExecuting the vsyscall did not page fault\n" ); |
448 | return 1; |
449 | } else if (segv_err & (1 << 4)) { /* INSTR */ |
450 | printf("[OK]\tExecuting the vsyscall page failed: #PF(0x%lx)\n" , |
451 | segv_err); |
452 | } else { |
453 | printf("[FAIL]\tExecution failed with the wrong error: #PF(0x%lx)\n" , |
454 | segv_err); |
455 | return 1; |
456 | } |
457 | #endif |
458 | |
459 | return 0; |
460 | } |
461 | |
462 | /* |
463 | * Debuggers expect ptrace() to be able to peek at the vsyscall page. |
464 | * Use process_vm_readv() as a proxy for ptrace() to test this. We |
465 | * want it to work in the vsyscall=emulate case and to fail in the |
466 | * vsyscall=xonly case. |
467 | * |
468 | * It's worth noting that this ABI is a bit nutty. write(2) can't |
469 | * read from the vsyscall page on any kernel version or mode. The |
470 | * fact that ptrace() ever worked was a nice courtesy of old kernels, |
471 | * but the code to support it is fairly gross. |
472 | */ |
473 | static int test_process_vm_readv(void) |
474 | { |
475 | #ifdef __x86_64__ |
476 | char buf[4096]; |
477 | struct iovec local, remote; |
478 | int ret; |
479 | |
480 | printf("[RUN]\tprocess_vm_readv() from vsyscall page\n" ); |
481 | |
482 | local.iov_base = buf; |
483 | local.iov_len = 4096; |
484 | remote.iov_base = (void *)0xffffffffff600000; |
485 | remote.iov_len = 4096; |
486 | ret = process_vm_readv(getpid(), &local, 1, &remote, 1, 0); |
487 | if (ret != 4096) { |
488 | /* |
489 | * We expect process_vm_readv() to work if and only if the |
490 | * vsyscall page is readable. |
491 | */ |
492 | printf("[%s]\tprocess_vm_readv() failed (ret = %d, errno = %d)\n" , vsyscall_map_r ? "FAIL" : "OK" , ret, errno); |
493 | return vsyscall_map_r ? 1 : 0; |
494 | } |
495 | |
496 | if (vsyscall_map_r) { |
497 | if (!memcmp(buf, remote.iov_base, sizeof(buf))) { |
498 | printf("[OK]\tIt worked and read correct data\n" ); |
499 | } else { |
500 | printf("[FAIL]\tIt worked but returned incorrect data\n" ); |
501 | return 1; |
502 | } |
503 | } else { |
504 | printf("[FAIL]\tprocess_rm_readv() succeeded, but it should have failed in this configuration\n" ); |
505 | return 1; |
506 | } |
507 | #endif |
508 | |
509 | return 0; |
510 | } |
511 | |
512 | #ifdef __x86_64__ |
513 | static volatile sig_atomic_t num_vsyscall_traps; |
514 | |
515 | static void sigtrap(int sig, siginfo_t *info, void *ctx_void) |
516 | { |
517 | ucontext_t *ctx = (ucontext_t *)ctx_void; |
518 | unsigned long ip = ctx->uc_mcontext.gregs[REG_RIP]; |
519 | |
520 | if (((ip ^ 0xffffffffff600000UL) & ~0xfffUL) == 0) |
521 | num_vsyscall_traps++; |
522 | } |
523 | |
524 | static int test_emulation(void) |
525 | { |
526 | time_t tmp; |
527 | bool is_native; |
528 | |
529 | if (!vsyscall_map_x) |
530 | return 0; |
531 | |
532 | printf("[RUN]\tchecking that vsyscalls are emulated\n" ); |
533 | sethandler(SIGTRAP, sigtrap, 0); |
534 | set_eflags(get_eflags() | X86_EFLAGS_TF); |
535 | vtime(&tmp); |
536 | set_eflags(get_eflags() & ~X86_EFLAGS_TF); |
537 | |
538 | /* |
539 | * If vsyscalls are emulated, we expect a single trap in the |
540 | * vsyscall page -- the call instruction will trap with RIP |
541 | * pointing to the entry point before emulation takes over. |
542 | * In native mode, we expect two traps, since whatever code |
543 | * the vsyscall page contains will be more than just a ret |
544 | * instruction. |
545 | */ |
546 | is_native = (num_vsyscall_traps > 1); |
547 | |
548 | printf("[%s]\tvsyscalls are %s (%d instructions in vsyscall page)\n" , |
549 | (is_native ? "FAIL" : "OK" ), |
550 | (is_native ? "native" : "emulated" ), |
551 | (int)num_vsyscall_traps); |
552 | |
553 | return is_native; |
554 | } |
555 | #endif |
556 | |
557 | int main(int argc, char **argv) |
558 | { |
559 | int nerrs = 0; |
560 | |
561 | init_vdso(); |
562 | nerrs += init_vsys(); |
563 | |
564 | nerrs += test_gtod(); |
565 | nerrs += test_time(); |
566 | nerrs += test_getcpu(cpu: 0); |
567 | nerrs += test_getcpu(cpu: 1); |
568 | |
569 | sethandler(SIGSEGV, sigsegv, 0); |
570 | nerrs += test_vsys_r(); |
571 | nerrs += test_vsys_x(); |
572 | |
573 | nerrs += test_process_vm_readv(); |
574 | |
575 | #ifdef __x86_64__ |
576 | nerrs += test_emulation(); |
577 | #endif |
578 | |
579 | return nerrs ? 1 : 0; |
580 | } |
581 | |