1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * 32-bit syscall ABI conformance test. |
4 | * |
5 | * Copyright (c) 2015 Denys Vlasenko |
6 | */ |
7 | /* |
8 | * Can be built statically: |
9 | * gcc -Os -Wall -static -m32 test_syscall_vdso.c thunks_32.S |
10 | */ |
11 | #undef _GNU_SOURCE |
12 | #define _GNU_SOURCE 1 |
13 | #undef __USE_GNU |
14 | #define __USE_GNU 1 |
15 | #include <unistd.h> |
16 | #include <stdlib.h> |
17 | #include <string.h> |
18 | #include <stdio.h> |
19 | #include <signal.h> |
20 | #include <sys/types.h> |
21 | #include <sys/select.h> |
22 | #include <sys/time.h> |
23 | #include <elf.h> |
24 | #include <sys/ptrace.h> |
25 | #include <sys/wait.h> |
26 | |
27 | #if !defined(__i386__) |
28 | int main(int argc, char **argv, char **envp) |
29 | { |
30 | printf("[SKIP]\tNot a 32-bit x86 userspace\n" ); |
31 | return 0; |
32 | } |
33 | #else |
34 | |
35 | long syscall_addr; |
36 | long get_syscall(char **envp) |
37 | { |
38 | Elf32_auxv_t *auxv; |
39 | while (*envp++ != NULL) |
40 | continue; |
41 | for (auxv = (void *)envp; auxv->a_type != AT_NULL; auxv++) |
42 | if (auxv->a_type == AT_SYSINFO) |
43 | return auxv->a_un.a_val; |
44 | printf("[WARN]\tAT_SYSINFO not supplied\n" ); |
45 | return 0; |
46 | } |
47 | |
48 | asm ( |
49 | " .pushsection .text\n" |
50 | " .global int80\n" |
51 | "int80:\n" |
52 | " int $0x80\n" |
53 | " ret\n" |
54 | " .popsection\n" |
55 | ); |
56 | extern char int80; |
57 | |
58 | struct regs64 { |
59 | uint64_t rax, rbx, rcx, rdx; |
60 | uint64_t rsi, rdi, rbp, rsp; |
61 | uint64_t r8, r9, r10, r11; |
62 | uint64_t r12, r13, r14, r15; |
63 | }; |
64 | struct regs64 regs64; |
65 | int kernel_is_64bit; |
66 | |
67 | asm ( |
68 | " .pushsection .text\n" |
69 | " .code64\n" |
70 | "get_regs64:\n" |
71 | " push %rax\n" |
72 | " mov $regs64, %eax\n" |
73 | " pop 0*8(%rax)\n" |
74 | " movq %rbx, 1*8(%rax)\n" |
75 | " movq %rcx, 2*8(%rax)\n" |
76 | " movq %rdx, 3*8(%rax)\n" |
77 | " movq %rsi, 4*8(%rax)\n" |
78 | " movq %rdi, 5*8(%rax)\n" |
79 | " movq %rbp, 6*8(%rax)\n" |
80 | " movq %rsp, 7*8(%rax)\n" |
81 | " movq %r8, 8*8(%rax)\n" |
82 | " movq %r9, 9*8(%rax)\n" |
83 | " movq %r10, 10*8(%rax)\n" |
84 | " movq %r11, 11*8(%rax)\n" |
85 | " movq %r12, 12*8(%rax)\n" |
86 | " movq %r13, 13*8(%rax)\n" |
87 | " movq %r14, 14*8(%rax)\n" |
88 | " movq %r15, 15*8(%rax)\n" |
89 | " ret\n" |
90 | "poison_regs64:\n" |
91 | " movq $0x7f7f7f7f, %r8\n" |
92 | " shl $32, %r8\n" |
93 | " orq $0x7f7f7f7f, %r8\n" |
94 | " movq %r8, %r9\n" |
95 | " incq %r9\n" |
96 | " movq %r9, %r10\n" |
97 | " incq %r10\n" |
98 | " movq %r10, %r11\n" |
99 | " incq %r11\n" |
100 | " movq %r11, %r12\n" |
101 | " incq %r12\n" |
102 | " movq %r12, %r13\n" |
103 | " incq %r13\n" |
104 | " movq %r13, %r14\n" |
105 | " incq %r14\n" |
106 | " movq %r14, %r15\n" |
107 | " incq %r15\n" |
108 | " ret\n" |
109 | " .code32\n" |
110 | " .popsection\n" |
111 | ); |
112 | extern void get_regs64(void); |
113 | extern void poison_regs64(void); |
114 | extern unsigned long call64_from_32(void (*function)(void)); |
115 | void print_regs64(void) |
116 | { |
117 | if (!kernel_is_64bit) |
118 | return; |
119 | printf("ax:%016llx bx:%016llx cx:%016llx dx:%016llx\n" , regs64.rax, regs64.rbx, regs64.rcx, regs64.rdx); |
120 | printf("si:%016llx di:%016llx bp:%016llx sp:%016llx\n" , regs64.rsi, regs64.rdi, regs64.rbp, regs64.rsp); |
121 | printf(" 8:%016llx 9:%016llx 10:%016llx 11:%016llx\n" , regs64.r8 , regs64.r9 , regs64.r10, regs64.r11); |
122 | printf("12:%016llx 13:%016llx 14:%016llx 15:%016llx\n" , regs64.r12, regs64.r13, regs64.r14, regs64.r15); |
123 | } |
124 | |
125 | int check_regs64(void) |
126 | { |
127 | int err = 0; |
128 | int num = 8; |
129 | uint64_t *r64 = ®s64.r8; |
130 | uint64_t expected = 0x7f7f7f7f7f7f7f7fULL; |
131 | |
132 | if (!kernel_is_64bit) |
133 | return 0; |
134 | |
135 | do { |
136 | if (*r64 == expected++) |
137 | continue; /* register did not change */ |
138 | if (syscall_addr != (long)&int80) { |
139 | /* |
140 | * Non-INT80 syscall entrypoints are allowed to clobber R8+ regs: |
141 | * either clear them to 0, or for R11, load EFLAGS. |
142 | */ |
143 | if (*r64 == 0) |
144 | continue; |
145 | if (num == 11) { |
146 | printf("[NOTE]\tR11 has changed:%016llx - assuming clobbered by SYSRET insn\n" , *r64); |
147 | continue; |
148 | } |
149 | } else { |
150 | /* |
151 | * INT80 syscall entrypoint can be used by |
152 | * 64-bit programs too, unlike SYSCALL/SYSENTER. |
153 | * Therefore it must preserve R12+ |
154 | * (they are callee-saved registers in 64-bit C ABI). |
155 | * |
156 | * Starting in Linux 4.17 (and any kernel that |
157 | * backports the change), R8..11 are preserved. |
158 | * Historically (and probably unintentionally), they |
159 | * were clobbered or zeroed. |
160 | */ |
161 | } |
162 | printf("[FAIL]\tR%d has changed:%016llx\n" , num, *r64); |
163 | err++; |
164 | } while (r64++, ++num < 16); |
165 | |
166 | if (!err) |
167 | printf("[OK]\tR8..R15 did not leak kernel data\n" ); |
168 | return err; |
169 | } |
170 | |
171 | int nfds; |
172 | fd_set rfds; |
173 | fd_set wfds; |
174 | fd_set efds; |
175 | struct timespec timeout; |
176 | sigset_t sigmask; |
177 | struct { |
178 | sigset_t *sp; |
179 | int sz; |
180 | } sigmask_desc; |
181 | |
182 | void prep_args() |
183 | { |
184 | nfds = 42; |
185 | FD_ZERO(&rfds); |
186 | FD_ZERO(&wfds); |
187 | FD_ZERO(&efds); |
188 | FD_SET(0, &rfds); |
189 | FD_SET(1, &wfds); |
190 | FD_SET(2, &efds); |
191 | timeout.tv_sec = 0; |
192 | timeout.tv_nsec = 123; |
193 | sigemptyset(&sigmask); |
194 | sigaddset(&sigmask, SIGINT); |
195 | sigaddset(&sigmask, SIGUSR2); |
196 | sigaddset(&sigmask, SIGRTMAX); |
197 | sigmask_desc.sp = &sigmask; |
198 | sigmask_desc.sz = 8; /* bytes */ |
199 | } |
200 | |
201 | static void print_flags(const char *name, unsigned long r) |
202 | { |
203 | static const char *bitarray[] = { |
204 | "\n" ,"c\n" ,/* Carry Flag */ |
205 | "0 " ,"1 " ,/* Bit 1 - always on */ |
206 | "" ,"p " ,/* Parity Flag */ |
207 | "0 " ,"3? " , |
208 | "" ,"a " ,/* Auxiliary carry Flag */ |
209 | "0 " ,"5? " , |
210 | "" ,"z " ,/* Zero Flag */ |
211 | "" ,"s " ,/* Sign Flag */ |
212 | "" ,"t " ,/* Trap Flag */ |
213 | "" ,"i " ,/* Interrupt Flag */ |
214 | "" ,"d " ,/* Direction Flag */ |
215 | "" ,"o " ,/* Overflow Flag */ |
216 | "0 " ,"1 " ,/* I/O Privilege Level (2 bits) */ |
217 | "0" ,"1" ,/* I/O Privilege Level (2 bits) */ |
218 | "" ,"n " ,/* Nested Task */ |
219 | "0 " ,"15? " , |
220 | "" ,"r " ,/* Resume Flag */ |
221 | "" ,"v " ,/* Virtual Mode */ |
222 | "" ,"ac " ,/* Alignment Check/Access Control */ |
223 | "" ,"vif " ,/* Virtual Interrupt Flag */ |
224 | "" ,"vip " ,/* Virtual Interrupt Pending */ |
225 | "" ,"id " ,/* CPUID detection */ |
226 | NULL |
227 | }; |
228 | const char **bitstr; |
229 | int bit; |
230 | |
231 | printf("%s=%016lx " , name, r); |
232 | bitstr = bitarray + 42; |
233 | bit = 21; |
234 | if ((r >> 22) != 0) |
235 | printf("(extra bits are set) " ); |
236 | do { |
237 | if (bitstr[(r >> bit) & 1][0]) |
238 | fputs(bitstr[(r >> bit) & 1], stdout); |
239 | bitstr -= 2; |
240 | bit--; |
241 | } while (bit >= 0); |
242 | } |
243 | |
244 | int run_syscall(void) |
245 | { |
246 | long flags, bad_arg; |
247 | |
248 | prep_args(); |
249 | |
250 | if (kernel_is_64bit) |
251 | call64_from_32(poison_regs64); |
252 | /*print_regs64();*/ |
253 | |
254 | asm("\n" |
255 | /* Try 6-arg syscall: pselect. It should return quickly */ |
256 | " push %%ebp\n" |
257 | " mov $308, %%eax\n" /* PSELECT */ |
258 | " mov nfds, %%ebx\n" /* ebx arg1 */ |
259 | " mov $rfds, %%ecx\n" /* ecx arg2 */ |
260 | " mov $wfds, %%edx\n" /* edx arg3 */ |
261 | " mov $efds, %%esi\n" /* esi arg4 */ |
262 | " mov $timeout, %%edi\n" /* edi arg5 */ |
263 | " mov $sigmask_desc, %%ebp\n" /* %ebp arg6 */ |
264 | " push $0x200ed7\n" /* set almost all flags */ |
265 | " popf\n" /* except TF, IOPL, NT, RF, VM, AC, VIF, VIP */ |
266 | " call *syscall_addr\n" |
267 | /* Check that registers are not clobbered */ |
268 | " pushf\n" |
269 | " pop %%eax\n" |
270 | " cld\n" |
271 | " cmp nfds, %%ebx\n" /* ebx arg1 */ |
272 | " mov $1, %%ebx\n" |
273 | " jne 1f\n" |
274 | " cmp $rfds, %%ecx\n" /* ecx arg2 */ |
275 | " mov $2, %%ebx\n" |
276 | " jne 1f\n" |
277 | " cmp $wfds, %%edx\n" /* edx arg3 */ |
278 | " mov $3, %%ebx\n" |
279 | " jne 1f\n" |
280 | " cmp $efds, %%esi\n" /* esi arg4 */ |
281 | " mov $4, %%ebx\n" |
282 | " jne 1f\n" |
283 | " cmp $timeout, %%edi\n" /* edi arg5 */ |
284 | " mov $5, %%ebx\n" |
285 | " jne 1f\n" |
286 | " cmpl $sigmask_desc, %%ebp\n" /* %ebp arg6 */ |
287 | " mov $6, %%ebx\n" |
288 | " jne 1f\n" |
289 | " mov $0, %%ebx\n" |
290 | "1:\n" |
291 | " pop %%ebp\n" |
292 | : "=a" (flags), "=b" (bad_arg) |
293 | : |
294 | : "cx" , "dx" , "si" , "di" |
295 | ); |
296 | |
297 | if (kernel_is_64bit) { |
298 | memset(®s64, 0x77, sizeof(regs64)); |
299 | call64_from_32(get_regs64); |
300 | /*print_regs64();*/ |
301 | } |
302 | |
303 | /* |
304 | * On paravirt kernels, flags are not preserved across syscalls. |
305 | * Thus, we do not consider it a bug if some are changed. |
306 | * We just show ones which do. |
307 | */ |
308 | if ((0x200ed7 ^ flags) != 0) { |
309 | print_flags("[WARN]\tFlags before" , 0x200ed7); |
310 | print_flags("[WARN]\tFlags after" , flags); |
311 | print_flags("[WARN]\tFlags change" , (0x200ed7 ^ flags)); |
312 | } |
313 | |
314 | if (bad_arg) { |
315 | printf("[FAIL]\targ#%ld clobbered\n" , bad_arg); |
316 | return 1; |
317 | } |
318 | printf("[OK]\tArguments are preserved across syscall\n" ); |
319 | |
320 | return check_regs64(); |
321 | } |
322 | |
323 | int run_syscall_twice() |
324 | { |
325 | int exitcode = 0; |
326 | long sv; |
327 | |
328 | if (syscall_addr) { |
329 | printf("[RUN]\tExecuting 6-argument 32-bit syscall via VDSO\n" ); |
330 | exitcode = run_syscall(); |
331 | } |
332 | sv = syscall_addr; |
333 | syscall_addr = (long)&int80; |
334 | printf("[RUN]\tExecuting 6-argument 32-bit syscall via INT 80\n" ); |
335 | exitcode += run_syscall(); |
336 | syscall_addr = sv; |
337 | return exitcode; |
338 | } |
339 | |
340 | void ptrace_me() |
341 | { |
342 | pid_t pid; |
343 | |
344 | fflush(NULL); |
345 | pid = fork(); |
346 | if (pid < 0) |
347 | exit(1); |
348 | if (pid == 0) { |
349 | /* child */ |
350 | if (ptrace(PTRACE_TRACEME, 0L, 0L, 0L) != 0) |
351 | exit(0); |
352 | raise(SIGSTOP); |
353 | return; |
354 | } |
355 | /* parent */ |
356 | printf("[RUN]\tRunning tests under ptrace\n" ); |
357 | while (1) { |
358 | int status; |
359 | pid = waitpid(-1, &status, __WALL); |
360 | if (WIFEXITED(status)) |
361 | exit(WEXITSTATUS(status)); |
362 | if (WIFSIGNALED(status)) |
363 | exit(WTERMSIG(status)); |
364 | if (pid <= 0 || !WIFSTOPPED(status)) /* paranoia */ |
365 | exit(255); |
366 | /* |
367 | * Note: we do not inject sig = WSTOPSIG(status). |
368 | * We probably should, but careful: do not inject SIGTRAP |
369 | * generated by syscall entry/exit stops. |
370 | * That kills the child. |
371 | */ |
372 | ptrace(PTRACE_SYSCALL, pid, 0L, 0L /*sig*/); |
373 | } |
374 | } |
375 | |
376 | int main(int argc, char **argv, char **envp) |
377 | { |
378 | int exitcode = 0; |
379 | int cs; |
380 | |
381 | asm("\n" |
382 | " movl %%cs, %%eax\n" |
383 | : "=a" (cs) |
384 | ); |
385 | kernel_is_64bit = (cs == 0x23); |
386 | if (!kernel_is_64bit) |
387 | printf("[NOTE]\tNot a 64-bit kernel, won't test R8..R15 leaks\n" ); |
388 | |
389 | /* This only works for non-static builds: |
390 | * syscall_addr = dlsym(dlopen("linux-gate.so.1", RTLD_NOW), "__kernel_vsyscall"); |
391 | */ |
392 | syscall_addr = get_syscall(envp); |
393 | |
394 | exitcode += run_syscall_twice(); |
395 | ptrace_me(); |
396 | exitcode += run_syscall_twice(); |
397 | |
398 | return exitcode; |
399 | } |
400 | #endif |
401 | |