1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* |
3 | * syscall_numbering.c - test calling the x86-64 kernel with various |
4 | * valid and invalid system call numbers. |
5 | * |
6 | * Copyright (c) 2018 Andrew Lutomirski |
7 | */ |
8 | |
9 | #define _GNU_SOURCE |
10 | |
11 | #include <stdlib.h> |
12 | #include <stdio.h> |
13 | #include <stdbool.h> |
14 | #include <errno.h> |
15 | #include <unistd.h> |
16 | #include <string.h> |
17 | #include <fcntl.h> |
18 | #include <limits.h> |
19 | #include <signal.h> |
20 | #include <sysexits.h> |
21 | |
22 | #include <sys/ptrace.h> |
23 | #include <sys/user.h> |
24 | #include <sys/wait.h> |
25 | #include <sys/mman.h> |
26 | |
27 | #include <linux/ptrace.h> |
28 | |
29 | /* Common system call numbers */ |
30 | #define SYS_READ 0 |
31 | #define SYS_WRITE 1 |
32 | #define SYS_GETPID 39 |
33 | /* x64-only system call numbers */ |
34 | #define X64_IOCTL 16 |
35 | #define X64_READV 19 |
36 | #define X64_WRITEV 20 |
37 | /* x32-only system call numbers (without X32_BIT) */ |
38 | #define X32_IOCTL 514 |
39 | #define X32_READV 515 |
40 | #define X32_WRITEV 516 |
41 | |
42 | #define X32_BIT 0x40000000 |
43 | |
44 | static int nullfd = -1; /* File descriptor for /dev/null */ |
45 | static bool with_x32; /* x32 supported on this kernel? */ |
46 | |
47 | enum ptrace_pass { |
48 | PTP_NOTHING, |
49 | PTP_GETREGS, |
50 | PTP_WRITEBACK, |
51 | PTP_FUZZRET, |
52 | PTP_FUZZHIGH, |
53 | PTP_INTNUM, |
54 | PTP_DONE |
55 | }; |
56 | |
57 | static const char * const ptrace_pass_name[] = |
58 | { |
59 | [PTP_NOTHING] = "just stop, no data read" , |
60 | [PTP_GETREGS] = "only getregs" , |
61 | [PTP_WRITEBACK] = "getregs, unmodified setregs" , |
62 | [PTP_FUZZRET] = "modifying the default return" , |
63 | [PTP_FUZZHIGH] = "clobbering the top 32 bits" , |
64 | [PTP_INTNUM] = "sign-extending the syscall number" , |
65 | }; |
66 | |
67 | /* |
68 | * Shared memory block between tracer and test |
69 | */ |
70 | struct shared { |
71 | unsigned int nerr; /* Total error count */ |
72 | unsigned int indent; /* Message indentation level */ |
73 | enum ptrace_pass ptrace_pass; |
74 | bool probing_syscall; /* In probe_syscall() */ |
75 | }; |
76 | static volatile struct shared *sh; |
77 | |
78 | static inline unsigned int offset(void) |
79 | { |
80 | unsigned int level = sh ? sh->indent : 0; |
81 | |
82 | return 8 + level * 4; |
83 | } |
84 | |
85 | #define msg(lvl, fmt, ...) printf("%-*s" fmt, offset(), "[" #lvl "]", \ |
86 | ## __VA_ARGS__) |
87 | |
88 | #define run(fmt, ...) msg(RUN, fmt, ## __VA_ARGS__) |
89 | #define info(fmt, ...) msg(INFO, fmt, ## __VA_ARGS__) |
90 | #define ok(fmt, ...) msg(OK, fmt, ## __VA_ARGS__) |
91 | |
92 | #define fail(fmt, ...) \ |
93 | do { \ |
94 | msg(FAIL, fmt, ## __VA_ARGS__); \ |
95 | sh->nerr++; \ |
96 | } while (0) |
97 | |
98 | #define crit(fmt, ...) \ |
99 | do { \ |
100 | sh->indent = 0; \ |
101 | msg(FAIL, fmt, ## __VA_ARGS__); \ |
102 | msg(SKIP, "Unable to run test\n"); \ |
103 | exit(EX_OSERR); \ |
104 | } while (0) |
105 | |
106 | /* Sentinel for ptrace-modified return value */ |
107 | #define MODIFIED_BY_PTRACE -9999 |
108 | |
109 | /* |
110 | * Directly invokes the given syscall with nullfd as the first argument |
111 | * and the rest zero. Avoids involving glibc wrappers in case they ever |
112 | * end up intercepting some system calls for some reason, or modify |
113 | * the system call number itself. |
114 | */ |
115 | static long long probe_syscall(int msb, int lsb) |
116 | { |
117 | register long long arg1 asm("rdi" ) = nullfd; |
118 | register long long arg2 asm("rsi" ) = 0; |
119 | register long long arg3 asm("rdx" ) = 0; |
120 | register long long arg4 asm("r10" ) = 0; |
121 | register long long arg5 asm("r8" ) = 0; |
122 | register long long arg6 asm("r9" ) = 0; |
123 | long long nr = ((long long)msb << 32) | (unsigned int)lsb; |
124 | long long ret; |
125 | |
126 | /* |
127 | * We pass in an extra copy of the extended system call number |
128 | * in %rbx, so we can examine it from the ptrace handler without |
129 | * worrying about it being possibly modified. This is to test |
130 | * the validity of struct user regs.orig_rax a.k.a. |
131 | * struct pt_regs.orig_ax. |
132 | */ |
133 | sh->probing_syscall = true; |
134 | asm volatile("syscall" |
135 | : "=a" (ret) |
136 | : "a" (nr), "b" (nr), |
137 | "r" (arg1), "r" (arg2), "r" (arg3), |
138 | "r" (arg4), "r" (arg5), "r" (arg6) |
139 | : "rcx" , "r11" , "memory" , "cc" ); |
140 | sh->probing_syscall = false; |
141 | |
142 | return ret; |
143 | } |
144 | |
145 | static const char *syscall_str(int msb, int start, int end) |
146 | { |
147 | static char buf[64]; |
148 | const char * const type = (start & X32_BIT) ? "x32" : "x64" ; |
149 | int lsb = start; |
150 | |
151 | /* |
152 | * Improve readability by stripping the x32 bit, but round |
153 | * toward zero so we don't display -1 as -1073741825. |
154 | */ |
155 | if (lsb < 0) |
156 | lsb |= X32_BIT; |
157 | else |
158 | lsb &= ~X32_BIT; |
159 | |
160 | if (start == end) |
161 | snprintf(buf, size: sizeof buf, fmt: "%s syscall %d:%d" , |
162 | type, msb, lsb); |
163 | else |
164 | snprintf(buf, size: sizeof buf, fmt: "%s syscalls %d:%d..%d" , |
165 | type, msb, lsb, lsb + (end-start)); |
166 | |
167 | return buf; |
168 | } |
169 | |
170 | static unsigned int _check_for(int msb, int start, int end, long long expect, |
171 | const char *expect_str) |
172 | { |
173 | unsigned int err = 0; |
174 | |
175 | sh->indent++; |
176 | if (start != end) |
177 | sh->indent++; |
178 | |
179 | for (int nr = start; nr <= end; nr++) { |
180 | long long ret = probe_syscall(msb, lsb: nr); |
181 | |
182 | if (ret != expect) { |
183 | fail("%s returned %lld, but it should have returned %s\n" , |
184 | syscall_str(msb, nr, nr), |
185 | ret, expect_str); |
186 | err++; |
187 | } |
188 | } |
189 | |
190 | if (start != end) |
191 | sh->indent--; |
192 | |
193 | if (err) { |
194 | if (start != end) |
195 | fail("%s had %u failure%s\n" , |
196 | syscall_str(msb, start, end), |
197 | err, err == 1 ? "s" : "" ); |
198 | } else { |
199 | ok("%s returned %s as expected\n" , |
200 | syscall_str(msb, start, end), expect_str); |
201 | } |
202 | |
203 | sh->indent--; |
204 | |
205 | return err; |
206 | } |
207 | |
208 | #define check_for(msb,start,end,expect) \ |
209 | _check_for(msb,start,end,expect,#expect) |
210 | |
211 | static bool check_zero(int msb, int nr) |
212 | { |
213 | return check_for(msb, nr, nr, 0); |
214 | } |
215 | |
216 | static bool check_enosys(int msb, int nr) |
217 | { |
218 | return check_for(msb, nr, nr, -ENOSYS); |
219 | } |
220 | |
221 | /* |
222 | * Anyone diagnosing a failure will want to know whether the kernel |
223 | * supports x32. Tell them. This can also be used to conditionalize |
224 | * tests based on existence or nonexistence of x32. |
225 | */ |
226 | static bool test_x32(void) |
227 | { |
228 | long long ret; |
229 | pid_t mypid = getpid(); |
230 | |
231 | run("Checking for x32 by calling x32 getpid()\n" ); |
232 | ret = probe_syscall(msb: 0, SYS_GETPID | X32_BIT); |
233 | |
234 | sh->indent++; |
235 | if (ret == mypid) { |
236 | info("x32 is supported\n" ); |
237 | with_x32 = true; |
238 | } else if (ret == -ENOSYS) { |
239 | info("x32 is not supported\n" ); |
240 | with_x32 = false; |
241 | } else { |
242 | fail("x32 getpid() returned %lld, but it should have returned either %lld or -ENOSYS\n" , ret, (long long)mypid); |
243 | with_x32 = false; |
244 | } |
245 | sh->indent--; |
246 | return with_x32; |
247 | } |
248 | |
249 | static void test_syscalls_common(int msb) |
250 | { |
251 | enum ptrace_pass pass = sh->ptrace_pass; |
252 | |
253 | run("Checking some common syscalls as 64 bit\n" ); |
254 | check_zero(msb, SYS_READ); |
255 | check_zero(msb, SYS_WRITE); |
256 | |
257 | run("Checking some 64-bit only syscalls as 64 bit\n" ); |
258 | check_zero(msb, X64_READV); |
259 | check_zero(msb, X64_WRITEV); |
260 | |
261 | run("Checking out of range system calls\n" ); |
262 | check_for(msb, -64, -2, -ENOSYS); |
263 | if (pass >= PTP_FUZZRET) |
264 | check_for(msb, -1, -1, MODIFIED_BY_PTRACE); |
265 | else |
266 | check_for(msb, -1, -1, -ENOSYS); |
267 | check_for(msb, X32_BIT-64, X32_BIT-1, -ENOSYS); |
268 | check_for(msb, -64-X32_BIT, -1-X32_BIT, -ENOSYS); |
269 | check_for(msb, INT_MAX-64, INT_MAX-1, -ENOSYS); |
270 | } |
271 | |
272 | static void test_syscalls_with_x32(int msb) |
273 | { |
274 | /* |
275 | * Syscalls 512-547 are "x32" syscalls. They are |
276 | * intended to be called with the x32 (0x40000000) bit |
277 | * set. Calling them without the x32 bit set is |
278 | * nonsense and should not work. |
279 | */ |
280 | run("Checking x32 syscalls as 64 bit\n" ); |
281 | check_for(msb, 512, 547, -ENOSYS); |
282 | |
283 | run("Checking some common syscalls as x32\n" ); |
284 | check_zero(msb, SYS_READ | X32_BIT); |
285 | check_zero(msb, SYS_WRITE | X32_BIT); |
286 | |
287 | run("Checking some x32 syscalls as x32\n" ); |
288 | check_zero(msb, X32_READV | X32_BIT); |
289 | check_zero(msb, X32_WRITEV | X32_BIT); |
290 | |
291 | run("Checking some 64-bit syscalls as x32\n" ); |
292 | check_enosys(msb, X64_IOCTL | X32_BIT); |
293 | check_enosys(msb, X64_READV | X32_BIT); |
294 | check_enosys(msb, X64_WRITEV | X32_BIT); |
295 | } |
296 | |
297 | static void test_syscalls_without_x32(int msb) |
298 | { |
299 | run("Checking for absence of x32 system calls\n" ); |
300 | check_for(msb, 0 | X32_BIT, 999 | X32_BIT, -ENOSYS); |
301 | } |
302 | |
303 | static void test_syscall_numbering(void) |
304 | { |
305 | static const int msbs[] = { |
306 | 0, 1, -1, X32_BIT-1, X32_BIT, X32_BIT-1, -X32_BIT, INT_MAX, |
307 | INT_MIN, INT_MIN+1 |
308 | }; |
309 | |
310 | sh->indent++; |
311 | |
312 | /* |
313 | * The MSB is supposed to be ignored, so we loop over a few |
314 | * to test that out. |
315 | */ |
316 | for (size_t i = 0; i < sizeof(msbs)/sizeof(msbs[0]); i++) { |
317 | int msb = msbs[i]; |
318 | run("Checking system calls with msb = %d (0x%x)\n" , |
319 | msb, msb); |
320 | |
321 | sh->indent++; |
322 | |
323 | test_syscalls_common(msb); |
324 | if (with_x32) |
325 | test_syscalls_with_x32(msb); |
326 | else |
327 | test_syscalls_without_x32(msb); |
328 | |
329 | sh->indent--; |
330 | } |
331 | |
332 | sh->indent--; |
333 | } |
334 | |
335 | static void syscall_numbering_tracee(void) |
336 | { |
337 | enum ptrace_pass pass; |
338 | |
339 | if (ptrace(PTRACE_TRACEME, 0, 0, 0)) { |
340 | crit("Failed to request tracing\n" ); |
341 | return; |
342 | } |
343 | raise(SIGSTOP); |
344 | |
345 | for (sh->ptrace_pass = pass = PTP_NOTHING; pass < PTP_DONE; |
346 | sh->ptrace_pass = ++pass) { |
347 | run("Running tests under ptrace: %s\n" , ptrace_pass_name[pass]); |
348 | test_syscall_numbering(); |
349 | } |
350 | } |
351 | |
352 | static void mess_with_syscall(pid_t testpid, enum ptrace_pass pass) |
353 | { |
354 | struct user_regs_struct regs; |
355 | |
356 | sh->probing_syscall = false; /* Do this on entry only */ |
357 | |
358 | /* For these, don't even getregs */ |
359 | if (pass == PTP_NOTHING || pass == PTP_DONE) |
360 | return; |
361 | |
362 | ptrace(PTRACE_GETREGS, testpid, NULL, ®s); |
363 | |
364 | if (regs.orig_rax != regs.rbx) { |
365 | fail("orig_rax %#llx doesn't match syscall number %#llx\n" , |
366 | (unsigned long long)regs.orig_rax, |
367 | (unsigned long long)regs.rbx); |
368 | } |
369 | |
370 | switch (pass) { |
371 | case PTP_GETREGS: |
372 | /* Just read, no writeback */ |
373 | return; |
374 | case PTP_WRITEBACK: |
375 | /* Write back the same register state verbatim */ |
376 | break; |
377 | case PTP_FUZZRET: |
378 | regs.rax = MODIFIED_BY_PTRACE; |
379 | break; |
380 | case PTP_FUZZHIGH: |
381 | regs.rax = MODIFIED_BY_PTRACE; |
382 | regs.orig_rax = regs.orig_rax | 0xffffffff00000000ULL; |
383 | break; |
384 | case PTP_INTNUM: |
385 | regs.rax = MODIFIED_BY_PTRACE; |
386 | regs.orig_rax = (int)regs.orig_rax; |
387 | break; |
388 | default: |
389 | crit("invalid ptrace_pass\n" ); |
390 | break; |
391 | } |
392 | |
393 | ptrace(PTRACE_SETREGS, testpid, NULL, ®s); |
394 | } |
395 | |
396 | static void syscall_numbering_tracer(pid_t testpid) |
397 | { |
398 | int wstatus; |
399 | |
400 | do { |
401 | pid_t wpid = waitpid(testpid, &wstatus, 0); |
402 | if (wpid < 0 && errno != EINTR) |
403 | break; |
404 | if (wpid != testpid) |
405 | continue; |
406 | if (!WIFSTOPPED(wstatus)) |
407 | break; /* Thread exited? */ |
408 | |
409 | if (sh->probing_syscall && WSTOPSIG(wstatus) == SIGTRAP) |
410 | mess_with_syscall(testpid, pass: sh->ptrace_pass); |
411 | } while (sh->ptrace_pass != PTP_DONE && |
412 | !ptrace(PTRACE_SYSCALL, testpid, NULL, NULL)); |
413 | |
414 | ptrace(PTRACE_DETACH, testpid, NULL, NULL); |
415 | |
416 | /* Wait for the child process to terminate */ |
417 | while (waitpid(testpid, &wstatus, 0) != testpid || !WIFEXITED(wstatus)) |
418 | /* wait some more */; |
419 | } |
420 | |
421 | static void test_traced_syscall_numbering(void) |
422 | { |
423 | pid_t testpid; |
424 | |
425 | /* Launch the test thread; this thread continues as the tracer thread */ |
426 | testpid = fork(); |
427 | |
428 | if (testpid < 0) { |
429 | crit("Unable to launch tracer process\n" ); |
430 | } else if (testpid == 0) { |
431 | syscall_numbering_tracee(); |
432 | _exit(0); |
433 | } else { |
434 | syscall_numbering_tracer(testpid); |
435 | } |
436 | } |
437 | |
438 | int main(void) |
439 | { |
440 | unsigned int nerr; |
441 | |
442 | /* |
443 | * It is quite likely to get a segfault on a failure, so make |
444 | * sure the message gets out by setting stdout to nonbuffered. |
445 | */ |
446 | setvbuf(stdout, NULL, _IONBF, 0); |
447 | |
448 | /* |
449 | * Harmless file descriptor to work on... |
450 | */ |
451 | nullfd = open("/dev/null" , O_RDWR); |
452 | if (nullfd < 0) { |
453 | crit("Unable to open /dev/null: %s\n" , strerror(errno)); |
454 | } |
455 | |
456 | /* |
457 | * Set up a block of shared memory... |
458 | */ |
459 | sh = mmap(NULL, sysconf(_SC_PAGE_SIZE), PROT_READ|PROT_WRITE, |
460 | MAP_ANONYMOUS|MAP_SHARED, 0, 0); |
461 | if (sh == MAP_FAILED) { |
462 | crit("Unable to allocated shared memory block: %s\n" , |
463 | strerror(errno)); |
464 | } |
465 | |
466 | with_x32 = test_x32(); |
467 | |
468 | run("Running tests without ptrace...\n" ); |
469 | test_syscall_numbering(); |
470 | |
471 | test_traced_syscall_numbering(); |
472 | |
473 | nerr = sh->nerr; |
474 | if (!nerr) { |
475 | ok("All system calls succeeded or failed as expected\n" ); |
476 | return 0; |
477 | } else { |
478 | fail("A total of %u system call%s had incorrect behavior\n" , |
479 | nerr, nerr != 1 ? "s" : "" ); |
480 | return 1; |
481 | } |
482 | } |
483 | |