1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright (c) 2012 The Chromium OS Authors. All rights reserved. |
4 | * |
5 | * Test code for seccomp bpf. |
6 | */ |
7 | |
8 | #define _GNU_SOURCE |
9 | #include <sys/types.h> |
10 | |
11 | /* |
12 | * glibc 2.26 and later have SIGSYS in siginfo_t. Before that, |
13 | * we need to use the kernel's siginfo.h file and trick glibc |
14 | * into accepting it. |
15 | */ |
16 | #if !__GLIBC_PREREQ(2, 26) |
17 | # include <asm/siginfo.h> |
18 | # define __have_siginfo_t 1 |
19 | # define __have_sigval_t 1 |
20 | # define __have_sigevent_t 1 |
21 | #endif |
22 | |
23 | #include <errno.h> |
24 | #include <linux/filter.h> |
25 | #include <sys/prctl.h> |
26 | #include <sys/ptrace.h> |
27 | #include <sys/user.h> |
28 | #include <linux/prctl.h> |
29 | #include <linux/ptrace.h> |
30 | #include <linux/seccomp.h> |
31 | #include <pthread.h> |
32 | #include <semaphore.h> |
33 | #include <signal.h> |
34 | #include <stddef.h> |
35 | #include <stdbool.h> |
36 | #include <string.h> |
37 | #include <time.h> |
38 | #include <limits.h> |
39 | #include <linux/elf.h> |
40 | #include <sys/uio.h> |
41 | #include <sys/utsname.h> |
42 | #include <sys/fcntl.h> |
43 | #include <sys/mman.h> |
44 | #include <sys/times.h> |
45 | #include <sys/socket.h> |
46 | #include <sys/ioctl.h> |
47 | #include <linux/kcmp.h> |
48 | #include <sys/resource.h> |
49 | #include <sys/capability.h> |
50 | |
51 | #include <unistd.h> |
52 | #include <sys/syscall.h> |
53 | #include <poll.h> |
54 | |
55 | #include "../kselftest_harness.h" |
56 | #include "../clone3/clone3_selftests.h" |
57 | |
58 | /* Attempt to de-conflict with the selftests tree. */ |
59 | #ifndef SKIP |
60 | #define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__) |
61 | #endif |
62 | |
63 | #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) |
64 | |
65 | #ifndef PR_SET_PTRACER |
66 | # define PR_SET_PTRACER 0x59616d61 |
67 | #endif |
68 | |
69 | #ifndef PR_SET_NO_NEW_PRIVS |
70 | #define PR_SET_NO_NEW_PRIVS 38 |
71 | #define PR_GET_NO_NEW_PRIVS 39 |
72 | #endif |
73 | |
74 | #ifndef PR_SECCOMP_EXT |
75 | #define PR_SECCOMP_EXT 43 |
76 | #endif |
77 | |
78 | #ifndef SECCOMP_EXT_ACT |
79 | #define SECCOMP_EXT_ACT 1 |
80 | #endif |
81 | |
82 | #ifndef SECCOMP_EXT_ACT_TSYNC |
83 | #define SECCOMP_EXT_ACT_TSYNC 1 |
84 | #endif |
85 | |
86 | #ifndef SECCOMP_MODE_STRICT |
87 | #define SECCOMP_MODE_STRICT 1 |
88 | #endif |
89 | |
90 | #ifndef SECCOMP_MODE_FILTER |
91 | #define SECCOMP_MODE_FILTER 2 |
92 | #endif |
93 | |
94 | #ifndef SECCOMP_RET_ALLOW |
95 | struct seccomp_data { |
96 | int nr; |
97 | __u32 arch; |
98 | __u64 instruction_pointer; |
99 | __u64 args[6]; |
100 | }; |
101 | #endif |
102 | |
103 | #ifndef SECCOMP_RET_KILL_PROCESS |
104 | #define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */ |
105 | #define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */ |
106 | #endif |
107 | #ifndef SECCOMP_RET_KILL |
108 | #define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD |
109 | #define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ |
110 | #define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ |
111 | #define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ |
112 | #define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ |
113 | #endif |
114 | #ifndef SECCOMP_RET_LOG |
115 | #define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ |
116 | #endif |
117 | |
118 | #ifndef __NR_seccomp |
119 | # if defined(__i386__) |
120 | # define __NR_seccomp 354 |
121 | # elif defined(__x86_64__) |
122 | # define __NR_seccomp 317 |
123 | # elif defined(__arm__) |
124 | # define __NR_seccomp 383 |
125 | # elif defined(__aarch64__) |
126 | # define __NR_seccomp 277 |
127 | # elif defined(__riscv) |
128 | # define __NR_seccomp 277 |
129 | # elif defined(__csky__) |
130 | # define __NR_seccomp 277 |
131 | # elif defined(__loongarch__) |
132 | # define __NR_seccomp 277 |
133 | # elif defined(__hppa__) |
134 | # define __NR_seccomp 338 |
135 | # elif defined(__powerpc__) |
136 | # define __NR_seccomp 358 |
137 | # elif defined(__s390__) |
138 | # define __NR_seccomp 348 |
139 | # elif defined(__xtensa__) |
140 | # define __NR_seccomp 337 |
141 | # elif defined(__sh__) |
142 | # define __NR_seccomp 372 |
143 | # elif defined(__mc68000__) |
144 | # define __NR_seccomp 380 |
145 | # else |
146 | # warning "seccomp syscall number unknown for this architecture" |
147 | # define __NR_seccomp 0xffff |
148 | # endif |
149 | #endif |
150 | |
151 | #ifndef SECCOMP_SET_MODE_STRICT |
152 | #define SECCOMP_SET_MODE_STRICT 0 |
153 | #endif |
154 | |
155 | #ifndef SECCOMP_SET_MODE_FILTER |
156 | #define SECCOMP_SET_MODE_FILTER 1 |
157 | #endif |
158 | |
159 | #ifndef SECCOMP_GET_ACTION_AVAIL |
160 | #define SECCOMP_GET_ACTION_AVAIL 2 |
161 | #endif |
162 | |
163 | #ifndef SECCOMP_GET_NOTIF_SIZES |
164 | #define SECCOMP_GET_NOTIF_SIZES 3 |
165 | #endif |
166 | |
167 | #ifndef SECCOMP_FILTER_FLAG_TSYNC |
168 | #define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) |
169 | #endif |
170 | |
171 | #ifndef SECCOMP_FILTER_FLAG_LOG |
172 | #define SECCOMP_FILTER_FLAG_LOG (1UL << 1) |
173 | #endif |
174 | |
175 | #ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW |
176 | #define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) |
177 | #endif |
178 | |
179 | #ifndef PTRACE_SECCOMP_GET_METADATA |
180 | #define PTRACE_SECCOMP_GET_METADATA 0x420d |
181 | |
182 | struct seccomp_metadata { |
183 | __u64 filter_off; /* Input: which filter */ |
184 | __u64 flags; /* Output: filter's flags */ |
185 | }; |
186 | #endif |
187 | |
188 | #ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER |
189 | #define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3) |
190 | #endif |
191 | |
192 | #ifndef SECCOMP_RET_USER_NOTIF |
193 | #define SECCOMP_RET_USER_NOTIF 0x7fc00000U |
194 | |
195 | #define SECCOMP_IOC_MAGIC '!' |
196 | #define SECCOMP_IO(nr) _IO(SECCOMP_IOC_MAGIC, nr) |
197 | #define SECCOMP_IOR(nr, type) _IOR(SECCOMP_IOC_MAGIC, nr, type) |
198 | #define SECCOMP_IOW(nr, type) _IOW(SECCOMP_IOC_MAGIC, nr, type) |
199 | #define SECCOMP_IOWR(nr, type) _IOWR(SECCOMP_IOC_MAGIC, nr, type) |
200 | |
201 | /* Flags for seccomp notification fd ioctl. */ |
202 | #define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif) |
203 | #define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \ |
204 | struct seccomp_notif_resp) |
205 | #define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOW(2, __u64) |
206 | |
207 | struct seccomp_notif { |
208 | __u64 id; |
209 | __u32 pid; |
210 | __u32 flags; |
211 | struct seccomp_data data; |
212 | }; |
213 | |
214 | struct seccomp_notif_resp { |
215 | __u64 id; |
216 | __s64 val; |
217 | __s32 error; |
218 | __u32 flags; |
219 | }; |
220 | |
221 | struct seccomp_notif_sizes { |
222 | __u16 seccomp_notif; |
223 | __u16 seccomp_notif_resp; |
224 | __u16 seccomp_data; |
225 | }; |
226 | #endif |
227 | |
228 | #ifndef SECCOMP_IOCTL_NOTIF_ADDFD |
229 | /* On success, the return value is the remote process's added fd number */ |
230 | #define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \ |
231 | struct seccomp_notif_addfd) |
232 | |
233 | /* valid flags for seccomp_notif_addfd */ |
234 | #define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */ |
235 | |
236 | struct seccomp_notif_addfd { |
237 | __u64 id; |
238 | __u32 flags; |
239 | __u32 srcfd; |
240 | __u32 newfd; |
241 | __u32 newfd_flags; |
242 | }; |
243 | #endif |
244 | |
245 | #ifndef SECCOMP_ADDFD_FLAG_SEND |
246 | #define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomically */ |
247 | #endif |
248 | |
249 | struct seccomp_notif_addfd_small { |
250 | __u64 id; |
251 | char weird[4]; |
252 | }; |
253 | #define SECCOMP_IOCTL_NOTIF_ADDFD_SMALL \ |
254 | SECCOMP_IOW(3, struct seccomp_notif_addfd_small) |
255 | |
256 | struct seccomp_notif_addfd_big { |
257 | union { |
258 | struct seccomp_notif_addfd addfd; |
259 | char buf[sizeof(struct seccomp_notif_addfd) + 8]; |
260 | }; |
261 | }; |
262 | #define SECCOMP_IOCTL_NOTIF_ADDFD_BIG \ |
263 | SECCOMP_IOWR(3, struct seccomp_notif_addfd_big) |
264 | |
265 | #ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY |
266 | #define PTRACE_EVENTMSG_SYSCALL_ENTRY 1 |
267 | #define PTRACE_EVENTMSG_SYSCALL_EXIT 2 |
268 | #endif |
269 | |
270 | #ifndef SECCOMP_USER_NOTIF_FLAG_CONTINUE |
271 | #define SECCOMP_USER_NOTIF_FLAG_CONTINUE 0x00000001 |
272 | #endif |
273 | |
274 | #ifndef SECCOMP_FILTER_FLAG_TSYNC_ESRCH |
275 | #define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4) |
276 | #endif |
277 | |
278 | #ifndef SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV |
279 | #define SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV (1UL << 5) |
280 | #endif |
281 | |
282 | #ifndef seccomp |
283 | int seccomp(unsigned int op, unsigned int flags, void *args) |
284 | { |
285 | errno = 0; |
286 | return syscall(__NR_seccomp, op, flags, args); |
287 | } |
288 | #endif |
289 | |
290 | #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ |
291 | #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n])) |
292 | #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ |
293 | #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32)) |
294 | #else |
295 | #error "wut? Unknown __BYTE_ORDER__?!" |
296 | #endif |
297 | |
298 | #define SIBLING_EXIT_UNKILLED 0xbadbeef |
299 | #define SIBLING_EXIT_FAILURE 0xbadface |
300 | #define SIBLING_EXIT_NEWPRIVS 0xbadfeed |
301 | |
302 | static int __filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2) |
303 | { |
304 | #ifdef __NR_kcmp |
305 | errno = 0; |
306 | return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2); |
307 | #else |
308 | errno = ENOSYS; |
309 | return -1; |
310 | #endif |
311 | } |
312 | |
313 | /* Have TH_LOG report actual location filecmp() is used. */ |
314 | #define filecmp(pid1, pid2, fd1, fd2) ({ \ |
315 | int _ret; \ |
316 | \ |
317 | _ret = __filecmp(pid1, pid2, fd1, fd2); \ |
318 | if (_ret != 0) { \ |
319 | if (_ret < 0 && errno == ENOSYS) { \ |
320 | TH_LOG("kcmp() syscall missing (test is less accurate)");\ |
321 | _ret = 0; \ |
322 | } \ |
323 | } \ |
324 | _ret; }) |
325 | |
326 | TEST(kcmp) |
327 | { |
328 | int ret; |
329 | |
330 | ret = __filecmp(pid1: getpid(), pid2: getpid(), fd1: 1, fd2: 1); |
331 | EXPECT_EQ(ret, 0); |
332 | if (ret != 0 && errno == ENOSYS) |
333 | SKIP(return, "Kernel does not support kcmp() (missing CONFIG_KCMP?)" ); |
334 | } |
335 | |
336 | TEST(mode_strict_support) |
337 | { |
338 | long ret; |
339 | |
340 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL); |
341 | ASSERT_EQ(0, ret) { |
342 | TH_LOG("Kernel does not support CONFIG_SECCOMP" ); |
343 | } |
344 | syscall(__NR_exit, 0); |
345 | } |
346 | |
347 | TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL) |
348 | { |
349 | long ret; |
350 | |
351 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL); |
352 | ASSERT_EQ(0, ret) { |
353 | TH_LOG("Kernel does not support CONFIG_SECCOMP" ); |
354 | } |
355 | syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER, |
356 | NULL, NULL, NULL); |
357 | EXPECT_FALSE(true) { |
358 | TH_LOG("Unreachable!" ); |
359 | } |
360 | } |
361 | |
362 | /* Note! This doesn't test no new privs behavior */ |
363 | TEST(no_new_privs_support) |
364 | { |
365 | long ret; |
366 | |
367 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
368 | EXPECT_EQ(0, ret) { |
369 | TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!" ); |
370 | } |
371 | } |
372 | |
373 | /* Tests kernel support by checking for a copy_from_user() fault on NULL. */ |
374 | TEST(mode_filter_support) |
375 | { |
376 | long ret; |
377 | |
378 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); |
379 | ASSERT_EQ(0, ret) { |
380 | TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!" ); |
381 | } |
382 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL); |
383 | EXPECT_EQ(-1, ret); |
384 | EXPECT_EQ(EFAULT, errno) { |
385 | TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!" ); |
386 | } |
387 | } |
388 | |
389 | TEST(mode_filter_without_nnp) |
390 | { |
391 | struct sock_filter filter[] = { |
392 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
393 | }; |
394 | struct sock_fprog prog = { |
395 | .len = (unsigned short)ARRAY_SIZE(filter), |
396 | .filter = filter, |
397 | }; |
398 | long ret; |
399 | cap_t cap = cap_get_proc(); |
400 | cap_flag_value_t is_cap_sys_admin = 0; |
401 | |
402 | ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0); |
403 | ASSERT_LE(0, ret) { |
404 | TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS" ); |
405 | } |
406 | errno = 0; |
407 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); |
408 | /* Succeeds with CAP_SYS_ADMIN, fails without */ |
409 | cap_get_flag(cap, CAP_SYS_ADMIN, CAP_EFFECTIVE, &is_cap_sys_admin); |
410 | if (!is_cap_sys_admin) { |
411 | EXPECT_EQ(-1, ret); |
412 | EXPECT_EQ(EACCES, errno); |
413 | } else { |
414 | EXPECT_EQ(0, ret); |
415 | } |
416 | } |
417 | |
418 | #define MAX_INSNS_PER_PATH 32768 |
419 | |
420 | TEST(filter_size_limits) |
421 | { |
422 | int i; |
423 | int count = BPF_MAXINSNS + 1; |
424 | struct sock_filter allow[] = { |
425 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
426 | }; |
427 | struct sock_filter *filter; |
428 | struct sock_fprog prog = { }; |
429 | long ret; |
430 | |
431 | filter = calloc(count, sizeof(*filter)); |
432 | ASSERT_NE(NULL, filter); |
433 | |
434 | for (i = 0; i < count; i++) |
435 | filter[i] = allow[0]; |
436 | |
437 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
438 | ASSERT_EQ(0, ret); |
439 | |
440 | prog.filter = filter; |
441 | prog.len = count; |
442 | |
443 | /* Too many filter instructions in a single filter. */ |
444 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); |
445 | ASSERT_NE(0, ret) { |
446 | TH_LOG("Installing %d insn filter was allowed" , prog.len); |
447 | } |
448 | |
449 | /* One less is okay, though. */ |
450 | prog.len -= 1; |
451 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); |
452 | ASSERT_EQ(0, ret) { |
453 | TH_LOG("Installing %d insn filter wasn't allowed" , prog.len); |
454 | } |
455 | } |
456 | |
457 | TEST(filter_chain_limits) |
458 | { |
459 | int i; |
460 | int count = BPF_MAXINSNS; |
461 | struct sock_filter allow[] = { |
462 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
463 | }; |
464 | struct sock_filter *filter; |
465 | struct sock_fprog prog = { }; |
466 | long ret; |
467 | |
468 | filter = calloc(count, sizeof(*filter)); |
469 | ASSERT_NE(NULL, filter); |
470 | |
471 | for (i = 0; i < count; i++) |
472 | filter[i] = allow[0]; |
473 | |
474 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
475 | ASSERT_EQ(0, ret); |
476 | |
477 | prog.filter = filter; |
478 | prog.len = 1; |
479 | |
480 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); |
481 | ASSERT_EQ(0, ret); |
482 | |
483 | prog.len = count; |
484 | |
485 | /* Too many total filter instructions. */ |
486 | for (i = 0; i < MAX_INSNS_PER_PATH; i++) { |
487 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); |
488 | if (ret != 0) |
489 | break; |
490 | } |
491 | ASSERT_NE(0, ret) { |
492 | TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)" , |
493 | i, count, i * (count + 4)); |
494 | } |
495 | } |
496 | |
497 | TEST(mode_filter_cannot_move_to_strict) |
498 | { |
499 | struct sock_filter filter[] = { |
500 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
501 | }; |
502 | struct sock_fprog prog = { |
503 | .len = (unsigned short)ARRAY_SIZE(filter), |
504 | .filter = filter, |
505 | }; |
506 | long ret; |
507 | |
508 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
509 | ASSERT_EQ(0, ret); |
510 | |
511 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); |
512 | ASSERT_EQ(0, ret); |
513 | |
514 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0); |
515 | EXPECT_EQ(-1, ret); |
516 | EXPECT_EQ(EINVAL, errno); |
517 | } |
518 | |
519 | |
520 | TEST(mode_filter_get_seccomp) |
521 | { |
522 | struct sock_filter filter[] = { |
523 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
524 | }; |
525 | struct sock_fprog prog = { |
526 | .len = (unsigned short)ARRAY_SIZE(filter), |
527 | .filter = filter, |
528 | }; |
529 | long ret; |
530 | |
531 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
532 | ASSERT_EQ(0, ret); |
533 | |
534 | ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0); |
535 | EXPECT_EQ(0, ret); |
536 | |
537 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); |
538 | ASSERT_EQ(0, ret); |
539 | |
540 | ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0); |
541 | EXPECT_EQ(2, ret); |
542 | } |
543 | |
544 | |
545 | TEST(ALLOW_all) |
546 | { |
547 | struct sock_filter filter[] = { |
548 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
549 | }; |
550 | struct sock_fprog prog = { |
551 | .len = (unsigned short)ARRAY_SIZE(filter), |
552 | .filter = filter, |
553 | }; |
554 | long ret; |
555 | |
556 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
557 | ASSERT_EQ(0, ret); |
558 | |
559 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); |
560 | ASSERT_EQ(0, ret); |
561 | } |
562 | |
563 | TEST(empty_prog) |
564 | { |
565 | struct sock_filter filter[] = { |
566 | }; |
567 | struct sock_fprog prog = { |
568 | .len = (unsigned short)ARRAY_SIZE(filter), |
569 | .filter = filter, |
570 | }; |
571 | long ret; |
572 | |
573 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
574 | ASSERT_EQ(0, ret); |
575 | |
576 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); |
577 | EXPECT_EQ(-1, ret); |
578 | EXPECT_EQ(EINVAL, errno); |
579 | } |
580 | |
581 | TEST(log_all) |
582 | { |
583 | struct sock_filter filter[] = { |
584 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG), |
585 | }; |
586 | struct sock_fprog prog = { |
587 | .len = (unsigned short)ARRAY_SIZE(filter), |
588 | .filter = filter, |
589 | }; |
590 | long ret; |
591 | pid_t parent = getppid(); |
592 | |
593 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
594 | ASSERT_EQ(0, ret); |
595 | |
596 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); |
597 | ASSERT_EQ(0, ret); |
598 | |
599 | /* getppid() should succeed and be logged (no check for logging) */ |
600 | EXPECT_EQ(parent, syscall(__NR_getppid)); |
601 | } |
602 | |
603 | TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS) |
604 | { |
605 | struct sock_filter filter[] = { |
606 | BPF_STMT(BPF_RET|BPF_K, 0x10000000U), |
607 | }; |
608 | struct sock_fprog prog = { |
609 | .len = (unsigned short)ARRAY_SIZE(filter), |
610 | .filter = filter, |
611 | }; |
612 | long ret; |
613 | |
614 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
615 | ASSERT_EQ(0, ret); |
616 | |
617 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); |
618 | ASSERT_EQ(0, ret); |
619 | EXPECT_EQ(0, syscall(__NR_getpid)) { |
620 | TH_LOG("getpid() shouldn't ever return" ); |
621 | } |
622 | } |
623 | |
624 | /* return code >= 0x80000000 is unused. */ |
625 | TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS) |
626 | { |
627 | struct sock_filter filter[] = { |
628 | BPF_STMT(BPF_RET|BPF_K, 0x90000000U), |
629 | }; |
630 | struct sock_fprog prog = { |
631 | .len = (unsigned short)ARRAY_SIZE(filter), |
632 | .filter = filter, |
633 | }; |
634 | long ret; |
635 | |
636 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
637 | ASSERT_EQ(0, ret); |
638 | |
639 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); |
640 | ASSERT_EQ(0, ret); |
641 | EXPECT_EQ(0, syscall(__NR_getpid)) { |
642 | TH_LOG("getpid() shouldn't ever return" ); |
643 | } |
644 | } |
645 | |
646 | TEST_SIGNAL(KILL_all, SIGSYS) |
647 | { |
648 | struct sock_filter filter[] = { |
649 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), |
650 | }; |
651 | struct sock_fprog prog = { |
652 | .len = (unsigned short)ARRAY_SIZE(filter), |
653 | .filter = filter, |
654 | }; |
655 | long ret; |
656 | |
657 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
658 | ASSERT_EQ(0, ret); |
659 | |
660 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); |
661 | ASSERT_EQ(0, ret); |
662 | } |
663 | |
664 | TEST_SIGNAL(KILL_one, SIGSYS) |
665 | { |
666 | struct sock_filter filter[] = { |
667 | BPF_STMT(BPF_LD|BPF_W|BPF_ABS, |
668 | offsetof(struct seccomp_data, nr)), |
669 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), |
670 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), |
671 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
672 | }; |
673 | struct sock_fprog prog = { |
674 | .len = (unsigned short)ARRAY_SIZE(filter), |
675 | .filter = filter, |
676 | }; |
677 | long ret; |
678 | pid_t parent = getppid(); |
679 | |
680 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
681 | ASSERT_EQ(0, ret); |
682 | |
683 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); |
684 | ASSERT_EQ(0, ret); |
685 | |
686 | EXPECT_EQ(parent, syscall(__NR_getppid)); |
687 | /* getpid() should never return. */ |
688 | EXPECT_EQ(0, syscall(__NR_getpid)); |
689 | } |
690 | |
691 | TEST_SIGNAL(KILL_one_arg_one, SIGSYS) |
692 | { |
693 | void *fatal_address; |
694 | struct sock_filter filter[] = { |
695 | BPF_STMT(BPF_LD|BPF_W|BPF_ABS, |
696 | offsetof(struct seccomp_data, nr)), |
697 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0), |
698 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
699 | /* Only both with lower 32-bit for now. */ |
700 | BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)), |
701 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, |
702 | (unsigned long)&fatal_address, 0, 1), |
703 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), |
704 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
705 | }; |
706 | struct sock_fprog prog = { |
707 | .len = (unsigned short)ARRAY_SIZE(filter), |
708 | .filter = filter, |
709 | }; |
710 | long ret; |
711 | pid_t parent = getppid(); |
712 | struct tms timebuf; |
713 | clock_t clock = times(&timebuf); |
714 | |
715 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
716 | ASSERT_EQ(0, ret); |
717 | |
718 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); |
719 | ASSERT_EQ(0, ret); |
720 | |
721 | EXPECT_EQ(parent, syscall(__NR_getppid)); |
722 | EXPECT_LE(clock, syscall(__NR_times, &timebuf)); |
723 | /* times() should never return. */ |
724 | EXPECT_EQ(0, syscall(__NR_times, &fatal_address)); |
725 | } |
726 | |
727 | TEST_SIGNAL(KILL_one_arg_six, SIGSYS) |
728 | { |
729 | #ifndef __NR_mmap2 |
730 | int sysno = __NR_mmap; |
731 | #else |
732 | int sysno = __NR_mmap2; |
733 | #endif |
734 | struct sock_filter filter[] = { |
735 | BPF_STMT(BPF_LD|BPF_W|BPF_ABS, |
736 | offsetof(struct seccomp_data, nr)), |
737 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0), |
738 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
739 | /* Only both with lower 32-bit for now. */ |
740 | BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)), |
741 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1), |
742 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), |
743 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
744 | }; |
745 | struct sock_fprog prog = { |
746 | .len = (unsigned short)ARRAY_SIZE(filter), |
747 | .filter = filter, |
748 | }; |
749 | long ret; |
750 | pid_t parent = getppid(); |
751 | int fd; |
752 | void *map1, *map2; |
753 | int page_size = sysconf(_SC_PAGESIZE); |
754 | |
755 | ASSERT_LT(0, page_size); |
756 | |
757 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
758 | ASSERT_EQ(0, ret); |
759 | |
760 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); |
761 | ASSERT_EQ(0, ret); |
762 | |
763 | fd = open("/dev/zero" , O_RDONLY); |
764 | ASSERT_NE(-1, fd); |
765 | |
766 | EXPECT_EQ(parent, syscall(__NR_getppid)); |
767 | map1 = (void *)syscall(sysno, |
768 | NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size); |
769 | EXPECT_NE(MAP_FAILED, map1); |
770 | /* mmap2() should never return. */ |
771 | map2 = (void *)syscall(sysno, |
772 | NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE); |
773 | EXPECT_EQ(MAP_FAILED, map2); |
774 | |
775 | /* The test failed, so clean up the resources. */ |
776 | munmap(map1, page_size); |
777 | munmap(map2, page_size); |
778 | close(fd); |
779 | } |
780 | |
781 | /* This is a thread task to die via seccomp filter violation. */ |
782 | void *kill_thread(void *data) |
783 | { |
784 | bool die = (bool)data; |
785 | |
786 | if (die) { |
787 | syscall(__NR_getpid); |
788 | return (void *)SIBLING_EXIT_FAILURE; |
789 | } |
790 | |
791 | return (void *)SIBLING_EXIT_UNKILLED; |
792 | } |
793 | |
794 | enum kill_t { |
795 | KILL_THREAD, |
796 | KILL_PROCESS, |
797 | RET_UNKNOWN |
798 | }; |
799 | |
800 | /* Prepare a thread that will kill itself or both of us. */ |
801 | void kill_thread_or_group(struct __test_metadata *_metadata, |
802 | enum kill_t kill_how) |
803 | { |
804 | pthread_t thread; |
805 | void *status; |
806 | /* Kill only when calling __NR_getpid. */ |
807 | struct sock_filter filter_thread[] = { |
808 | BPF_STMT(BPF_LD|BPF_W|BPF_ABS, |
809 | offsetof(struct seccomp_data, nr)), |
810 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), |
811 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD), |
812 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
813 | }; |
814 | struct sock_fprog prog_thread = { |
815 | .len = (unsigned short)ARRAY_SIZE(filter_thread), |
816 | .filter = filter_thread, |
817 | }; |
818 | int kill = kill_how == KILL_PROCESS ? SECCOMP_RET_KILL_PROCESS : 0xAAAAAAAA; |
819 | struct sock_filter filter_process[] = { |
820 | BPF_STMT(BPF_LD|BPF_W|BPF_ABS, |
821 | offsetof(struct seccomp_data, nr)), |
822 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), |
823 | BPF_STMT(BPF_RET|BPF_K, kill), |
824 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
825 | }; |
826 | struct sock_fprog prog_process = { |
827 | .len = (unsigned short)ARRAY_SIZE(filter_process), |
828 | .filter = filter_process, |
829 | }; |
830 | |
831 | ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { |
832 | TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!" ); |
833 | } |
834 | |
835 | ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, |
836 | kill_how == KILL_THREAD ? &prog_thread |
837 | : &prog_process)); |
838 | |
839 | /* |
840 | * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS |
841 | * flag cannot be downgraded by a new filter. |
842 | */ |
843 | if (kill_how == KILL_PROCESS) |
844 | ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread)); |
845 | |
846 | /* Start a thread that will exit immediately. */ |
847 | ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false)); |
848 | ASSERT_EQ(0, pthread_join(thread, &status)); |
849 | ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status); |
850 | |
851 | /* Start a thread that will die immediately. */ |
852 | ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true)); |
853 | ASSERT_EQ(0, pthread_join(thread, &status)); |
854 | ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status); |
855 | |
856 | /* |
857 | * If we get here, only the spawned thread died. Let the parent know |
858 | * the whole process didn't die (i.e. this thread, the spawner, |
859 | * stayed running). |
860 | */ |
861 | exit(42); |
862 | } |
863 | |
864 | TEST(KILL_thread) |
865 | { |
866 | int status; |
867 | pid_t child_pid; |
868 | |
869 | child_pid = fork(); |
870 | ASSERT_LE(0, child_pid); |
871 | if (child_pid == 0) { |
872 | kill_thread_or_group(_metadata, kill_how: KILL_THREAD); |
873 | _exit(38); |
874 | } |
875 | |
876 | ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); |
877 | |
878 | /* If only the thread was killed, we'll see exit 42. */ |
879 | ASSERT_TRUE(WIFEXITED(status)); |
880 | ASSERT_EQ(42, WEXITSTATUS(status)); |
881 | } |
882 | |
883 | TEST(KILL_process) |
884 | { |
885 | int status; |
886 | pid_t child_pid; |
887 | |
888 | child_pid = fork(); |
889 | ASSERT_LE(0, child_pid); |
890 | if (child_pid == 0) { |
891 | kill_thread_or_group(_metadata, kill_how: KILL_PROCESS); |
892 | _exit(38); |
893 | } |
894 | |
895 | ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); |
896 | |
897 | /* If the entire process was killed, we'll see SIGSYS. */ |
898 | ASSERT_TRUE(WIFSIGNALED(status)); |
899 | ASSERT_EQ(SIGSYS, WTERMSIG(status)); |
900 | } |
901 | |
902 | TEST(KILL_unknown) |
903 | { |
904 | int status; |
905 | pid_t child_pid; |
906 | |
907 | child_pid = fork(); |
908 | ASSERT_LE(0, child_pid); |
909 | if (child_pid == 0) { |
910 | kill_thread_or_group(_metadata, kill_how: RET_UNKNOWN); |
911 | _exit(38); |
912 | } |
913 | |
914 | ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); |
915 | |
916 | /* If the entire process was killed, we'll see SIGSYS. */ |
917 | EXPECT_TRUE(WIFSIGNALED(status)) { |
918 | TH_LOG("Unknown SECCOMP_RET is only killing the thread?" ); |
919 | } |
920 | ASSERT_EQ(SIGSYS, WTERMSIG(status)); |
921 | } |
922 | |
923 | /* TODO(wad) add 64-bit versus 32-bit arg tests. */ |
924 | TEST(arg_out_of_range) |
925 | { |
926 | struct sock_filter filter[] = { |
927 | BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)), |
928 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
929 | }; |
930 | struct sock_fprog prog = { |
931 | .len = (unsigned short)ARRAY_SIZE(filter), |
932 | .filter = filter, |
933 | }; |
934 | long ret; |
935 | |
936 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
937 | ASSERT_EQ(0, ret); |
938 | |
939 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); |
940 | EXPECT_EQ(-1, ret); |
941 | EXPECT_EQ(EINVAL, errno); |
942 | } |
943 | |
944 | #define ERRNO_FILTER(name, errno) \ |
945 | struct sock_filter _read_filter_##name[] = { \ |
946 | BPF_STMT(BPF_LD|BPF_W|BPF_ABS, \ |
947 | offsetof(struct seccomp_data, nr)), \ |
948 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), \ |
949 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno), \ |
950 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), \ |
951 | }; \ |
952 | struct sock_fprog prog_##name = { \ |
953 | .len = (unsigned short)ARRAY_SIZE(_read_filter_##name), \ |
954 | .filter = _read_filter_##name, \ |
955 | } |
956 | |
957 | /* Make sure basic errno values are correctly passed through a filter. */ |
958 | TEST(ERRNO_valid) |
959 | { |
960 | ERRNO_FILTER(valid, E2BIG); |
961 | long ret; |
962 | pid_t parent = getppid(); |
963 | |
964 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
965 | ASSERT_EQ(0, ret); |
966 | |
967 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid); |
968 | ASSERT_EQ(0, ret); |
969 | |
970 | EXPECT_EQ(parent, syscall(__NR_getppid)); |
971 | EXPECT_EQ(-1, read(-1, NULL, 0)); |
972 | EXPECT_EQ(E2BIG, errno); |
973 | } |
974 | |
975 | /* Make sure an errno of zero is correctly handled by the arch code. */ |
976 | TEST(ERRNO_zero) |
977 | { |
978 | ERRNO_FILTER(zero, 0); |
979 | long ret; |
980 | pid_t parent = getppid(); |
981 | |
982 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
983 | ASSERT_EQ(0, ret); |
984 | |
985 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero); |
986 | ASSERT_EQ(0, ret); |
987 | |
988 | EXPECT_EQ(parent, syscall(__NR_getppid)); |
989 | /* "errno" of 0 is ok. */ |
990 | EXPECT_EQ(0, read(-1, NULL, 0)); |
991 | } |
992 | |
993 | /* |
994 | * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller. |
995 | * This tests that the errno value gets capped correctly, fixed by |
996 | * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO"). |
997 | */ |
998 | TEST(ERRNO_capped) |
999 | { |
1000 | ERRNO_FILTER(capped, 4096); |
1001 | long ret; |
1002 | pid_t parent = getppid(); |
1003 | |
1004 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
1005 | ASSERT_EQ(0, ret); |
1006 | |
1007 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped); |
1008 | ASSERT_EQ(0, ret); |
1009 | |
1010 | EXPECT_EQ(parent, syscall(__NR_getppid)); |
1011 | EXPECT_EQ(-1, read(-1, NULL, 0)); |
1012 | EXPECT_EQ(4095, errno); |
1013 | } |
1014 | |
1015 | /* |
1016 | * Filters are processed in reverse order: last applied is executed first. |
1017 | * Since only the SECCOMP_RET_ACTION mask is tested for return values, the |
1018 | * SECCOMP_RET_DATA mask results will follow the most recently applied |
1019 | * matching filter return (and not the lowest or highest value). |
1020 | */ |
1021 | TEST(ERRNO_order) |
1022 | { |
1023 | ERRNO_FILTER(first, 11); |
1024 | ERRNO_FILTER(second, 13); |
1025 | ERRNO_FILTER(third, 12); |
1026 | long ret; |
1027 | pid_t parent = getppid(); |
1028 | |
1029 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
1030 | ASSERT_EQ(0, ret); |
1031 | |
1032 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first); |
1033 | ASSERT_EQ(0, ret); |
1034 | |
1035 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second); |
1036 | ASSERT_EQ(0, ret); |
1037 | |
1038 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third); |
1039 | ASSERT_EQ(0, ret); |
1040 | |
1041 | EXPECT_EQ(parent, syscall(__NR_getppid)); |
1042 | EXPECT_EQ(-1, read(-1, NULL, 0)); |
1043 | EXPECT_EQ(12, errno); |
1044 | } |
1045 | |
1046 | FIXTURE(TRAP) { |
1047 | struct sock_fprog prog; |
1048 | }; |
1049 | |
1050 | FIXTURE_SETUP(TRAP) |
1051 | { |
1052 | struct sock_filter filter[] = { |
1053 | BPF_STMT(BPF_LD|BPF_W|BPF_ABS, |
1054 | offsetof(struct seccomp_data, nr)), |
1055 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), |
1056 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP), |
1057 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
1058 | }; |
1059 | |
1060 | memset(&self->prog, 0, sizeof(self->prog)); |
1061 | self->prog.filter = malloc(sizeof(filter)); |
1062 | ASSERT_NE(NULL, self->prog.filter); |
1063 | memcpy(self->prog.filter, filter, sizeof(filter)); |
1064 | self->prog.len = (unsigned short)ARRAY_SIZE(filter); |
1065 | } |
1066 | |
1067 | FIXTURE_TEARDOWN(TRAP) |
1068 | { |
1069 | if (self->prog.filter) |
1070 | free(self->prog.filter); |
1071 | } |
1072 | |
1073 | TEST_F_SIGNAL(TRAP, dfl, SIGSYS) |
1074 | { |
1075 | long ret; |
1076 | |
1077 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
1078 | ASSERT_EQ(0, ret); |
1079 | |
1080 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); |
1081 | ASSERT_EQ(0, ret); |
1082 | syscall(__NR_getpid); |
1083 | } |
1084 | |
1085 | /* Ensure that SIGSYS overrides SIG_IGN */ |
1086 | TEST_F_SIGNAL(TRAP, ign, SIGSYS) |
1087 | { |
1088 | long ret; |
1089 | |
1090 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
1091 | ASSERT_EQ(0, ret); |
1092 | |
1093 | signal(SIGSYS, SIG_IGN); |
1094 | |
1095 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); |
1096 | ASSERT_EQ(0, ret); |
1097 | syscall(__NR_getpid); |
1098 | } |
1099 | |
1100 | static siginfo_t TRAP_info; |
1101 | static volatile int TRAP_nr; |
1102 | static void TRAP_action(int nr, siginfo_t *info, void *void_context) |
1103 | { |
1104 | memcpy(&TRAP_info, info, sizeof(TRAP_info)); |
1105 | TRAP_nr = nr; |
1106 | } |
1107 | |
1108 | TEST_F(TRAP, handler) |
1109 | { |
1110 | int ret, test; |
1111 | struct sigaction act; |
1112 | sigset_t mask; |
1113 | |
1114 | memset(&act, 0, sizeof(act)); |
1115 | sigemptyset(set: &mask); |
1116 | sigaddset(set: &mask, SIGSYS); |
1117 | |
1118 | act.sa_sigaction = &TRAP_action; |
1119 | act.sa_flags = SA_SIGINFO; |
1120 | ret = sigaction(SIGSYS, &act, NULL); |
1121 | ASSERT_EQ(0, ret) { |
1122 | TH_LOG("sigaction failed" ); |
1123 | } |
1124 | ret = sigprocmask(SIG_UNBLOCK, &mask, NULL); |
1125 | ASSERT_EQ(0, ret) { |
1126 | TH_LOG("sigprocmask failed" ); |
1127 | } |
1128 | |
1129 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
1130 | ASSERT_EQ(0, ret); |
1131 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); |
1132 | ASSERT_EQ(0, ret); |
1133 | TRAP_nr = 0; |
1134 | memset(&TRAP_info, 0, sizeof(TRAP_info)); |
1135 | /* Expect the registers to be rolled back. (nr = error) may vary |
1136 | * based on arch. */ |
1137 | ret = syscall(__NR_getpid); |
1138 | /* Silence gcc warning about volatile. */ |
1139 | test = TRAP_nr; |
1140 | EXPECT_EQ(SIGSYS, test); |
1141 | struct local_sigsys { |
1142 | void *_call_addr; /* calling user insn */ |
1143 | int _syscall; /* triggering system call number */ |
1144 | unsigned int _arch; /* AUDIT_ARCH_* of syscall */ |
1145 | } *sigsys = (struct local_sigsys *) |
1146 | #ifdef si_syscall |
1147 | &(TRAP_info.si_call_addr); |
1148 | #else |
1149 | &TRAP_info.si_pid; |
1150 | #endif |
1151 | EXPECT_EQ(__NR_getpid, sigsys->_syscall); |
1152 | /* Make sure arch is non-zero. */ |
1153 | EXPECT_NE(0, sigsys->_arch); |
1154 | EXPECT_NE(0, (unsigned long)sigsys->_call_addr); |
1155 | } |
1156 | |
1157 | FIXTURE(precedence) { |
1158 | struct sock_fprog allow; |
1159 | struct sock_fprog log; |
1160 | struct sock_fprog trace; |
1161 | struct sock_fprog error; |
1162 | struct sock_fprog trap; |
1163 | struct sock_fprog kill; |
1164 | }; |
1165 | |
1166 | FIXTURE_SETUP(precedence) |
1167 | { |
1168 | struct sock_filter allow_insns[] = { |
1169 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
1170 | }; |
1171 | struct sock_filter log_insns[] = { |
1172 | BPF_STMT(BPF_LD|BPF_W|BPF_ABS, |
1173 | offsetof(struct seccomp_data, nr)), |
1174 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), |
1175 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
1176 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG), |
1177 | }; |
1178 | struct sock_filter trace_insns[] = { |
1179 | BPF_STMT(BPF_LD|BPF_W|BPF_ABS, |
1180 | offsetof(struct seccomp_data, nr)), |
1181 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), |
1182 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
1183 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE), |
1184 | }; |
1185 | struct sock_filter error_insns[] = { |
1186 | BPF_STMT(BPF_LD|BPF_W|BPF_ABS, |
1187 | offsetof(struct seccomp_data, nr)), |
1188 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), |
1189 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
1190 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO), |
1191 | }; |
1192 | struct sock_filter trap_insns[] = { |
1193 | BPF_STMT(BPF_LD|BPF_W|BPF_ABS, |
1194 | offsetof(struct seccomp_data, nr)), |
1195 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), |
1196 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
1197 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP), |
1198 | }; |
1199 | struct sock_filter kill_insns[] = { |
1200 | BPF_STMT(BPF_LD|BPF_W|BPF_ABS, |
1201 | offsetof(struct seccomp_data, nr)), |
1202 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), |
1203 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
1204 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), |
1205 | }; |
1206 | |
1207 | memset(self, 0, sizeof(*self)); |
1208 | #define FILTER_ALLOC(_x) \ |
1209 | self->_x.filter = malloc(sizeof(_x##_insns)); \ |
1210 | ASSERT_NE(NULL, self->_x.filter); \ |
1211 | memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \ |
1212 | self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns) |
1213 | FILTER_ALLOC(allow); |
1214 | FILTER_ALLOC(log); |
1215 | FILTER_ALLOC(trace); |
1216 | FILTER_ALLOC(error); |
1217 | FILTER_ALLOC(trap); |
1218 | FILTER_ALLOC(kill); |
1219 | } |
1220 | |
1221 | FIXTURE_TEARDOWN(precedence) |
1222 | { |
1223 | #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter) |
1224 | FILTER_FREE(allow); |
1225 | FILTER_FREE(log); |
1226 | FILTER_FREE(trace); |
1227 | FILTER_FREE(error); |
1228 | FILTER_FREE(trap); |
1229 | FILTER_FREE(kill); |
1230 | } |
1231 | |
1232 | TEST_F(precedence, allow_ok) |
1233 | { |
1234 | pid_t parent, res = 0; |
1235 | long ret; |
1236 | |
1237 | parent = getppid(); |
1238 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
1239 | ASSERT_EQ(0, ret); |
1240 | |
1241 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); |
1242 | ASSERT_EQ(0, ret); |
1243 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); |
1244 | ASSERT_EQ(0, ret); |
1245 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); |
1246 | ASSERT_EQ(0, ret); |
1247 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); |
1248 | ASSERT_EQ(0, ret); |
1249 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); |
1250 | ASSERT_EQ(0, ret); |
1251 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); |
1252 | ASSERT_EQ(0, ret); |
1253 | /* Should work just fine. */ |
1254 | res = syscall(__NR_getppid); |
1255 | EXPECT_EQ(parent, res); |
1256 | } |
1257 | |
1258 | TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS) |
1259 | { |
1260 | pid_t parent, res = 0; |
1261 | long ret; |
1262 | |
1263 | parent = getppid(); |
1264 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
1265 | ASSERT_EQ(0, ret); |
1266 | |
1267 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); |
1268 | ASSERT_EQ(0, ret); |
1269 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); |
1270 | ASSERT_EQ(0, ret); |
1271 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); |
1272 | ASSERT_EQ(0, ret); |
1273 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); |
1274 | ASSERT_EQ(0, ret); |
1275 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); |
1276 | ASSERT_EQ(0, ret); |
1277 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); |
1278 | ASSERT_EQ(0, ret); |
1279 | /* Should work just fine. */ |
1280 | res = syscall(__NR_getppid); |
1281 | EXPECT_EQ(parent, res); |
1282 | /* getpid() should never return. */ |
1283 | res = syscall(__NR_getpid); |
1284 | EXPECT_EQ(0, res); |
1285 | } |
1286 | |
1287 | TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS) |
1288 | { |
1289 | pid_t parent; |
1290 | long ret; |
1291 | |
1292 | parent = getppid(); |
1293 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
1294 | ASSERT_EQ(0, ret); |
1295 | |
1296 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); |
1297 | ASSERT_EQ(0, ret); |
1298 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); |
1299 | ASSERT_EQ(0, ret); |
1300 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); |
1301 | ASSERT_EQ(0, ret); |
1302 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); |
1303 | ASSERT_EQ(0, ret); |
1304 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); |
1305 | ASSERT_EQ(0, ret); |
1306 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); |
1307 | ASSERT_EQ(0, ret); |
1308 | /* Should work just fine. */ |
1309 | EXPECT_EQ(parent, syscall(__NR_getppid)); |
1310 | /* getpid() should never return. */ |
1311 | EXPECT_EQ(0, syscall(__NR_getpid)); |
1312 | } |
1313 | |
1314 | TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS) |
1315 | { |
1316 | pid_t parent; |
1317 | long ret; |
1318 | |
1319 | parent = getppid(); |
1320 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
1321 | ASSERT_EQ(0, ret); |
1322 | |
1323 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); |
1324 | ASSERT_EQ(0, ret); |
1325 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); |
1326 | ASSERT_EQ(0, ret); |
1327 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); |
1328 | ASSERT_EQ(0, ret); |
1329 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); |
1330 | ASSERT_EQ(0, ret); |
1331 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); |
1332 | ASSERT_EQ(0, ret); |
1333 | /* Should work just fine. */ |
1334 | EXPECT_EQ(parent, syscall(__NR_getppid)); |
1335 | /* getpid() should never return. */ |
1336 | EXPECT_EQ(0, syscall(__NR_getpid)); |
1337 | } |
1338 | |
1339 | TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS) |
1340 | { |
1341 | pid_t parent; |
1342 | long ret; |
1343 | |
1344 | parent = getppid(); |
1345 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
1346 | ASSERT_EQ(0, ret); |
1347 | |
1348 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); |
1349 | ASSERT_EQ(0, ret); |
1350 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); |
1351 | ASSERT_EQ(0, ret); |
1352 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); |
1353 | ASSERT_EQ(0, ret); |
1354 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); |
1355 | ASSERT_EQ(0, ret); |
1356 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); |
1357 | ASSERT_EQ(0, ret); |
1358 | /* Should work just fine. */ |
1359 | EXPECT_EQ(parent, syscall(__NR_getppid)); |
1360 | /* getpid() should never return. */ |
1361 | EXPECT_EQ(0, syscall(__NR_getpid)); |
1362 | } |
1363 | |
1364 | TEST_F(precedence, errno_is_third) |
1365 | { |
1366 | pid_t parent; |
1367 | long ret; |
1368 | |
1369 | parent = getppid(); |
1370 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
1371 | ASSERT_EQ(0, ret); |
1372 | |
1373 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); |
1374 | ASSERT_EQ(0, ret); |
1375 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); |
1376 | ASSERT_EQ(0, ret); |
1377 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); |
1378 | ASSERT_EQ(0, ret); |
1379 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); |
1380 | ASSERT_EQ(0, ret); |
1381 | /* Should work just fine. */ |
1382 | EXPECT_EQ(parent, syscall(__NR_getppid)); |
1383 | EXPECT_EQ(0, syscall(__NR_getpid)); |
1384 | } |
1385 | |
1386 | TEST_F(precedence, errno_is_third_in_any_order) |
1387 | { |
1388 | pid_t parent; |
1389 | long ret; |
1390 | |
1391 | parent = getppid(); |
1392 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
1393 | ASSERT_EQ(0, ret); |
1394 | |
1395 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); |
1396 | ASSERT_EQ(0, ret); |
1397 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); |
1398 | ASSERT_EQ(0, ret); |
1399 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); |
1400 | ASSERT_EQ(0, ret); |
1401 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); |
1402 | ASSERT_EQ(0, ret); |
1403 | /* Should work just fine. */ |
1404 | EXPECT_EQ(parent, syscall(__NR_getppid)); |
1405 | EXPECT_EQ(0, syscall(__NR_getpid)); |
1406 | } |
1407 | |
1408 | TEST_F(precedence, trace_is_fourth) |
1409 | { |
1410 | pid_t parent; |
1411 | long ret; |
1412 | |
1413 | parent = getppid(); |
1414 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
1415 | ASSERT_EQ(0, ret); |
1416 | |
1417 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); |
1418 | ASSERT_EQ(0, ret); |
1419 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); |
1420 | ASSERT_EQ(0, ret); |
1421 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); |
1422 | ASSERT_EQ(0, ret); |
1423 | /* Should work just fine. */ |
1424 | EXPECT_EQ(parent, syscall(__NR_getppid)); |
1425 | /* No ptracer */ |
1426 | EXPECT_EQ(-1, syscall(__NR_getpid)); |
1427 | } |
1428 | |
1429 | TEST_F(precedence, trace_is_fourth_in_any_order) |
1430 | { |
1431 | pid_t parent; |
1432 | long ret; |
1433 | |
1434 | parent = getppid(); |
1435 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
1436 | ASSERT_EQ(0, ret); |
1437 | |
1438 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); |
1439 | ASSERT_EQ(0, ret); |
1440 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); |
1441 | ASSERT_EQ(0, ret); |
1442 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); |
1443 | ASSERT_EQ(0, ret); |
1444 | /* Should work just fine. */ |
1445 | EXPECT_EQ(parent, syscall(__NR_getppid)); |
1446 | /* No ptracer */ |
1447 | EXPECT_EQ(-1, syscall(__NR_getpid)); |
1448 | } |
1449 | |
1450 | TEST_F(precedence, log_is_fifth) |
1451 | { |
1452 | pid_t mypid, parent; |
1453 | long ret; |
1454 | |
1455 | mypid = getpid(); |
1456 | parent = getppid(); |
1457 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
1458 | ASSERT_EQ(0, ret); |
1459 | |
1460 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); |
1461 | ASSERT_EQ(0, ret); |
1462 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); |
1463 | ASSERT_EQ(0, ret); |
1464 | /* Should work just fine. */ |
1465 | EXPECT_EQ(parent, syscall(__NR_getppid)); |
1466 | /* Should also work just fine */ |
1467 | EXPECT_EQ(mypid, syscall(__NR_getpid)); |
1468 | } |
1469 | |
1470 | TEST_F(precedence, log_is_fifth_in_any_order) |
1471 | { |
1472 | pid_t mypid, parent; |
1473 | long ret; |
1474 | |
1475 | mypid = getpid(); |
1476 | parent = getppid(); |
1477 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
1478 | ASSERT_EQ(0, ret); |
1479 | |
1480 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); |
1481 | ASSERT_EQ(0, ret); |
1482 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); |
1483 | ASSERT_EQ(0, ret); |
1484 | /* Should work just fine. */ |
1485 | EXPECT_EQ(parent, syscall(__NR_getppid)); |
1486 | /* Should also work just fine */ |
1487 | EXPECT_EQ(mypid, syscall(__NR_getpid)); |
1488 | } |
1489 | |
1490 | #ifndef PTRACE_O_TRACESECCOMP |
1491 | #define PTRACE_O_TRACESECCOMP 0x00000080 |
1492 | #endif |
1493 | |
1494 | /* Catch the Ubuntu 12.04 value error. */ |
1495 | #if PTRACE_EVENT_SECCOMP != 7 |
1496 | #undef PTRACE_EVENT_SECCOMP |
1497 | #endif |
1498 | |
1499 | #ifndef PTRACE_EVENT_SECCOMP |
1500 | #define PTRACE_EVENT_SECCOMP 7 |
1501 | #endif |
1502 | |
1503 | #define PTRACE_EVENT_MASK(status) ((status) >> 16) |
1504 | bool tracer_running; |
1505 | void tracer_stop(int sig) |
1506 | { |
1507 | tracer_running = false; |
1508 | } |
1509 | |
1510 | typedef void tracer_func_t(struct __test_metadata *_metadata, |
1511 | pid_t tracee, int status, void *args); |
1512 | |
1513 | void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee, |
1514 | tracer_func_t tracer_func, void *args, bool ptrace_syscall) |
1515 | { |
1516 | int ret = -1; |
1517 | struct sigaction action = { |
1518 | .sa_handler = tracer_stop, |
1519 | }; |
1520 | |
1521 | /* Allow external shutdown. */ |
1522 | tracer_running = true; |
1523 | ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL)); |
1524 | |
1525 | errno = 0; |
1526 | while (ret == -1 && errno != EINVAL) |
1527 | ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0); |
1528 | ASSERT_EQ(0, ret) { |
1529 | kill(tracee, SIGKILL); |
1530 | } |
1531 | /* Wait for attach stop */ |
1532 | wait(NULL); |
1533 | |
1534 | ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ? |
1535 | PTRACE_O_TRACESYSGOOD : |
1536 | PTRACE_O_TRACESECCOMP); |
1537 | ASSERT_EQ(0, ret) { |
1538 | TH_LOG("Failed to set PTRACE_O_TRACESECCOMP" ); |
1539 | kill(tracee, SIGKILL); |
1540 | } |
1541 | ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT, |
1542 | tracee, NULL, 0); |
1543 | ASSERT_EQ(0, ret); |
1544 | |
1545 | /* Unblock the tracee */ |
1546 | ASSERT_EQ(1, write(fd, "A" , 1)); |
1547 | ASSERT_EQ(0, close(fd)); |
1548 | |
1549 | /* Run until we're shut down. Must assert to stop execution. */ |
1550 | while (tracer_running) { |
1551 | int status; |
1552 | |
1553 | if (wait(&status) != tracee) |
1554 | continue; |
1555 | |
1556 | if (WIFSIGNALED(status)) { |
1557 | /* Child caught a fatal signal. */ |
1558 | return; |
1559 | } |
1560 | if (WIFEXITED(status)) { |
1561 | /* Child exited with code. */ |
1562 | return; |
1563 | } |
1564 | |
1565 | /* Check if we got an expected event. */ |
1566 | ASSERT_EQ(WIFCONTINUED(status), false); |
1567 | ASSERT_EQ(WIFSTOPPED(status), true); |
1568 | ASSERT_EQ(WSTOPSIG(status) & SIGTRAP, SIGTRAP) { |
1569 | TH_LOG("Unexpected WSTOPSIG: %d" , WSTOPSIG(status)); |
1570 | } |
1571 | |
1572 | tracer_func(_metadata, tracee, status, args); |
1573 | |
1574 | ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT, |
1575 | tracee, NULL, 0); |
1576 | ASSERT_EQ(0, ret); |
1577 | } |
1578 | /* Directly report the status of our test harness results. */ |
1579 | syscall(__NR_exit, _metadata->exit_code); |
1580 | } |
1581 | |
1582 | /* Common tracer setup/teardown functions. */ |
1583 | void cont_handler(int num) |
1584 | { } |
1585 | pid_t setup_trace_fixture(struct __test_metadata *_metadata, |
1586 | tracer_func_t func, void *args, bool ptrace_syscall) |
1587 | { |
1588 | char sync; |
1589 | int pipefd[2]; |
1590 | pid_t tracer_pid; |
1591 | pid_t tracee = getpid(); |
1592 | |
1593 | /* Setup a pipe for clean synchronization. */ |
1594 | ASSERT_EQ(0, pipe(pipefd)); |
1595 | |
1596 | /* Fork a child which we'll promote to tracer */ |
1597 | tracer_pid = fork(); |
1598 | ASSERT_LE(0, tracer_pid); |
1599 | signal(SIGALRM, cont_handler); |
1600 | if (tracer_pid == 0) { |
1601 | close(pipefd[0]); |
1602 | start_tracer(_metadata, fd: pipefd[1], tracee, tracer_func: func, args, |
1603 | ptrace_syscall); |
1604 | syscall(__NR_exit, 0); |
1605 | } |
1606 | close(pipefd[1]); |
1607 | prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0); |
1608 | read(pipefd[0], &sync, 1); |
1609 | close(pipefd[0]); |
1610 | |
1611 | return tracer_pid; |
1612 | } |
1613 | |
1614 | void teardown_trace_fixture(struct __test_metadata *_metadata, |
1615 | pid_t tracer) |
1616 | { |
1617 | if (tracer) { |
1618 | int status; |
1619 | /* |
1620 | * Extract the exit code from the other process and |
1621 | * adopt it for ourselves in case its asserts failed. |
1622 | */ |
1623 | ASSERT_EQ(0, kill(tracer, SIGUSR1)); |
1624 | ASSERT_EQ(tracer, waitpid(tracer, &status, 0)); |
1625 | if (WEXITSTATUS(status)) |
1626 | _metadata->exit_code = KSFT_FAIL; |
1627 | } |
1628 | } |
1629 | |
1630 | /* "poke" tracer arguments and function. */ |
1631 | struct tracer_args_poke_t { |
1632 | unsigned long poke_addr; |
1633 | }; |
1634 | |
1635 | void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status, |
1636 | void *args) |
1637 | { |
1638 | int ret; |
1639 | unsigned long msg; |
1640 | struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args; |
1641 | |
1642 | ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); |
1643 | EXPECT_EQ(0, ret); |
1644 | /* If this fails, don't try to recover. */ |
1645 | ASSERT_EQ(0x1001, msg) { |
1646 | kill(tracee, SIGKILL); |
1647 | } |
1648 | /* |
1649 | * Poke in the message. |
1650 | * Registers are not touched to try to keep this relatively arch |
1651 | * agnostic. |
1652 | */ |
1653 | ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001); |
1654 | EXPECT_EQ(0, ret); |
1655 | } |
1656 | |
1657 | FIXTURE(TRACE_poke) { |
1658 | struct sock_fprog prog; |
1659 | pid_t tracer; |
1660 | long poked; |
1661 | struct tracer_args_poke_t tracer_args; |
1662 | }; |
1663 | |
1664 | FIXTURE_SETUP(TRACE_poke) |
1665 | { |
1666 | struct sock_filter filter[] = { |
1667 | BPF_STMT(BPF_LD|BPF_W|BPF_ABS, |
1668 | offsetof(struct seccomp_data, nr)), |
1669 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), |
1670 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001), |
1671 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
1672 | }; |
1673 | |
1674 | self->poked = 0; |
1675 | memset(&self->prog, 0, sizeof(self->prog)); |
1676 | self->prog.filter = malloc(sizeof(filter)); |
1677 | ASSERT_NE(NULL, self->prog.filter); |
1678 | memcpy(self->prog.filter, filter, sizeof(filter)); |
1679 | self->prog.len = (unsigned short)ARRAY_SIZE(filter); |
1680 | |
1681 | /* Set up tracer args. */ |
1682 | self->tracer_args.poke_addr = (unsigned long)&self->poked; |
1683 | |
1684 | /* Launch tracer. */ |
1685 | self->tracer = setup_trace_fixture(_metadata, func: tracer_poke, |
1686 | args: &self->tracer_args, ptrace_syscall: false); |
1687 | } |
1688 | |
1689 | FIXTURE_TEARDOWN(TRACE_poke) |
1690 | { |
1691 | teardown_trace_fixture(_metadata, tracer: self->tracer); |
1692 | if (self->prog.filter) |
1693 | free(self->prog.filter); |
1694 | } |
1695 | |
1696 | TEST_F(TRACE_poke, read_has_side_effects) |
1697 | { |
1698 | ssize_t ret; |
1699 | |
1700 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
1701 | ASSERT_EQ(0, ret); |
1702 | |
1703 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); |
1704 | ASSERT_EQ(0, ret); |
1705 | |
1706 | EXPECT_EQ(0, self->poked); |
1707 | ret = read(-1, NULL, 0); |
1708 | EXPECT_EQ(-1, ret); |
1709 | EXPECT_EQ(0x1001, self->poked); |
1710 | } |
1711 | |
1712 | TEST_F(TRACE_poke, getpid_runs_normally) |
1713 | { |
1714 | long ret; |
1715 | |
1716 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
1717 | ASSERT_EQ(0, ret); |
1718 | |
1719 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); |
1720 | ASSERT_EQ(0, ret); |
1721 | |
1722 | EXPECT_EQ(0, self->poked); |
1723 | EXPECT_NE(0, syscall(__NR_getpid)); |
1724 | EXPECT_EQ(0, self->poked); |
1725 | } |
1726 | |
1727 | #if defined(__x86_64__) |
1728 | # define ARCH_REGS struct user_regs_struct |
1729 | # define SYSCALL_NUM(_regs) (_regs).orig_rax |
1730 | # define SYSCALL_RET(_regs) (_regs).rax |
1731 | #elif defined(__i386__) |
1732 | # define ARCH_REGS struct user_regs_struct |
1733 | # define SYSCALL_NUM(_regs) (_regs).orig_eax |
1734 | # define SYSCALL_RET(_regs) (_regs).eax |
1735 | #elif defined(__arm__) |
1736 | # define ARCH_REGS struct pt_regs |
1737 | # define SYSCALL_NUM(_regs) (_regs).ARM_r7 |
1738 | # ifndef PTRACE_SET_SYSCALL |
1739 | # define PTRACE_SET_SYSCALL 23 |
1740 | # endif |
1741 | # define SYSCALL_NUM_SET(_regs, _nr) \ |
1742 | EXPECT_EQ(0, ptrace(PTRACE_SET_SYSCALL, tracee, NULL, _nr)) |
1743 | # define SYSCALL_RET(_regs) (_regs).ARM_r0 |
1744 | #elif defined(__aarch64__) |
1745 | # define ARCH_REGS struct user_pt_regs |
1746 | # define SYSCALL_NUM(_regs) (_regs).regs[8] |
1747 | # ifndef NT_ARM_SYSTEM_CALL |
1748 | # define NT_ARM_SYSTEM_CALL 0x404 |
1749 | # endif |
1750 | # define SYSCALL_NUM_SET(_regs, _nr) \ |
1751 | do { \ |
1752 | struct iovec __v; \ |
1753 | typeof(_nr) __nr = (_nr); \ |
1754 | __v.iov_base = &__nr; \ |
1755 | __v.iov_len = sizeof(__nr); \ |
1756 | EXPECT_EQ(0, ptrace(PTRACE_SETREGSET, tracee, \ |
1757 | NT_ARM_SYSTEM_CALL, &__v)); \ |
1758 | } while (0) |
1759 | # define SYSCALL_RET(_regs) (_regs).regs[0] |
1760 | #elif defined(__loongarch__) |
1761 | # define ARCH_REGS struct user_pt_regs |
1762 | # define SYSCALL_NUM(_regs) (_regs).regs[11] |
1763 | # define SYSCALL_RET(_regs) (_regs).regs[4] |
1764 | #elif defined(__riscv) && __riscv_xlen == 64 |
1765 | # define ARCH_REGS struct user_regs_struct |
1766 | # define SYSCALL_NUM(_regs) (_regs).a7 |
1767 | # define SYSCALL_RET(_regs) (_regs).a0 |
1768 | #elif defined(__csky__) |
1769 | # define ARCH_REGS struct pt_regs |
1770 | # if defined(__CSKYABIV2__) |
1771 | # define SYSCALL_NUM(_regs) (_regs).regs[3] |
1772 | # else |
1773 | # define SYSCALL_NUM(_regs) (_regs).regs[9] |
1774 | # endif |
1775 | # define SYSCALL_RET(_regs) (_regs).a0 |
1776 | #elif defined(__hppa__) |
1777 | # define ARCH_REGS struct user_regs_struct |
1778 | # define SYSCALL_NUM(_regs) (_regs).gr[20] |
1779 | # define SYSCALL_RET(_regs) (_regs).gr[28] |
1780 | #elif defined(__powerpc__) |
1781 | # define ARCH_REGS struct pt_regs |
1782 | # define SYSCALL_NUM(_regs) (_regs).gpr[0] |
1783 | # define SYSCALL_RET(_regs) (_regs).gpr[3] |
1784 | # define SYSCALL_RET_SET(_regs, _val) \ |
1785 | do { \ |
1786 | typeof(_val) _result = (_val); \ |
1787 | if ((_regs.trap & 0xfff0) == 0x3000) { \ |
1788 | /* \ |
1789 | * scv 0 system call uses -ve result \ |
1790 | * for error, so no need to adjust. \ |
1791 | */ \ |
1792 | SYSCALL_RET(_regs) = _result; \ |
1793 | } else { \ |
1794 | /* \ |
1795 | * A syscall error is signaled by the \ |
1796 | * CR0 SO bit and the code is stored as \ |
1797 | * a positive value. \ |
1798 | */ \ |
1799 | if (_result < 0) { \ |
1800 | SYSCALL_RET(_regs) = -_result; \ |
1801 | (_regs).ccr |= 0x10000000; \ |
1802 | } else { \ |
1803 | SYSCALL_RET(_regs) = _result; \ |
1804 | (_regs).ccr &= ~0x10000000; \ |
1805 | } \ |
1806 | } \ |
1807 | } while (0) |
1808 | # define SYSCALL_RET_SET_ON_PTRACE_EXIT |
1809 | #elif defined(__s390__) |
1810 | # define ARCH_REGS s390_regs |
1811 | # define SYSCALL_NUM(_regs) (_regs).gprs[2] |
1812 | # define SYSCALL_RET_SET(_regs, _val) \ |
1813 | TH_LOG("Can't modify syscall return on this architecture") |
1814 | #elif defined(__mips__) |
1815 | # include <asm/unistd_nr_n32.h> |
1816 | # include <asm/unistd_nr_n64.h> |
1817 | # include <asm/unistd_nr_o32.h> |
1818 | # define ARCH_REGS struct pt_regs |
1819 | # define SYSCALL_NUM(_regs) \ |
1820 | ({ \ |
1821 | typeof((_regs).regs[2]) _nr; \ |
1822 | if ((_regs).regs[2] == __NR_O32_Linux) \ |
1823 | _nr = (_regs).regs[4]; \ |
1824 | else \ |
1825 | _nr = (_regs).regs[2]; \ |
1826 | _nr; \ |
1827 | }) |
1828 | # define SYSCALL_NUM_SET(_regs, _nr) \ |
1829 | do { \ |
1830 | if ((_regs).regs[2] == __NR_O32_Linux) \ |
1831 | (_regs).regs[4] = _nr; \ |
1832 | else \ |
1833 | (_regs).regs[2] = _nr; \ |
1834 | } while (0) |
1835 | # define SYSCALL_RET_SET(_regs, _val) \ |
1836 | TH_LOG("Can't modify syscall return on this architecture") |
1837 | #elif defined(__xtensa__) |
1838 | # define ARCH_REGS struct user_pt_regs |
1839 | # define SYSCALL_NUM(_regs) (_regs).syscall |
1840 | /* |
1841 | * On xtensa syscall return value is in the register |
1842 | * a2 of the current window which is not fixed. |
1843 | */ |
1844 | #define SYSCALL_RET(_regs) (_regs).a[(_regs).windowbase * 4 + 2] |
1845 | #elif defined(__sh__) |
1846 | # define ARCH_REGS struct pt_regs |
1847 | # define SYSCALL_NUM(_regs) (_regs).regs[3] |
1848 | # define SYSCALL_RET(_regs) (_regs).regs[0] |
1849 | #elif defined(__mc68000__) |
1850 | # define ARCH_REGS struct user_regs_struct |
1851 | # define SYSCALL_NUM(_regs) (_regs).orig_d0 |
1852 | # define SYSCALL_RET(_regs) (_regs).d0 |
1853 | #else |
1854 | # error "Do not know how to find your architecture's registers and syscalls" |
1855 | #endif |
1856 | |
1857 | /* |
1858 | * Most architectures can change the syscall by just updating the |
1859 | * associated register. This is the default if not defined above. |
1860 | */ |
1861 | #ifndef SYSCALL_NUM_SET |
1862 | # define SYSCALL_NUM_SET(_regs, _nr) \ |
1863 | do { \ |
1864 | SYSCALL_NUM(_regs) = (_nr); \ |
1865 | } while (0) |
1866 | #endif |
1867 | /* |
1868 | * Most architectures can change the syscall return value by just |
1869 | * writing to the SYSCALL_RET register. This is the default if not |
1870 | * defined above. If an architecture cannot set the return value |
1871 | * (for example when the syscall and return value register is |
1872 | * shared), report it with TH_LOG() in an arch-specific definition |
1873 | * of SYSCALL_RET_SET() above, and leave SYSCALL_RET undefined. |
1874 | */ |
1875 | #if !defined(SYSCALL_RET) && !defined(SYSCALL_RET_SET) |
1876 | # error "One of SYSCALL_RET or SYSCALL_RET_SET is needed for this arch" |
1877 | #endif |
1878 | #ifndef SYSCALL_RET_SET |
1879 | # define SYSCALL_RET_SET(_regs, _val) \ |
1880 | do { \ |
1881 | SYSCALL_RET(_regs) = (_val); \ |
1882 | } while (0) |
1883 | #endif |
1884 | |
1885 | /* When the syscall return can't be changed, stub out the tests for it. */ |
1886 | #ifndef SYSCALL_RET |
1887 | # define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action) |
1888 | #else |
1889 | # define EXPECT_SYSCALL_RETURN(val, action) \ |
1890 | do { \ |
1891 | errno = 0; \ |
1892 | if (val < 0) { \ |
1893 | EXPECT_EQ(-1, action); \ |
1894 | EXPECT_EQ(-(val), errno); \ |
1895 | } else { \ |
1896 | EXPECT_EQ(val, action); \ |
1897 | } \ |
1898 | } while (0) |
1899 | #endif |
1900 | |
1901 | /* |
1902 | * Some architectures (e.g. powerpc) can only set syscall |
1903 | * return values on syscall exit during ptrace. |
1904 | */ |
1905 | const bool ptrace_entry_set_syscall_nr = true; |
1906 | const bool ptrace_entry_set_syscall_ret = |
1907 | #ifndef SYSCALL_RET_SET_ON_PTRACE_EXIT |
1908 | true; |
1909 | #else |
1910 | false; |
1911 | #endif |
1912 | |
1913 | /* |
1914 | * Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for |
1915 | * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux). |
1916 | */ |
1917 | #if defined(__x86_64__) || defined(__i386__) || defined(__mips__) || defined(__mc68000__) |
1918 | # define ARCH_GETREGS(_regs) ptrace(PTRACE_GETREGS, tracee, 0, &(_regs)) |
1919 | # define ARCH_SETREGS(_regs) ptrace(PTRACE_SETREGS, tracee, 0, &(_regs)) |
1920 | #else |
1921 | # define ARCH_GETREGS(_regs) ({ \ |
1922 | struct iovec __v; \ |
1923 | __v.iov_base = &(_regs); \ |
1924 | __v.iov_len = sizeof(_regs); \ |
1925 | ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &__v); \ |
1926 | }) |
1927 | # define ARCH_SETREGS(_regs) ({ \ |
1928 | struct iovec __v; \ |
1929 | __v.iov_base = &(_regs); \ |
1930 | __v.iov_len = sizeof(_regs); \ |
1931 | ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &__v); \ |
1932 | }) |
1933 | #endif |
1934 | |
1935 | /* Architecture-specific syscall fetching routine. */ |
1936 | int get_syscall(struct __test_metadata *_metadata, pid_t tracee) |
1937 | { |
1938 | ARCH_REGS regs; |
1939 | |
1940 | EXPECT_EQ(0, ARCH_GETREGS(regs)) { |
1941 | return -1; |
1942 | } |
1943 | |
1944 | return SYSCALL_NUM(regs); |
1945 | } |
1946 | |
1947 | /* Architecture-specific syscall changing routine. */ |
1948 | void __change_syscall(struct __test_metadata *_metadata, |
1949 | pid_t tracee, long *syscall, long *ret) |
1950 | { |
1951 | ARCH_REGS orig, regs; |
1952 | |
1953 | /* Do not get/set registers if we have nothing to do. */ |
1954 | if (!syscall && !ret) |
1955 | return; |
1956 | |
1957 | EXPECT_EQ(0, ARCH_GETREGS(regs)) { |
1958 | return; |
1959 | } |
1960 | orig = regs; |
1961 | |
1962 | if (syscall) |
1963 | SYSCALL_NUM_SET(regs, *syscall); |
1964 | |
1965 | if (ret) |
1966 | SYSCALL_RET_SET(regs, *ret); |
1967 | |
1968 | /* Flush any register changes made. */ |
1969 | if (memcmp(p: &orig, q: ®s, size: sizeof(orig)) != 0) |
1970 | EXPECT_EQ(0, ARCH_SETREGS(regs)); |
1971 | } |
1972 | |
1973 | /* Change only syscall number. */ |
1974 | void change_syscall_nr(struct __test_metadata *_metadata, |
1975 | pid_t tracee, long syscall) |
1976 | { |
1977 | __change_syscall(_metadata, tracee, syscall: &syscall, NULL); |
1978 | } |
1979 | |
1980 | /* Change syscall return value (and set syscall number to -1). */ |
1981 | void change_syscall_ret(struct __test_metadata *_metadata, |
1982 | pid_t tracee, long ret) |
1983 | { |
1984 | long syscall = -1; |
1985 | |
1986 | __change_syscall(_metadata, tracee, syscall: &syscall, ret: &ret); |
1987 | } |
1988 | |
1989 | void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee, |
1990 | int status, void *args) |
1991 | { |
1992 | int ret; |
1993 | unsigned long msg; |
1994 | |
1995 | EXPECT_EQ(PTRACE_EVENT_MASK(status), PTRACE_EVENT_SECCOMP) { |
1996 | TH_LOG("Unexpected ptrace event: %d" , PTRACE_EVENT_MASK(status)); |
1997 | return; |
1998 | } |
1999 | |
2000 | /* Make sure we got the right message. */ |
2001 | ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); |
2002 | EXPECT_EQ(0, ret); |
2003 | |
2004 | /* Validate and take action on expected syscalls. */ |
2005 | switch (msg) { |
2006 | case 0x1002: |
2007 | /* change getpid to getppid. */ |
2008 | EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee)); |
2009 | change_syscall_nr(_metadata, tracee, __NR_getppid); |
2010 | break; |
2011 | case 0x1003: |
2012 | /* skip gettid with valid return code. */ |
2013 | EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee)); |
2014 | change_syscall_ret(_metadata, tracee, ret: 45000); |
2015 | break; |
2016 | case 0x1004: |
2017 | /* skip openat with error. */ |
2018 | EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee)); |
2019 | change_syscall_ret(_metadata, tracee, ret: -ESRCH); |
2020 | break; |
2021 | case 0x1005: |
2022 | /* do nothing (allow getppid) */ |
2023 | EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee)); |
2024 | break; |
2025 | default: |
2026 | EXPECT_EQ(0, msg) { |
2027 | TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx" , msg); |
2028 | kill(tracee, SIGKILL); |
2029 | } |
2030 | } |
2031 | |
2032 | } |
2033 | |
2034 | FIXTURE(TRACE_syscall) { |
2035 | struct sock_fprog prog; |
2036 | pid_t tracer, mytid, mypid, parent; |
2037 | long syscall_nr; |
2038 | }; |
2039 | |
2040 | void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, |
2041 | int status, void *args) |
2042 | { |
2043 | int ret; |
2044 | unsigned long msg; |
2045 | static bool entry; |
2046 | long syscall_nr_val, syscall_ret_val; |
2047 | long *syscall_nr = NULL, *syscall_ret = NULL; |
2048 | FIXTURE_DATA(TRACE_syscall) *self = args; |
2049 | |
2050 | EXPECT_EQ(WSTOPSIG(status) & 0x80, 0x80) { |
2051 | TH_LOG("Unexpected WSTOPSIG: %d" , WSTOPSIG(status)); |
2052 | return; |
2053 | } |
2054 | |
2055 | /* |
2056 | * The traditional way to tell PTRACE_SYSCALL entry/exit |
2057 | * is by counting. |
2058 | */ |
2059 | entry = !entry; |
2060 | |
2061 | /* Make sure we got an appropriate message. */ |
2062 | ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); |
2063 | EXPECT_EQ(0, ret); |
2064 | EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY |
2065 | : PTRACE_EVENTMSG_SYSCALL_EXIT, msg); |
2066 | |
2067 | /* |
2068 | * Some architectures only support setting return values during |
2069 | * syscall exit under ptrace, and on exit the syscall number may |
2070 | * no longer be available. Therefore, save the initial sycall |
2071 | * number here, so it can be examined during both entry and exit |
2072 | * phases. |
2073 | */ |
2074 | if (entry) |
2075 | self->syscall_nr = get_syscall(_metadata, tracee); |
2076 | |
2077 | /* |
2078 | * Depending on the architecture's syscall setting abilities, we |
2079 | * pick which things to set during this phase (entry or exit). |
2080 | */ |
2081 | if (entry == ptrace_entry_set_syscall_nr) |
2082 | syscall_nr = &syscall_nr_val; |
2083 | if (entry == ptrace_entry_set_syscall_ret) |
2084 | syscall_ret = &syscall_ret_val; |
2085 | |
2086 | /* Now handle the actual rewriting cases. */ |
2087 | switch (self->syscall_nr) { |
2088 | case __NR_getpid: |
2089 | syscall_nr_val = __NR_getppid; |
2090 | /* Never change syscall return for this case. */ |
2091 | syscall_ret = NULL; |
2092 | break; |
2093 | case __NR_gettid: |
2094 | syscall_nr_val = -1; |
2095 | syscall_ret_val = 45000; |
2096 | break; |
2097 | case __NR_openat: |
2098 | syscall_nr_val = -1; |
2099 | syscall_ret_val = -ESRCH; |
2100 | break; |
2101 | default: |
2102 | /* Unhandled, do nothing. */ |
2103 | return; |
2104 | } |
2105 | |
2106 | __change_syscall(_metadata, tracee, syscall: syscall_nr, ret: syscall_ret); |
2107 | } |
2108 | |
2109 | FIXTURE_VARIANT(TRACE_syscall) { |
2110 | /* |
2111 | * All of the SECCOMP_RET_TRACE behaviors can be tested with either |
2112 | * SECCOMP_RET_TRACE+PTRACE_CONT or plain ptrace()+PTRACE_SYSCALL. |
2113 | * This indicates if we should use SECCOMP_RET_TRACE (false), or |
2114 | * ptrace (true). |
2115 | */ |
2116 | bool use_ptrace; |
2117 | }; |
2118 | |
2119 | FIXTURE_VARIANT_ADD(TRACE_syscall, ptrace) { |
2120 | .use_ptrace = true, |
2121 | }; |
2122 | |
2123 | FIXTURE_VARIANT_ADD(TRACE_syscall, seccomp) { |
2124 | .use_ptrace = false, |
2125 | }; |
2126 | |
2127 | FIXTURE_SETUP(TRACE_syscall) |
2128 | { |
2129 | struct sock_filter filter[] = { |
2130 | BPF_STMT(BPF_LD|BPF_W|BPF_ABS, |
2131 | offsetof(struct seccomp_data, nr)), |
2132 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), |
2133 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002), |
2134 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1), |
2135 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003), |
2136 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_openat, 0, 1), |
2137 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004), |
2138 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), |
2139 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005), |
2140 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
2141 | }; |
2142 | struct sock_fprog prog = { |
2143 | .len = (unsigned short)ARRAY_SIZE(filter), |
2144 | .filter = filter, |
2145 | }; |
2146 | long ret; |
2147 | |
2148 | /* Prepare some testable syscall results. */ |
2149 | self->mytid = syscall(__NR_gettid); |
2150 | ASSERT_GT(self->mytid, 0); |
2151 | ASSERT_NE(self->mytid, 1) { |
2152 | TH_LOG("Running this test as init is not supported. :)" ); |
2153 | } |
2154 | |
2155 | self->mypid = getpid(); |
2156 | ASSERT_GT(self->mypid, 0); |
2157 | ASSERT_EQ(self->mytid, self->mypid); |
2158 | |
2159 | self->parent = getppid(); |
2160 | ASSERT_GT(self->parent, 0); |
2161 | ASSERT_NE(self->parent, self->mypid); |
2162 | |
2163 | /* Launch tracer. */ |
2164 | self->tracer = setup_trace_fixture(_metadata, |
2165 | func: variant->use_ptrace ? tracer_ptrace |
2166 | : tracer_seccomp, |
2167 | args: self, ptrace_syscall: variant->use_ptrace); |
2168 | |
2169 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
2170 | ASSERT_EQ(0, ret); |
2171 | |
2172 | /* Do not install seccomp rewrite filters, as we'll use ptrace instead. */ |
2173 | if (variant->use_ptrace) |
2174 | return; |
2175 | |
2176 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); |
2177 | ASSERT_EQ(0, ret); |
2178 | } |
2179 | |
2180 | FIXTURE_TEARDOWN(TRACE_syscall) |
2181 | { |
2182 | teardown_trace_fixture(_metadata, tracer: self->tracer); |
2183 | } |
2184 | |
2185 | TEST(negative_ENOSYS) |
2186 | { |
2187 | #if defined(__arm__) |
2188 | SKIP(return, "arm32 does not support calling syscall -1" ); |
2189 | #endif |
2190 | /* |
2191 | * There should be no difference between an "internal" skip |
2192 | * and userspace asking for syscall "-1". |
2193 | */ |
2194 | errno = 0; |
2195 | EXPECT_EQ(-1, syscall(-1)); |
2196 | EXPECT_EQ(errno, ENOSYS); |
2197 | /* And no difference for "still not valid but not -1". */ |
2198 | errno = 0; |
2199 | EXPECT_EQ(-1, syscall(-101)); |
2200 | EXPECT_EQ(errno, ENOSYS); |
2201 | } |
2202 | |
2203 | TEST_F(TRACE_syscall, negative_ENOSYS) |
2204 | { |
2205 | negative_ENOSYS(_metadata); |
2206 | } |
2207 | |
2208 | TEST_F(TRACE_syscall, syscall_allowed) |
2209 | { |
2210 | /* getppid works as expected (no changes). */ |
2211 | EXPECT_EQ(self->parent, syscall(__NR_getppid)); |
2212 | EXPECT_NE(self->mypid, syscall(__NR_getppid)); |
2213 | } |
2214 | |
2215 | TEST_F(TRACE_syscall, syscall_redirected) |
2216 | { |
2217 | /* getpid has been redirected to getppid as expected. */ |
2218 | EXPECT_EQ(self->parent, syscall(__NR_getpid)); |
2219 | EXPECT_NE(self->mypid, syscall(__NR_getpid)); |
2220 | } |
2221 | |
2222 | TEST_F(TRACE_syscall, syscall_errno) |
2223 | { |
2224 | /* Tracer should skip the open syscall, resulting in ESRCH. */ |
2225 | EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat)); |
2226 | } |
2227 | |
2228 | TEST_F(TRACE_syscall, syscall_faked) |
2229 | { |
2230 | /* Tracer skips the gettid syscall and store altered return value. */ |
2231 | EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid)); |
2232 | } |
2233 | |
2234 | TEST_F_SIGNAL(TRACE_syscall, kill_immediate, SIGSYS) |
2235 | { |
2236 | struct sock_filter filter[] = { |
2237 | BPF_STMT(BPF_LD|BPF_W|BPF_ABS, |
2238 | offsetof(struct seccomp_data, nr)), |
2239 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_mknodat, 0, 1), |
2240 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD), |
2241 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
2242 | }; |
2243 | struct sock_fprog prog = { |
2244 | .len = (unsigned short)ARRAY_SIZE(filter), |
2245 | .filter = filter, |
2246 | }; |
2247 | long ret; |
2248 | |
2249 | /* Install "kill on mknodat" filter. */ |
2250 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); |
2251 | ASSERT_EQ(0, ret); |
2252 | |
2253 | /* This should immediately die with SIGSYS, regardless of tracer. */ |
2254 | EXPECT_EQ(-1, syscall(__NR_mknodat, -1, NULL, 0, 0)); |
2255 | } |
2256 | |
2257 | TEST_F(TRACE_syscall, skip_after) |
2258 | { |
2259 | struct sock_filter filter[] = { |
2260 | BPF_STMT(BPF_LD|BPF_W|BPF_ABS, |
2261 | offsetof(struct seccomp_data, nr)), |
2262 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), |
2263 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM), |
2264 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
2265 | }; |
2266 | struct sock_fprog prog = { |
2267 | .len = (unsigned short)ARRAY_SIZE(filter), |
2268 | .filter = filter, |
2269 | }; |
2270 | long ret; |
2271 | |
2272 | /* Install additional "errno on getppid" filter. */ |
2273 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); |
2274 | ASSERT_EQ(0, ret); |
2275 | |
2276 | /* Tracer will redirect getpid to getppid, and we should see EPERM. */ |
2277 | errno = 0; |
2278 | EXPECT_EQ(-1, syscall(__NR_getpid)); |
2279 | EXPECT_EQ(EPERM, errno); |
2280 | } |
2281 | |
2282 | TEST_F_SIGNAL(TRACE_syscall, kill_after, SIGSYS) |
2283 | { |
2284 | struct sock_filter filter[] = { |
2285 | BPF_STMT(BPF_LD|BPF_W|BPF_ABS, |
2286 | offsetof(struct seccomp_data, nr)), |
2287 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), |
2288 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), |
2289 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
2290 | }; |
2291 | struct sock_fprog prog = { |
2292 | .len = (unsigned short)ARRAY_SIZE(filter), |
2293 | .filter = filter, |
2294 | }; |
2295 | long ret; |
2296 | |
2297 | /* Install additional "death on getppid" filter. */ |
2298 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); |
2299 | ASSERT_EQ(0, ret); |
2300 | |
2301 | /* Tracer will redirect getpid to getppid, and we should die. */ |
2302 | EXPECT_NE(self->mypid, syscall(__NR_getpid)); |
2303 | } |
2304 | |
2305 | TEST(seccomp_syscall) |
2306 | { |
2307 | struct sock_filter filter[] = { |
2308 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
2309 | }; |
2310 | struct sock_fprog prog = { |
2311 | .len = (unsigned short)ARRAY_SIZE(filter), |
2312 | .filter = filter, |
2313 | }; |
2314 | long ret; |
2315 | |
2316 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
2317 | ASSERT_EQ(0, ret) { |
2318 | TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!" ); |
2319 | } |
2320 | |
2321 | /* Reject insane operation. */ |
2322 | ret = seccomp(op: -1, flags: 0, args: &prog); |
2323 | ASSERT_NE(ENOSYS, errno) { |
2324 | TH_LOG("Kernel does not support seccomp syscall!" ); |
2325 | } |
2326 | EXPECT_EQ(EINVAL, errno) { |
2327 | TH_LOG("Did not reject crazy op value!" ); |
2328 | } |
2329 | |
2330 | /* Reject strict with flags or pointer. */ |
2331 | ret = seccomp(SECCOMP_SET_MODE_STRICT, flags: -1, NULL); |
2332 | EXPECT_EQ(EINVAL, errno) { |
2333 | TH_LOG("Did not reject mode strict with flags!" ); |
2334 | } |
2335 | ret = seccomp(SECCOMP_SET_MODE_STRICT, flags: 0, args: &prog); |
2336 | EXPECT_EQ(EINVAL, errno) { |
2337 | TH_LOG("Did not reject mode strict with uargs!" ); |
2338 | } |
2339 | |
2340 | /* Reject insane args for filter. */ |
2341 | ret = seccomp(SECCOMP_SET_MODE_FILTER, flags: -1, args: &prog); |
2342 | EXPECT_EQ(EINVAL, errno) { |
2343 | TH_LOG("Did not reject crazy filter flags!" ); |
2344 | } |
2345 | ret = seccomp(SECCOMP_SET_MODE_FILTER, flags: 0, NULL); |
2346 | EXPECT_EQ(EFAULT, errno) { |
2347 | TH_LOG("Did not reject NULL filter!" ); |
2348 | } |
2349 | |
2350 | ret = seccomp(SECCOMP_SET_MODE_FILTER, flags: 0, args: &prog); |
2351 | EXPECT_EQ(0, errno) { |
2352 | TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s" , |
2353 | strerror(errno)); |
2354 | } |
2355 | } |
2356 | |
2357 | TEST(seccomp_syscall_mode_lock) |
2358 | { |
2359 | struct sock_filter filter[] = { |
2360 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
2361 | }; |
2362 | struct sock_fprog prog = { |
2363 | .len = (unsigned short)ARRAY_SIZE(filter), |
2364 | .filter = filter, |
2365 | }; |
2366 | long ret; |
2367 | |
2368 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); |
2369 | ASSERT_EQ(0, ret) { |
2370 | TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!" ); |
2371 | } |
2372 | |
2373 | ret = seccomp(SECCOMP_SET_MODE_FILTER, flags: 0, args: &prog); |
2374 | ASSERT_NE(ENOSYS, errno) { |
2375 | TH_LOG("Kernel does not support seccomp syscall!" ); |
2376 | } |
2377 | EXPECT_EQ(0, ret) { |
2378 | TH_LOG("Could not install filter!" ); |
2379 | } |
2380 | |
2381 | /* Make sure neither entry point will switch to strict. */ |
2382 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0); |
2383 | EXPECT_EQ(EINVAL, errno) { |
2384 | TH_LOG("Switched to mode strict!" ); |
2385 | } |
2386 | |
2387 | ret = seccomp(SECCOMP_SET_MODE_STRICT, flags: 0, NULL); |
2388 | EXPECT_EQ(EINVAL, errno) { |
2389 | TH_LOG("Switched to mode strict!" ); |
2390 | } |
2391 | } |
2392 | |
2393 | /* |
2394 | * Test detection of known and unknown filter flags. Userspace needs to be able |
2395 | * to check if a filter flag is supported by the current kernel and a good way |
2396 | * of doing that is by attempting to enter filter mode, with the flag bit in |
2397 | * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates |
2398 | * that the flag is valid and EINVAL indicates that the flag is invalid. |
2399 | */ |
2400 | TEST(detect_seccomp_filter_flags) |
2401 | { |
2402 | unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC, |
2403 | SECCOMP_FILTER_FLAG_LOG, |
2404 | SECCOMP_FILTER_FLAG_SPEC_ALLOW, |
2405 | SECCOMP_FILTER_FLAG_NEW_LISTENER, |
2406 | SECCOMP_FILTER_FLAG_TSYNC_ESRCH }; |
2407 | unsigned int exclusive[] = { |
2408 | SECCOMP_FILTER_FLAG_TSYNC, |
2409 | SECCOMP_FILTER_FLAG_NEW_LISTENER }; |
2410 | unsigned int flag, all_flags, exclusive_mask; |
2411 | int i; |
2412 | long ret; |
2413 | |
2414 | /* Test detection of individual known-good filter flags */ |
2415 | for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) { |
2416 | int bits = 0; |
2417 | |
2418 | flag = flags[i]; |
2419 | /* Make sure the flag is a single bit! */ |
2420 | while (flag) { |
2421 | if (flag & 0x1) |
2422 | bits ++; |
2423 | flag >>= 1; |
2424 | } |
2425 | ASSERT_EQ(1, bits); |
2426 | flag = flags[i]; |
2427 | |
2428 | ret = seccomp(SECCOMP_SET_MODE_FILTER, flags: flag, NULL); |
2429 | ASSERT_NE(ENOSYS, errno) { |
2430 | TH_LOG("Kernel does not support seccomp syscall!" ); |
2431 | } |
2432 | EXPECT_EQ(-1, ret); |
2433 | EXPECT_EQ(EFAULT, errno) { |
2434 | TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!" , |
2435 | flag); |
2436 | } |
2437 | |
2438 | all_flags |= flag; |
2439 | } |
2440 | |
2441 | /* |
2442 | * Test detection of all known-good filter flags combined. But |
2443 | * for the exclusive flags we need to mask them out and try them |
2444 | * individually for the "all flags" testing. |
2445 | */ |
2446 | exclusive_mask = 0; |
2447 | for (i = 0; i < ARRAY_SIZE(exclusive); i++) |
2448 | exclusive_mask |= exclusive[i]; |
2449 | for (i = 0; i < ARRAY_SIZE(exclusive); i++) { |
2450 | flag = all_flags & ~exclusive_mask; |
2451 | flag |= exclusive[i]; |
2452 | |
2453 | ret = seccomp(SECCOMP_SET_MODE_FILTER, flags: flag, NULL); |
2454 | EXPECT_EQ(-1, ret); |
2455 | EXPECT_EQ(EFAULT, errno) { |
2456 | TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!" , |
2457 | flag); |
2458 | } |
2459 | } |
2460 | |
2461 | /* Test detection of an unknown filter flags, without exclusives. */ |
2462 | flag = -1; |
2463 | flag &= ~exclusive_mask; |
2464 | ret = seccomp(SECCOMP_SET_MODE_FILTER, flags: flag, NULL); |
2465 | EXPECT_EQ(-1, ret); |
2466 | EXPECT_EQ(EINVAL, errno) { |
2467 | TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!" , |
2468 | flag); |
2469 | } |
2470 | |
2471 | /* |
2472 | * Test detection of an unknown filter flag that may simply need to be |
2473 | * added to this test |
2474 | */ |
2475 | flag = flags[ARRAY_SIZE(flags) - 1] << 1; |
2476 | ret = seccomp(SECCOMP_SET_MODE_FILTER, flags: flag, NULL); |
2477 | EXPECT_EQ(-1, ret); |
2478 | EXPECT_EQ(EINVAL, errno) { |
2479 | TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?" , |
2480 | flag); |
2481 | } |
2482 | } |
2483 | |
2484 | TEST(TSYNC_first) |
2485 | { |
2486 | struct sock_filter filter[] = { |
2487 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
2488 | }; |
2489 | struct sock_fprog prog = { |
2490 | .len = (unsigned short)ARRAY_SIZE(filter), |
2491 | .filter = filter, |
2492 | }; |
2493 | long ret; |
2494 | |
2495 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); |
2496 | ASSERT_EQ(0, ret) { |
2497 | TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!" ); |
2498 | } |
2499 | |
2500 | ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, |
2501 | args: &prog); |
2502 | ASSERT_NE(ENOSYS, errno) { |
2503 | TH_LOG("Kernel does not support seccomp syscall!" ); |
2504 | } |
2505 | EXPECT_EQ(0, ret) { |
2506 | TH_LOG("Could not install initial filter with TSYNC!" ); |
2507 | } |
2508 | } |
2509 | |
2510 | #define TSYNC_SIBLINGS 2 |
2511 | struct tsync_sibling { |
2512 | pthread_t tid; |
2513 | pid_t system_tid; |
2514 | sem_t *started; |
2515 | pthread_cond_t *cond; |
2516 | pthread_mutex_t *mutex; |
2517 | int diverge; |
2518 | int num_waits; |
2519 | struct sock_fprog *prog; |
2520 | struct __test_metadata *metadata; |
2521 | }; |
2522 | |
2523 | /* |
2524 | * To avoid joining joined threads (which is not allowed by Bionic), |
2525 | * make sure we both successfully join and clear the tid to skip a |
2526 | * later join attempt during fixture teardown. Any remaining threads |
2527 | * will be directly killed during teardown. |
2528 | */ |
2529 | #define PTHREAD_JOIN(tid, status) \ |
2530 | do { \ |
2531 | int _rc = pthread_join(tid, status); \ |
2532 | if (_rc) { \ |
2533 | TH_LOG("pthread_join of tid %u failed: %d\n", \ |
2534 | (unsigned int)tid, _rc); \ |
2535 | } else { \ |
2536 | tid = 0; \ |
2537 | } \ |
2538 | } while (0) |
2539 | |
2540 | FIXTURE(TSYNC) { |
2541 | struct sock_fprog root_prog, apply_prog; |
2542 | struct tsync_sibling sibling[TSYNC_SIBLINGS]; |
2543 | sem_t started; |
2544 | pthread_cond_t cond; |
2545 | pthread_mutex_t mutex; |
2546 | int sibling_count; |
2547 | }; |
2548 | |
2549 | FIXTURE_SETUP(TSYNC) |
2550 | { |
2551 | struct sock_filter root_filter[] = { |
2552 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
2553 | }; |
2554 | struct sock_filter apply_filter[] = { |
2555 | BPF_STMT(BPF_LD|BPF_W|BPF_ABS, |
2556 | offsetof(struct seccomp_data, nr)), |
2557 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), |
2558 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), |
2559 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
2560 | }; |
2561 | |
2562 | memset(&self->root_prog, 0, sizeof(self->root_prog)); |
2563 | memset(&self->apply_prog, 0, sizeof(self->apply_prog)); |
2564 | memset(&self->sibling, 0, sizeof(self->sibling)); |
2565 | self->root_prog.filter = malloc(sizeof(root_filter)); |
2566 | ASSERT_NE(NULL, self->root_prog.filter); |
2567 | memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter)); |
2568 | self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter); |
2569 | |
2570 | self->apply_prog.filter = malloc(sizeof(apply_filter)); |
2571 | ASSERT_NE(NULL, self->apply_prog.filter); |
2572 | memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter)); |
2573 | self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter); |
2574 | |
2575 | self->sibling_count = 0; |
2576 | pthread_mutex_init(&self->mutex, NULL); |
2577 | pthread_cond_init(&self->cond, NULL); |
2578 | sem_init(&self->started, 0, 0); |
2579 | self->sibling[0].tid = 0; |
2580 | self->sibling[0].cond = &self->cond; |
2581 | self->sibling[0].started = &self->started; |
2582 | self->sibling[0].mutex = &self->mutex; |
2583 | self->sibling[0].diverge = 0; |
2584 | self->sibling[0].num_waits = 1; |
2585 | self->sibling[0].prog = &self->root_prog; |
2586 | self->sibling[0].metadata = _metadata; |
2587 | self->sibling[1].tid = 0; |
2588 | self->sibling[1].cond = &self->cond; |
2589 | self->sibling[1].started = &self->started; |
2590 | self->sibling[1].mutex = &self->mutex; |
2591 | self->sibling[1].diverge = 0; |
2592 | self->sibling[1].prog = &self->root_prog; |
2593 | self->sibling[1].num_waits = 1; |
2594 | self->sibling[1].metadata = _metadata; |
2595 | } |
2596 | |
2597 | FIXTURE_TEARDOWN(TSYNC) |
2598 | { |
2599 | int sib = 0; |
2600 | |
2601 | if (self->root_prog.filter) |
2602 | free(self->root_prog.filter); |
2603 | if (self->apply_prog.filter) |
2604 | free(self->apply_prog.filter); |
2605 | |
2606 | for ( ; sib < self->sibling_count; ++sib) { |
2607 | struct tsync_sibling *s = &self->sibling[sib]; |
2608 | |
2609 | if (!s->tid) |
2610 | continue; |
2611 | /* |
2612 | * If a thread is still running, it may be stuck, so hit |
2613 | * it over the head really hard. |
2614 | */ |
2615 | pthread_kill(s->tid, 9); |
2616 | } |
2617 | pthread_mutex_destroy(&self->mutex); |
2618 | pthread_cond_destroy(&self->cond); |
2619 | sem_destroy(&self->started); |
2620 | } |
2621 | |
2622 | void *tsync_sibling(void *data) |
2623 | { |
2624 | long ret = 0; |
2625 | struct tsync_sibling *me = data; |
2626 | |
2627 | me->system_tid = syscall(__NR_gettid); |
2628 | |
2629 | pthread_mutex_lock(me->mutex); |
2630 | if (me->diverge) { |
2631 | /* Just re-apply the root prog to fork the tree */ |
2632 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, |
2633 | me->prog, 0, 0); |
2634 | } |
2635 | sem_post(me->started); |
2636 | /* Return outside of started so parent notices failures. */ |
2637 | if (ret) { |
2638 | pthread_mutex_unlock(me->mutex); |
2639 | return (void *)SIBLING_EXIT_FAILURE; |
2640 | } |
2641 | do { |
2642 | pthread_cond_wait(me->cond, me->mutex); |
2643 | me->num_waits = me->num_waits - 1; |
2644 | } while (me->num_waits); |
2645 | pthread_mutex_unlock(me->mutex); |
2646 | |
2647 | ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); |
2648 | if (!ret) |
2649 | return (void *)SIBLING_EXIT_NEWPRIVS; |
2650 | read(-1, NULL, 0); |
2651 | return (void *)SIBLING_EXIT_UNKILLED; |
2652 | } |
2653 | |
2654 | void tsync_start_sibling(struct tsync_sibling *sibling) |
2655 | { |
2656 | pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling); |
2657 | } |
2658 | |
2659 | TEST_F(TSYNC, siblings_fail_prctl) |
2660 | { |
2661 | long ret; |
2662 | void *status; |
2663 | struct sock_filter filter[] = { |
2664 | BPF_STMT(BPF_LD|BPF_W|BPF_ABS, |
2665 | offsetof(struct seccomp_data, nr)), |
2666 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), |
2667 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL), |
2668 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
2669 | }; |
2670 | struct sock_fprog prog = { |
2671 | .len = (unsigned short)ARRAY_SIZE(filter), |
2672 | .filter = filter, |
2673 | }; |
2674 | |
2675 | ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { |
2676 | TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!" ); |
2677 | } |
2678 | |
2679 | /* Check prctl failure detection by requesting sib 0 diverge. */ |
2680 | ret = seccomp(SECCOMP_SET_MODE_FILTER, flags: 0, args: &prog); |
2681 | ASSERT_NE(ENOSYS, errno) { |
2682 | TH_LOG("Kernel does not support seccomp syscall!" ); |
2683 | } |
2684 | ASSERT_EQ(0, ret) { |
2685 | TH_LOG("setting filter failed" ); |
2686 | } |
2687 | |
2688 | self->sibling[0].diverge = 1; |
2689 | tsync_start_sibling(sibling: &self->sibling[0]); |
2690 | tsync_start_sibling(sibling: &self->sibling[1]); |
2691 | |
2692 | while (self->sibling_count < TSYNC_SIBLINGS) { |
2693 | sem_wait(&self->started); |
2694 | self->sibling_count++; |
2695 | } |
2696 | |
2697 | /* Signal the threads to clean up*/ |
2698 | pthread_mutex_lock(&self->mutex); |
2699 | ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { |
2700 | TH_LOG("cond broadcast non-zero" ); |
2701 | } |
2702 | pthread_mutex_unlock(&self->mutex); |
2703 | |
2704 | /* Ensure diverging sibling failed to call prctl. */ |
2705 | PTHREAD_JOIN(self->sibling[0].tid, &status); |
2706 | EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status); |
2707 | PTHREAD_JOIN(self->sibling[1].tid, &status); |
2708 | EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); |
2709 | } |
2710 | |
2711 | TEST_F(TSYNC, two_siblings_with_ancestor) |
2712 | { |
2713 | long ret; |
2714 | void *status; |
2715 | |
2716 | ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { |
2717 | TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!" ); |
2718 | } |
2719 | |
2720 | ret = seccomp(SECCOMP_SET_MODE_FILTER, flags: 0, args: &self->root_prog); |
2721 | ASSERT_NE(ENOSYS, errno) { |
2722 | TH_LOG("Kernel does not support seccomp syscall!" ); |
2723 | } |
2724 | ASSERT_EQ(0, ret) { |
2725 | TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!" ); |
2726 | } |
2727 | tsync_start_sibling(sibling: &self->sibling[0]); |
2728 | tsync_start_sibling(sibling: &self->sibling[1]); |
2729 | |
2730 | while (self->sibling_count < TSYNC_SIBLINGS) { |
2731 | sem_wait(&self->started); |
2732 | self->sibling_count++; |
2733 | } |
2734 | |
2735 | ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, |
2736 | args: &self->apply_prog); |
2737 | ASSERT_EQ(0, ret) { |
2738 | TH_LOG("Could install filter on all threads!" ); |
2739 | } |
2740 | /* Tell the siblings to test the policy */ |
2741 | pthread_mutex_lock(&self->mutex); |
2742 | ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { |
2743 | TH_LOG("cond broadcast non-zero" ); |
2744 | } |
2745 | pthread_mutex_unlock(&self->mutex); |
2746 | /* Ensure they are both killed and don't exit cleanly. */ |
2747 | PTHREAD_JOIN(self->sibling[0].tid, &status); |
2748 | EXPECT_EQ(0x0, (long)status); |
2749 | PTHREAD_JOIN(self->sibling[1].tid, &status); |
2750 | EXPECT_EQ(0x0, (long)status); |
2751 | } |
2752 | |
2753 | TEST_F(TSYNC, two_sibling_want_nnp) |
2754 | { |
2755 | void *status; |
2756 | |
2757 | /* start siblings before any prctl() operations */ |
2758 | tsync_start_sibling(sibling: &self->sibling[0]); |
2759 | tsync_start_sibling(sibling: &self->sibling[1]); |
2760 | while (self->sibling_count < TSYNC_SIBLINGS) { |
2761 | sem_wait(&self->started); |
2762 | self->sibling_count++; |
2763 | } |
2764 | |
2765 | /* Tell the siblings to test no policy */ |
2766 | pthread_mutex_lock(&self->mutex); |
2767 | ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { |
2768 | TH_LOG("cond broadcast non-zero" ); |
2769 | } |
2770 | pthread_mutex_unlock(&self->mutex); |
2771 | |
2772 | /* Ensure they are both upset about lacking nnp. */ |
2773 | PTHREAD_JOIN(self->sibling[0].tid, &status); |
2774 | EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status); |
2775 | PTHREAD_JOIN(self->sibling[1].tid, &status); |
2776 | EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status); |
2777 | } |
2778 | |
2779 | TEST_F(TSYNC, two_siblings_with_no_filter) |
2780 | { |
2781 | long ret; |
2782 | void *status; |
2783 | |
2784 | /* start siblings before any prctl() operations */ |
2785 | tsync_start_sibling(sibling: &self->sibling[0]); |
2786 | tsync_start_sibling(sibling: &self->sibling[1]); |
2787 | while (self->sibling_count < TSYNC_SIBLINGS) { |
2788 | sem_wait(&self->started); |
2789 | self->sibling_count++; |
2790 | } |
2791 | |
2792 | ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { |
2793 | TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!" ); |
2794 | } |
2795 | |
2796 | ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, |
2797 | args: &self->apply_prog); |
2798 | ASSERT_NE(ENOSYS, errno) { |
2799 | TH_LOG("Kernel does not support seccomp syscall!" ); |
2800 | } |
2801 | ASSERT_EQ(0, ret) { |
2802 | TH_LOG("Could install filter on all threads!" ); |
2803 | } |
2804 | |
2805 | /* Tell the siblings to test the policy */ |
2806 | pthread_mutex_lock(&self->mutex); |
2807 | ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { |
2808 | TH_LOG("cond broadcast non-zero" ); |
2809 | } |
2810 | pthread_mutex_unlock(&self->mutex); |
2811 | |
2812 | /* Ensure they are both killed and don't exit cleanly. */ |
2813 | PTHREAD_JOIN(self->sibling[0].tid, &status); |
2814 | EXPECT_EQ(0x0, (long)status); |
2815 | PTHREAD_JOIN(self->sibling[1].tid, &status); |
2816 | EXPECT_EQ(0x0, (long)status); |
2817 | } |
2818 | |
2819 | TEST_F(TSYNC, two_siblings_with_one_divergence) |
2820 | { |
2821 | long ret; |
2822 | void *status; |
2823 | |
2824 | ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { |
2825 | TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!" ); |
2826 | } |
2827 | |
2828 | ret = seccomp(SECCOMP_SET_MODE_FILTER, flags: 0, args: &self->root_prog); |
2829 | ASSERT_NE(ENOSYS, errno) { |
2830 | TH_LOG("Kernel does not support seccomp syscall!" ); |
2831 | } |
2832 | ASSERT_EQ(0, ret) { |
2833 | TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!" ); |
2834 | } |
2835 | self->sibling[0].diverge = 1; |
2836 | tsync_start_sibling(sibling: &self->sibling[0]); |
2837 | tsync_start_sibling(sibling: &self->sibling[1]); |
2838 | |
2839 | while (self->sibling_count < TSYNC_SIBLINGS) { |
2840 | sem_wait(&self->started); |
2841 | self->sibling_count++; |
2842 | } |
2843 | |
2844 | ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, |
2845 | args: &self->apply_prog); |
2846 | ASSERT_EQ(self->sibling[0].system_tid, ret) { |
2847 | TH_LOG("Did not fail on diverged sibling." ); |
2848 | } |
2849 | |
2850 | /* Wake the threads */ |
2851 | pthread_mutex_lock(&self->mutex); |
2852 | ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { |
2853 | TH_LOG("cond broadcast non-zero" ); |
2854 | } |
2855 | pthread_mutex_unlock(&self->mutex); |
2856 | |
2857 | /* Ensure they are both unkilled. */ |
2858 | PTHREAD_JOIN(self->sibling[0].tid, &status); |
2859 | EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); |
2860 | PTHREAD_JOIN(self->sibling[1].tid, &status); |
2861 | EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); |
2862 | } |
2863 | |
2864 | TEST_F(TSYNC, two_siblings_with_one_divergence_no_tid_in_err) |
2865 | { |
2866 | long ret, flags; |
2867 | void *status; |
2868 | |
2869 | ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { |
2870 | TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!" ); |
2871 | } |
2872 | |
2873 | ret = seccomp(SECCOMP_SET_MODE_FILTER, flags: 0, args: &self->root_prog); |
2874 | ASSERT_NE(ENOSYS, errno) { |
2875 | TH_LOG("Kernel does not support seccomp syscall!" ); |
2876 | } |
2877 | ASSERT_EQ(0, ret) { |
2878 | TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!" ); |
2879 | } |
2880 | self->sibling[0].diverge = 1; |
2881 | tsync_start_sibling(sibling: &self->sibling[0]); |
2882 | tsync_start_sibling(sibling: &self->sibling[1]); |
2883 | |
2884 | while (self->sibling_count < TSYNC_SIBLINGS) { |
2885 | sem_wait(&self->started); |
2886 | self->sibling_count++; |
2887 | } |
2888 | |
2889 | flags = SECCOMP_FILTER_FLAG_TSYNC | \ |
2890 | SECCOMP_FILTER_FLAG_TSYNC_ESRCH; |
2891 | ret = seccomp(SECCOMP_SET_MODE_FILTER, flags, args: &self->apply_prog); |
2892 | ASSERT_EQ(ESRCH, errno) { |
2893 | TH_LOG("Did not return ESRCH for diverged sibling." ); |
2894 | } |
2895 | ASSERT_EQ(-1, ret) { |
2896 | TH_LOG("Did not fail on diverged sibling." ); |
2897 | } |
2898 | |
2899 | /* Wake the threads */ |
2900 | pthread_mutex_lock(&self->mutex); |
2901 | ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { |
2902 | TH_LOG("cond broadcast non-zero" ); |
2903 | } |
2904 | pthread_mutex_unlock(&self->mutex); |
2905 | |
2906 | /* Ensure they are both unkilled. */ |
2907 | PTHREAD_JOIN(self->sibling[0].tid, &status); |
2908 | EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); |
2909 | PTHREAD_JOIN(self->sibling[1].tid, &status); |
2910 | EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); |
2911 | } |
2912 | |
2913 | TEST_F(TSYNC, two_siblings_not_under_filter) |
2914 | { |
2915 | long ret, sib; |
2916 | void *status; |
2917 | struct timespec delay = { .tv_nsec = 100000000 }; |
2918 | |
2919 | ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { |
2920 | TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!" ); |
2921 | } |
2922 | |
2923 | /* |
2924 | * Sibling 0 will have its own seccomp policy |
2925 | * and Sibling 1 will not be under seccomp at |
2926 | * all. Sibling 1 will enter seccomp and 0 |
2927 | * will cause failure. |
2928 | */ |
2929 | self->sibling[0].diverge = 1; |
2930 | tsync_start_sibling(sibling: &self->sibling[0]); |
2931 | tsync_start_sibling(sibling: &self->sibling[1]); |
2932 | |
2933 | while (self->sibling_count < TSYNC_SIBLINGS) { |
2934 | sem_wait(&self->started); |
2935 | self->sibling_count++; |
2936 | } |
2937 | |
2938 | ret = seccomp(SECCOMP_SET_MODE_FILTER, flags: 0, args: &self->root_prog); |
2939 | ASSERT_NE(ENOSYS, errno) { |
2940 | TH_LOG("Kernel does not support seccomp syscall!" ); |
2941 | } |
2942 | ASSERT_EQ(0, ret) { |
2943 | TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!" ); |
2944 | } |
2945 | |
2946 | ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, |
2947 | args: &self->apply_prog); |
2948 | ASSERT_EQ(ret, self->sibling[0].system_tid) { |
2949 | TH_LOG("Did not fail on diverged sibling." ); |
2950 | } |
2951 | sib = 1; |
2952 | if (ret == self->sibling[0].system_tid) |
2953 | sib = 0; |
2954 | |
2955 | pthread_mutex_lock(&self->mutex); |
2956 | |
2957 | /* Increment the other siblings num_waits so we can clean up |
2958 | * the one we just saw. |
2959 | */ |
2960 | self->sibling[!sib].num_waits += 1; |
2961 | |
2962 | /* Signal the thread to clean up*/ |
2963 | ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { |
2964 | TH_LOG("cond broadcast non-zero" ); |
2965 | } |
2966 | pthread_mutex_unlock(&self->mutex); |
2967 | PTHREAD_JOIN(self->sibling[sib].tid, &status); |
2968 | EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); |
2969 | /* Poll for actual task death. pthread_join doesn't guarantee it. */ |
2970 | while (!kill(self->sibling[sib].system_tid, 0)) |
2971 | nanosleep(&delay, NULL); |
2972 | /* Switch to the remaining sibling */ |
2973 | sib = !sib; |
2974 | |
2975 | ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, |
2976 | args: &self->apply_prog); |
2977 | ASSERT_EQ(0, ret) { |
2978 | TH_LOG("Expected the remaining sibling to sync" ); |
2979 | }; |
2980 | |
2981 | pthread_mutex_lock(&self->mutex); |
2982 | |
2983 | /* If remaining sibling didn't have a chance to wake up during |
2984 | * the first broadcast, manually reduce the num_waits now. |
2985 | */ |
2986 | if (self->sibling[sib].num_waits > 1) |
2987 | self->sibling[sib].num_waits = 1; |
2988 | ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { |
2989 | TH_LOG("cond broadcast non-zero" ); |
2990 | } |
2991 | pthread_mutex_unlock(&self->mutex); |
2992 | PTHREAD_JOIN(self->sibling[sib].tid, &status); |
2993 | EXPECT_EQ(0, (long)status); |
2994 | /* Poll for actual task death. pthread_join doesn't guarantee it. */ |
2995 | while (!kill(self->sibling[sib].system_tid, 0)) |
2996 | nanosleep(&delay, NULL); |
2997 | |
2998 | ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, |
2999 | args: &self->apply_prog); |
3000 | ASSERT_EQ(0, ret); /* just us chickens */ |
3001 | } |
3002 | |
3003 | /* Make sure restarted syscalls are seen directly as "restart_syscall". */ |
3004 | TEST(syscall_restart) |
3005 | { |
3006 | long ret; |
3007 | unsigned long msg; |
3008 | pid_t child_pid; |
3009 | int pipefd[2]; |
3010 | int status; |
3011 | siginfo_t info = { }; |
3012 | struct sock_filter filter[] = { |
3013 | BPF_STMT(BPF_LD|BPF_W|BPF_ABS, |
3014 | offsetof(struct seccomp_data, nr)), |
3015 | |
3016 | #ifdef __NR_sigreturn |
3017 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 7, 0), |
3018 | #endif |
3019 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 6, 0), |
3020 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 5, 0), |
3021 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 4, 0), |
3022 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 5, 0), |
3023 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_clock_nanosleep, 4, 0), |
3024 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0), |
3025 | |
3026 | /* Allow __NR_write for easy logging. */ |
3027 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1), |
3028 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
3029 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), |
3030 | /* The nanosleep jump target. */ |
3031 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100), |
3032 | /* The restart_syscall jump target. */ |
3033 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200), |
3034 | }; |
3035 | struct sock_fprog prog = { |
3036 | .len = (unsigned short)ARRAY_SIZE(filter), |
3037 | .filter = filter, |
3038 | }; |
3039 | #if defined(__arm__) |
3040 | struct utsname utsbuf; |
3041 | #endif |
3042 | |
3043 | ASSERT_EQ(0, pipe(pipefd)); |
3044 | |
3045 | child_pid = fork(); |
3046 | ASSERT_LE(0, child_pid); |
3047 | if (child_pid == 0) { |
3048 | /* Child uses EXPECT not ASSERT to deliver status correctly. */ |
3049 | char buf = ' '; |
3050 | struct timespec timeout = { }; |
3051 | |
3052 | /* Attach parent as tracer and stop. */ |
3053 | EXPECT_EQ(0, ptrace(PTRACE_TRACEME)); |
3054 | EXPECT_EQ(0, raise(SIGSTOP)); |
3055 | |
3056 | EXPECT_EQ(0, close(pipefd[1])); |
3057 | |
3058 | EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { |
3059 | TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!" ); |
3060 | } |
3061 | |
3062 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); |
3063 | EXPECT_EQ(0, ret) { |
3064 | TH_LOG("Failed to install filter!" ); |
3065 | } |
3066 | |
3067 | EXPECT_EQ(1, read(pipefd[0], &buf, 1)) { |
3068 | TH_LOG("Failed to read() sync from parent" ); |
3069 | } |
3070 | EXPECT_EQ('.', buf) { |
3071 | TH_LOG("Failed to get sync data from read()" ); |
3072 | } |
3073 | |
3074 | /* Start nanosleep to be interrupted. */ |
3075 | timeout.tv_sec = 1; |
3076 | errno = 0; |
3077 | EXPECT_EQ(0, nanosleep(&timeout, NULL)) { |
3078 | TH_LOG("Call to nanosleep() failed (errno %d: %s)" , |
3079 | errno, strerror(errno)); |
3080 | } |
3081 | |
3082 | /* Read final sync from parent. */ |
3083 | EXPECT_EQ(1, read(pipefd[0], &buf, 1)) { |
3084 | TH_LOG("Failed final read() from parent" ); |
3085 | } |
3086 | EXPECT_EQ('!', buf) { |
3087 | TH_LOG("Failed to get final data from read()" ); |
3088 | } |
3089 | |
3090 | /* Directly report the status of our test harness results. */ |
3091 | syscall(__NR_exit, _metadata->exit_code); |
3092 | } |
3093 | EXPECT_EQ(0, close(pipefd[0])); |
3094 | |
3095 | /* Attach to child, setup options, and release. */ |
3096 | ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); |
3097 | ASSERT_EQ(true, WIFSTOPPED(status)); |
3098 | ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL, |
3099 | PTRACE_O_TRACESECCOMP)); |
3100 | ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); |
3101 | ASSERT_EQ(1, write(pipefd[1], "." , 1)); |
3102 | |
3103 | /* Wait for nanosleep() to start. */ |
3104 | ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); |
3105 | ASSERT_EQ(true, WIFSTOPPED(status)); |
3106 | ASSERT_EQ(SIGTRAP, WSTOPSIG(status)); |
3107 | ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16)); |
3108 | ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg)); |
3109 | ASSERT_EQ(0x100, msg); |
3110 | ret = get_syscall(_metadata, tracee: child_pid); |
3111 | EXPECT_TRUE(ret == __NR_nanosleep || ret == __NR_clock_nanosleep); |
3112 | |
3113 | /* Might as well check siginfo for sanity while we're here. */ |
3114 | ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info)); |
3115 | ASSERT_EQ(SIGTRAP, info.si_signo); |
3116 | ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code); |
3117 | EXPECT_EQ(0, info.si_errno); |
3118 | EXPECT_EQ(getuid(), info.si_uid); |
3119 | /* Verify signal delivery came from child (seccomp-triggered). */ |
3120 | EXPECT_EQ(child_pid, info.si_pid); |
3121 | |
3122 | /* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */ |
3123 | ASSERT_EQ(0, kill(child_pid, SIGSTOP)); |
3124 | ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); |
3125 | ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); |
3126 | ASSERT_EQ(true, WIFSTOPPED(status)); |
3127 | ASSERT_EQ(SIGSTOP, WSTOPSIG(status)); |
3128 | ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info)); |
3129 | /* |
3130 | * There is no siginfo on SIGSTOP any more, so we can't verify |
3131 | * signal delivery came from parent now (getpid() == info.si_pid). |
3132 | * https://lkml.kernel.org/r/CAGXu5jJaZAOzP1qFz66tYrtbuywqb+UN2SOA1VLHpCCOiYvYeg@mail.gmail.com |
3133 | * At least verify the SIGSTOP via PTRACE_GETSIGINFO. |
3134 | */ |
3135 | EXPECT_EQ(SIGSTOP, info.si_signo); |
3136 | |
3137 | /* Restart nanosleep with SIGCONT, which triggers restart_syscall. */ |
3138 | ASSERT_EQ(0, kill(child_pid, SIGCONT)); |
3139 | ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); |
3140 | ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); |
3141 | ASSERT_EQ(true, WIFSTOPPED(status)); |
3142 | ASSERT_EQ(SIGCONT, WSTOPSIG(status)); |
3143 | ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); |
3144 | |
3145 | /* Wait for restart_syscall() to start. */ |
3146 | ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); |
3147 | ASSERT_EQ(true, WIFSTOPPED(status)); |
3148 | ASSERT_EQ(SIGTRAP, WSTOPSIG(status)); |
3149 | ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16)); |
3150 | ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg)); |
3151 | |
3152 | ASSERT_EQ(0x200, msg); |
3153 | ret = get_syscall(_metadata, tracee: child_pid); |
3154 | #if defined(__arm__) |
3155 | /* |
3156 | * FIXME: |
3157 | * - native ARM registers do NOT expose true syscall. |
3158 | * - compat ARM registers on ARM64 DO expose true syscall. |
3159 | */ |
3160 | ASSERT_EQ(0, uname(&utsbuf)); |
3161 | if (strncmp(utsbuf.machine, "arm" , 3) == 0) { |
3162 | EXPECT_EQ(__NR_nanosleep, ret); |
3163 | } else |
3164 | #endif |
3165 | { |
3166 | EXPECT_EQ(__NR_restart_syscall, ret); |
3167 | } |
3168 | |
3169 | /* Write again to end test. */ |
3170 | ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); |
3171 | ASSERT_EQ(1, write(pipefd[1], "!" , 1)); |
3172 | EXPECT_EQ(0, close(pipefd[1])); |
3173 | |
3174 | ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); |
3175 | if (WIFSIGNALED(status) || WEXITSTATUS(status)) |
3176 | _metadata->exit_code = KSFT_FAIL; |
3177 | } |
3178 | |
3179 | TEST_SIGNAL(filter_flag_log, SIGSYS) |
3180 | { |
3181 | struct sock_filter allow_filter[] = { |
3182 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
3183 | }; |
3184 | struct sock_filter kill_filter[] = { |
3185 | BPF_STMT(BPF_LD|BPF_W|BPF_ABS, |
3186 | offsetof(struct seccomp_data, nr)), |
3187 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), |
3188 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), |
3189 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
3190 | }; |
3191 | struct sock_fprog allow_prog = { |
3192 | .len = (unsigned short)ARRAY_SIZE(allow_filter), |
3193 | .filter = allow_filter, |
3194 | }; |
3195 | struct sock_fprog kill_prog = { |
3196 | .len = (unsigned short)ARRAY_SIZE(kill_filter), |
3197 | .filter = kill_filter, |
3198 | }; |
3199 | long ret; |
3200 | pid_t parent = getppid(); |
3201 | |
3202 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
3203 | ASSERT_EQ(0, ret); |
3204 | |
3205 | /* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */ |
3206 | ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG, |
3207 | args: &allow_prog); |
3208 | ASSERT_NE(ENOSYS, errno) { |
3209 | TH_LOG("Kernel does not support seccomp syscall!" ); |
3210 | } |
3211 | EXPECT_NE(0, ret) { |
3212 | TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!" ); |
3213 | } |
3214 | EXPECT_EQ(EINVAL, errno) { |
3215 | TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!" ); |
3216 | } |
3217 | |
3218 | /* Verify that a simple, permissive filter can be added with no flags */ |
3219 | ret = seccomp(SECCOMP_SET_MODE_FILTER, flags: 0, args: &allow_prog); |
3220 | EXPECT_EQ(0, ret); |
3221 | |
3222 | /* See if the same filter can be added with the FILTER_FLAG_LOG flag */ |
3223 | ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG, |
3224 | args: &allow_prog); |
3225 | ASSERT_NE(EINVAL, errno) { |
3226 | TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!" ); |
3227 | } |
3228 | EXPECT_EQ(0, ret); |
3229 | |
3230 | /* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */ |
3231 | ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG, |
3232 | args: &kill_prog); |
3233 | EXPECT_EQ(0, ret); |
3234 | |
3235 | EXPECT_EQ(parent, syscall(__NR_getppid)); |
3236 | /* getpid() should never return. */ |
3237 | EXPECT_EQ(0, syscall(__NR_getpid)); |
3238 | } |
3239 | |
3240 | TEST(get_action_avail) |
3241 | { |
3242 | __u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP, |
3243 | SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE, |
3244 | SECCOMP_RET_LOG, SECCOMP_RET_ALLOW }; |
3245 | __u32 unknown_action = 0x10000000U; |
3246 | int i; |
3247 | long ret; |
3248 | |
3249 | ret = seccomp(SECCOMP_GET_ACTION_AVAIL, flags: 0, args: &actions[0]); |
3250 | ASSERT_NE(ENOSYS, errno) { |
3251 | TH_LOG("Kernel does not support seccomp syscall!" ); |
3252 | } |
3253 | ASSERT_NE(EINVAL, errno) { |
3254 | TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!" ); |
3255 | } |
3256 | EXPECT_EQ(ret, 0); |
3257 | |
3258 | for (i = 0; i < ARRAY_SIZE(actions); i++) { |
3259 | ret = seccomp(SECCOMP_GET_ACTION_AVAIL, flags: 0, args: &actions[i]); |
3260 | EXPECT_EQ(ret, 0) { |
3261 | TH_LOG("Expected action (0x%X) not available!" , |
3262 | actions[i]); |
3263 | } |
3264 | } |
3265 | |
3266 | /* Check that an unknown action is handled properly (EOPNOTSUPP) */ |
3267 | ret = seccomp(SECCOMP_GET_ACTION_AVAIL, flags: 0, args: &unknown_action); |
3268 | EXPECT_EQ(ret, -1); |
3269 | EXPECT_EQ(errno, EOPNOTSUPP); |
3270 | } |
3271 | |
3272 | TEST(get_metadata) |
3273 | { |
3274 | pid_t pid; |
3275 | int pipefd[2]; |
3276 | char buf; |
3277 | struct seccomp_metadata md; |
3278 | long ret; |
3279 | |
3280 | /* Only real root can get metadata. */ |
3281 | if (geteuid()) { |
3282 | SKIP(return, "get_metadata requires real root" ); |
3283 | return; |
3284 | } |
3285 | |
3286 | ASSERT_EQ(0, pipe(pipefd)); |
3287 | |
3288 | pid = fork(); |
3289 | ASSERT_GE(pid, 0); |
3290 | if (pid == 0) { |
3291 | struct sock_filter filter[] = { |
3292 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
3293 | }; |
3294 | struct sock_fprog prog = { |
3295 | .len = (unsigned short)ARRAY_SIZE(filter), |
3296 | .filter = filter, |
3297 | }; |
3298 | |
3299 | /* one with log, one without */ |
3300 | EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, |
3301 | SECCOMP_FILTER_FLAG_LOG, &prog)); |
3302 | EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog)); |
3303 | |
3304 | EXPECT_EQ(0, close(pipefd[0])); |
3305 | ASSERT_EQ(1, write(pipefd[1], "1" , 1)); |
3306 | ASSERT_EQ(0, close(pipefd[1])); |
3307 | |
3308 | while (1) |
3309 | sleep(100); |
3310 | } |
3311 | |
3312 | ASSERT_EQ(0, close(pipefd[1])); |
3313 | ASSERT_EQ(1, read(pipefd[0], &buf, 1)); |
3314 | |
3315 | ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid)); |
3316 | ASSERT_EQ(pid, waitpid(pid, NULL, 0)); |
3317 | |
3318 | /* Past here must not use ASSERT or child process is never killed. */ |
3319 | |
3320 | md.filter_off = 0; |
3321 | errno = 0; |
3322 | ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md); |
3323 | EXPECT_EQ(sizeof(md), ret) { |
3324 | if (errno == EINVAL) |
3325 | SKIP(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)" ); |
3326 | } |
3327 | |
3328 | EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG); |
3329 | EXPECT_EQ(md.filter_off, 0); |
3330 | |
3331 | md.filter_off = 1; |
3332 | ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md); |
3333 | EXPECT_EQ(sizeof(md), ret); |
3334 | EXPECT_EQ(md.flags, 0); |
3335 | EXPECT_EQ(md.filter_off, 1); |
3336 | |
3337 | skip: |
3338 | ASSERT_EQ(0, kill(pid, SIGKILL)); |
3339 | } |
3340 | |
3341 | static int user_notif_syscall(int nr, unsigned int flags) |
3342 | { |
3343 | struct sock_filter filter[] = { |
3344 | BPF_STMT(BPF_LD|BPF_W|BPF_ABS, |
3345 | offsetof(struct seccomp_data, nr)), |
3346 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, nr, 0, 1), |
3347 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_USER_NOTIF), |
3348 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
3349 | }; |
3350 | |
3351 | struct sock_fprog prog = { |
3352 | .len = (unsigned short)ARRAY_SIZE(filter), |
3353 | .filter = filter, |
3354 | }; |
3355 | |
3356 | return seccomp(SECCOMP_SET_MODE_FILTER, flags, args: &prog); |
3357 | } |
3358 | |
3359 | #define USER_NOTIF_MAGIC INT_MAX |
3360 | TEST(user_notification_basic) |
3361 | { |
3362 | pid_t pid; |
3363 | long ret; |
3364 | int status, listener; |
3365 | struct seccomp_notif req = {}; |
3366 | struct seccomp_notif_resp resp = {}; |
3367 | struct pollfd pollfd; |
3368 | |
3369 | struct sock_filter filter[] = { |
3370 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
3371 | }; |
3372 | struct sock_fprog prog = { |
3373 | .len = (unsigned short)ARRAY_SIZE(filter), |
3374 | .filter = filter, |
3375 | }; |
3376 | |
3377 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
3378 | ASSERT_EQ(0, ret) { |
3379 | TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!" ); |
3380 | } |
3381 | |
3382 | pid = fork(); |
3383 | ASSERT_GE(pid, 0); |
3384 | |
3385 | /* Check that we get -ENOSYS with no listener attached */ |
3386 | if (pid == 0) { |
3387 | if (user_notif_syscall(__NR_getppid, flags: 0) < 0) |
3388 | exit(1); |
3389 | ret = syscall(__NR_getppid); |
3390 | exit(ret >= 0 || errno != ENOSYS); |
3391 | } |
3392 | |
3393 | EXPECT_EQ(waitpid(pid, &status, 0), pid); |
3394 | EXPECT_EQ(true, WIFEXITED(status)); |
3395 | EXPECT_EQ(0, WEXITSTATUS(status)); |
3396 | |
3397 | /* Add some no-op filters for grins. */ |
3398 | EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); |
3399 | EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); |
3400 | EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); |
3401 | EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); |
3402 | |
3403 | /* Check that the basic notification machinery works */ |
3404 | listener = user_notif_syscall(__NR_getppid, |
3405 | SECCOMP_FILTER_FLAG_NEW_LISTENER); |
3406 | ASSERT_GE(listener, 0); |
3407 | |
3408 | /* Installing a second listener in the chain should EBUSY */ |
3409 | EXPECT_EQ(user_notif_syscall(__NR_getppid, |
3410 | SECCOMP_FILTER_FLAG_NEW_LISTENER), |
3411 | -1); |
3412 | EXPECT_EQ(errno, EBUSY); |
3413 | |
3414 | pid = fork(); |
3415 | ASSERT_GE(pid, 0); |
3416 | |
3417 | if (pid == 0) { |
3418 | ret = syscall(__NR_getppid); |
3419 | exit(ret != USER_NOTIF_MAGIC); |
3420 | } |
3421 | |
3422 | pollfd.fd = listener; |
3423 | pollfd.events = POLLIN | POLLOUT; |
3424 | |
3425 | EXPECT_GT(poll(&pollfd, 1, -1), 0); |
3426 | EXPECT_EQ(pollfd.revents, POLLIN); |
3427 | |
3428 | /* Test that we can't pass garbage to the kernel. */ |
3429 | memset(&req, 0, sizeof(req)); |
3430 | req.pid = -1; |
3431 | errno = 0; |
3432 | ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req); |
3433 | EXPECT_EQ(-1, ret); |
3434 | EXPECT_EQ(EINVAL, errno); |
3435 | |
3436 | if (ret) { |
3437 | req.pid = 0; |
3438 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); |
3439 | } |
3440 | |
3441 | pollfd.fd = listener; |
3442 | pollfd.events = POLLIN | POLLOUT; |
3443 | |
3444 | EXPECT_GT(poll(&pollfd, 1, -1), 0); |
3445 | EXPECT_EQ(pollfd.revents, POLLOUT); |
3446 | |
3447 | EXPECT_EQ(req.data.nr, __NR_getppid); |
3448 | |
3449 | resp.id = req.id; |
3450 | resp.error = 0; |
3451 | resp.val = USER_NOTIF_MAGIC; |
3452 | |
3453 | /* check that we make sure flags == 0 */ |
3454 | resp.flags = 1; |
3455 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); |
3456 | EXPECT_EQ(errno, EINVAL); |
3457 | |
3458 | resp.flags = 0; |
3459 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); |
3460 | |
3461 | EXPECT_EQ(waitpid(pid, &status, 0), pid); |
3462 | EXPECT_EQ(true, WIFEXITED(status)); |
3463 | EXPECT_EQ(0, WEXITSTATUS(status)); |
3464 | } |
3465 | |
3466 | TEST(user_notification_with_tsync) |
3467 | { |
3468 | int ret; |
3469 | unsigned int flags; |
3470 | |
3471 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
3472 | ASSERT_EQ(0, ret) { |
3473 | TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!" ); |
3474 | } |
3475 | |
3476 | /* these were exclusive */ |
3477 | flags = SECCOMP_FILTER_FLAG_NEW_LISTENER | |
3478 | SECCOMP_FILTER_FLAG_TSYNC; |
3479 | ASSERT_EQ(-1, user_notif_syscall(__NR_getppid, flags)); |
3480 | ASSERT_EQ(EINVAL, errno); |
3481 | |
3482 | /* but now they're not */ |
3483 | flags |= SECCOMP_FILTER_FLAG_TSYNC_ESRCH; |
3484 | ret = user_notif_syscall(__NR_getppid, flags); |
3485 | close(ret); |
3486 | ASSERT_LE(0, ret); |
3487 | } |
3488 | |
3489 | TEST(user_notification_kill_in_middle) |
3490 | { |
3491 | pid_t pid; |
3492 | long ret; |
3493 | int listener; |
3494 | struct seccomp_notif req = {}; |
3495 | struct seccomp_notif_resp resp = {}; |
3496 | |
3497 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
3498 | ASSERT_EQ(0, ret) { |
3499 | TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!" ); |
3500 | } |
3501 | |
3502 | listener = user_notif_syscall(__NR_getppid, |
3503 | SECCOMP_FILTER_FLAG_NEW_LISTENER); |
3504 | ASSERT_GE(listener, 0); |
3505 | |
3506 | /* |
3507 | * Check that nothing bad happens when we kill the task in the middle |
3508 | * of a syscall. |
3509 | */ |
3510 | pid = fork(); |
3511 | ASSERT_GE(pid, 0); |
3512 | |
3513 | if (pid == 0) { |
3514 | ret = syscall(__NR_getppid); |
3515 | exit(ret != USER_NOTIF_MAGIC); |
3516 | } |
3517 | |
3518 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); |
3519 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), 0); |
3520 | |
3521 | EXPECT_EQ(kill(pid, SIGKILL), 0); |
3522 | EXPECT_EQ(waitpid(pid, NULL, 0), pid); |
3523 | |
3524 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), -1); |
3525 | |
3526 | resp.id = req.id; |
3527 | ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp); |
3528 | EXPECT_EQ(ret, -1); |
3529 | EXPECT_EQ(errno, ENOENT); |
3530 | } |
3531 | |
3532 | static int handled = -1; |
3533 | |
3534 | static void signal_handler(int signal) |
3535 | { |
3536 | if (write(handled, "c" , 1) != 1) |
3537 | perror("write from signal" ); |
3538 | } |
3539 | |
3540 | TEST(user_notification_signal) |
3541 | { |
3542 | pid_t pid; |
3543 | long ret; |
3544 | int status, listener, sk_pair[2]; |
3545 | struct seccomp_notif req = {}; |
3546 | struct seccomp_notif_resp resp = {}; |
3547 | char c; |
3548 | |
3549 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
3550 | ASSERT_EQ(0, ret) { |
3551 | TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!" ); |
3552 | } |
3553 | |
3554 | ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0); |
3555 | |
3556 | listener = user_notif_syscall(__NR_gettid, |
3557 | SECCOMP_FILTER_FLAG_NEW_LISTENER); |
3558 | ASSERT_GE(listener, 0); |
3559 | |
3560 | pid = fork(); |
3561 | ASSERT_GE(pid, 0); |
3562 | |
3563 | if (pid == 0) { |
3564 | close(sk_pair[0]); |
3565 | handled = sk_pair[1]; |
3566 | if (signal(SIGUSR1, signal_handler) == SIG_ERR) { |
3567 | perror("signal" ); |
3568 | exit(1); |
3569 | } |
3570 | /* |
3571 | * ERESTARTSYS behavior is a bit hard to test, because we need |
3572 | * to rely on a signal that has not yet been handled. Let's at |
3573 | * least check that the error code gets propagated through, and |
3574 | * hope that it doesn't break when there is actually a signal :) |
3575 | */ |
3576 | ret = syscall(__NR_gettid); |
3577 | exit(!(ret == -1 && errno == 512)); |
3578 | } |
3579 | |
3580 | close(sk_pair[1]); |
3581 | |
3582 | memset(&req, 0, sizeof(req)); |
3583 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); |
3584 | |
3585 | EXPECT_EQ(kill(pid, SIGUSR1), 0); |
3586 | |
3587 | /* |
3588 | * Make sure the signal really is delivered, which means we're not |
3589 | * stuck in the user notification code any more and the notification |
3590 | * should be dead. |
3591 | */ |
3592 | EXPECT_EQ(read(sk_pair[0], &c, 1), 1); |
3593 | |
3594 | resp.id = req.id; |
3595 | resp.error = -EPERM; |
3596 | resp.val = 0; |
3597 | |
3598 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); |
3599 | EXPECT_EQ(errno, ENOENT); |
3600 | |
3601 | memset(&req, 0, sizeof(req)); |
3602 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); |
3603 | |
3604 | resp.id = req.id; |
3605 | resp.error = -512; /* -ERESTARTSYS */ |
3606 | resp.val = 0; |
3607 | |
3608 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); |
3609 | |
3610 | EXPECT_EQ(waitpid(pid, &status, 0), pid); |
3611 | EXPECT_EQ(true, WIFEXITED(status)); |
3612 | EXPECT_EQ(0, WEXITSTATUS(status)); |
3613 | } |
3614 | |
3615 | TEST(user_notification_closed_listener) |
3616 | { |
3617 | pid_t pid; |
3618 | long ret; |
3619 | int status, listener; |
3620 | |
3621 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
3622 | ASSERT_EQ(0, ret) { |
3623 | TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!" ); |
3624 | } |
3625 | |
3626 | listener = user_notif_syscall(__NR_getppid, |
3627 | SECCOMP_FILTER_FLAG_NEW_LISTENER); |
3628 | ASSERT_GE(listener, 0); |
3629 | |
3630 | /* |
3631 | * Check that we get an ENOSYS when the listener is closed. |
3632 | */ |
3633 | pid = fork(); |
3634 | ASSERT_GE(pid, 0); |
3635 | if (pid == 0) { |
3636 | close(listener); |
3637 | ret = syscall(__NR_getppid); |
3638 | exit(ret != -1 && errno != ENOSYS); |
3639 | } |
3640 | |
3641 | close(listener); |
3642 | |
3643 | EXPECT_EQ(waitpid(pid, &status, 0), pid); |
3644 | EXPECT_EQ(true, WIFEXITED(status)); |
3645 | EXPECT_EQ(0, WEXITSTATUS(status)); |
3646 | } |
3647 | |
3648 | /* |
3649 | * Check that a pid in a child namespace still shows up as valid in ours. |
3650 | */ |
3651 | TEST(user_notification_child_pid_ns) |
3652 | { |
3653 | pid_t pid; |
3654 | int status, listener; |
3655 | struct seccomp_notif req = {}; |
3656 | struct seccomp_notif_resp resp = {}; |
3657 | |
3658 | ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0) { |
3659 | if (errno == EINVAL) |
3660 | SKIP(return, "kernel missing CLONE_NEWUSER support" ); |
3661 | }; |
3662 | |
3663 | listener = user_notif_syscall(__NR_getppid, |
3664 | SECCOMP_FILTER_FLAG_NEW_LISTENER); |
3665 | ASSERT_GE(listener, 0); |
3666 | |
3667 | pid = fork(); |
3668 | ASSERT_GE(pid, 0); |
3669 | |
3670 | if (pid == 0) |
3671 | exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); |
3672 | |
3673 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); |
3674 | EXPECT_EQ(req.pid, pid); |
3675 | |
3676 | resp.id = req.id; |
3677 | resp.error = 0; |
3678 | resp.val = USER_NOTIF_MAGIC; |
3679 | |
3680 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); |
3681 | |
3682 | EXPECT_EQ(waitpid(pid, &status, 0), pid); |
3683 | EXPECT_EQ(true, WIFEXITED(status)); |
3684 | EXPECT_EQ(0, WEXITSTATUS(status)); |
3685 | close(listener); |
3686 | } |
3687 | |
3688 | /* |
3689 | * Check that a pid in a sibling (i.e. unrelated) namespace shows up as 0, i.e. |
3690 | * invalid. |
3691 | */ |
3692 | TEST(user_notification_sibling_pid_ns) |
3693 | { |
3694 | pid_t pid, pid2; |
3695 | int status, listener; |
3696 | struct seccomp_notif req = {}; |
3697 | struct seccomp_notif_resp resp = {}; |
3698 | |
3699 | ASSERT_EQ(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0), 0) { |
3700 | TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!" ); |
3701 | } |
3702 | |
3703 | listener = user_notif_syscall(__NR_getppid, |
3704 | SECCOMP_FILTER_FLAG_NEW_LISTENER); |
3705 | ASSERT_GE(listener, 0); |
3706 | |
3707 | pid = fork(); |
3708 | ASSERT_GE(pid, 0); |
3709 | |
3710 | if (pid == 0) { |
3711 | ASSERT_EQ(unshare(CLONE_NEWPID), 0) { |
3712 | if (errno == EPERM) |
3713 | SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN" ); |
3714 | else if (errno == EINVAL) |
3715 | SKIP(return, "CLONE_NEWPID is invalid (missing CONFIG_PID_NS?)" ); |
3716 | } |
3717 | |
3718 | pid2 = fork(); |
3719 | ASSERT_GE(pid2, 0); |
3720 | |
3721 | if (pid2 == 0) |
3722 | exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); |
3723 | |
3724 | EXPECT_EQ(waitpid(pid2, &status, 0), pid2); |
3725 | EXPECT_EQ(true, WIFEXITED(status)); |
3726 | EXPECT_EQ(0, WEXITSTATUS(status)); |
3727 | exit(WEXITSTATUS(status)); |
3728 | } |
3729 | |
3730 | /* Create the sibling ns, and sibling in it. */ |
3731 | ASSERT_EQ(unshare(CLONE_NEWPID), 0) { |
3732 | if (errno == EPERM) |
3733 | SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN" ); |
3734 | else if (errno == EINVAL) |
3735 | SKIP(return, "CLONE_NEWPID is invalid (missing CONFIG_PID_NS?)" ); |
3736 | } |
3737 | ASSERT_EQ(errno, 0); |
3738 | |
3739 | pid2 = fork(); |
3740 | ASSERT_GE(pid2, 0); |
3741 | |
3742 | if (pid2 == 0) { |
3743 | ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); |
3744 | /* |
3745 | * The pid should be 0, i.e. the task is in some namespace that |
3746 | * we can't "see". |
3747 | */ |
3748 | EXPECT_EQ(req.pid, 0); |
3749 | |
3750 | resp.id = req.id; |
3751 | resp.error = 0; |
3752 | resp.val = USER_NOTIF_MAGIC; |
3753 | |
3754 | ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); |
3755 | exit(0); |
3756 | } |
3757 | |
3758 | close(listener); |
3759 | |
3760 | EXPECT_EQ(waitpid(pid, &status, 0), pid); |
3761 | EXPECT_EQ(true, WIFEXITED(status)); |
3762 | EXPECT_EQ(0, WEXITSTATUS(status)); |
3763 | |
3764 | EXPECT_EQ(waitpid(pid2, &status, 0), pid2); |
3765 | EXPECT_EQ(true, WIFEXITED(status)); |
3766 | EXPECT_EQ(0, WEXITSTATUS(status)); |
3767 | } |
3768 | |
3769 | TEST(user_notification_fault_recv) |
3770 | { |
3771 | pid_t pid; |
3772 | int status, listener; |
3773 | struct seccomp_notif req = {}; |
3774 | struct seccomp_notif_resp resp = {}; |
3775 | |
3776 | ASSERT_EQ(unshare(CLONE_NEWUSER), 0) { |
3777 | if (errno == EINVAL) |
3778 | SKIP(return, "kernel missing CLONE_NEWUSER support" ); |
3779 | } |
3780 | |
3781 | listener = user_notif_syscall(__NR_getppid, |
3782 | SECCOMP_FILTER_FLAG_NEW_LISTENER); |
3783 | ASSERT_GE(listener, 0); |
3784 | |
3785 | pid = fork(); |
3786 | ASSERT_GE(pid, 0); |
3787 | |
3788 | if (pid == 0) |
3789 | exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); |
3790 | |
3791 | /* Do a bad recv() */ |
3792 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, NULL), -1); |
3793 | EXPECT_EQ(errno, EFAULT); |
3794 | |
3795 | /* We should still be able to receive this notification, though. */ |
3796 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); |
3797 | EXPECT_EQ(req.pid, pid); |
3798 | |
3799 | resp.id = req.id; |
3800 | resp.error = 0; |
3801 | resp.val = USER_NOTIF_MAGIC; |
3802 | |
3803 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); |
3804 | |
3805 | EXPECT_EQ(waitpid(pid, &status, 0), pid); |
3806 | EXPECT_EQ(true, WIFEXITED(status)); |
3807 | EXPECT_EQ(0, WEXITSTATUS(status)); |
3808 | } |
3809 | |
3810 | TEST(seccomp_get_notif_sizes) |
3811 | { |
3812 | struct seccomp_notif_sizes sizes; |
3813 | |
3814 | ASSERT_EQ(seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes), 0); |
3815 | EXPECT_EQ(sizes.seccomp_notif, sizeof(struct seccomp_notif)); |
3816 | EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp)); |
3817 | } |
3818 | |
3819 | TEST(user_notification_continue) |
3820 | { |
3821 | pid_t pid; |
3822 | long ret; |
3823 | int status, listener; |
3824 | struct seccomp_notif req = {}; |
3825 | struct seccomp_notif_resp resp = {}; |
3826 | struct pollfd pollfd; |
3827 | |
3828 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
3829 | ASSERT_EQ(0, ret) { |
3830 | TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!" ); |
3831 | } |
3832 | |
3833 | listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER); |
3834 | ASSERT_GE(listener, 0); |
3835 | |
3836 | pid = fork(); |
3837 | ASSERT_GE(pid, 0); |
3838 | |
3839 | if (pid == 0) { |
3840 | int dup_fd, pipe_fds[2]; |
3841 | pid_t self; |
3842 | |
3843 | ASSERT_GE(pipe(pipe_fds), 0); |
3844 | |
3845 | dup_fd = dup(pipe_fds[0]); |
3846 | ASSERT_GE(dup_fd, 0); |
3847 | EXPECT_NE(pipe_fds[0], dup_fd); |
3848 | |
3849 | self = getpid(); |
3850 | ASSERT_EQ(filecmp(self, self, pipe_fds[0], dup_fd), 0); |
3851 | exit(0); |
3852 | } |
3853 | |
3854 | pollfd.fd = listener; |
3855 | pollfd.events = POLLIN | POLLOUT; |
3856 | |
3857 | EXPECT_GT(poll(&pollfd, 1, -1), 0); |
3858 | EXPECT_EQ(pollfd.revents, POLLIN); |
3859 | |
3860 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); |
3861 | |
3862 | pollfd.fd = listener; |
3863 | pollfd.events = POLLIN | POLLOUT; |
3864 | |
3865 | EXPECT_GT(poll(&pollfd, 1, -1), 0); |
3866 | EXPECT_EQ(pollfd.revents, POLLOUT); |
3867 | |
3868 | EXPECT_EQ(req.data.nr, __NR_dup); |
3869 | |
3870 | resp.id = req.id; |
3871 | resp.flags = SECCOMP_USER_NOTIF_FLAG_CONTINUE; |
3872 | |
3873 | /* |
3874 | * Verify that setting SECCOMP_USER_NOTIF_FLAG_CONTINUE enforces other |
3875 | * args be set to 0. |
3876 | */ |
3877 | resp.error = 0; |
3878 | resp.val = USER_NOTIF_MAGIC; |
3879 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); |
3880 | EXPECT_EQ(errno, EINVAL); |
3881 | |
3882 | resp.error = USER_NOTIF_MAGIC; |
3883 | resp.val = 0; |
3884 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); |
3885 | EXPECT_EQ(errno, EINVAL); |
3886 | |
3887 | resp.error = 0; |
3888 | resp.val = 0; |
3889 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0) { |
3890 | if (errno == EINVAL) |
3891 | SKIP(goto skip, "Kernel does not support SECCOMP_USER_NOTIF_FLAG_CONTINUE" ); |
3892 | } |
3893 | |
3894 | skip: |
3895 | EXPECT_EQ(waitpid(pid, &status, 0), pid); |
3896 | EXPECT_EQ(true, WIFEXITED(status)); |
3897 | EXPECT_EQ(0, WEXITSTATUS(status)) { |
3898 | if (WEXITSTATUS(status) == 2) { |
3899 | SKIP(return, "Kernel does not support kcmp() syscall" ); |
3900 | return; |
3901 | } |
3902 | } |
3903 | } |
3904 | |
3905 | TEST(user_notification_filter_empty) |
3906 | { |
3907 | pid_t pid; |
3908 | long ret; |
3909 | int status; |
3910 | struct pollfd pollfd; |
3911 | struct __clone_args args = { |
3912 | .flags = CLONE_FILES, |
3913 | .exit_signal = SIGCHLD, |
3914 | }; |
3915 | |
3916 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
3917 | ASSERT_EQ(0, ret) { |
3918 | TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!" ); |
3919 | } |
3920 | |
3921 | if (__NR_clone3 < 0) |
3922 | SKIP(return, "Test not built with clone3 support" ); |
3923 | |
3924 | pid = sys_clone3(args: &args, size: sizeof(args)); |
3925 | ASSERT_GE(pid, 0); |
3926 | |
3927 | if (pid == 0) { |
3928 | int listener; |
3929 | |
3930 | listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER); |
3931 | if (listener < 0) |
3932 | _exit(EXIT_FAILURE); |
3933 | |
3934 | if (dup2(listener, 200) != 200) |
3935 | _exit(EXIT_FAILURE); |
3936 | |
3937 | close(listener); |
3938 | |
3939 | _exit(EXIT_SUCCESS); |
3940 | } |
3941 | |
3942 | EXPECT_EQ(waitpid(pid, &status, 0), pid); |
3943 | EXPECT_EQ(true, WIFEXITED(status)); |
3944 | EXPECT_EQ(0, WEXITSTATUS(status)); |
3945 | |
3946 | /* |
3947 | * The seccomp filter has become unused so we should be notified once |
3948 | * the kernel gets around to cleaning up task struct. |
3949 | */ |
3950 | pollfd.fd = 200; |
3951 | pollfd.events = POLLHUP; |
3952 | |
3953 | EXPECT_GT(poll(&pollfd, 1, 2000), 0); |
3954 | EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0); |
3955 | } |
3956 | |
3957 | static void *do_thread(void *data) |
3958 | { |
3959 | return NULL; |
3960 | } |
3961 | |
3962 | TEST(user_notification_filter_empty_threaded) |
3963 | { |
3964 | pid_t pid; |
3965 | long ret; |
3966 | int status; |
3967 | struct pollfd pollfd; |
3968 | struct __clone_args args = { |
3969 | .flags = CLONE_FILES, |
3970 | .exit_signal = SIGCHLD, |
3971 | }; |
3972 | |
3973 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
3974 | ASSERT_EQ(0, ret) { |
3975 | TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!" ); |
3976 | } |
3977 | |
3978 | if (__NR_clone3 < 0) |
3979 | SKIP(return, "Test not built with clone3 support" ); |
3980 | |
3981 | pid = sys_clone3(args: &args, size: sizeof(args)); |
3982 | ASSERT_GE(pid, 0); |
3983 | |
3984 | if (pid == 0) { |
3985 | pid_t pid1, pid2; |
3986 | int listener, status; |
3987 | pthread_t thread; |
3988 | |
3989 | listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER); |
3990 | if (listener < 0) |
3991 | _exit(EXIT_FAILURE); |
3992 | |
3993 | if (dup2(listener, 200) != 200) |
3994 | _exit(EXIT_FAILURE); |
3995 | |
3996 | close(listener); |
3997 | |
3998 | pid1 = fork(); |
3999 | if (pid1 < 0) |
4000 | _exit(EXIT_FAILURE); |
4001 | |
4002 | if (pid1 == 0) |
4003 | _exit(EXIT_SUCCESS); |
4004 | |
4005 | pid2 = fork(); |
4006 | if (pid2 < 0) |
4007 | _exit(EXIT_FAILURE); |
4008 | |
4009 | if (pid2 == 0) |
4010 | _exit(EXIT_SUCCESS); |
4011 | |
4012 | if (pthread_create(&thread, NULL, do_thread, NULL) || |
4013 | pthread_join(thread, NULL)) |
4014 | _exit(EXIT_FAILURE); |
4015 | |
4016 | if (pthread_create(&thread, NULL, do_thread, NULL) || |
4017 | pthread_join(thread, NULL)) |
4018 | _exit(EXIT_FAILURE); |
4019 | |
4020 | if (waitpid(pid1, &status, 0) != pid1 || !WIFEXITED(status) || |
4021 | WEXITSTATUS(status)) |
4022 | _exit(EXIT_FAILURE); |
4023 | |
4024 | if (waitpid(pid2, &status, 0) != pid2 || !WIFEXITED(status) || |
4025 | WEXITSTATUS(status)) |
4026 | _exit(EXIT_FAILURE); |
4027 | |
4028 | exit(EXIT_SUCCESS); |
4029 | } |
4030 | |
4031 | EXPECT_EQ(waitpid(pid, &status, 0), pid); |
4032 | EXPECT_EQ(true, WIFEXITED(status)); |
4033 | EXPECT_EQ(0, WEXITSTATUS(status)); |
4034 | |
4035 | /* |
4036 | * The seccomp filter has become unused so we should be notified once |
4037 | * the kernel gets around to cleaning up task struct. |
4038 | */ |
4039 | pollfd.fd = 200; |
4040 | pollfd.events = POLLHUP; |
4041 | |
4042 | EXPECT_GT(poll(&pollfd, 1, 2000), 0); |
4043 | EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0); |
4044 | } |
4045 | |
4046 | |
4047 | int get_next_fd(int prev_fd) |
4048 | { |
4049 | for (int i = prev_fd + 1; i < FD_SETSIZE; ++i) { |
4050 | if (fcntl(i, F_GETFD) == -1) |
4051 | return i; |
4052 | } |
4053 | _exit(EXIT_FAILURE); |
4054 | } |
4055 | |
4056 | TEST(user_notification_addfd) |
4057 | { |
4058 | pid_t pid; |
4059 | long ret; |
4060 | int status, listener, memfd, fd, nextfd; |
4061 | struct seccomp_notif_addfd addfd = {}; |
4062 | struct seccomp_notif_addfd_small small = {}; |
4063 | struct seccomp_notif_addfd_big big = {}; |
4064 | struct seccomp_notif req = {}; |
4065 | struct seccomp_notif_resp resp = {}; |
4066 | /* 100 ms */ |
4067 | struct timespec delay = { .tv_nsec = 100000000 }; |
4068 | |
4069 | /* There may be arbitrary already-open fds at test start. */ |
4070 | memfd = memfd_create("test" , 0); |
4071 | ASSERT_GE(memfd, 0); |
4072 | nextfd = get_next_fd(prev_fd: memfd); |
4073 | |
4074 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
4075 | ASSERT_EQ(0, ret) { |
4076 | TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!" ); |
4077 | } |
4078 | |
4079 | /* fd: 4 */ |
4080 | /* Check that the basic notification machinery works */ |
4081 | listener = user_notif_syscall(__NR_getppid, |
4082 | SECCOMP_FILTER_FLAG_NEW_LISTENER); |
4083 | ASSERT_EQ(listener, nextfd); |
4084 | nextfd = get_next_fd(prev_fd: nextfd); |
4085 | |
4086 | pid = fork(); |
4087 | ASSERT_GE(pid, 0); |
4088 | |
4089 | if (pid == 0) { |
4090 | /* fds will be added and this value is expected */ |
4091 | if (syscall(__NR_getppid) != USER_NOTIF_MAGIC) |
4092 | exit(1); |
4093 | |
4094 | /* Atomic addfd+send is received here. Check it is a valid fd */ |
4095 | if (fcntl(syscall(__NR_getppid), F_GETFD) == -1) |
4096 | exit(1); |
4097 | |
4098 | exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); |
4099 | } |
4100 | |
4101 | ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); |
4102 | |
4103 | addfd.srcfd = memfd; |
4104 | addfd.newfd = 0; |
4105 | addfd.id = req.id; |
4106 | addfd.flags = 0x0; |
4107 | |
4108 | /* Verify bad newfd_flags cannot be set */ |
4109 | addfd.newfd_flags = ~O_CLOEXEC; |
4110 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); |
4111 | EXPECT_EQ(errno, EINVAL); |
4112 | addfd.newfd_flags = O_CLOEXEC; |
4113 | |
4114 | /* Verify bad flags cannot be set */ |
4115 | addfd.flags = 0xff; |
4116 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); |
4117 | EXPECT_EQ(errno, EINVAL); |
4118 | addfd.flags = 0; |
4119 | |
4120 | /* Verify that remote_fd cannot be set without setting flags */ |
4121 | addfd.newfd = 1; |
4122 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); |
4123 | EXPECT_EQ(errno, EINVAL); |
4124 | addfd.newfd = 0; |
4125 | |
4126 | /* Verify small size cannot be set */ |
4127 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_SMALL, &small), -1); |
4128 | EXPECT_EQ(errno, EINVAL); |
4129 | |
4130 | /* Verify we can't send bits filled in unknown buffer area */ |
4131 | memset(&big, 0xAA, sizeof(big)); |
4132 | big.addfd = addfd; |
4133 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big), -1); |
4134 | EXPECT_EQ(errno, E2BIG); |
4135 | |
4136 | |
4137 | /* Verify we can set an arbitrary remote fd */ |
4138 | fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); |
4139 | EXPECT_EQ(fd, nextfd); |
4140 | nextfd = get_next_fd(prev_fd: nextfd); |
4141 | EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0); |
4142 | |
4143 | /* Verify we can set an arbitrary remote fd with large size */ |
4144 | memset(&big, 0x0, sizeof(big)); |
4145 | big.addfd = addfd; |
4146 | fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big); |
4147 | EXPECT_EQ(fd, nextfd); |
4148 | nextfd = get_next_fd(prev_fd: nextfd); |
4149 | |
4150 | /* Verify we can set a specific remote fd */ |
4151 | addfd.newfd = 42; |
4152 | addfd.flags = SECCOMP_ADDFD_FLAG_SETFD; |
4153 | fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); |
4154 | EXPECT_EQ(fd, 42); |
4155 | EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0); |
4156 | |
4157 | /* Resume syscall */ |
4158 | resp.id = req.id; |
4159 | resp.error = 0; |
4160 | resp.val = USER_NOTIF_MAGIC; |
4161 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); |
4162 | |
4163 | /* |
4164 | * This sets the ID of the ADD FD to the last request plus 1. The |
4165 | * notification ID increments 1 per notification. |
4166 | */ |
4167 | addfd.id = req.id + 1; |
4168 | |
4169 | /* This spins until the underlying notification is generated */ |
4170 | while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 && |
4171 | errno != -EINPROGRESS) |
4172 | nanosleep(&delay, NULL); |
4173 | |
4174 | memset(&req, 0, sizeof(req)); |
4175 | ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); |
4176 | ASSERT_EQ(addfd.id, req.id); |
4177 | |
4178 | /* Verify we can do an atomic addfd and send */ |
4179 | addfd.newfd = 0; |
4180 | addfd.flags = SECCOMP_ADDFD_FLAG_SEND; |
4181 | fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); |
4182 | /* |
4183 | * Child has earlier "low" fds and now 42, so we expect the next |
4184 | * lowest available fd to be assigned here. |
4185 | */ |
4186 | EXPECT_EQ(fd, nextfd); |
4187 | nextfd = get_next_fd(prev_fd: nextfd); |
4188 | ASSERT_EQ(filecmp(getpid(), pid, memfd, fd), 0); |
4189 | |
4190 | /* |
4191 | * This sets the ID of the ADD FD to the last request plus 1. The |
4192 | * notification ID increments 1 per notification. |
4193 | */ |
4194 | addfd.id = req.id + 1; |
4195 | |
4196 | /* This spins until the underlying notification is generated */ |
4197 | while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 && |
4198 | errno != -EINPROGRESS) |
4199 | nanosleep(&delay, NULL); |
4200 | |
4201 | memset(&req, 0, sizeof(req)); |
4202 | ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); |
4203 | ASSERT_EQ(addfd.id, req.id); |
4204 | |
4205 | resp.id = req.id; |
4206 | resp.error = 0; |
4207 | resp.val = USER_NOTIF_MAGIC; |
4208 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); |
4209 | |
4210 | /* Wait for child to finish. */ |
4211 | EXPECT_EQ(waitpid(pid, &status, 0), pid); |
4212 | EXPECT_EQ(true, WIFEXITED(status)); |
4213 | EXPECT_EQ(0, WEXITSTATUS(status)); |
4214 | |
4215 | close(memfd); |
4216 | } |
4217 | |
4218 | TEST(user_notification_addfd_rlimit) |
4219 | { |
4220 | pid_t pid; |
4221 | long ret; |
4222 | int status, listener, memfd; |
4223 | struct seccomp_notif_addfd addfd = {}; |
4224 | struct seccomp_notif req = {}; |
4225 | struct seccomp_notif_resp resp = {}; |
4226 | const struct rlimit lim = { |
4227 | .rlim_cur = 0, |
4228 | .rlim_max = 0, |
4229 | }; |
4230 | |
4231 | memfd = memfd_create("test" , 0); |
4232 | ASSERT_GE(memfd, 0); |
4233 | |
4234 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
4235 | ASSERT_EQ(0, ret) { |
4236 | TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!" ); |
4237 | } |
4238 | |
4239 | /* Check that the basic notification machinery works */ |
4240 | listener = user_notif_syscall(__NR_getppid, |
4241 | SECCOMP_FILTER_FLAG_NEW_LISTENER); |
4242 | ASSERT_GE(listener, 0); |
4243 | |
4244 | pid = fork(); |
4245 | ASSERT_GE(pid, 0); |
4246 | |
4247 | if (pid == 0) |
4248 | exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); |
4249 | |
4250 | |
4251 | ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); |
4252 | |
4253 | ASSERT_EQ(prlimit(pid, RLIMIT_NOFILE, &lim, NULL), 0); |
4254 | |
4255 | addfd.srcfd = memfd; |
4256 | addfd.newfd_flags = O_CLOEXEC; |
4257 | addfd.newfd = 0; |
4258 | addfd.id = req.id; |
4259 | addfd.flags = 0; |
4260 | |
4261 | /* Should probably spot check /proc/sys/fs/file-nr */ |
4262 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); |
4263 | EXPECT_EQ(errno, EMFILE); |
4264 | |
4265 | addfd.flags = SECCOMP_ADDFD_FLAG_SEND; |
4266 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); |
4267 | EXPECT_EQ(errno, EMFILE); |
4268 | |
4269 | addfd.newfd = 100; |
4270 | addfd.flags = SECCOMP_ADDFD_FLAG_SETFD; |
4271 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); |
4272 | EXPECT_EQ(errno, EBADF); |
4273 | |
4274 | resp.id = req.id; |
4275 | resp.error = 0; |
4276 | resp.val = USER_NOTIF_MAGIC; |
4277 | |
4278 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); |
4279 | |
4280 | /* Wait for child to finish. */ |
4281 | EXPECT_EQ(waitpid(pid, &status, 0), pid); |
4282 | EXPECT_EQ(true, WIFEXITED(status)); |
4283 | EXPECT_EQ(0, WEXITSTATUS(status)); |
4284 | |
4285 | close(memfd); |
4286 | } |
4287 | |
4288 | #ifndef SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP |
4289 | #define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0) |
4290 | #define SECCOMP_IOCTL_NOTIF_SET_FLAGS SECCOMP_IOW(4, __u64) |
4291 | #endif |
4292 | |
4293 | TEST(user_notification_sync) |
4294 | { |
4295 | struct seccomp_notif req = {}; |
4296 | struct seccomp_notif_resp resp = {}; |
4297 | int status, listener; |
4298 | pid_t pid; |
4299 | long ret; |
4300 | |
4301 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
4302 | ASSERT_EQ(0, ret) { |
4303 | TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!" ); |
4304 | } |
4305 | |
4306 | listener = user_notif_syscall(__NR_getppid, |
4307 | SECCOMP_FILTER_FLAG_NEW_LISTENER); |
4308 | ASSERT_GE(listener, 0); |
4309 | |
4310 | /* Try to set invalid flags. */ |
4311 | EXPECT_SYSCALL_RETURN(-EINVAL, |
4312 | ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS, 0xffffffff, 0)); |
4313 | |
4314 | ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS, |
4315 | SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP, 0), 0); |
4316 | |
4317 | pid = fork(); |
4318 | ASSERT_GE(pid, 0); |
4319 | if (pid == 0) { |
4320 | ret = syscall(__NR_getppid); |
4321 | ASSERT_EQ(ret, USER_NOTIF_MAGIC) { |
4322 | _exit(1); |
4323 | } |
4324 | _exit(0); |
4325 | } |
4326 | |
4327 | req.pid = 0; |
4328 | ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); |
4329 | |
4330 | ASSERT_EQ(req.data.nr, __NR_getppid); |
4331 | |
4332 | resp.id = req.id; |
4333 | resp.error = 0; |
4334 | resp.val = USER_NOTIF_MAGIC; |
4335 | resp.flags = 0; |
4336 | ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); |
4337 | |
4338 | ASSERT_EQ(waitpid(pid, &status, 0), pid); |
4339 | ASSERT_EQ(status, 0); |
4340 | } |
4341 | |
4342 | |
4343 | /* Make sure PTRACE_O_SUSPEND_SECCOMP requires CAP_SYS_ADMIN. */ |
4344 | FIXTURE(O_SUSPEND_SECCOMP) { |
4345 | pid_t pid; |
4346 | }; |
4347 | |
4348 | FIXTURE_SETUP(O_SUSPEND_SECCOMP) |
4349 | { |
4350 | ERRNO_FILTER(block_read, E2BIG); |
4351 | cap_value_t cap_list[] = { CAP_SYS_ADMIN }; |
4352 | cap_t caps; |
4353 | |
4354 | self->pid = 0; |
4355 | |
4356 | /* make sure we don't have CAP_SYS_ADMIN */ |
4357 | caps = cap_get_proc(); |
4358 | ASSERT_NE(NULL, caps); |
4359 | ASSERT_EQ(0, cap_set_flag(caps, CAP_EFFECTIVE, 1, cap_list, CAP_CLEAR)); |
4360 | ASSERT_EQ(0, cap_set_proc(caps)); |
4361 | cap_free(caps); |
4362 | |
4363 | ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); |
4364 | ASSERT_EQ(0, prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_block_read)); |
4365 | |
4366 | self->pid = fork(); |
4367 | ASSERT_GE(self->pid, 0); |
4368 | |
4369 | if (self->pid == 0) { |
4370 | while (1) |
4371 | pause(); |
4372 | _exit(127); |
4373 | } |
4374 | } |
4375 | |
4376 | FIXTURE_TEARDOWN(O_SUSPEND_SECCOMP) |
4377 | { |
4378 | if (self->pid) |
4379 | kill(self->pid, SIGKILL); |
4380 | } |
4381 | |
4382 | TEST_F(O_SUSPEND_SECCOMP, setoptions) |
4383 | { |
4384 | int wstatus; |
4385 | |
4386 | ASSERT_EQ(0, ptrace(PTRACE_ATTACH, self->pid, NULL, 0)); |
4387 | ASSERT_EQ(self->pid, wait(&wstatus)); |
4388 | ASSERT_EQ(-1, ptrace(PTRACE_SETOPTIONS, self->pid, NULL, PTRACE_O_SUSPEND_SECCOMP)); |
4389 | if (errno == EINVAL) |
4390 | SKIP(return, "Kernel does not support PTRACE_O_SUSPEND_SECCOMP (missing CONFIG_CHECKPOINT_RESTORE?)" ); |
4391 | ASSERT_EQ(EPERM, errno); |
4392 | } |
4393 | |
4394 | TEST_F(O_SUSPEND_SECCOMP, seize) |
4395 | { |
4396 | int ret; |
4397 | |
4398 | ret = ptrace(PTRACE_SEIZE, self->pid, NULL, PTRACE_O_SUSPEND_SECCOMP); |
4399 | ASSERT_EQ(-1, ret); |
4400 | if (errno == EINVAL) |
4401 | SKIP(return, "Kernel does not support PTRACE_O_SUSPEND_SECCOMP (missing CONFIG_CHECKPOINT_RESTORE?)" ); |
4402 | ASSERT_EQ(EPERM, errno); |
4403 | } |
4404 | |
4405 | /* |
4406 | * get_nth - Get the nth, space separated entry in a file. |
4407 | * |
4408 | * Returns the length of the read field. |
4409 | * Throws error if field is zero-lengthed. |
4410 | */ |
4411 | static ssize_t get_nth(struct __test_metadata *_metadata, const char *path, |
4412 | const unsigned int position, char **entry) |
4413 | { |
4414 | char *line = NULL; |
4415 | unsigned int i; |
4416 | ssize_t nread; |
4417 | size_t len = 0; |
4418 | FILE *f; |
4419 | |
4420 | f = fopen(path, "r" ); |
4421 | ASSERT_NE(f, NULL) { |
4422 | TH_LOG("Could not open %s: %s" , path, strerror(errno)); |
4423 | } |
4424 | |
4425 | for (i = 0; i < position; i++) { |
4426 | nread = getdelim(&line, &len, ' ', f); |
4427 | ASSERT_GE(nread, 0) { |
4428 | TH_LOG("Failed to read %d entry in file %s" , i, path); |
4429 | } |
4430 | } |
4431 | fclose(f); |
4432 | |
4433 | ASSERT_GT(nread, 0) { |
4434 | TH_LOG("Entry in file %s had zero length" , path); |
4435 | } |
4436 | |
4437 | *entry = line; |
4438 | return nread - 1; |
4439 | } |
4440 | |
4441 | /* For a given PID, get the task state (D, R, etc...) */ |
4442 | static char get_proc_stat(struct __test_metadata *_metadata, pid_t pid) |
4443 | { |
4444 | char proc_path[100] = {0}; |
4445 | char status; |
4446 | char *line; |
4447 | |
4448 | snprintf(buf: proc_path, size: sizeof(proc_path), fmt: "/proc/%d/stat" , pid); |
4449 | ASSERT_EQ(get_nth(_metadata, proc_path, 3, &line), 1); |
4450 | |
4451 | status = *line; |
4452 | free(line); |
4453 | |
4454 | return status; |
4455 | } |
4456 | |
4457 | TEST(user_notification_fifo) |
4458 | { |
4459 | struct seccomp_notif_resp resp = {}; |
4460 | struct seccomp_notif req = {}; |
4461 | int i, status, listener; |
4462 | pid_t pid, pids[3]; |
4463 | __u64 baseid; |
4464 | long ret; |
4465 | /* 100 ms */ |
4466 | struct timespec delay = { .tv_nsec = 100000000 }; |
4467 | |
4468 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
4469 | ASSERT_EQ(0, ret) { |
4470 | TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!" ); |
4471 | } |
4472 | |
4473 | /* Setup a listener */ |
4474 | listener = user_notif_syscall(__NR_getppid, |
4475 | SECCOMP_FILTER_FLAG_NEW_LISTENER); |
4476 | ASSERT_GE(listener, 0); |
4477 | |
4478 | pid = fork(); |
4479 | ASSERT_GE(pid, 0); |
4480 | |
4481 | if (pid == 0) { |
4482 | ret = syscall(__NR_getppid); |
4483 | exit(ret != USER_NOTIF_MAGIC); |
4484 | } |
4485 | |
4486 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); |
4487 | baseid = req.id + 1; |
4488 | |
4489 | resp.id = req.id; |
4490 | resp.error = 0; |
4491 | resp.val = USER_NOTIF_MAGIC; |
4492 | |
4493 | /* check that we make sure flags == 0 */ |
4494 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); |
4495 | |
4496 | EXPECT_EQ(waitpid(pid, &status, 0), pid); |
4497 | EXPECT_EQ(true, WIFEXITED(status)); |
4498 | EXPECT_EQ(0, WEXITSTATUS(status)); |
4499 | |
4500 | /* Start children, and generate notifications */ |
4501 | for (i = 0; i < ARRAY_SIZE(pids); i++) { |
4502 | pid = fork(); |
4503 | if (pid == 0) { |
4504 | ret = syscall(__NR_getppid); |
4505 | exit(ret != USER_NOTIF_MAGIC); |
4506 | } |
4507 | pids[i] = pid; |
4508 | } |
4509 | |
4510 | /* This spins until all of the children are sleeping */ |
4511 | restart_wait: |
4512 | for (i = 0; i < ARRAY_SIZE(pids); i++) { |
4513 | if (get_proc_stat(_metadata, pid: pids[i]) != 'S') { |
4514 | nanosleep(&delay, NULL); |
4515 | goto restart_wait; |
4516 | } |
4517 | } |
4518 | |
4519 | /* Read the notifications in order (and respond) */ |
4520 | for (i = 0; i < ARRAY_SIZE(pids); i++) { |
4521 | memset(&req, 0, sizeof(req)); |
4522 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); |
4523 | EXPECT_EQ(req.id, baseid + i); |
4524 | resp.id = req.id; |
4525 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); |
4526 | } |
4527 | |
4528 | /* Make sure notifications were received */ |
4529 | for (i = 0; i < ARRAY_SIZE(pids); i++) { |
4530 | EXPECT_EQ(waitpid(pids[i], &status, 0), pids[i]); |
4531 | EXPECT_EQ(true, WIFEXITED(status)); |
4532 | EXPECT_EQ(0, WEXITSTATUS(status)); |
4533 | } |
4534 | } |
4535 | |
4536 | /* get_proc_syscall - Get the syscall in progress for a given pid |
4537 | * |
4538 | * Returns the current syscall number for a given process |
4539 | * Returns -1 if not in syscall (running or blocked) |
4540 | */ |
4541 | static long get_proc_syscall(struct __test_metadata *_metadata, int pid) |
4542 | { |
4543 | char proc_path[100] = {0}; |
4544 | long ret = -1; |
4545 | ssize_t nread; |
4546 | char *line; |
4547 | |
4548 | snprintf(buf: proc_path, size: sizeof(proc_path), fmt: "/proc/%d/syscall" , pid); |
4549 | nread = get_nth(_metadata, path: proc_path, position: 1, entry: &line); |
4550 | ASSERT_GT(nread, 0); |
4551 | |
4552 | if (!strncmp("running" , line, MIN(7, nread))) |
4553 | ret = strtol(line, NULL, 16); |
4554 | |
4555 | free(line); |
4556 | return ret; |
4557 | } |
4558 | |
4559 | /* Ensure non-fatal signals prior to receive are unmodified */ |
4560 | TEST(user_notification_wait_killable_pre_notification) |
4561 | { |
4562 | struct sigaction new_action = { |
4563 | .sa_handler = signal_handler, |
4564 | }; |
4565 | int listener, status, sk_pair[2]; |
4566 | pid_t pid; |
4567 | long ret; |
4568 | char c; |
4569 | /* 100 ms */ |
4570 | struct timespec delay = { .tv_nsec = 100000000 }; |
4571 | |
4572 | ASSERT_EQ(sigemptyset(&new_action.sa_mask), 0); |
4573 | |
4574 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
4575 | ASSERT_EQ(0, ret) |
4576 | { |
4577 | TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!" ); |
4578 | } |
4579 | |
4580 | ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0); |
4581 | |
4582 | listener = user_notif_syscall( |
4583 | __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER | |
4584 | SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV); |
4585 | ASSERT_GE(listener, 0); |
4586 | |
4587 | /* |
4588 | * Check that we can kill the process with SIGUSR1 prior to receiving |
4589 | * the notification. SIGUSR1 is wired up to a custom signal handler, |
4590 | * and make sure it gets called. |
4591 | */ |
4592 | pid = fork(); |
4593 | ASSERT_GE(pid, 0); |
4594 | |
4595 | if (pid == 0) { |
4596 | close(sk_pair[0]); |
4597 | handled = sk_pair[1]; |
4598 | |
4599 | /* Setup the non-fatal sigaction without SA_RESTART */ |
4600 | if (sigaction(SIGUSR1, &new_action, NULL)) { |
4601 | perror("sigaction" ); |
4602 | exit(1); |
4603 | } |
4604 | |
4605 | ret = syscall(__NR_getppid); |
4606 | /* Make sure we got a return from a signal interruption */ |
4607 | exit(ret != -1 || errno != EINTR); |
4608 | } |
4609 | |
4610 | /* |
4611 | * Make sure we've gotten to the seccomp user notification wait |
4612 | * from getppid prior to sending any signals |
4613 | */ |
4614 | while (get_proc_syscall(_metadata, pid) != __NR_getppid && |
4615 | get_proc_stat(_metadata, pid) != 'S') |
4616 | nanosleep(&delay, NULL); |
4617 | |
4618 | /* Send non-fatal kill signal */ |
4619 | EXPECT_EQ(kill(pid, SIGUSR1), 0); |
4620 | |
4621 | /* wait for process to exit (exit checks for EINTR) */ |
4622 | EXPECT_EQ(waitpid(pid, &status, 0), pid); |
4623 | EXPECT_EQ(true, WIFEXITED(status)); |
4624 | EXPECT_EQ(0, WEXITSTATUS(status)); |
4625 | |
4626 | EXPECT_EQ(read(sk_pair[0], &c, 1), 1); |
4627 | } |
4628 | |
4629 | /* Ensure non-fatal signals after receive are blocked */ |
4630 | TEST(user_notification_wait_killable) |
4631 | { |
4632 | struct sigaction new_action = { |
4633 | .sa_handler = signal_handler, |
4634 | }; |
4635 | struct seccomp_notif_resp resp = {}; |
4636 | struct seccomp_notif req = {}; |
4637 | int listener, status, sk_pair[2]; |
4638 | pid_t pid; |
4639 | long ret; |
4640 | char c; |
4641 | /* 100 ms */ |
4642 | struct timespec delay = { .tv_nsec = 100000000 }; |
4643 | |
4644 | ASSERT_EQ(sigemptyset(&new_action.sa_mask), 0); |
4645 | |
4646 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
4647 | ASSERT_EQ(0, ret) |
4648 | { |
4649 | TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!" ); |
4650 | } |
4651 | |
4652 | ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0); |
4653 | |
4654 | listener = user_notif_syscall( |
4655 | __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER | |
4656 | SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV); |
4657 | ASSERT_GE(listener, 0); |
4658 | |
4659 | pid = fork(); |
4660 | ASSERT_GE(pid, 0); |
4661 | |
4662 | if (pid == 0) { |
4663 | close(sk_pair[0]); |
4664 | handled = sk_pair[1]; |
4665 | |
4666 | /* Setup the sigaction without SA_RESTART */ |
4667 | if (sigaction(SIGUSR1, &new_action, NULL)) { |
4668 | perror("sigaction" ); |
4669 | exit(1); |
4670 | } |
4671 | |
4672 | /* Make sure that the syscall is completed (no EINTR) */ |
4673 | ret = syscall(__NR_getppid); |
4674 | exit(ret != USER_NOTIF_MAGIC); |
4675 | } |
4676 | |
4677 | /* |
4678 | * Get the notification, to make move the notifying process into a |
4679 | * non-preemptible (TASK_KILLABLE) state. |
4680 | */ |
4681 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); |
4682 | /* Send non-fatal kill signal */ |
4683 | EXPECT_EQ(kill(pid, SIGUSR1), 0); |
4684 | |
4685 | /* |
4686 | * Make sure the task enters moves to TASK_KILLABLE by waiting for |
4687 | * D (Disk Sleep) state after receiving non-fatal signal. |
4688 | */ |
4689 | while (get_proc_stat(_metadata, pid) != 'D') |
4690 | nanosleep(&delay, NULL); |
4691 | |
4692 | resp.id = req.id; |
4693 | resp.val = USER_NOTIF_MAGIC; |
4694 | /* Make sure the notification is found and able to be replied to */ |
4695 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); |
4696 | |
4697 | /* |
4698 | * Make sure that the signal handler does get called once we're back in |
4699 | * userspace. |
4700 | */ |
4701 | EXPECT_EQ(read(sk_pair[0], &c, 1), 1); |
4702 | /* wait for process to exit (exit checks for USER_NOTIF_MAGIC) */ |
4703 | EXPECT_EQ(waitpid(pid, &status, 0), pid); |
4704 | EXPECT_EQ(true, WIFEXITED(status)); |
4705 | EXPECT_EQ(0, WEXITSTATUS(status)); |
4706 | } |
4707 | |
4708 | /* Ensure fatal signals after receive are not blocked */ |
4709 | TEST(user_notification_wait_killable_fatal) |
4710 | { |
4711 | struct seccomp_notif req = {}; |
4712 | int listener, status; |
4713 | pid_t pid; |
4714 | long ret; |
4715 | /* 100 ms */ |
4716 | struct timespec delay = { .tv_nsec = 100000000 }; |
4717 | |
4718 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
4719 | ASSERT_EQ(0, ret) |
4720 | { |
4721 | TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!" ); |
4722 | } |
4723 | |
4724 | listener = user_notif_syscall( |
4725 | __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER | |
4726 | SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV); |
4727 | ASSERT_GE(listener, 0); |
4728 | |
4729 | pid = fork(); |
4730 | ASSERT_GE(pid, 0); |
4731 | |
4732 | if (pid == 0) { |
4733 | /* This should never complete as it should get a SIGTERM */ |
4734 | syscall(__NR_getppid); |
4735 | exit(1); |
4736 | } |
4737 | |
4738 | while (get_proc_stat(_metadata, pid) != 'S') |
4739 | nanosleep(&delay, NULL); |
4740 | |
4741 | /* |
4742 | * Get the notification, to make move the notifying process into a |
4743 | * non-preemptible (TASK_KILLABLE) state. |
4744 | */ |
4745 | EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); |
4746 | /* Kill the process with a fatal signal */ |
4747 | EXPECT_EQ(kill(pid, SIGTERM), 0); |
4748 | |
4749 | /* |
4750 | * Wait for the process to exit, and make sure the process terminated |
4751 | * due to the SIGTERM signal. |
4752 | */ |
4753 | EXPECT_EQ(waitpid(pid, &status, 0), pid); |
4754 | EXPECT_EQ(true, WIFSIGNALED(status)); |
4755 | EXPECT_EQ(SIGTERM, WTERMSIG(status)); |
4756 | } |
4757 | |
4758 | /* |
4759 | * TODO: |
4760 | * - expand NNP testing |
4761 | * - better arch-specific TRACE and TRAP handlers. |
4762 | * - endianness checking when appropriate |
4763 | * - 64-bit arg prodding |
4764 | * - arch value testing (x86 modes especially) |
4765 | * - verify that FILTER_FLAG_LOG filters generate log messages |
4766 | * - verify that RET_LOG generates log messages |
4767 | */ |
4768 | |
4769 | TEST_HARNESS_MAIN |
4770 | |