1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * sigreturn.c - tests for x86 sigreturn(2) and exit-to-userspace |
4 | * Copyright (c) 2014-2015 Andrew Lutomirski |
5 | * |
6 | * This is a series of tests that exercises the sigreturn(2) syscall and |
7 | * the IRET / SYSRET paths in the kernel. |
8 | * |
9 | * For now, this focuses on the effects of unusual CS and SS values, |
10 | * and it has a bunch of tests to make sure that ESP/RSP is restored |
11 | * properly. |
12 | * |
13 | * The basic idea behind these tests is to raise(SIGUSR1) to create a |
14 | * sigcontext frame, plug in the values to be tested, and then return, |
15 | * which implicitly invokes sigreturn(2) and programs the user context |
16 | * as desired. |
17 | * |
18 | * For tests for which we expect sigreturn and the subsequent return to |
19 | * user mode to succeed, we return to a short trampoline that generates |
20 | * SIGTRAP so that the meat of the tests can be ordinary C code in a |
21 | * SIGTRAP handler. |
22 | * |
23 | * The inner workings of each test is documented below. |
24 | * |
25 | * Do not run on outdated, unpatched kernels at risk of nasty crashes. |
26 | */ |
27 | |
28 | #define _GNU_SOURCE |
29 | |
30 | #include <sys/time.h> |
31 | #include <time.h> |
32 | #include <stdlib.h> |
33 | #include <sys/syscall.h> |
34 | #include <unistd.h> |
35 | #include <stdio.h> |
36 | #include <string.h> |
37 | #include <inttypes.h> |
38 | #include <sys/mman.h> |
39 | #include <sys/signal.h> |
40 | #include <sys/ucontext.h> |
41 | #include <asm/ldt.h> |
42 | #include <err.h> |
43 | #include <setjmp.h> |
44 | #include <stddef.h> |
45 | #include <stdbool.h> |
46 | #include <sys/ptrace.h> |
47 | #include <sys/user.h> |
48 | |
49 | /* Pull in AR_xyz defines. */ |
50 | typedef unsigned int u32; |
51 | typedef unsigned short u16; |
52 | #include "../../../../arch/x86/include/asm/desc_defs.h" |
53 | |
54 | /* |
55 | * Copied from asm/ucontext.h, as asm/ucontext.h conflicts badly with the glibc |
56 | * headers. |
57 | */ |
58 | #ifdef __x86_64__ |
59 | /* |
60 | * UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on |
61 | * kernels that save SS in the sigcontext. All kernels that set |
62 | * UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp |
63 | * regardless of SS (i.e. they implement espfix). |
64 | * |
65 | * Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS |
66 | * when delivering a signal that came from 64-bit code. |
67 | * |
68 | * Sigreturn restores SS as follows: |
69 | * |
70 | * if (saved SS is valid || UC_STRICT_RESTORE_SS is set || |
71 | * saved CS is not 64-bit) |
72 | * new SS = saved SS (will fail IRET and signal if invalid) |
73 | * else |
74 | * new SS = a flat 32-bit data segment |
75 | */ |
76 | #define UC_SIGCONTEXT_SS 0x2 |
77 | #define UC_STRICT_RESTORE_SS 0x4 |
78 | #endif |
79 | |
80 | /* |
81 | * In principle, this test can run on Linux emulation layers (e.g. |
82 | * Illumos "LX branded zones"). Solaris-based kernels reserve LDT |
83 | * entries 0-5 for their own internal purposes, so start our LDT |
84 | * allocations above that reservation. (The tests don't pass on LX |
85 | * branded zones, but at least this lets them run.) |
86 | */ |
87 | #define LDT_OFFSET 6 |
88 | |
89 | /* An aligned stack accessible through some of our segments. */ |
90 | static unsigned char stack16[65536] __attribute__((aligned(4096))); |
91 | |
92 | /* |
93 | * An aligned int3 instruction used as a trampoline. Some of the tests |
94 | * want to fish out their ss values, so this trampoline copies ss to eax |
95 | * before the int3. |
96 | */ |
97 | asm (".pushsection .text\n\t" |
98 | ".type int3, @function\n\t" |
99 | ".align 4096\n\t" |
100 | "int3:\n\t" |
101 | "mov %ss,%ecx\n\t" |
102 | "int3\n\t" |
103 | ".size int3, . - int3\n\t" |
104 | ".align 4096, 0xcc\n\t" |
105 | ".popsection" ); |
106 | extern char int3[4096]; |
107 | |
108 | /* |
109 | * At startup, we prepapre: |
110 | * |
111 | * - ldt_nonexistent_sel: An LDT entry that doesn't exist (all-zero |
112 | * descriptor or out of bounds). |
113 | * - code16_sel: A 16-bit LDT code segment pointing to int3. |
114 | * - data16_sel: A 16-bit LDT data segment pointing to stack16. |
115 | * - npcode32_sel: A 32-bit not-present LDT code segment pointing to int3. |
116 | * - npdata32_sel: A 32-bit not-present LDT data segment pointing to stack16. |
117 | * - gdt_data16_idx: A 16-bit GDT data segment pointing to stack16. |
118 | * - gdt_npdata32_idx: A 32-bit not-present GDT data segment pointing to |
119 | * stack16. |
120 | * |
121 | * For no particularly good reason, xyz_sel is a selector value with the |
122 | * RPL and LDT bits filled in, whereas xyz_idx is just an index into the |
123 | * descriptor table. These variables will be zero if their respective |
124 | * segments could not be allocated. |
125 | */ |
126 | static unsigned short ldt_nonexistent_sel; |
127 | static unsigned short code16_sel, data16_sel, npcode32_sel, npdata32_sel; |
128 | |
129 | static unsigned short gdt_data16_idx, gdt_npdata32_idx; |
130 | |
131 | static unsigned short GDT3(int idx) |
132 | { |
133 | return (idx << 3) | 3; |
134 | } |
135 | |
136 | static unsigned short LDT3(int idx) |
137 | { |
138 | return (idx << 3) | 7; |
139 | } |
140 | |
141 | static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), |
142 | int flags) |
143 | { |
144 | struct sigaction sa; |
145 | memset(&sa, 0, sizeof(sa)); |
146 | sa.sa_sigaction = handler; |
147 | sa.sa_flags = SA_SIGINFO | flags; |
148 | sigemptyset(&sa.sa_mask); |
149 | if (sigaction(sig, &sa, 0)) |
150 | err(1, "sigaction" ); |
151 | } |
152 | |
153 | static void clearhandler(int sig) |
154 | { |
155 | struct sigaction sa; |
156 | memset(&sa, 0, sizeof(sa)); |
157 | sa.sa_handler = SIG_DFL; |
158 | sigemptyset(&sa.sa_mask); |
159 | if (sigaction(sig, &sa, 0)) |
160 | err(1, "sigaction" ); |
161 | } |
162 | |
163 | static void add_ldt(const struct user_desc *desc, unsigned short *var, |
164 | const char *name) |
165 | { |
166 | if (syscall(SYS_modify_ldt, 1, desc, sizeof(*desc)) == 0) { |
167 | *var = LDT3(idx: desc->entry_number); |
168 | } else { |
169 | printf("[NOTE]\tFailed to create %s segment\n" , name); |
170 | *var = 0; |
171 | } |
172 | } |
173 | |
174 | static void setup_ldt(void) |
175 | { |
176 | if ((unsigned long)stack16 > (1ULL << 32) - sizeof(stack16)) |
177 | errx(1, "stack16 is too high\n" ); |
178 | if ((unsigned long)int3 > (1ULL << 32) - sizeof(int3)) |
179 | errx(1, "int3 is too high\n" ); |
180 | |
181 | ldt_nonexistent_sel = LDT3(LDT_OFFSET + 2); |
182 | |
183 | const struct user_desc code16_desc = { |
184 | .entry_number = LDT_OFFSET + 0, |
185 | .base_addr = (unsigned long)int3, |
186 | .limit = 4095, |
187 | .seg_32bit = 0, |
188 | .contents = 2, /* Code, not conforming */ |
189 | .read_exec_only = 0, |
190 | .limit_in_pages = 0, |
191 | .seg_not_present = 0, |
192 | .useable = 0 |
193 | }; |
194 | add_ldt(desc: &code16_desc, var: &code16_sel, name: "code16" ); |
195 | |
196 | const struct user_desc data16_desc = { |
197 | .entry_number = LDT_OFFSET + 1, |
198 | .base_addr = (unsigned long)stack16, |
199 | .limit = 0xffff, |
200 | .seg_32bit = 0, |
201 | .contents = 0, /* Data, grow-up */ |
202 | .read_exec_only = 0, |
203 | .limit_in_pages = 0, |
204 | .seg_not_present = 0, |
205 | .useable = 0 |
206 | }; |
207 | add_ldt(desc: &data16_desc, var: &data16_sel, name: "data16" ); |
208 | |
209 | const struct user_desc npcode32_desc = { |
210 | .entry_number = LDT_OFFSET + 3, |
211 | .base_addr = (unsigned long)int3, |
212 | .limit = 4095, |
213 | .seg_32bit = 1, |
214 | .contents = 2, /* Code, not conforming */ |
215 | .read_exec_only = 0, |
216 | .limit_in_pages = 0, |
217 | .seg_not_present = 1, |
218 | .useable = 0 |
219 | }; |
220 | add_ldt(desc: &npcode32_desc, var: &npcode32_sel, name: "npcode32" ); |
221 | |
222 | const struct user_desc npdata32_desc = { |
223 | .entry_number = LDT_OFFSET + 4, |
224 | .base_addr = (unsigned long)stack16, |
225 | .limit = 0xffff, |
226 | .seg_32bit = 1, |
227 | .contents = 0, /* Data, grow-up */ |
228 | .read_exec_only = 0, |
229 | .limit_in_pages = 0, |
230 | .seg_not_present = 1, |
231 | .useable = 0 |
232 | }; |
233 | add_ldt(desc: &npdata32_desc, var: &npdata32_sel, name: "npdata32" ); |
234 | |
235 | struct user_desc gdt_data16_desc = { |
236 | .entry_number = -1, |
237 | .base_addr = (unsigned long)stack16, |
238 | .limit = 0xffff, |
239 | .seg_32bit = 0, |
240 | .contents = 0, /* Data, grow-up */ |
241 | .read_exec_only = 0, |
242 | .limit_in_pages = 0, |
243 | .seg_not_present = 0, |
244 | .useable = 0 |
245 | }; |
246 | |
247 | if (syscall(SYS_set_thread_area, &gdt_data16_desc) == 0) { |
248 | /* |
249 | * This probably indicates vulnerability to CVE-2014-8133. |
250 | * Merely getting here isn't definitive, though, and we'll |
251 | * diagnose the problem for real later on. |
252 | */ |
253 | printf("[WARN]\tset_thread_area allocated data16 at index %d\n" , |
254 | gdt_data16_desc.entry_number); |
255 | gdt_data16_idx = gdt_data16_desc.entry_number; |
256 | } else { |
257 | printf("[OK]\tset_thread_area refused 16-bit data\n" ); |
258 | } |
259 | |
260 | struct user_desc gdt_npdata32_desc = { |
261 | .entry_number = -1, |
262 | .base_addr = (unsigned long)stack16, |
263 | .limit = 0xffff, |
264 | .seg_32bit = 1, |
265 | .contents = 0, /* Data, grow-up */ |
266 | .read_exec_only = 0, |
267 | .limit_in_pages = 0, |
268 | .seg_not_present = 1, |
269 | .useable = 0 |
270 | }; |
271 | |
272 | if (syscall(SYS_set_thread_area, &gdt_npdata32_desc) == 0) { |
273 | /* |
274 | * As a hardening measure, newer kernels don't allow this. |
275 | */ |
276 | printf("[WARN]\tset_thread_area allocated npdata32 at index %d\n" , |
277 | gdt_npdata32_desc.entry_number); |
278 | gdt_npdata32_idx = gdt_npdata32_desc.entry_number; |
279 | } else { |
280 | printf("[OK]\tset_thread_area refused 16-bit data\n" ); |
281 | } |
282 | } |
283 | |
284 | /* State used by our signal handlers. */ |
285 | static gregset_t initial_regs, requested_regs, resulting_regs; |
286 | |
287 | /* Instructions for the SIGUSR1 handler. */ |
288 | static volatile unsigned short sig_cs, sig_ss; |
289 | static volatile sig_atomic_t sig_trapped, sig_err, sig_trapno; |
290 | #ifdef __x86_64__ |
291 | static volatile sig_atomic_t sig_corrupt_final_ss; |
292 | #endif |
293 | |
294 | /* Abstractions for some 32-bit vs 64-bit differences. */ |
295 | #ifdef __x86_64__ |
296 | # define REG_IP REG_RIP |
297 | # define REG_SP REG_RSP |
298 | # define REG_CX REG_RCX |
299 | |
300 | struct selectors { |
301 | unsigned short cs, gs, fs, ss; |
302 | }; |
303 | |
304 | static unsigned short *ssptr(ucontext_t *ctx) |
305 | { |
306 | struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS]; |
307 | return &sels->ss; |
308 | } |
309 | |
310 | static unsigned short *csptr(ucontext_t *ctx) |
311 | { |
312 | struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS]; |
313 | return &sels->cs; |
314 | } |
315 | #else |
316 | # define REG_IP REG_EIP |
317 | # define REG_SP REG_ESP |
318 | # define REG_CX REG_ECX |
319 | |
320 | static greg_t *ssptr(ucontext_t *ctx) |
321 | { |
322 | return &ctx->uc_mcontext.gregs[REG_SS]; |
323 | } |
324 | |
325 | static greg_t *csptr(ucontext_t *ctx) |
326 | { |
327 | return &ctx->uc_mcontext.gregs[REG_CS]; |
328 | } |
329 | #endif |
330 | |
331 | /* |
332 | * Checks a given selector for its code bitness or returns -1 if it's not |
333 | * a usable code segment selector. |
334 | */ |
335 | int cs_bitness(unsigned short cs) |
336 | { |
337 | uint32_t valid = 0, ar; |
338 | asm ("lar %[cs], %[ar]\n\t" |
339 | "jnz 1f\n\t" |
340 | "mov $1, %[valid]\n\t" |
341 | "1:" |
342 | : [ar] "=r" (ar), [valid] "+rm" (valid) |
343 | : [cs] "r" (cs)); |
344 | |
345 | if (!valid) |
346 | return -1; |
347 | |
348 | bool db = (ar & (1 << 22)); |
349 | bool l = (ar & (1 << 21)); |
350 | |
351 | if (!(ar & (1<<11))) |
352 | return -1; /* Not code. */ |
353 | |
354 | if (l && !db) |
355 | return 64; |
356 | else if (!l && db) |
357 | return 32; |
358 | else if (!l && !db) |
359 | return 16; |
360 | else |
361 | return -1; /* Unknown bitness. */ |
362 | } |
363 | |
364 | /* |
365 | * Checks a given selector for its code bitness or returns -1 if it's not |
366 | * a usable code segment selector. |
367 | */ |
368 | bool is_valid_ss(unsigned short cs) |
369 | { |
370 | uint32_t valid = 0, ar; |
371 | asm ("lar %[cs], %[ar]\n\t" |
372 | "jnz 1f\n\t" |
373 | "mov $1, %[valid]\n\t" |
374 | "1:" |
375 | : [ar] "=r" (ar), [valid] "+rm" (valid) |
376 | : [cs] "r" (cs)); |
377 | |
378 | if (!valid) |
379 | return false; |
380 | |
381 | if ((ar & AR_TYPE_MASK) != AR_TYPE_RWDATA && |
382 | (ar & AR_TYPE_MASK) != AR_TYPE_RWDATA_EXPDOWN) |
383 | return false; |
384 | |
385 | return (ar & AR_P); |
386 | } |
387 | |
388 | /* Number of errors in the current test case. */ |
389 | static volatile sig_atomic_t nerrs; |
390 | |
391 | static void validate_signal_ss(int sig, ucontext_t *ctx) |
392 | { |
393 | #ifdef __x86_64__ |
394 | bool was_64bit = (cs_bitness(cs: *csptr(ctx)) == 64); |
395 | |
396 | if (!(ctx->uc_flags & UC_SIGCONTEXT_SS)) { |
397 | printf("[FAIL]\tUC_SIGCONTEXT_SS was not set\n" ); |
398 | nerrs++; |
399 | |
400 | /* |
401 | * This happens on Linux 4.1. The rest will fail, too, so |
402 | * return now to reduce the noise. |
403 | */ |
404 | return; |
405 | } |
406 | |
407 | /* UC_STRICT_RESTORE_SS is set iff we came from 64-bit mode. */ |
408 | if (!!(ctx->uc_flags & UC_STRICT_RESTORE_SS) != was_64bit) { |
409 | printf("[FAIL]\tUC_STRICT_RESTORE_SS was wrong in signal %d\n" , |
410 | sig); |
411 | nerrs++; |
412 | } |
413 | |
414 | if (is_valid_ss(cs: *ssptr(ctx))) { |
415 | /* |
416 | * DOSEMU was written before 64-bit sigcontext had SS, and |
417 | * it tries to figure out the signal source SS by looking at |
418 | * the physical register. Make sure that keeps working. |
419 | */ |
420 | unsigned short hw_ss; |
421 | asm ("mov %%ss, %0" : "=rm" (hw_ss)); |
422 | if (hw_ss != *ssptr(ctx)) { |
423 | printf("[FAIL]\tHW SS didn't match saved SS\n" ); |
424 | nerrs++; |
425 | } |
426 | } |
427 | #endif |
428 | } |
429 | |
430 | /* |
431 | * SIGUSR1 handler. Sets CS and SS as requested and points IP to the |
432 | * int3 trampoline. Sets SP to a large known value so that we can see |
433 | * whether the value round-trips back to user mode correctly. |
434 | */ |
435 | static void sigusr1(int sig, siginfo_t *info, void *ctx_void) |
436 | { |
437 | ucontext_t *ctx = (ucontext_t*)ctx_void; |
438 | |
439 | validate_signal_ss(sig, ctx); |
440 | |
441 | memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); |
442 | |
443 | *csptr(ctx) = sig_cs; |
444 | *ssptr(ctx) = sig_ss; |
445 | |
446 | ctx->uc_mcontext.gregs[REG_IP] = |
447 | sig_cs == code16_sel ? 0 : (unsigned long)&int3; |
448 | ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL; |
449 | ctx->uc_mcontext.gregs[REG_CX] = 0; |
450 | |
451 | #ifdef __i386__ |
452 | /* |
453 | * Make sure the kernel doesn't inadvertently use DS or ES-relative |
454 | * accesses in a region where user DS or ES is loaded. |
455 | * |
456 | * Skip this for 64-bit builds because long mode doesn't care about |
457 | * DS and ES and skipping it increases test coverage a little bit, |
458 | * since 64-bit kernels can still run the 32-bit build. |
459 | */ |
460 | ctx->uc_mcontext.gregs[REG_DS] = 0; |
461 | ctx->uc_mcontext.gregs[REG_ES] = 0; |
462 | #endif |
463 | |
464 | memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); |
465 | requested_regs[REG_CX] = *ssptr(ctx); /* The asm code does this. */ |
466 | |
467 | return; |
468 | } |
469 | |
470 | /* |
471 | * Called after a successful sigreturn (via int3) or from a failed |
472 | * sigreturn (directly by kernel). Restores our state so that the |
473 | * original raise(SIGUSR1) returns. |
474 | */ |
475 | static void sigtrap(int sig, siginfo_t *info, void *ctx_void) |
476 | { |
477 | ucontext_t *ctx = (ucontext_t*)ctx_void; |
478 | |
479 | validate_signal_ss(sig, ctx); |
480 | |
481 | sig_err = ctx->uc_mcontext.gregs[REG_ERR]; |
482 | sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO]; |
483 | |
484 | unsigned short ss; |
485 | asm ("mov %%ss,%0" : "=r" (ss)); |
486 | |
487 | greg_t asm_ss = ctx->uc_mcontext.gregs[REG_CX]; |
488 | if (asm_ss != sig_ss && sig == SIGTRAP) { |
489 | /* Sanity check failure. */ |
490 | printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n" , |
491 | ss, *ssptr(ctx), (unsigned long long)asm_ss); |
492 | nerrs++; |
493 | } |
494 | |
495 | memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); |
496 | memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t)); |
497 | |
498 | #ifdef __x86_64__ |
499 | if (sig_corrupt_final_ss) { |
500 | if (ctx->uc_flags & UC_STRICT_RESTORE_SS) { |
501 | printf("[FAIL]\tUC_STRICT_RESTORE_SS was set inappropriately\n" ); |
502 | nerrs++; |
503 | } else { |
504 | /* |
505 | * DOSEMU transitions from 32-bit to 64-bit mode by |
506 | * adjusting sigcontext, and it requires that this work |
507 | * even if the saved SS is bogus. |
508 | */ |
509 | printf("\tCorrupting SS on return to 64-bit mode\n" ); |
510 | *ssptr(ctx) = 0; |
511 | } |
512 | } |
513 | #endif |
514 | |
515 | sig_trapped = sig; |
516 | } |
517 | |
518 | #ifdef __x86_64__ |
519 | /* Tests recovery if !UC_STRICT_RESTORE_SS */ |
520 | static void sigusr2(int sig, siginfo_t *info, void *ctx_void) |
521 | { |
522 | ucontext_t *ctx = (ucontext_t*)ctx_void; |
523 | |
524 | if (!(ctx->uc_flags & UC_STRICT_RESTORE_SS)) { |
525 | printf("[FAIL]\traise(2) didn't set UC_STRICT_RESTORE_SS\n" ); |
526 | nerrs++; |
527 | return; /* We can't do the rest. */ |
528 | } |
529 | |
530 | ctx->uc_flags &= ~UC_STRICT_RESTORE_SS; |
531 | *ssptr(ctx) = 0; |
532 | |
533 | /* Return. The kernel should recover without sending another signal. */ |
534 | } |
535 | |
536 | static int test_nonstrict_ss(void) |
537 | { |
538 | clearhandler(SIGUSR1); |
539 | clearhandler(SIGTRAP); |
540 | clearhandler(SIGSEGV); |
541 | clearhandler(SIGILL); |
542 | sethandler(SIGUSR2, sigusr2, 0); |
543 | |
544 | nerrs = 0; |
545 | |
546 | printf("[RUN]\tClear UC_STRICT_RESTORE_SS and corrupt SS\n" ); |
547 | raise(SIGUSR2); |
548 | if (!nerrs) |
549 | printf("[OK]\tIt worked\n" ); |
550 | |
551 | return nerrs; |
552 | } |
553 | #endif |
554 | |
555 | /* Finds a usable code segment of the requested bitness. */ |
556 | int find_cs(int bitness) |
557 | { |
558 | unsigned short my_cs; |
559 | |
560 | asm ("mov %%cs,%0" : "=r" (my_cs)); |
561 | |
562 | if (cs_bitness(cs: my_cs) == bitness) |
563 | return my_cs; |
564 | if (cs_bitness(cs: my_cs + (2 << 3)) == bitness) |
565 | return my_cs + (2 << 3); |
566 | if (my_cs > (2<<3) && cs_bitness(cs: my_cs - (2 << 3)) == bitness) |
567 | return my_cs - (2 << 3); |
568 | if (cs_bitness(cs: code16_sel) == bitness) |
569 | return code16_sel; |
570 | |
571 | printf("[WARN]\tCould not find %d-bit CS\n" , bitness); |
572 | return -1; |
573 | } |
574 | |
575 | static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss) |
576 | { |
577 | int cs = find_cs(bitness: cs_bits); |
578 | if (cs == -1) { |
579 | printf("[SKIP]\tCode segment unavailable for %d-bit CS, %d-bit SS\n" , |
580 | cs_bits, use_16bit_ss ? 16 : 32); |
581 | return 0; |
582 | } |
583 | |
584 | if (force_ss != -1) { |
585 | sig_ss = force_ss; |
586 | } else { |
587 | if (use_16bit_ss) { |
588 | if (!data16_sel) { |
589 | printf("[SKIP]\tData segment unavailable for %d-bit CS, 16-bit SS\n" , |
590 | cs_bits); |
591 | return 0; |
592 | } |
593 | sig_ss = data16_sel; |
594 | } else { |
595 | asm volatile ("mov %%ss,%0" : "=r" (sig_ss)); |
596 | } |
597 | } |
598 | |
599 | sig_cs = cs; |
600 | |
601 | printf("[RUN]\tValid sigreturn: %d-bit CS (%hx), %d-bit SS (%hx%s)\n" , |
602 | cs_bits, sig_cs, use_16bit_ss ? 16 : 32, sig_ss, |
603 | (sig_ss & 4) ? "" : ", GDT" ); |
604 | |
605 | raise(SIGUSR1); |
606 | |
607 | nerrs = 0; |
608 | |
609 | /* |
610 | * Check that each register had an acceptable value when the |
611 | * int3 trampoline was invoked. |
612 | */ |
613 | for (int i = 0; i < NGREG; i++) { |
614 | greg_t req = requested_regs[i], res = resulting_regs[i]; |
615 | |
616 | if (i == REG_TRAPNO || i == REG_IP) |
617 | continue; /* don't care */ |
618 | |
619 | if (i == REG_SP) { |
620 | /* |
621 | * If we were using a 16-bit stack segment, then |
622 | * the kernel is a bit stuck: IRET only restores |
623 | * the low 16 bits of ESP/RSP if SS is 16-bit. |
624 | * The kernel uses a hack to restore bits 31:16, |
625 | * but that hack doesn't help with bits 63:32. |
626 | * On Intel CPUs, bits 63:32 end up zeroed, and, on |
627 | * AMD CPUs, they leak the high bits of the kernel |
628 | * espfix64 stack pointer. There's very little that |
629 | * the kernel can do about it. |
630 | * |
631 | * Similarly, if we are returning to a 32-bit context, |
632 | * the CPU will often lose the high 32 bits of RSP. |
633 | */ |
634 | |
635 | if (res == req) |
636 | continue; |
637 | |
638 | if (cs_bits != 64 && ((res ^ req) & 0xFFFFFFFF) == 0) { |
639 | printf("[NOTE]\tSP: %llx -> %llx\n" , |
640 | (unsigned long long)req, |
641 | (unsigned long long)res); |
642 | continue; |
643 | } |
644 | |
645 | printf("[FAIL]\tSP mismatch: requested 0x%llx; got 0x%llx\n" , |
646 | (unsigned long long)requested_regs[i], |
647 | (unsigned long long)resulting_regs[i]); |
648 | nerrs++; |
649 | continue; |
650 | } |
651 | |
652 | bool ignore_reg = false; |
653 | #if __i386__ |
654 | if (i == REG_UESP) |
655 | ignore_reg = true; |
656 | #else |
657 | if (i == REG_CSGSFS) { |
658 | struct selectors *req_sels = |
659 | (void *)&requested_regs[REG_CSGSFS]; |
660 | struct selectors *res_sels = |
661 | (void *)&resulting_regs[REG_CSGSFS]; |
662 | if (req_sels->cs != res_sels->cs) { |
663 | printf("[FAIL]\tCS mismatch: requested 0x%hx; got 0x%hx\n" , |
664 | req_sels->cs, res_sels->cs); |
665 | nerrs++; |
666 | } |
667 | |
668 | if (req_sels->ss != res_sels->ss) { |
669 | printf("[FAIL]\tSS mismatch: requested 0x%hx; got 0x%hx\n" , |
670 | req_sels->ss, res_sels->ss); |
671 | nerrs++; |
672 | } |
673 | |
674 | continue; |
675 | } |
676 | #endif |
677 | |
678 | /* Sanity check on the kernel */ |
679 | if (i == REG_CX && req != res) { |
680 | printf("[FAIL]\tCX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n" , |
681 | (unsigned long long)req, |
682 | (unsigned long long)res); |
683 | nerrs++; |
684 | continue; |
685 | } |
686 | |
687 | if (req != res && !ignore_reg) { |
688 | printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n" , |
689 | i, (unsigned long long)req, |
690 | (unsigned long long)res); |
691 | nerrs++; |
692 | } |
693 | } |
694 | |
695 | if (nerrs == 0) |
696 | printf("[OK]\tall registers okay\n" ); |
697 | |
698 | return nerrs; |
699 | } |
700 | |
701 | static int test_bad_iret(int cs_bits, unsigned short ss, int force_cs) |
702 | { |
703 | int cs = force_cs == -1 ? find_cs(bitness: cs_bits) : force_cs; |
704 | if (cs == -1) |
705 | return 0; |
706 | |
707 | sig_cs = cs; |
708 | sig_ss = ss; |
709 | |
710 | printf("[RUN]\t%d-bit CS (%hx), bogus SS (%hx)\n" , |
711 | cs_bits, sig_cs, sig_ss); |
712 | |
713 | sig_trapped = 0; |
714 | raise(SIGUSR1); |
715 | if (sig_trapped) { |
716 | char errdesc[32] = "" ; |
717 | if (sig_err) { |
718 | const char *src = (sig_err & 1) ? " EXT" : "" ; |
719 | const char *table; |
720 | if ((sig_err & 0x6) == 0x0) |
721 | table = "GDT" ; |
722 | else if ((sig_err & 0x6) == 0x4) |
723 | table = "LDT" ; |
724 | else if ((sig_err & 0x6) == 0x2) |
725 | table = "IDT" ; |
726 | else |
727 | table = "???" ; |
728 | |
729 | sprintf(errdesc, "%s%s index %d, " , |
730 | table, src, sig_err >> 3); |
731 | } |
732 | |
733 | char trapname[32]; |
734 | if (sig_trapno == 13) |
735 | strcpy(trapname, "GP" ); |
736 | else if (sig_trapno == 11) |
737 | strcpy(trapname, "NP" ); |
738 | else if (sig_trapno == 12) |
739 | strcpy(trapname, "SS" ); |
740 | else if (sig_trapno == 32) |
741 | strcpy(trapname, "IRET" ); /* X86_TRAP_IRET */ |
742 | else |
743 | sprintf(trapname, "%d" , sig_trapno); |
744 | |
745 | printf("[OK]\tGot #%s(0x%lx) (i.e. %s%s)\n" , |
746 | trapname, (unsigned long)sig_err, |
747 | errdesc, strsignal(sig_trapped)); |
748 | return 0; |
749 | } else { |
750 | /* |
751 | * This also implicitly tests UC_STRICT_RESTORE_SS: |
752 | * We check that these signals set UC_STRICT_RESTORE_SS and, |
753 | * if UC_STRICT_RESTORE_SS doesn't cause strict behavior, |
754 | * then we won't get SIGSEGV. |
755 | */ |
756 | printf("[FAIL]\tDid not get SIGSEGV\n" ); |
757 | return 1; |
758 | } |
759 | } |
760 | |
761 | int main() |
762 | { |
763 | int total_nerrs = 0; |
764 | unsigned short my_cs, my_ss; |
765 | |
766 | asm volatile ("mov %%cs,%0" : "=r" (my_cs)); |
767 | asm volatile ("mov %%ss,%0" : "=r" (my_ss)); |
768 | setup_ldt(); |
769 | |
770 | stack_t stack = { |
771 | /* Our sigaltstack scratch space. */ |
772 | .ss_sp = malloc(sizeof(char) * SIGSTKSZ), |
773 | .ss_size = SIGSTKSZ, |
774 | }; |
775 | if (sigaltstack(&stack, NULL) != 0) |
776 | err(1, "sigaltstack" ); |
777 | |
778 | sethandler(SIGUSR1, sigusr1, 0); |
779 | sethandler(SIGTRAP, sigtrap, SA_ONSTACK); |
780 | |
781 | /* Easy cases: return to a 32-bit SS in each possible CS bitness. */ |
782 | total_nerrs += test_valid_sigreturn(cs_bits: 64, use_16bit_ss: false, force_ss: -1); |
783 | total_nerrs += test_valid_sigreturn(cs_bits: 32, use_16bit_ss: false, force_ss: -1); |
784 | total_nerrs += test_valid_sigreturn(cs_bits: 16, use_16bit_ss: false, force_ss: -1); |
785 | |
786 | /* |
787 | * Test easy espfix cases: return to a 16-bit LDT SS in each possible |
788 | * CS bitness. NB: with a long mode CS, the SS bitness is irrelevant. |
789 | * |
790 | * This catches the original missing-espfix-on-64-bit-kernels issue |
791 | * as well as CVE-2014-8134. |
792 | */ |
793 | total_nerrs += test_valid_sigreturn(cs_bits: 64, use_16bit_ss: true, force_ss: -1); |
794 | total_nerrs += test_valid_sigreturn(cs_bits: 32, use_16bit_ss: true, force_ss: -1); |
795 | total_nerrs += test_valid_sigreturn(cs_bits: 16, use_16bit_ss: true, force_ss: -1); |
796 | |
797 | if (gdt_data16_idx) { |
798 | /* |
799 | * For performance reasons, Linux skips espfix if SS points |
800 | * to the GDT. If we were able to allocate a 16-bit SS in |
801 | * the GDT, see if it leaks parts of the kernel stack pointer. |
802 | * |
803 | * This tests for CVE-2014-8133. |
804 | */ |
805 | total_nerrs += test_valid_sigreturn(cs_bits: 64, use_16bit_ss: true, |
806 | force_ss: GDT3(idx: gdt_data16_idx)); |
807 | total_nerrs += test_valid_sigreturn(cs_bits: 32, use_16bit_ss: true, |
808 | force_ss: GDT3(idx: gdt_data16_idx)); |
809 | total_nerrs += test_valid_sigreturn(cs_bits: 16, use_16bit_ss: true, |
810 | force_ss: GDT3(idx: gdt_data16_idx)); |
811 | } |
812 | |
813 | #ifdef __x86_64__ |
814 | /* Nasty ABI case: check SS corruption handling. */ |
815 | sig_corrupt_final_ss = 1; |
816 | total_nerrs += test_valid_sigreturn(cs_bits: 32, use_16bit_ss: false, force_ss: -1); |
817 | total_nerrs += test_valid_sigreturn(cs_bits: 32, use_16bit_ss: true, force_ss: -1); |
818 | sig_corrupt_final_ss = 0; |
819 | #endif |
820 | |
821 | /* |
822 | * We're done testing valid sigreturn cases. Now we test states |
823 | * for which sigreturn itself will succeed but the subsequent |
824 | * entry to user mode will fail. |
825 | * |
826 | * Depending on the failure mode and the kernel bitness, these |
827 | * entry failures can generate SIGSEGV, SIGBUS, or SIGILL. |
828 | */ |
829 | clearhandler(SIGTRAP); |
830 | sethandler(SIGSEGV, sigtrap, SA_ONSTACK); |
831 | sethandler(SIGBUS, sigtrap, SA_ONSTACK); |
832 | sethandler(SIGILL, sigtrap, SA_ONSTACK); /* 32-bit kernels do this */ |
833 | |
834 | /* Easy failures: invalid SS, resulting in #GP(0) */ |
835 | test_bad_iret(cs_bits: 64, ss: ldt_nonexistent_sel, force_cs: -1); |
836 | test_bad_iret(cs_bits: 32, ss: ldt_nonexistent_sel, force_cs: -1); |
837 | test_bad_iret(cs_bits: 16, ss: ldt_nonexistent_sel, force_cs: -1); |
838 | |
839 | /* These fail because SS isn't a data segment, resulting in #GP(SS) */ |
840 | test_bad_iret(cs_bits: 64, ss: my_cs, force_cs: -1); |
841 | test_bad_iret(cs_bits: 32, ss: my_cs, force_cs: -1); |
842 | test_bad_iret(cs_bits: 16, ss: my_cs, force_cs: -1); |
843 | |
844 | /* Try to return to a not-present code segment, triggering #NP(SS). */ |
845 | test_bad_iret(cs_bits: 32, ss: my_ss, force_cs: npcode32_sel); |
846 | |
847 | /* |
848 | * Try to return to a not-present but otherwise valid data segment. |
849 | * This will cause IRET to fail with #SS on the espfix stack. This |
850 | * exercises CVE-2014-9322. |
851 | * |
852 | * Note that, if espfix is enabled, 64-bit Linux will lose track |
853 | * of the actual cause of failure and report #GP(0) instead. |
854 | * This would be very difficult for Linux to avoid, because |
855 | * espfix64 causes IRET failures to be promoted to #DF, so the |
856 | * original exception frame is never pushed onto the stack. |
857 | */ |
858 | test_bad_iret(cs_bits: 32, ss: npdata32_sel, force_cs: -1); |
859 | |
860 | /* |
861 | * Try to return to a not-present but otherwise valid data |
862 | * segment without invoking espfix. Newer kernels don't allow |
863 | * this to happen in the first place. On older kernels, though, |
864 | * this can trigger CVE-2014-9322. |
865 | */ |
866 | if (gdt_npdata32_idx) |
867 | test_bad_iret(cs_bits: 32, ss: GDT3(idx: gdt_npdata32_idx), force_cs: -1); |
868 | |
869 | #ifdef __x86_64__ |
870 | total_nerrs += test_nonstrict_ss(); |
871 | #endif |
872 | |
873 | free(stack.ss_sp); |
874 | return total_nerrs ? 1 : 0; |
875 | } |
876 | |