sigreturn.c source code [linux/tools/testing/selftests/x86/sigreturn.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/*
3	* sigreturn.c - tests for x86 sigreturn(2) and exit-to-userspace
4	* Copyright (c) 2014-2015 Andrew Lutomirski
5	*
6	* This is a series of tests that exercises the sigreturn(2) syscall and
7	* the IRET / SYSRET paths in the kernel.
8	*
9	* For now, this focuses on the effects of unusual CS and SS values,
10	* and it has a bunch of tests to make sure that ESP/RSP is restored
11	* properly.
12	*
13	* The basic idea behind these tests is to raise(SIGUSR1) to create a
14	* sigcontext frame, plug in the values to be tested, and then return,
15	* which implicitly invokes sigreturn(2) and programs the user context
16	* as desired.
17	*
18	* For tests for which we expect sigreturn and the subsequent return to
19	* user mode to succeed, we return to a short trampoline that generates
20	* SIGTRAP so that the meat of the tests can be ordinary C code in a
21	* SIGTRAP handler.
22	*
23	* The inner workings of each test is documented below.
24	*
25	* Do not run on outdated, unpatched kernels at risk of nasty crashes.
26	*/
27
28	#define _GNU_SOURCE
29
30	#include <sys/time.h>
31	#include <time.h>
32	#include <stdlib.h>
33	#include <sys/syscall.h>
34	#include <unistd.h>
35	#include <stdio.h>
36	#include <string.h>
37	#include <inttypes.h>
38	#include <sys/mman.h>
39	#include <sys/signal.h>
40	#include <sys/ucontext.h>
41	#include <asm/ldt.h>
42	#include <err.h>
43	#include <setjmp.h>
44	#include <stddef.h>
45	#include <stdbool.h>
46	#include <sys/ptrace.h>
47	#include <sys/user.h>
48
49	/ Pull in AR_xyz defines. /
50	typedef unsigned int u32;
51	typedef unsigned short u16;
52	#include "../../../../arch/x86/include/asm/desc_defs.h"
53
54	/*
55	* Copied from asm/ucontext.h, as asm/ucontext.h conflicts badly with the glibc
56	* headers.
57	*/
58	#ifdef __x86_64__
59	/*
60	* UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on
61	* kernels that save SS in the sigcontext. All kernels that set
62	* UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp
63	* regardless of SS (i.e. they implement espfix).
64	*
65	* Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS
66	* when delivering a signal that came from 64-bit code.
67	*
68	* Sigreturn restores SS as follows:
69	*
70	* if (saved SS is valid \|\| UC_STRICT_RESTORE_SS is set \|\|
71	* saved CS is not 64-bit)
72	* new SS = saved SS (will fail IRET and signal if invalid)
73	* else
74	* new SS = a flat 32-bit data segment
75	*/
76	#define UC_SIGCONTEXT_SS 0x2
77	#define UC_STRICT_RESTORE_SS 0x4
78	#endif
79
80	/*
81	* In principle, this test can run on Linux emulation layers (e.g.
82	* Illumos "LX branded zones"). Solaris-based kernels reserve LDT
83	* entries 0-5 for their own internal purposes, so start our LDT
84	* allocations above that reservation. (The tests don't pass on LX
85	* branded zones, but at least this lets them run.)
86	*/
87	#define LDT_OFFSET 6
88
89	/ An aligned stack accessible through some of our segments. /
90	static unsigned char stack16[`65536`] __attribute__((aligned(`4096`)));
91
92	/*
93	* An aligned int3 instruction used as a trampoline. Some of the tests
94	* want to fish out their ss values, so this trampoline copies ss to eax
95	* before the int3.
96	*/
97	asm (".pushsection .text\n\t"
98	".type int3, @function\n\t"
99	".align 4096\n\t"
100	"int3:\n\t"
101	"mov %ss,%ecx\n\t"
102	"int3\n\t"
103	".size int3, . - int3\n\t"
104	".align 4096, 0xcc\n\t"
105	".popsection");
106	extern char int3[`4096`];
107
108	/*
109	* At startup, we prepapre:
110	*
111	* - ldt_nonexistent_sel: An LDT entry that doesn't exist (all-zero
112	* descriptor or out of bounds).
113	* - code16_sel: A 16-bit LDT code segment pointing to int3.
114	* - data16_sel: A 16-bit LDT data segment pointing to stack16.
115	* - npcode32_sel: A 32-bit not-present LDT code segment pointing to int3.
116	* - npdata32_sel: A 32-bit not-present LDT data segment pointing to stack16.
117	* - gdt_data16_idx: A 16-bit GDT data segment pointing to stack16.
118	* - gdt_npdata32_idx: A 32-bit not-present GDT data segment pointing to
119	* stack16.
120	*
121	* For no particularly good reason, xyz_sel is a selector value with the
122	* RPL and LDT bits filled in, whereas xyz_idx is just an index into the
123	* descriptor table. These variables will be zero if their respective
124	* segments could not be allocated.
125	*/
126	static unsigned short ldt_nonexistent_sel;
127	static unsigned short code16_sel, data16_sel, npcode32_sel, npdata32_sel;
128
129	static unsigned short gdt_data16_idx, gdt_npdata32_idx;
130
131	static unsigned short GDT3(int idx)
132	{
133	return (idx << `3`) \| `3`;
134	}
135
136	static unsigned short LDT3(int idx)
137	{
138	return (idx << `3`) \| `7`;
139	}
140
141	static void sethandler(int sig, void (handler)(int, siginfo_t , void *),
142	int flags)
143	{
144	struct sigaction sa;
145	memset(&sa, `0`, sizeof(sa));
146	sa.sa_sigaction = handler;
147	sa.sa_flags = SA_SIGINFO \| flags;
148	sigemptyset(&sa.sa_mask);
149	if (sigaction(sig, &sa, `0`))
150	err(`1`, "sigaction");
151	}
152
153	static void clearhandler(int sig)
154	{
155	struct sigaction sa;
156	memset(&sa, `0`, sizeof(sa));
157	sa.sa_handler = SIG_DFL;
158	sigemptyset(&sa.sa_mask);
159	if (sigaction(sig, &sa, `0`))
160	err(`1`, "sigaction");
161	}
162
163	static void add_ldt(const struct user_desc desc, unsigned* short *var,
164	const char *name)
165	{
166	if (syscall(SYS_modify_ldt, `1`, desc, sizeof(*desc)) == `0`) {
167	*var = LDT3(idx: desc->entry_number);
168	} else {
169	printf("[NOTE]\tFailed to create %s segment\n", name);
170	*var = `0`;
171	}
172	}
173
174	static void setup_ldt(void)
175	{
176	if ((unsigned long)stack16 > (`1ULL` << `32`) - sizeof(stack16))
177	errx(`1`, "stack16 is too high\n");
178	if ((unsigned long)int3 > (`1ULL` << `32`) - sizeof(int3))
179	errx(`1`, "int3 is too high\n");
180
181	ldt_nonexistent_sel = LDT3(LDT_OFFSET + `2`);
182
183	const struct user_desc code16_desc = {
184	.entry_number = LDT_OFFSET + `0`,
185	.base_addr = (unsigned long)int3,
186	.limit = `4095`,
187	.seg_32bit = `0`,
188	.contents = `2`, / Code, not conforming /
189	.read_exec_only = `0`,
190	.limit_in_pages = `0`,
191	.seg_not_present = `0`,
192	.useable = `0`
193	};
194	add_ldt(desc: &code16_desc, var: &code16_sel, name: "code16");
195
196	const struct user_desc data16_desc = {
197	.entry_number = LDT_OFFSET + `1`,
198	.base_addr = (unsigned long)stack16,
199	.limit = `0xffff`,
200	.seg_32bit = `0`,
201	.contents = `0`, / Data, grow-up /
202	.read_exec_only = `0`,
203	.limit_in_pages = `0`,
204	.seg_not_present = `0`,
205	.useable = `0`
206	};
207	add_ldt(desc: &data16_desc, var: &data16_sel, name: "data16");
208
209	const struct user_desc npcode32_desc = {
210	.entry_number = LDT_OFFSET + `3`,
211	.base_addr = (unsigned long)int3,
212	.limit = `4095`,
213	.seg_32bit = `1`,
214	.contents = `2`, / Code, not conforming /
215	.read_exec_only = `0`,
216	.limit_in_pages = `0`,
217	.seg_not_present = `1`,
218	.useable = `0`
219	};
220	add_ldt(desc: &npcode32_desc, var: &npcode32_sel, name: "npcode32");
221
222	const struct user_desc npdata32_desc = {
223	.entry_number = LDT_OFFSET + `4`,
224	.base_addr = (unsigned long)stack16,
225	.limit = `0xffff`,
226	.seg_32bit = `1`,
227	.contents = `0`, / Data, grow-up /
228	.read_exec_only = `0`,
229	.limit_in_pages = `0`,
230	.seg_not_present = `1`,
231	.useable = `0`
232	};
233	add_ldt(desc: &npdata32_desc, var: &npdata32_sel, name: "npdata32");
234
235	struct user_desc gdt_data16_desc = {
236	.entry_number = -`1`,
237	.base_addr = (unsigned long)stack16,
238	.limit = `0xffff`,
239	.seg_32bit = `0`,
240	.contents = `0`, / Data, grow-up /
241	.read_exec_only = `0`,
242	.limit_in_pages = `0`,
243	.seg_not_present = `0`,
244	.useable = `0`
245	};
246
247	if (syscall(SYS_set_thread_area, &gdt_data16_desc) == `0`) {
248	/*
249	* This probably indicates vulnerability to CVE-2014-8133.
250	* Merely getting here isn't definitive, though, and we'll
251	* diagnose the problem for real later on.
252	*/
253	printf("[WARN]\tset_thread_area allocated data16 at index %d\n",
254	gdt_data16_desc.entry_number);
255	gdt_data16_idx = gdt_data16_desc.entry_number;
256	} else {
257	printf("[OK]\tset_thread_area refused 16-bit data\n");
258	}
259
260	struct user_desc gdt_npdata32_desc = {
261	.entry_number = -`1`,
262	.base_addr = (unsigned long)stack16,
263	.limit = `0xffff`,
264	.seg_32bit = `1`,
265	.contents = `0`, / Data, grow-up /
266	.read_exec_only = `0`,
267	.limit_in_pages = `0`,
268	.seg_not_present = `1`,
269	.useable = `0`
270	};
271
272	if (syscall(SYS_set_thread_area, &gdt_npdata32_desc) == `0`) {
273	/*
274	* As a hardening measure, newer kernels don't allow this.
275	*/
276	printf("[WARN]\tset_thread_area allocated npdata32 at index %d\n",
277	gdt_npdata32_desc.entry_number);
278	gdt_npdata32_idx = gdt_npdata32_desc.entry_number;
279	} else {
280	printf("[OK]\tset_thread_area refused 16-bit data\n");
281	}
282	}
283
284	/ State used by our signal handlers. /
285	static gregset_t initial_regs, requested_regs, resulting_regs;
286
287	/ Instructions for the SIGUSR1 handler. /
288	static volatile unsigned short sig_cs, sig_ss;
289	static volatile sig_atomic_t sig_trapped, sig_err, sig_trapno;
290	#ifdef __x86_64__
291	static volatile sig_atomic_t sig_corrupt_final_ss;
292	#endif
293
294	/ Abstractions for some 32-bit vs 64-bit differences. /
295	#ifdef __x86_64__
296	# define REG_IP REG_RIP
297	# define REG_SP REG_RSP
298	# define REG_CX REG_RCX
299
300	struct selectors {
301	unsigned short cs, gs, fs, ss;
302	};
303
304	static unsigned short ssptr(ucontext_t ctx)
305	{
306	struct selectors sels = (void* *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
307	return &sels->ss;
308	}
309
310	static unsigned short csptr(ucontext_t ctx)
311	{
312	struct selectors sels = (void* *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
313	return &sels->cs;
314	}
315	#else
316	# define REG_IP REG_EIP
317	# define REG_SP REG_ESP
318	# define REG_CX REG_ECX
319
320	static greg_t ssptr(ucontext_t ctx)
321	{
322	return &ctx->uc_mcontext.gregs[REG_SS];
323	}
324
325	static greg_t csptr(ucontext_t ctx)
326	{
327	return &ctx->uc_mcontext.gregs[REG_CS];
328	}
329	#endif
330
331	/*
332	* Checks a given selector for its code bitness or returns -1 if it's not
333	* a usable code segment selector.
334	*/
335	int cs_bitness(unsigned short cs)
336	{
337	uint32_t valid = `0`, ar;
338	asm ("lar %[cs], %[ar]\n\t"
339	"jnz 1f\n\t"
340	"mov $1, %[valid]\n\t"
341	"1:"
342	: [ar] "=r" (ar), [valid] "+rm" (valid)
343	: [cs] "r" (cs));
344
345	if (!valid)
346	return -`1`;
347
348	bool db = (ar & (`1` << `22`));
349	bool l = (ar & (`1` << `21`));
350
351	if (!(ar & (`1`<<`11`)))
352	return -`1`; / Not code. /
353
354	if (l && !db)
355	return `64`;
356	else if (!l && db)
357	return `32`;
358	else if (!l && !db)
359	return `16`;
360	else
361	return -`1`; / Unknown bitness. /
362	}
363
364	/*
365	* Checks a given selector for its code bitness or returns -1 if it's not
366	* a usable code segment selector.
367	*/
368	bool is_valid_ss(unsigned short cs)
369	{
370	uint32_t valid = `0`, ar;
371	asm ("lar %[cs], %[ar]\n\t"
372	"jnz 1f\n\t"
373	"mov $1, %[valid]\n\t"
374	"1:"
375	: [ar] "=r" (ar), [valid] "+rm" (valid)
376	: [cs] "r" (cs));
377
378	if (!valid)
379	return false;
380
381	if ((ar & AR_TYPE_MASK) != AR_TYPE_RWDATA &&
382	(ar & AR_TYPE_MASK) != AR_TYPE_RWDATA_EXPDOWN)
383	return false;
384
385	return (ar & AR_P);
386	}
387
388	/ Number of errors in the current test case. /
389	static volatile sig_atomic_t nerrs;
390
391	static void validate_signal_ss(int sig, ucontext_t *ctx)
392	{
393	#ifdef __x86_64__
394	bool was_64bit = (cs_bitness(cs: *csptr(ctx)) == `64`);
395
396	if (!(ctx->uc_flags & UC_SIGCONTEXT_SS)) {
397	printf("[FAIL]\tUC_SIGCONTEXT_SS was not set\n");
398	nerrs++;
399
400	/*
401	* This happens on Linux 4.1. The rest will fail, too, so
402	* return now to reduce the noise.
403	*/
404	return;
405	}
406
407	/ UC_STRICT_RESTORE_SS is set iff we came from 64-bit mode. /
408	if (!!(ctx->uc_flags & UC_STRICT_RESTORE_SS) != was_64bit) {
409	printf("[FAIL]\tUC_STRICT_RESTORE_SS was wrong in signal %d\n",
410	sig);
411	nerrs++;
412	}
413
414	if (is_valid_ss(cs: *ssptr(ctx))) {
415	/*
416	* DOSEMU was written before 64-bit sigcontext had SS, and
417	* it tries to figure out the signal source SS by looking at
418	* the physical register. Make sure that keeps working.
419	*/
420	unsigned short hw_ss;
421	asm ("mov %%ss, %0" : "=rm" (hw_ss));
422	if (hw_ss != *ssptr(ctx)) {
423	printf("[FAIL]\tHW SS didn't match saved SS\n");
424	nerrs++;
425	}
426	}
427	#endif
428	}
429
430	/*
431	* SIGUSR1 handler. Sets CS and SS as requested and points IP to the
432	* int3 trampoline. Sets SP to a large known value so that we can see
433	* whether the value round-trips back to user mode correctly.
434	*/
435	static void sigusr1(int sig, siginfo_t info, void* *ctx_void)
436	{
437	ucontext_t ctx = (ucontext_t)ctx_void;
438
439	validate_signal_ss(sig, ctx);
440
441	memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
442
443	*csptr(ctx) = sig_cs;
444	*ssptr(ctx) = sig_ss;
445
446	ctx->uc_mcontext.gregs[REG_IP] =
447	sig_cs == code16_sel ? `0` : (unsigned long)&int3;
448	ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)`0x8badf00d5aadc0deULL`;
449	ctx->uc_mcontext.gregs[REG_CX] = `0`;
450
451	#ifdef __i386__
452	/*
453	* Make sure the kernel doesn't inadvertently use DS or ES-relative
454	* accesses in a region where user DS or ES is loaded.
455	*
456	* Skip this for 64-bit builds because long mode doesn't care about
457	* DS and ES and skipping it increases test coverage a little bit,
458	* since 64-bit kernels can still run the 32-bit build.
459	*/
460	ctx->uc_mcontext.gregs[REG_DS] = `0`;
461	ctx->uc_mcontext.gregs[REG_ES] = `0`;
462	#endif
463
464	memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
465	requested_regs[REG_CX] = ssptr(ctx); /* The asm code does this. /
466
467	return;
468	}
469
470	/*
471	* Called after a successful sigreturn (via int3) or from a failed
472	* sigreturn (directly by kernel). Restores our state so that the
473	* original raise(SIGUSR1) returns.
474	*/
475	static void sigtrap(int sig, siginfo_t info, void* *ctx_void)
476	{
477	ucontext_t ctx = (ucontext_t)ctx_void;
478
479	validate_signal_ss(sig, ctx);
480
481	sig_err = ctx->uc_mcontext.gregs[REG_ERR];
482	sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO];
483
484	unsigned short ss;
485	asm ("mov %%ss,%0" : "=r" (ss));
486
487	greg_t asm_ss = ctx->uc_mcontext.gregs[REG_CX];
488	if (asm_ss != sig_ss && sig == SIGTRAP) {
489	/ Sanity check failure. /
490	printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n",
491	ss, ssptr(ctx), (unsigned* long long)asm_ss);
492	nerrs++;
493	}
494
495	memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
496	memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t));
497
498	#ifdef __x86_64__
499	if (sig_corrupt_final_ss) {
500	if (ctx->uc_flags & UC_STRICT_RESTORE_SS) {
501	printf("[FAIL]\tUC_STRICT_RESTORE_SS was set inappropriately\n");
502	nerrs++;
503	} else {
504	/*
505	* DOSEMU transitions from 32-bit to 64-bit mode by
506	* adjusting sigcontext, and it requires that this work
507	* even if the saved SS is bogus.
508	*/
509	printf("\tCorrupting SS on return to 64-bit mode\n");
510	*ssptr(ctx) = `0`;
511	}
512	}
513	#endif
514
515	sig_trapped = sig;
516	}
517
518	#ifdef __x86_64__
519	/ Tests recovery if !UC_STRICT_RESTORE_SS /
520	static void sigusr2(int sig, siginfo_t info, void* *ctx_void)
521	{
522	ucontext_t ctx = (ucontext_t)ctx_void;
523
524	if (!(ctx->uc_flags & UC_STRICT_RESTORE_SS)) {
525	printf("[FAIL]\traise(2) didn't set UC_STRICT_RESTORE_SS\n");
526	nerrs++;
527	return; / We can't do the rest. /
528	}
529
530	ctx->uc_flags &= ~UC_STRICT_RESTORE_SS;
531	*ssptr(ctx) = `0`;
532
533	/ Return. The kernel should recover without sending another signal. /
534	}
535
536	static int test_nonstrict_ss(void)
537	{
538	clearhandler(SIGUSR1);
539	clearhandler(SIGTRAP);
540	clearhandler(SIGSEGV);
541	clearhandler(SIGILL);
542	sethandler(SIGUSR2, sigusr2, `0`);
543
544	nerrs = `0`;
545
546	printf("[RUN]\tClear UC_STRICT_RESTORE_SS and corrupt SS\n");
547	raise(SIGUSR2);
548	if (!nerrs)
549	printf("[OK]\tIt worked\n");
550
551	return nerrs;
552	}
553	#endif
554
555	/ Finds a usable code segment of the requested bitness. /
556	int find_cs(int bitness)
557	{
558	unsigned short my_cs;
559
560	asm ("mov %%cs,%0" : "=r" (my_cs));
561
562	if (cs_bitness(cs: my_cs) == bitness)
563	return my_cs;
564	if (cs_bitness(cs: my_cs + (`2` << `3`)) == bitness)
565	return my_cs + (`2` << `3`);
566	if (my_cs > (`2`<<`3`) && cs_bitness(cs: my_cs - (`2` << `3`)) == bitness)
567	return my_cs - (`2` << `3`);
568	if (cs_bitness(cs: code16_sel) == bitness)
569	return code16_sel;
570
571	printf("[WARN]\tCould not find %d-bit CS\n", bitness);
572	return -`1`;
573	}
574
575	static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss)
576	{
577	int cs = find_cs(bitness: cs_bits);
578	if (cs == -`1`) {
579	printf("[SKIP]\tCode segment unavailable for %d-bit CS, %d-bit SS\n",
580	cs_bits, use_16bit_ss ? `16` : `32`);
581	return `0`;
582	}
583
584	if (force_ss != -`1`) {
585	sig_ss = force_ss;
586	} else {
587	if (use_16bit_ss) {
588	if (!data16_sel) {
589	printf("[SKIP]\tData segment unavailable for %d-bit CS, 16-bit SS\n",
590	cs_bits);
591	return `0`;
592	}
593	sig_ss = data16_sel;
594	} else {
595	asm volatile ("mov %%ss,%0" : "=r" (sig_ss));
596	}
597	}
598
599	sig_cs = cs;
600
601	printf("[RUN]\tValid sigreturn: %d-bit CS (%hx), %d-bit SS (%hx%s)\n",
602	cs_bits, sig_cs, use_16bit_ss ? `16` : `32`, sig_ss,
603	(sig_ss & `4`) ? "" : ", GDT");
604
605	raise(SIGUSR1);
606
607	nerrs = `0`;
608
609	/*
610	* Check that each register had an acceptable value when the
611	* int3 trampoline was invoked.
612	*/
613	for (int i = `0`; i < NGREG; i++) {
614	greg_t req = requested_regs[i], res = resulting_regs[i];
615
616	if (i == REG_TRAPNO \|\| i == REG_IP)
617	continue; / don't care /
618
619	if (i == REG_SP) {
620	/*
621	* If we were using a 16-bit stack segment, then
622	* the kernel is a bit stuck: IRET only restores
623	* the low 16 bits of ESP/RSP if SS is 16-bit.
624	* The kernel uses a hack to restore bits 31:16,
625	* but that hack doesn't help with bits 63:32.
626	* On Intel CPUs, bits 63:32 end up zeroed, and, on
627	* AMD CPUs, they leak the high bits of the kernel
628	* espfix64 stack pointer. There's very little that
629	* the kernel can do about it.
630	*
631	* Similarly, if we are returning to a 32-bit context,
632	* the CPU will often lose the high 32 bits of RSP.
633	*/
634
635	if (res == req)
636	continue;
637
638	if (cs_bits != `64` && ((res ^ req) & `0xFFFFFFFF`) == `0`) {
639	printf("[NOTE]\tSP: %llx -> %llx\n",
640	(unsigned long long)req,
641	(unsigned long long)res);
642	continue;
643	}
644
645	printf("[FAIL]\tSP mismatch: requested 0x%llx; got 0x%llx\n",
646	(unsigned long long)requested_regs[i],
647	(unsigned long long)resulting_regs[i]);
648	nerrs++;
649	continue;
650	}
651
652	bool ignore_reg = false;
653	#if __i386__
654	if (i == REG_UESP)
655	ignore_reg = true;
656	#else
657	if (i == REG_CSGSFS) {
658	struct selectors *req_sels =
659	(void *)&requested_regs[REG_CSGSFS];
660	struct selectors *res_sels =
661	(void *)&resulting_regs[REG_CSGSFS];
662	if (req_sels->cs != res_sels->cs) {
663	printf("[FAIL]\tCS mismatch: requested 0x%hx; got 0x%hx\n",
664	req_sels->cs, res_sels->cs);
665	nerrs++;
666	}
667
668	if (req_sels->ss != res_sels->ss) {
669	printf("[FAIL]\tSS mismatch: requested 0x%hx; got 0x%hx\n",
670	req_sels->ss, res_sels->ss);
671	nerrs++;
672	}
673
674	continue;
675	}
676	#endif
677
678	/ Sanity check on the kernel /
679	if (i == REG_CX && req != res) {
680	printf("[FAIL]\tCX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n",
681	(unsigned long long)req,
682	(unsigned long long)res);
683	nerrs++;
684	continue;
685	}
686
687	if (req != res && !ignore_reg) {
688	printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n",
689	i, (unsigned long long)req,
690	(unsigned long long)res);
691	nerrs++;
692	}
693	}
694
695	if (nerrs == `0`)
696	printf("[OK]\tall registers okay\n");
697
698	return nerrs;
699	}
700
701	static int test_bad_iret(int cs_bits, unsigned short ss, int force_cs)
702	{
703	int cs = force_cs == -`1` ? find_cs(bitness: cs_bits) : force_cs;
704	if (cs == -`1`)
705	return `0`;
706
707	sig_cs = cs;
708	sig_ss = ss;
709
710	printf("[RUN]\t%d-bit CS (%hx), bogus SS (%hx)\n",
711	cs_bits, sig_cs, sig_ss);
712
713	sig_trapped = `0`;
714	raise(SIGUSR1);
715	if (sig_trapped) {
716	char errdesc[`32`] = "";
717	if (sig_err) {
718	const char *src = (sig_err & `1`) ? " EXT" : "";
719	const char *table;
720	if ((sig_err & `0x6`) == `0x0`)
721	table = "GDT";
722	else if ((sig_err & `0x6`) == `0x4`)
723	table = "LDT";
724	else if ((sig_err & `0x6`) == `0x2`)
725	table = "IDT";
726	else
727	table = "???";
728
729	sprintf(errdesc, "%s%s index %d, ",
730	table, src, sig_err >> `3`);
731	}
732
733	char trapname[`32`];
734	if (sig_trapno == `13`)
735	strcpy(trapname, "GP");
736	else if (sig_trapno == `11`)
737	strcpy(trapname, "NP");
738	else if (sig_trapno == `12`)
739	strcpy(trapname, "SS");
740	else if (sig_trapno == `32`)
741	strcpy(trapname, "IRET"); / X86_TRAP_IRET /
742	else
743	sprintf(trapname, "%d", sig_trapno);
744
745	printf("[OK]\tGot #%s(0x%lx) (i.e. %s%s)\n",
746	trapname, (unsigned long)sig_err,
747	errdesc, strsignal(sig_trapped));
748	return `0`;
749	} else {
750	/*
751	* This also implicitly tests UC_STRICT_RESTORE_SS:
752	* We check that these signals set UC_STRICT_RESTORE_SS and,
753	* if UC_STRICT_RESTORE_SS doesn't cause strict behavior,
754	* then we won't get SIGSEGV.
755	*/
756	printf("[FAIL]\tDid not get SIGSEGV\n");
757	return `1`;
758	}
759	}
760
761	int main()
762	{
763	int total_nerrs = `0`;
764	unsigned short my_cs, my_ss;
765
766	asm volatile ("mov %%cs,%0" : "=r" (my_cs));
767	asm volatile ("mov %%ss,%0" : "=r" (my_ss));
768	setup_ldt();
769
770	stack_t stack = {
771	/ Our sigaltstack scratch space. /
772	.ss_sp = malloc(sizeof(char) * SIGSTKSZ),
773	.ss_size = SIGSTKSZ,
774	};
775	if (sigaltstack(&stack, NULL) != `0`)
776	err(`1`, "sigaltstack");
777
778	sethandler(SIGUSR1, sigusr1, `0`);
779	sethandler(SIGTRAP, sigtrap, SA_ONSTACK);
780
781	/ Easy cases: return to a 32-bit SS in each possible CS bitness. /
782	total_nerrs += test_valid_sigreturn(cs_bits: `64`, use_16bit_ss: false, force_ss: -`1`);
783	total_nerrs += test_valid_sigreturn(cs_bits: `32`, use_16bit_ss: false, force_ss: -`1`);
784	total_nerrs += test_valid_sigreturn(cs_bits: `16`, use_16bit_ss: false, force_ss: -`1`);
785
786	/*
787	* Test easy espfix cases: return to a 16-bit LDT SS in each possible
788	* CS bitness. NB: with a long mode CS, the SS bitness is irrelevant.
789	*
790	* This catches the original missing-espfix-on-64-bit-kernels issue
791	* as well as CVE-2014-8134.
792	*/
793	total_nerrs += test_valid_sigreturn(cs_bits: `64`, use_16bit_ss: true, force_ss: -`1`);
794	total_nerrs += test_valid_sigreturn(cs_bits: `32`, use_16bit_ss: true, force_ss: -`1`);
795	total_nerrs += test_valid_sigreturn(cs_bits: `16`, use_16bit_ss: true, force_ss: -`1`);
796
797	if (gdt_data16_idx) {
798	/*
799	* For performance reasons, Linux skips espfix if SS points
800	* to the GDT. If we were able to allocate a 16-bit SS in
801	* the GDT, see if it leaks parts of the kernel stack pointer.
802	*
803	* This tests for CVE-2014-8133.
804	*/
805	total_nerrs += test_valid_sigreturn(cs_bits: `64`, use_16bit_ss: true,
806	force_ss: GDT3(idx: gdt_data16_idx));
807	total_nerrs += test_valid_sigreturn(cs_bits: `32`, use_16bit_ss: true,
808	force_ss: GDT3(idx: gdt_data16_idx));
809	total_nerrs += test_valid_sigreturn(cs_bits: `16`, use_16bit_ss: true,
810	force_ss: GDT3(idx: gdt_data16_idx));
811	}
812
813	#ifdef __x86_64__
814	/ Nasty ABI case: check SS corruption handling. /
815	sig_corrupt_final_ss = `1`;
816	total_nerrs += test_valid_sigreturn(cs_bits: `32`, use_16bit_ss: false, force_ss: -`1`);
817	total_nerrs += test_valid_sigreturn(cs_bits: `32`, use_16bit_ss: true, force_ss: -`1`);
818	sig_corrupt_final_ss = `0`;
819	#endif
820
821	/*
822	* We're done testing valid sigreturn cases. Now we test states
823	* for which sigreturn itself will succeed but the subsequent
824	* entry to user mode will fail.
825	*
826	* Depending on the failure mode and the kernel bitness, these
827	* entry failures can generate SIGSEGV, SIGBUS, or SIGILL.
828	*/
829	clearhandler(SIGTRAP);
830	sethandler(SIGSEGV, sigtrap, SA_ONSTACK);
831	sethandler(SIGBUS, sigtrap, SA_ONSTACK);
832	sethandler(SIGILL, sigtrap, SA_ONSTACK); / 32-bit kernels do this /
833
834	/ Easy failures: invalid SS, resulting in #GP(0) /
835	test_bad_iret(cs_bits: `64`, ss: ldt_nonexistent_sel, force_cs: -`1`);
836	test_bad_iret(cs_bits: `32`, ss: ldt_nonexistent_sel, force_cs: -`1`);
837	test_bad_iret(cs_bits: `16`, ss: ldt_nonexistent_sel, force_cs: -`1`);
838
839	/ These fail because SS isn't a data segment, resulting in #GP(SS) /
840	test_bad_iret(cs_bits: `64`, ss: my_cs, force_cs: -`1`);
841	test_bad_iret(cs_bits: `32`, ss: my_cs, force_cs: -`1`);
842	test_bad_iret(cs_bits: `16`, ss: my_cs, force_cs: -`1`);
843
844	/ Try to return to a not-present code segment, triggering #NP(SS). /
845	test_bad_iret(cs_bits: `32`, ss: my_ss, force_cs: npcode32_sel);
846
847	/*
848	* Try to return to a not-present but otherwise valid data segment.
849	* This will cause IRET to fail with #SS on the espfix stack. This
850	* exercises CVE-2014-9322.
851	*
852	* Note that, if espfix is enabled, 64-bit Linux will lose track
853	* of the actual cause of failure and report #GP(0) instead.
854	* This would be very difficult for Linux to avoid, because
855	* espfix64 causes IRET failures to be promoted to #DF, so the
856	* original exception frame is never pushed onto the stack.
857	*/
858	test_bad_iret(cs_bits: `32`, ss: npdata32_sel, force_cs: -`1`);
859
860	/*
861	* Try to return to a not-present but otherwise valid data
862	* segment without invoking espfix. Newer kernels don't allow
863	* this to happen in the first place. On older kernels, though,
864	* this can trigger CVE-2014-9322.
865	*/
866	if (gdt_npdata32_idx)
867	test_bad_iret(cs_bits: `32`, ss: GDT3(idx: gdt_npdata32_idx), force_cs: -`1`);
868
869	#ifdef __x86_64__
870	total_nerrs += test_nonstrict_ss();
871	#endif
872
873	free(stack.ss_sp);
874	return total_nerrs ? `1` : `0`;
875	}
876

source code of linux/tools/testing/selftests/x86/sigreturn.c