1 | /* |
2 | * umip.c Emulation for instruction protected by the User-Mode Instruction |
3 | * Prevention feature |
4 | * |
5 | * Copyright (c) 2017, Intel Corporation. |
6 | * Ricardo Neri <ricardo.neri-calderon@linux.intel.com> |
7 | */ |
8 | |
9 | #include <linux/uaccess.h> |
10 | #include <asm/umip.h> |
11 | #include <asm/traps.h> |
12 | #include <asm/insn.h> |
13 | #include <asm/insn-eval.h> |
14 | #include <linux/ratelimit.h> |
15 | |
16 | #undef pr_fmt |
17 | #define pr_fmt(fmt) "umip: " fmt |
18 | |
19 | /** DOC: Emulation for User-Mode Instruction Prevention (UMIP) |
20 | * |
21 | * User-Mode Instruction Prevention is a security feature present in recent |
22 | * x86 processors that, when enabled, prevents a group of instructions (SGDT, |
23 | * SIDT, SLDT, SMSW and STR) from being run in user mode by issuing a general |
24 | * protection fault if the instruction is executed with CPL > 0. |
25 | * |
26 | * Rather than relaying to the user space the general protection fault caused by |
27 | * the UMIP-protected instructions (in the form of a SIGSEGV signal), it can be |
28 | * trapped and emulate the result of such instructions to provide dummy values. |
29 | * This allows to both conserve the current kernel behavior and not reveal the |
30 | * system resources that UMIP intends to protect (i.e., the locations of the |
31 | * global descriptor and interrupt descriptor tables, the segment selectors of |
32 | * the local descriptor table, the value of the task state register and the |
33 | * contents of the CR0 register). |
34 | * |
35 | * This emulation is needed because certain applications (e.g., WineHQ and |
36 | * DOSEMU2) rely on this subset of instructions to function. |
37 | * |
38 | * The instructions protected by UMIP can be split in two groups. Those which |
39 | * return a kernel memory address (SGDT and SIDT) and those which return a |
40 | * value (SLDT, STR and SMSW). |
41 | * |
42 | * For the instructions that return a kernel memory address, applications |
43 | * such as WineHQ rely on the result being located in the kernel memory space, |
44 | * not the actual location of the table. The result is emulated as a hard-coded |
45 | * value that, lies close to the top of the kernel memory. The limit for the GDT |
46 | * and the IDT are set to zero. |
47 | * |
48 | * The instruction SMSW is emulated to return the value that the register CR0 |
49 | * has at boot time as set in the head_32. |
50 | * SLDT and STR are emulated to return the values that the kernel programmatically |
51 | * assigns: |
52 | * - SLDT returns (GDT_ENTRY_LDT * 8) if an LDT has been set, 0 if not. |
53 | * - STR returns (GDT_ENTRY_TSS * 8). |
54 | * |
55 | * Emulation is provided for both 32-bit and 64-bit processes. |
56 | * |
57 | * Care is taken to appropriately emulate the results when segmentation is |
58 | * used. That is, rather than relying on USER_DS and USER_CS, the function |
59 | * insn_get_addr_ref() inspects the segment descriptor pointed by the |
60 | * registers in pt_regs. This ensures that we correctly obtain the segment |
61 | * base address and the address and operand sizes even if the user space |
62 | * application uses a local descriptor table. |
63 | */ |
64 | |
65 | #define UMIP_DUMMY_GDT_BASE 0xfffffffffffe0000ULL |
66 | #define UMIP_DUMMY_IDT_BASE 0xffffffffffff0000ULL |
67 | |
68 | /* |
69 | * The SGDT and SIDT instructions store the contents of the global descriptor |
70 | * table and interrupt table registers, respectively. The destination is a |
71 | * memory operand of X+2 bytes. X bytes are used to store the base address of |
72 | * the table and 2 bytes are used to store the limit. In 32-bit processes X |
73 | * has a value of 4, in 64-bit processes X has a value of 8. |
74 | */ |
75 | #define UMIP_GDT_IDT_BASE_SIZE_64BIT 8 |
76 | #define UMIP_GDT_IDT_BASE_SIZE_32BIT 4 |
77 | #define UMIP_GDT_IDT_LIMIT_SIZE 2 |
78 | |
79 | #define UMIP_INST_SGDT 0 /* 0F 01 /0 */ |
80 | #define UMIP_INST_SIDT 1 /* 0F 01 /1 */ |
81 | #define UMIP_INST_SMSW 2 /* 0F 01 /4 */ |
82 | #define UMIP_INST_SLDT 3 /* 0F 00 /0 */ |
83 | #define UMIP_INST_STR 4 /* 0F 00 /1 */ |
84 | |
85 | static const char * const umip_insns[5] = { |
86 | [UMIP_INST_SGDT] = "SGDT" , |
87 | [UMIP_INST_SIDT] = "SIDT" , |
88 | [UMIP_INST_SMSW] = "SMSW" , |
89 | [UMIP_INST_SLDT] = "SLDT" , |
90 | [UMIP_INST_STR] = "STR" , |
91 | }; |
92 | |
93 | #define umip_pr_err(regs, fmt, ...) \ |
94 | umip_printk(regs, KERN_ERR, fmt, ##__VA_ARGS__) |
95 | #define umip_pr_debug(regs, fmt, ...) \ |
96 | umip_printk(regs, KERN_DEBUG, fmt, ##__VA_ARGS__) |
97 | |
98 | /** |
99 | * umip_printk() - Print a rate-limited message |
100 | * @regs: Register set with the context in which the warning is printed |
101 | * @log_level: Kernel log level to print the message |
102 | * @fmt: The text string to print |
103 | * |
104 | * Print the text contained in @fmt. The print rate is limited to bursts of 5 |
105 | * messages every two minutes. The purpose of this customized version of |
106 | * printk() is to print messages when user space processes use any of the |
107 | * UMIP-protected instructions. Thus, the printed text is prepended with the |
108 | * task name and process ID number of the current task as well as the |
109 | * instruction and stack pointers in @regs as seen when entering kernel mode. |
110 | * |
111 | * Returns: |
112 | * |
113 | * None. |
114 | */ |
115 | static __printf(3, 4) |
116 | void umip_printk(const struct pt_regs *regs, const char *log_level, |
117 | const char *fmt, ...) |
118 | { |
119 | /* Bursts of 5 messages every two minutes */ |
120 | static DEFINE_RATELIMIT_STATE(ratelimit, 2 * 60 * HZ, 5); |
121 | struct task_struct *tsk = current; |
122 | struct va_format vaf; |
123 | va_list args; |
124 | |
125 | if (!__ratelimit(&ratelimit)) |
126 | return; |
127 | |
128 | va_start(args, fmt); |
129 | vaf.fmt = fmt; |
130 | vaf.va = &args; |
131 | printk("%s" pr_fmt("%s[%d] ip:%lx sp:%lx: %pV" ), log_level, tsk->comm, |
132 | task_pid_nr(tsk), regs->ip, regs->sp, &vaf); |
133 | va_end(args); |
134 | } |
135 | |
136 | /** |
137 | * identify_insn() - Identify a UMIP-protected instruction |
138 | * @insn: Instruction structure with opcode and ModRM byte. |
139 | * |
140 | * From the opcode and ModRM.reg in @insn identify, if any, a UMIP-protected |
141 | * instruction that can be emulated. |
142 | * |
143 | * Returns: |
144 | * |
145 | * On success, a constant identifying a specific UMIP-protected instruction that |
146 | * can be emulated. |
147 | * |
148 | * -EINVAL on error or when not an UMIP-protected instruction that can be |
149 | * emulated. |
150 | */ |
151 | static int identify_insn(struct insn *insn) |
152 | { |
153 | /* By getting modrm we also get the opcode. */ |
154 | insn_get_modrm(insn); |
155 | |
156 | if (!insn->modrm.nbytes) |
157 | return -EINVAL; |
158 | |
159 | /* All the instructions of interest start with 0x0f. */ |
160 | if (insn->opcode.bytes[0] != 0xf) |
161 | return -EINVAL; |
162 | |
163 | if (insn->opcode.bytes[1] == 0x1) { |
164 | switch (X86_MODRM_REG(insn->modrm.value)) { |
165 | case 0: |
166 | return UMIP_INST_SGDT; |
167 | case 1: |
168 | return UMIP_INST_SIDT; |
169 | case 4: |
170 | return UMIP_INST_SMSW; |
171 | default: |
172 | return -EINVAL; |
173 | } |
174 | } else if (insn->opcode.bytes[1] == 0x0) { |
175 | if (X86_MODRM_REG(insn->modrm.value) == 0) |
176 | return UMIP_INST_SLDT; |
177 | else if (X86_MODRM_REG(insn->modrm.value) == 1) |
178 | return UMIP_INST_STR; |
179 | else |
180 | return -EINVAL; |
181 | } else { |
182 | return -EINVAL; |
183 | } |
184 | } |
185 | |
186 | /** |
187 | * emulate_umip_insn() - Emulate UMIP instructions and return dummy values |
188 | * @insn: Instruction structure with operands |
189 | * @umip_inst: A constant indicating the instruction to emulate |
190 | * @data: Buffer into which the dummy result is stored |
191 | * @data_size: Size of the emulated result |
192 | * @x86_64: true if process is 64-bit, false otherwise |
193 | * |
194 | * Emulate an instruction protected by UMIP and provide a dummy result. The |
195 | * result of the emulation is saved in @data. The size of the results depends |
196 | * on both the instruction and type of operand (register vs memory address). |
197 | * The size of the result is updated in @data_size. Caller is responsible |
198 | * of providing a @data buffer of at least UMIP_GDT_IDT_BASE_SIZE + |
199 | * UMIP_GDT_IDT_LIMIT_SIZE bytes. |
200 | * |
201 | * Returns: |
202 | * |
203 | * 0 on success, -EINVAL on error while emulating. |
204 | */ |
205 | static int emulate_umip_insn(struct insn *insn, int umip_inst, |
206 | unsigned char *data, int *data_size, bool x86_64) |
207 | { |
208 | if (!data || !data_size || !insn) |
209 | return -EINVAL; |
210 | /* |
211 | * These two instructions return the base address and limit of the |
212 | * global and interrupt descriptor table, respectively. According to the |
213 | * Intel Software Development manual, the base address can be 24-bit, |
214 | * 32-bit or 64-bit. Limit is always 16-bit. If the operand size is |
215 | * 16-bit, the returned value of the base address is supposed to be a |
216 | * zero-extended 24-byte number. However, it seems that a 32-byte number |
217 | * is always returned irrespective of the operand size. |
218 | */ |
219 | if (umip_inst == UMIP_INST_SGDT || umip_inst == UMIP_INST_SIDT) { |
220 | u64 dummy_base_addr; |
221 | u16 dummy_limit = 0; |
222 | |
223 | /* SGDT and SIDT do not use registers operands. */ |
224 | if (X86_MODRM_MOD(insn->modrm.value) == 3) |
225 | return -EINVAL; |
226 | |
227 | if (umip_inst == UMIP_INST_SGDT) |
228 | dummy_base_addr = UMIP_DUMMY_GDT_BASE; |
229 | else |
230 | dummy_base_addr = UMIP_DUMMY_IDT_BASE; |
231 | |
232 | /* |
233 | * 64-bit processes use the entire dummy base address. |
234 | * 32-bit processes use the lower 32 bits of the base address. |
235 | * dummy_base_addr is always 64 bits, but we memcpy the correct |
236 | * number of bytes from it to the destination. |
237 | */ |
238 | if (x86_64) |
239 | *data_size = UMIP_GDT_IDT_BASE_SIZE_64BIT; |
240 | else |
241 | *data_size = UMIP_GDT_IDT_BASE_SIZE_32BIT; |
242 | |
243 | memcpy(data + 2, &dummy_base_addr, *data_size); |
244 | |
245 | *data_size += UMIP_GDT_IDT_LIMIT_SIZE; |
246 | memcpy(data, &dummy_limit, UMIP_GDT_IDT_LIMIT_SIZE); |
247 | |
248 | } else if (umip_inst == UMIP_INST_SMSW || umip_inst == UMIP_INST_SLDT || |
249 | umip_inst == UMIP_INST_STR) { |
250 | unsigned long dummy_value; |
251 | |
252 | if (umip_inst == UMIP_INST_SMSW) { |
253 | dummy_value = CR0_STATE; |
254 | } else if (umip_inst == UMIP_INST_STR) { |
255 | dummy_value = GDT_ENTRY_TSS * 8; |
256 | } else if (umip_inst == UMIP_INST_SLDT) { |
257 | #ifdef CONFIG_MODIFY_LDT_SYSCALL |
258 | down_read(sem: ¤t->mm->context.ldt_usr_sem); |
259 | if (current->mm->context.ldt) |
260 | dummy_value = GDT_ENTRY_LDT * 8; |
261 | else |
262 | dummy_value = 0; |
263 | up_read(sem: ¤t->mm->context.ldt_usr_sem); |
264 | #else |
265 | dummy_value = 0; |
266 | #endif |
267 | } |
268 | |
269 | /* |
270 | * For these 3 instructions, the number |
271 | * of bytes to be copied in the result buffer is determined |
272 | * by whether the operand is a register or a memory location. |
273 | * If operand is a register, return as many bytes as the operand |
274 | * size. If operand is memory, return only the two least |
275 | * significant bytes. |
276 | */ |
277 | if (X86_MODRM_MOD(insn->modrm.value) == 3) |
278 | *data_size = insn->opnd_bytes; |
279 | else |
280 | *data_size = 2; |
281 | |
282 | memcpy(data, &dummy_value, *data_size); |
283 | } else { |
284 | return -EINVAL; |
285 | } |
286 | |
287 | return 0; |
288 | } |
289 | |
290 | /** |
291 | * force_sig_info_umip_fault() - Force a SIGSEGV with SEGV_MAPERR |
292 | * @addr: Address that caused the signal |
293 | * @regs: Register set containing the instruction pointer |
294 | * |
295 | * Force a SIGSEGV signal with SEGV_MAPERR as the error code. This function is |
296 | * intended to be used to provide a segmentation fault when the result of the |
297 | * UMIP emulation could not be copied to the user space memory. |
298 | * |
299 | * Returns: none |
300 | */ |
301 | static void force_sig_info_umip_fault(void __user *addr, struct pt_regs *regs) |
302 | { |
303 | struct task_struct *tsk = current; |
304 | |
305 | tsk->thread.cr2 = (unsigned long)addr; |
306 | tsk->thread.error_code = X86_PF_USER | X86_PF_WRITE; |
307 | tsk->thread.trap_nr = X86_TRAP_PF; |
308 | |
309 | force_sig_fault(SIGSEGV, SEGV_MAPERR, addr); |
310 | |
311 | if (!(show_unhandled_signals && unhandled_signal(tsk, SIGSEGV))) |
312 | return; |
313 | |
314 | umip_pr_err(regs, "segfault in emulation. error%x\n" , |
315 | X86_PF_USER | X86_PF_WRITE); |
316 | } |
317 | |
318 | /** |
319 | * fixup_umip_exception() - Fixup a general protection fault caused by UMIP |
320 | * @regs: Registers as saved when entering the #GP handler |
321 | * |
322 | * The instructions SGDT, SIDT, STR, SMSW and SLDT cause a general protection |
323 | * fault if executed with CPL > 0 (i.e., from user space). This function fixes |
324 | * the exception up and provides dummy results for SGDT, SIDT and SMSW; STR |
325 | * and SLDT are not fixed up. |
326 | * |
327 | * If operands are memory addresses, results are copied to user-space memory as |
328 | * indicated by the instruction pointed by eIP using the registers indicated in |
329 | * the instruction operands. If operands are registers, results are copied into |
330 | * the context that was saved when entering kernel mode. |
331 | * |
332 | * Returns: |
333 | * |
334 | * True if emulation was successful; false if not. |
335 | */ |
336 | bool fixup_umip_exception(struct pt_regs *regs) |
337 | { |
338 | int nr_copied, reg_offset, dummy_data_size, umip_inst; |
339 | /* 10 bytes is the maximum size of the result of UMIP instructions */ |
340 | unsigned char dummy_data[10] = { 0 }; |
341 | unsigned char buf[MAX_INSN_SIZE]; |
342 | unsigned long *reg_addr; |
343 | void __user *uaddr; |
344 | struct insn insn; |
345 | |
346 | if (!regs) |
347 | return false; |
348 | |
349 | /* |
350 | * Give up on emulation if fetching the instruction failed. Should a |
351 | * page fault or a #GP be issued? |
352 | */ |
353 | nr_copied = insn_fetch_from_user(regs, buf); |
354 | if (nr_copied <= 0) |
355 | return false; |
356 | |
357 | if (!insn_decode_from_regs(insn: &insn, regs, buf, buf_size: nr_copied)) |
358 | return false; |
359 | |
360 | umip_inst = identify_insn(insn: &insn); |
361 | if (umip_inst < 0) |
362 | return false; |
363 | |
364 | umip_pr_debug(regs, "%s instruction cannot be used by applications.\n" , |
365 | umip_insns[umip_inst]); |
366 | |
367 | umip_pr_debug(regs, "For now, expensive software emulation returns the result.\n" ); |
368 | |
369 | if (emulate_umip_insn(insn: &insn, umip_inst, data: dummy_data, data_size: &dummy_data_size, |
370 | x86_64: user_64bit_mode(regs))) |
371 | return false; |
372 | |
373 | /* |
374 | * If operand is a register, write result to the copy of the register |
375 | * value that was pushed to the stack when entering into kernel mode. |
376 | * Upon exit, the value we write will be restored to the actual hardware |
377 | * register. |
378 | */ |
379 | if (X86_MODRM_MOD(insn.modrm.value) == 3) { |
380 | reg_offset = insn_get_modrm_rm_off(insn: &insn, regs); |
381 | |
382 | /* |
383 | * Negative values are usually errors. In memory addressing, |
384 | * the exception is -EDOM. Since we expect a register operand, |
385 | * all negative values are errors. |
386 | */ |
387 | if (reg_offset < 0) |
388 | return false; |
389 | |
390 | reg_addr = (unsigned long *)((unsigned long)regs + reg_offset); |
391 | memcpy(reg_addr, dummy_data, dummy_data_size); |
392 | } else { |
393 | uaddr = insn_get_addr_ref(insn: &insn, regs); |
394 | if ((unsigned long)uaddr == -1L) |
395 | return false; |
396 | |
397 | nr_copied = copy_to_user(to: uaddr, from: dummy_data, n: dummy_data_size); |
398 | if (nr_copied > 0) { |
399 | /* |
400 | * If copy fails, send a signal and tell caller that |
401 | * fault was fixed up. |
402 | */ |
403 | force_sig_info_umip_fault(addr: uaddr, regs); |
404 | return true; |
405 | } |
406 | } |
407 | |
408 | /* increase IP to let the program keep going */ |
409 | regs->ip += insn.length; |
410 | return true; |
411 | } |
412 | |