1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | |
3 | #ifndef _ASM_X86_NOSPEC_BRANCH_H_ |
4 | #define _ASM_X86_NOSPEC_BRANCH_H_ |
5 | |
6 | #include <linux/static_key.h> |
7 | #include <linux/objtool.h> |
8 | #include <linux/linkage.h> |
9 | |
10 | #include <asm/alternative.h> |
11 | #include <asm/cpufeatures.h> |
12 | #include <asm/msr-index.h> |
13 | #include <asm/unwind_hints.h> |
14 | #include <asm/percpu.h> |
15 | #include <asm/current.h> |
16 | |
17 | /* |
18 | * Call depth tracking for Intel SKL CPUs to address the RSB underflow |
19 | * issue in software. |
20 | * |
21 | * The tracking does not use a counter. It uses uses arithmetic shift |
22 | * right on call entry and logical shift left on return. |
23 | * |
24 | * The depth tracking variable is initialized to 0x8000.... when the call |
25 | * depth is zero. The arithmetic shift right sign extends the MSB and |
26 | * saturates after the 12th call. The shift count is 5 for both directions |
27 | * so the tracking covers 12 nested calls. |
28 | * |
29 | * Call |
30 | * 0: 0x8000000000000000 0x0000000000000000 |
31 | * 1: 0xfc00000000000000 0xf000000000000000 |
32 | * ... |
33 | * 11: 0xfffffffffffffff8 0xfffffffffffffc00 |
34 | * 12: 0xffffffffffffffff 0xffffffffffffffe0 |
35 | * |
36 | * After a return buffer fill the depth is credited 12 calls before the |
37 | * next stuffing has to take place. |
38 | * |
39 | * There is a inaccuracy for situations like this: |
40 | * |
41 | * 10 calls |
42 | * 5 returns |
43 | * 3 calls |
44 | * 4 returns |
45 | * 3 calls |
46 | * .... |
47 | * |
48 | * The shift count might cause this to be off by one in either direction, |
49 | * but there is still a cushion vs. the RSB depth. The algorithm does not |
50 | * claim to be perfect and it can be speculated around by the CPU, but it |
51 | * is considered that it obfuscates the problem enough to make exploitation |
52 | * extremely difficult. |
53 | */ |
54 | #define RET_DEPTH_SHIFT 5 |
55 | #define RSB_RET_STUFF_LOOPS 16 |
56 | #define RET_DEPTH_INIT 0x8000000000000000ULL |
57 | #define RET_DEPTH_INIT_FROM_CALL 0xfc00000000000000ULL |
58 | #define RET_DEPTH_CREDIT 0xffffffffffffffffULL |
59 | |
60 | #ifdef CONFIG_CALL_THUNKS_DEBUG |
61 | # define CALL_THUNKS_DEBUG_INC_CALLS \ |
62 | incq PER_CPU_VAR(__x86_call_count); |
63 | # define CALL_THUNKS_DEBUG_INC_RETS \ |
64 | incq PER_CPU_VAR(__x86_ret_count); |
65 | # define CALL_THUNKS_DEBUG_INC_STUFFS \ |
66 | incq PER_CPU_VAR(__x86_stuffs_count); |
67 | # define CALL_THUNKS_DEBUG_INC_CTXSW \ |
68 | incq PER_CPU_VAR(__x86_ctxsw_count); |
69 | #else |
70 | # define CALL_THUNKS_DEBUG_INC_CALLS |
71 | # define CALL_THUNKS_DEBUG_INC_RETS |
72 | # define CALL_THUNKS_DEBUG_INC_STUFFS |
73 | # define CALL_THUNKS_DEBUG_INC_CTXSW |
74 | #endif |
75 | |
76 | #if defined(CONFIG_MITIGATION_CALL_DEPTH_TRACKING) && !defined(COMPILE_OFFSETS) |
77 | |
78 | #include <asm/asm-offsets.h> |
79 | |
80 | #define CREDIT_CALL_DEPTH \ |
81 | movq $-1, PER_CPU_VAR(pcpu_hot + X86_call_depth); |
82 | |
83 | #define RESET_CALL_DEPTH \ |
84 | xor %eax, %eax; \ |
85 | bts $63, %rax; \ |
86 | movq %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth); |
87 | |
88 | #define RESET_CALL_DEPTH_FROM_CALL \ |
89 | movb $0xfc, %al; \ |
90 | shl $56, %rax; \ |
91 | movq %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth); \ |
92 | CALL_THUNKS_DEBUG_INC_CALLS |
93 | |
94 | #define INCREMENT_CALL_DEPTH \ |
95 | sarq $5, PER_CPU_VAR(pcpu_hot + X86_call_depth); \ |
96 | CALL_THUNKS_DEBUG_INC_CALLS |
97 | |
98 | #else |
99 | #define CREDIT_CALL_DEPTH |
100 | #define RESET_CALL_DEPTH |
101 | #define RESET_CALL_DEPTH_FROM_CALL |
102 | #define INCREMENT_CALL_DEPTH |
103 | #endif |
104 | |
105 | /* |
106 | * Fill the CPU return stack buffer. |
107 | * |
108 | * Each entry in the RSB, if used for a speculative 'ret', contains an |
109 | * infinite 'pause; lfence; jmp' loop to capture speculative execution. |
110 | * |
111 | * This is required in various cases for retpoline and IBRS-based |
112 | * mitigations for the Spectre variant 2 vulnerability. Sometimes to |
113 | * eliminate potentially bogus entries from the RSB, and sometimes |
114 | * purely to ensure that it doesn't get empty, which on some CPUs would |
115 | * allow predictions from other (unwanted!) sources to be used. |
116 | * |
117 | * We define a CPP macro such that it can be used from both .S files and |
118 | * inline assembly. It's possible to do a .macro and then include that |
119 | * from C via asm(".include <asm/nospec-branch.h>") but let's not go there. |
120 | */ |
121 | |
122 | #define RETPOLINE_THUNK_SIZE 32 |
123 | #define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ |
124 | |
125 | /* |
126 | * Common helper for __FILL_RETURN_BUFFER and __FILL_ONE_RETURN. |
127 | */ |
128 | #define __FILL_RETURN_SLOT \ |
129 | ANNOTATE_INTRA_FUNCTION_CALL; \ |
130 | call 772f; \ |
131 | int3; \ |
132 | 772: |
133 | |
134 | /* |
135 | * Stuff the entire RSB. |
136 | * |
137 | * Google experimented with loop-unrolling and this turned out to be |
138 | * the optimal version - two calls, each with their own speculation |
139 | * trap should their return address end up getting used, in a loop. |
140 | */ |
141 | #ifdef CONFIG_X86_64 |
142 | #define __FILL_RETURN_BUFFER(reg, nr) \ |
143 | mov $(nr/2), reg; \ |
144 | 771: \ |
145 | __FILL_RETURN_SLOT \ |
146 | __FILL_RETURN_SLOT \ |
147 | add $(BITS_PER_LONG/8) * 2, %_ASM_SP; \ |
148 | dec reg; \ |
149 | jnz 771b; \ |
150 | /* barrier for jnz misprediction */ \ |
151 | lfence; \ |
152 | CREDIT_CALL_DEPTH \ |
153 | CALL_THUNKS_DEBUG_INC_CTXSW |
154 | #else |
155 | /* |
156 | * i386 doesn't unconditionally have LFENCE, as such it can't |
157 | * do a loop. |
158 | */ |
159 | #define __FILL_RETURN_BUFFER(reg, nr) \ |
160 | .rept nr; \ |
161 | __FILL_RETURN_SLOT; \ |
162 | .endr; \ |
163 | add $(BITS_PER_LONG/8) * nr, %_ASM_SP; |
164 | #endif |
165 | |
166 | /* |
167 | * Stuff a single RSB slot. |
168 | * |
169 | * To mitigate Post-Barrier RSB speculation, one CALL instruction must be |
170 | * forced to retire before letting a RET instruction execute. |
171 | * |
172 | * On PBRSB-vulnerable CPUs, it is not safe for a RET to be executed |
173 | * before this point. |
174 | */ |
175 | #define __FILL_ONE_RETURN \ |
176 | __FILL_RETURN_SLOT \ |
177 | add $(BITS_PER_LONG/8), %_ASM_SP; \ |
178 | lfence; |
179 | |
180 | #ifdef __ASSEMBLY__ |
181 | |
182 | /* |
183 | * This should be used immediately before an indirect jump/call. It tells |
184 | * objtool the subsequent indirect jump/call is vouched safe for retpoline |
185 | * builds. |
186 | */ |
187 | .macro ANNOTATE_RETPOLINE_SAFE |
188 | .Lhere_\@: |
189 | .pushsection .discard.retpoline_safe |
190 | .long .Lhere_\@ |
191 | .popsection |
192 | .endm |
193 | |
194 | /* |
195 | * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions |
196 | * vs RETBleed validation. |
197 | */ |
198 | #define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE |
199 | |
200 | /* |
201 | * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should |
202 | * eventually turn into its own annotation. |
203 | */ |
204 | .macro VALIDATE_UNRET_END |
205 | #if defined(CONFIG_NOINSTR_VALIDATION) && \ |
206 | (defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)) |
207 | ANNOTATE_RETPOLINE_SAFE |
208 | nop |
209 | #endif |
210 | .endm |
211 | |
212 | /* |
213 | * Equivalent to -mindirect-branch-cs-prefix; emit the 5 byte jmp/call |
214 | * to the retpoline thunk with a CS prefix when the register requires |
215 | * a RAX prefix byte to encode. Also see apply_retpolines(). |
216 | */ |
217 | .macro __CS_PREFIX reg:req |
218 | .irp rs,r8,r9,r10,r11,r12,r13,r14,r15 |
219 | .ifc \reg,\rs |
220 | .byte 0x2e |
221 | .endif |
222 | .endr |
223 | .endm |
224 | |
225 | /* |
226 | * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple |
227 | * indirect jmp/call which may be susceptible to the Spectre variant 2 |
228 | * attack. |
229 | * |
230 | * NOTE: these do not take kCFI into account and are thus not comparable to C |
231 | * indirect calls, take care when using. The target of these should be an ENDBR |
232 | * instruction irrespective of kCFI. |
233 | */ |
234 | .macro JMP_NOSPEC reg:req |
235 | #ifdef CONFIG_MITIGATION_RETPOLINE |
236 | __CS_PREFIX \reg |
237 | jmp __x86_indirect_thunk_\reg |
238 | #else |
239 | jmp *%\reg |
240 | int3 |
241 | #endif |
242 | .endm |
243 | |
244 | .macro CALL_NOSPEC reg:req |
245 | #ifdef CONFIG_MITIGATION_RETPOLINE |
246 | __CS_PREFIX \reg |
247 | call __x86_indirect_thunk_\reg |
248 | #else |
249 | call *%\reg |
250 | #endif |
251 | .endm |
252 | |
253 | /* |
254 | * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP |
255 | * monstrosity above, manually. |
256 | */ |
257 | .macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS) |
258 | ALTERNATIVE_2 "jmp .Lskip_rsb_\@" , \ |
259 | __stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \ |
260 | __stringify(nop;nop;__FILL_ONE_RETURN), \ftr2 |
261 | |
262 | .Lskip_rsb_\@: |
263 | .endm |
264 | |
265 | /* |
266 | * The CALL to srso_alias_untrain_ret() must be patched in directly at |
267 | * the spot where untraining must be done, ie., srso_alias_untrain_ret() |
268 | * must be the target of a CALL instruction instead of indirectly |
269 | * jumping to a wrapper which then calls it. Therefore, this macro is |
270 | * called outside of __UNTRAIN_RET below, for the time being, before the |
271 | * kernel can support nested alternatives with arbitrary nesting. |
272 | */ |
273 | .macro CALL_UNTRAIN_RET |
274 | #if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO) |
275 | ALTERNATIVE_2 "" , "call entry_untrain_ret" , X86_FEATURE_UNRET, \ |
276 | "call srso_alias_untrain_ret" , X86_FEATURE_SRSO_ALIAS |
277 | #endif |
278 | .endm |
279 | |
280 | /* |
281 | * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the |
282 | * return thunk isn't mapped into the userspace tables (then again, AMD |
283 | * typically has NO_MELTDOWN). |
284 | * |
285 | * While retbleed_untrain_ret() doesn't clobber anything but requires stack, |
286 | * entry_ibpb() will clobber AX, CX, DX. |
287 | * |
288 | * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point |
289 | * where we have a stack but before any RET instruction. |
290 | */ |
291 | .macro __UNTRAIN_RET ibpb_feature, call_depth_insns |
292 | #if defined(CONFIG_MITIGATION_RETHUNK) || defined(CONFIG_MITIGATION_IBPB_ENTRY) |
293 | VALIDATE_UNRET_END |
294 | CALL_UNTRAIN_RET |
295 | ALTERNATIVE_2 "" , \ |
296 | "call entry_ibpb" , \ibpb_feature, \ |
297 | __stringify(\call_depth_insns), X86_FEATURE_CALL_DEPTH |
298 | #endif |
299 | .endm |
300 | |
301 | #define UNTRAIN_RET \ |
302 | __UNTRAIN_RET X86_FEATURE_ENTRY_IBPB, __stringify(RESET_CALL_DEPTH) |
303 | |
304 | #define UNTRAIN_RET_VM \ |
305 | __UNTRAIN_RET X86_FEATURE_IBPB_ON_VMEXIT, __stringify(RESET_CALL_DEPTH) |
306 | |
307 | #define UNTRAIN_RET_FROM_CALL \ |
308 | __UNTRAIN_RET X86_FEATURE_ENTRY_IBPB, __stringify(RESET_CALL_DEPTH_FROM_CALL) |
309 | |
310 | |
311 | .macro CALL_DEPTH_ACCOUNT |
312 | #ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING |
313 | ALTERNATIVE "" , \ |
314 | __stringify(INCREMENT_CALL_DEPTH), X86_FEATURE_CALL_DEPTH |
315 | #endif |
316 | .endm |
317 | |
318 | /* |
319 | * Macro to execute VERW instruction that mitigate transient data sampling |
320 | * attacks such as MDS. On affected systems a microcode update overloaded VERW |
321 | * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF. |
322 | * |
323 | * Note: Only the memory operand variant of VERW clears the CPU buffers. |
324 | */ |
325 | .macro CLEAR_CPU_BUFFERS |
326 | ALTERNATIVE "" , __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF |
327 | .endm |
328 | |
329 | #ifdef CONFIG_X86_64 |
330 | .macro CLEAR_BRANCH_HISTORY |
331 | ALTERNATIVE "" , "call clear_bhb_loop" , X86_FEATURE_CLEAR_BHB_LOOP |
332 | .endm |
333 | |
334 | .macro CLEAR_BRANCH_HISTORY_VMEXIT |
335 | ALTERNATIVE "" , "call clear_bhb_loop" , X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT |
336 | .endm |
337 | #else |
338 | #define CLEAR_BRANCH_HISTORY |
339 | #define CLEAR_BRANCH_HISTORY_VMEXIT |
340 | #endif |
341 | |
342 | #else /* __ASSEMBLY__ */ |
343 | |
344 | #define ANNOTATE_RETPOLINE_SAFE \ |
345 | "999:\n\t" \ |
346 | ".pushsection .discard.retpoline_safe\n\t" \ |
347 | ".long 999b\n\t" \ |
348 | ".popsection\n\t" |
349 | |
350 | typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; |
351 | extern retpoline_thunk_t __x86_indirect_thunk_array[]; |
352 | extern retpoline_thunk_t __x86_indirect_call_thunk_array[]; |
353 | extern retpoline_thunk_t __x86_indirect_jump_thunk_array[]; |
354 | |
355 | #ifdef CONFIG_MITIGATION_RETHUNK |
356 | extern void __x86_return_thunk(void); |
357 | #else |
358 | static inline void __x86_return_thunk(void) {} |
359 | #endif |
360 | |
361 | #ifdef CONFIG_MITIGATION_UNRET_ENTRY |
362 | extern void retbleed_return_thunk(void); |
363 | #else |
364 | static inline void retbleed_return_thunk(void) {} |
365 | #endif |
366 | |
367 | extern void srso_alias_untrain_ret(void); |
368 | |
369 | #ifdef CONFIG_MITIGATION_SRSO |
370 | extern void srso_return_thunk(void); |
371 | extern void srso_alias_return_thunk(void); |
372 | #else |
373 | static inline void srso_return_thunk(void) {} |
374 | static inline void srso_alias_return_thunk(void) {} |
375 | #endif |
376 | |
377 | extern void retbleed_return_thunk(void); |
378 | extern void srso_return_thunk(void); |
379 | extern void srso_alias_return_thunk(void); |
380 | |
381 | extern void entry_untrain_ret(void); |
382 | extern void entry_ibpb(void); |
383 | |
384 | #ifdef CONFIG_X86_64 |
385 | extern void clear_bhb_loop(void); |
386 | #endif |
387 | |
388 | extern void (*x86_return_thunk)(void); |
389 | |
390 | extern void __warn_thunk(void); |
391 | |
392 | #ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING |
393 | extern void call_depth_return_thunk(void); |
394 | |
395 | #define CALL_DEPTH_ACCOUNT \ |
396 | ALTERNATIVE("", \ |
397 | __stringify(INCREMENT_CALL_DEPTH), \ |
398 | X86_FEATURE_CALL_DEPTH) |
399 | |
400 | #ifdef CONFIG_CALL_THUNKS_DEBUG |
401 | DECLARE_PER_CPU(u64, __x86_call_count); |
402 | DECLARE_PER_CPU(u64, __x86_ret_count); |
403 | DECLARE_PER_CPU(u64, __x86_stuffs_count); |
404 | DECLARE_PER_CPU(u64, __x86_ctxsw_count); |
405 | #endif |
406 | #else /* !CONFIG_MITIGATION_CALL_DEPTH_TRACKING */ |
407 | |
408 | static inline void call_depth_return_thunk(void) {} |
409 | #define CALL_DEPTH_ACCOUNT "" |
410 | |
411 | #endif /* CONFIG_MITIGATION_CALL_DEPTH_TRACKING */ |
412 | |
413 | #ifdef CONFIG_MITIGATION_RETPOLINE |
414 | |
415 | #define GEN(reg) \ |
416 | extern retpoline_thunk_t __x86_indirect_thunk_ ## reg; |
417 | #include <asm/GEN-for-each-reg.h> |
418 | #undef GEN |
419 | |
420 | #define GEN(reg) \ |
421 | extern retpoline_thunk_t __x86_indirect_call_thunk_ ## reg; |
422 | #include <asm/GEN-for-each-reg.h> |
423 | #undef GEN |
424 | |
425 | #define GEN(reg) \ |
426 | extern retpoline_thunk_t __x86_indirect_jump_thunk_ ## reg; |
427 | #include <asm/GEN-for-each-reg.h> |
428 | #undef GEN |
429 | |
430 | #ifdef CONFIG_X86_64 |
431 | |
432 | /* |
433 | * Inline asm uses the %V modifier which is only in newer GCC |
434 | * which is ensured when CONFIG_MITIGATION_RETPOLINE is defined. |
435 | */ |
436 | # define CALL_NOSPEC \ |
437 | ALTERNATIVE_2( \ |
438 | ANNOTATE_RETPOLINE_SAFE \ |
439 | "call *%[thunk_target]\n", \ |
440 | "call __x86_indirect_thunk_%V[thunk_target]\n", \ |
441 | X86_FEATURE_RETPOLINE, \ |
442 | "lfence;\n" \ |
443 | ANNOTATE_RETPOLINE_SAFE \ |
444 | "call *%[thunk_target]\n", \ |
445 | X86_FEATURE_RETPOLINE_LFENCE) |
446 | |
447 | # define THUNK_TARGET(addr) [thunk_target] "r" (addr) |
448 | |
449 | #else /* CONFIG_X86_32 */ |
450 | /* |
451 | * For i386 we use the original ret-equivalent retpoline, because |
452 | * otherwise we'll run out of registers. We don't care about CET |
453 | * here, anyway. |
454 | */ |
455 | # define CALL_NOSPEC \ |
456 | ALTERNATIVE_2( \ |
457 | ANNOTATE_RETPOLINE_SAFE \ |
458 | "call *%[thunk_target]\n", \ |
459 | " jmp 904f;\n" \ |
460 | " .align 16\n" \ |
461 | "901: call 903f;\n" \ |
462 | "902: pause;\n" \ |
463 | " lfence;\n" \ |
464 | " jmp 902b;\n" \ |
465 | " .align 16\n" \ |
466 | "903: lea 4(%%esp), %%esp;\n" \ |
467 | " pushl %[thunk_target];\n" \ |
468 | " ret;\n" \ |
469 | " .align 16\n" \ |
470 | "904: call 901b;\n", \ |
471 | X86_FEATURE_RETPOLINE, \ |
472 | "lfence;\n" \ |
473 | ANNOTATE_RETPOLINE_SAFE \ |
474 | "call *%[thunk_target]\n", \ |
475 | X86_FEATURE_RETPOLINE_LFENCE) |
476 | |
477 | # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) |
478 | #endif |
479 | #else /* No retpoline for C / inline asm */ |
480 | # define CALL_NOSPEC "call *%[thunk_target]\n" |
481 | # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) |
482 | #endif |
483 | |
484 | /* The Spectre V2 mitigation variants */ |
485 | enum spectre_v2_mitigation { |
486 | SPECTRE_V2_NONE, |
487 | SPECTRE_V2_RETPOLINE, |
488 | SPECTRE_V2_LFENCE, |
489 | SPECTRE_V2_EIBRS, |
490 | SPECTRE_V2_EIBRS_RETPOLINE, |
491 | SPECTRE_V2_EIBRS_LFENCE, |
492 | SPECTRE_V2_IBRS, |
493 | }; |
494 | |
495 | /* The indirect branch speculation control variants */ |
496 | enum spectre_v2_user_mitigation { |
497 | SPECTRE_V2_USER_NONE, |
498 | SPECTRE_V2_USER_STRICT, |
499 | SPECTRE_V2_USER_STRICT_PREFERRED, |
500 | SPECTRE_V2_USER_PRCTL, |
501 | SPECTRE_V2_USER_SECCOMP, |
502 | }; |
503 | |
504 | /* The Speculative Store Bypass disable variants */ |
505 | enum ssb_mitigation { |
506 | SPEC_STORE_BYPASS_NONE, |
507 | SPEC_STORE_BYPASS_DISABLE, |
508 | SPEC_STORE_BYPASS_PRCTL, |
509 | SPEC_STORE_BYPASS_SECCOMP, |
510 | }; |
511 | |
512 | static __always_inline |
513 | void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) |
514 | { |
515 | asm volatile(ALTERNATIVE("" , "wrmsr" , %c[feature]) |
516 | : : "c" (msr), |
517 | "a" ((u32)val), |
518 | "d" ((u32)(val >> 32)), |
519 | [feature] "i" (feature) |
520 | : "memory" ); |
521 | } |
522 | |
523 | extern u64 x86_pred_cmd; |
524 | |
525 | static inline void indirect_branch_prediction_barrier(void) |
526 | { |
527 | alternative_msr_write(MSR_IA32_PRED_CMD, val: x86_pred_cmd, X86_FEATURE_USE_IBPB); |
528 | } |
529 | |
530 | /* The Intel SPEC CTRL MSR base value cache */ |
531 | extern u64 x86_spec_ctrl_base; |
532 | DECLARE_PER_CPU(u64, x86_spec_ctrl_current); |
533 | extern void update_spec_ctrl_cond(u64 val); |
534 | extern u64 spec_ctrl_current(void); |
535 | |
536 | /* |
537 | * With retpoline, we must use IBRS to restrict branch prediction |
538 | * before calling into firmware. |
539 | * |
540 | * (Implemented as CPP macros due to header hell.) |
541 | */ |
542 | #define firmware_restrict_branch_speculation_start() \ |
543 | do { \ |
544 | preempt_disable(); \ |
545 | alternative_msr_write(MSR_IA32_SPEC_CTRL, \ |
546 | spec_ctrl_current() | SPEC_CTRL_IBRS, \ |
547 | X86_FEATURE_USE_IBRS_FW); \ |
548 | alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, \ |
549 | X86_FEATURE_USE_IBPB_FW); \ |
550 | } while (0) |
551 | |
552 | #define firmware_restrict_branch_speculation_end() \ |
553 | do { \ |
554 | alternative_msr_write(MSR_IA32_SPEC_CTRL, \ |
555 | spec_ctrl_current(), \ |
556 | X86_FEATURE_USE_IBRS_FW); \ |
557 | preempt_enable(); \ |
558 | } while (0) |
559 | |
560 | DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); |
561 | DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); |
562 | DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); |
563 | |
564 | DECLARE_STATIC_KEY_FALSE(mds_idle_clear); |
565 | |
566 | DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); |
567 | |
568 | DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear); |
569 | |
570 | extern u16 mds_verw_sel; |
571 | |
572 | #include <asm/segment.h> |
573 | |
574 | /** |
575 | * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability |
576 | * |
577 | * This uses the otherwise unused and obsolete VERW instruction in |
578 | * combination with microcode which triggers a CPU buffer flush when the |
579 | * instruction is executed. |
580 | */ |
581 | static __always_inline void mds_clear_cpu_buffers(void) |
582 | { |
583 | static const u16 ds = __KERNEL_DS; |
584 | |
585 | /* |
586 | * Has to be the memory-operand variant because only that |
587 | * guarantees the CPU buffer flush functionality according to |
588 | * documentation. The register-operand variant does not. |
589 | * Works with any segment selector, but a valid writable |
590 | * data segment is the fastest variant. |
591 | * |
592 | * "cc" clobber is required because VERW modifies ZF. |
593 | */ |
594 | asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc" ); |
595 | } |
596 | |
597 | /** |
598 | * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability |
599 | * |
600 | * Clear CPU buffers if the corresponding static key is enabled |
601 | */ |
602 | static __always_inline void mds_idle_clear_cpu_buffers(void) |
603 | { |
604 | if (static_branch_likely(&mds_idle_clear)) |
605 | mds_clear_cpu_buffers(); |
606 | } |
607 | |
608 | #endif /* __ASSEMBLY__ */ |
609 | |
610 | #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ |
611 | |