1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | |
3 | #include <linux/export.h> |
4 | #include <linux/stringify.h> |
5 | #include <linux/linkage.h> |
6 | #include <asm/dwarf2.h> |
7 | #include <asm/cpufeatures.h> |
8 | #include <asm/alternative.h> |
9 | #include <asm/asm-offsets.h> |
10 | #include <asm/nospec-branch.h> |
11 | #include <asm/unwind_hints.h> |
12 | #include <asm/percpu.h> |
13 | #include <asm/frame.h> |
14 | #include <asm/nops.h> |
15 | |
16 | .section .text..__x86.indirect_thunk |
17 | |
18 | |
19 | .macro POLINE reg |
20 | ANNOTATE_INTRA_FUNCTION_CALL |
21 | call .Ldo_rop_\@ |
22 | int3 |
23 | .Ldo_rop_\@: |
24 | mov %\reg, (%_ASM_SP) |
25 | UNWIND_HINT_FUNC |
26 | .endm |
27 | |
28 | .macro RETPOLINE reg |
29 | POLINE \reg |
30 | RET |
31 | .endm |
32 | |
33 | .macro THUNK reg |
34 | |
35 | .align RETPOLINE_THUNK_SIZE |
36 | SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL) |
37 | UNWIND_HINT_UNDEFINED |
38 | ANNOTATE_NOENDBR |
39 | |
40 | ALTERNATIVE_2 __stringify(RETPOLINE \reg), \ |
41 | __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE, \ |
42 | __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), ALT_NOT(X86_FEATURE_RETPOLINE) |
43 | |
44 | .endm |
45 | |
46 | /* |
47 | * Despite being an assembler file we can't just use .irp here |
48 | * because __KSYM_DEPS__ only uses the C preprocessor and would |
49 | * only see one instance of "__x86_indirect_thunk_\reg" rather |
50 | * than one per register with the correct names. So we do it |
51 | * the simple and nasty way... |
52 | * |
53 | * Worse, you can only have a single EXPORT_SYMBOL per line, |
54 | * and CPP can't insert newlines, so we have to repeat everything |
55 | * at least twice. |
56 | */ |
57 | |
58 | #define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym) |
59 | |
60 | .align RETPOLINE_THUNK_SIZE |
61 | SYM_CODE_START(__x86_indirect_thunk_array) |
62 | |
63 | #define GEN(reg) THUNK reg |
64 | #include <asm/GEN-for-each-reg.h> |
65 | #undef GEN |
66 | |
67 | .align RETPOLINE_THUNK_SIZE |
68 | SYM_CODE_END(__x86_indirect_thunk_array) |
69 | |
70 | #define GEN(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg) |
71 | #include <asm/GEN-for-each-reg.h> |
72 | #undef GEN |
73 | |
74 | #ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING |
75 | .macro CALL_THUNK reg |
76 | .align RETPOLINE_THUNK_SIZE |
77 | |
78 | SYM_INNER_LABEL(__x86_indirect_call_thunk_\reg, SYM_L_GLOBAL) |
79 | UNWIND_HINT_UNDEFINED |
80 | ANNOTATE_NOENDBR |
81 | |
82 | CALL_DEPTH_ACCOUNT |
83 | POLINE \reg |
84 | ANNOTATE_UNRET_SAFE |
85 | ret |
86 | int3 |
87 | .endm |
88 | |
89 | .align RETPOLINE_THUNK_SIZE |
90 | SYM_CODE_START(__x86_indirect_call_thunk_array) |
91 | |
92 | #define GEN(reg) CALL_THUNK reg |
93 | #include <asm/GEN-for-each-reg.h> |
94 | #undef GEN |
95 | |
96 | .align RETPOLINE_THUNK_SIZE |
97 | SYM_CODE_END(__x86_indirect_call_thunk_array) |
98 | |
99 | #define GEN(reg) __EXPORT_THUNK(__x86_indirect_call_thunk_ ## reg) |
100 | #include <asm/GEN-for-each-reg.h> |
101 | #undef GEN |
102 | |
103 | .macro JUMP_THUNK reg |
104 | .align RETPOLINE_THUNK_SIZE |
105 | |
106 | SYM_INNER_LABEL(__x86_indirect_jump_thunk_\reg, SYM_L_GLOBAL) |
107 | UNWIND_HINT_UNDEFINED |
108 | ANNOTATE_NOENDBR |
109 | POLINE \reg |
110 | ANNOTATE_UNRET_SAFE |
111 | ret |
112 | int3 |
113 | .endm |
114 | |
115 | .align RETPOLINE_THUNK_SIZE |
116 | SYM_CODE_START(__x86_indirect_jump_thunk_array) |
117 | |
118 | #define GEN(reg) JUMP_THUNK reg |
119 | #include <asm/GEN-for-each-reg.h> |
120 | #undef GEN |
121 | |
122 | .align RETPOLINE_THUNK_SIZE |
123 | SYM_CODE_END(__x86_indirect_jump_thunk_array) |
124 | |
125 | #define GEN(reg) __EXPORT_THUNK(__x86_indirect_jump_thunk_ ## reg) |
126 | #include <asm/GEN-for-each-reg.h> |
127 | #undef GEN |
128 | #endif |
129 | |
130 | #ifdef CONFIG_MITIGATION_RETHUNK |
131 | |
132 | /* |
133 | * Be careful here: that label cannot really be removed because in |
134 | * some configurations and toolchains, the JMP __x86_return_thunk the |
135 | * compiler issues is either a short one or the compiler doesn't use |
136 | * relocations for same-section JMPs and that breaks the returns |
137 | * detection logic in apply_returns() and in objtool. |
138 | */ |
139 | .section .text..__x86.return_thunk |
140 | |
141 | #ifdef CONFIG_MITIGATION_SRSO |
142 | |
143 | /* |
144 | * srso_alias_untrain_ret() and srso_alias_safe_ret() are placed at |
145 | * special addresses: |
146 | * |
147 | * - srso_alias_untrain_ret() is 2M aligned |
148 | * - srso_alias_safe_ret() is also in the same 2M page but bits 2, 8, 14 |
149 | * and 20 in its virtual address are set (while those bits in the |
150 | * srso_alias_untrain_ret() function are cleared). |
151 | * |
152 | * This guarantees that those two addresses will alias in the branch |
153 | * target buffer of Zen3/4 generations, leading to any potential |
154 | * poisoned entries at that BTB slot to get evicted. |
155 | * |
156 | * As a result, srso_alias_safe_ret() becomes a safe return. |
157 | */ |
158 | .pushsection .text..__x86.rethunk_untrain |
159 | SYM_CODE_START_NOALIGN(srso_alias_untrain_ret) |
160 | UNWIND_HINT_FUNC |
161 | ANNOTATE_NOENDBR |
162 | ASM_NOP2 |
163 | lfence |
164 | jmp srso_alias_return_thunk |
165 | SYM_FUNC_END(srso_alias_untrain_ret) |
166 | __EXPORT_THUNK(srso_alias_untrain_ret) |
167 | .popsection |
168 | |
169 | .pushsection .text..__x86.rethunk_safe |
170 | SYM_CODE_START_NOALIGN(srso_alias_safe_ret) |
171 | lea 8(%_ASM_SP), %_ASM_SP |
172 | UNWIND_HINT_FUNC |
173 | ANNOTATE_UNRET_SAFE |
174 | ret |
175 | int3 |
176 | SYM_FUNC_END(srso_alias_safe_ret) |
177 | |
178 | SYM_CODE_START_NOALIGN(srso_alias_return_thunk) |
179 | UNWIND_HINT_FUNC |
180 | ANNOTATE_NOENDBR |
181 | call srso_alias_safe_ret |
182 | ud2 |
183 | SYM_CODE_END(srso_alias_return_thunk) |
184 | .popsection |
185 | |
186 | /* |
187 | * SRSO untraining sequence for Zen1/2, similar to retbleed_untrain_ret() |
188 | * above. On kernel entry, srso_untrain_ret() is executed which is a |
189 | * |
190 | * movabs $0xccccc30824648d48,%rax |
191 | * |
192 | * and when the return thunk executes the inner label srso_safe_ret() |
193 | * later, it is a stack manipulation and a RET which is mispredicted and |
194 | * thus a "safe" one to use. |
195 | */ |
196 | .align 64 |
197 | .skip 64 - (srso_safe_ret - srso_untrain_ret), 0xcc |
198 | SYM_CODE_START_LOCAL_NOALIGN(srso_untrain_ret) |
199 | ANNOTATE_NOENDBR |
200 | .byte 0x48, 0xb8 |
201 | |
202 | /* |
203 | * This forces the function return instruction to speculate into a trap |
204 | * (UD2 in srso_return_thunk() below). This RET will then mispredict |
205 | * and execution will continue at the return site read from the top of |
206 | * the stack. |
207 | */ |
208 | SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL) |
209 | lea 8(%_ASM_SP), %_ASM_SP |
210 | ret |
211 | int3 |
212 | int3 |
213 | /* end of movabs */ |
214 | lfence |
215 | call srso_safe_ret |
216 | ud2 |
217 | SYM_CODE_END(srso_safe_ret) |
218 | SYM_FUNC_END(srso_untrain_ret) |
219 | |
220 | SYM_CODE_START(srso_return_thunk) |
221 | UNWIND_HINT_FUNC |
222 | ANNOTATE_NOENDBR |
223 | call srso_safe_ret |
224 | ud2 |
225 | SYM_CODE_END(srso_return_thunk) |
226 | |
227 | #define JMP_SRSO_UNTRAIN_RET "jmp srso_untrain_ret" |
228 | #else /* !CONFIG_MITIGATION_SRSO */ |
229 | /* Dummy for the alternative in CALL_UNTRAIN_RET. */ |
230 | SYM_CODE_START(srso_alias_untrain_ret) |
231 | ANNOTATE_UNRET_SAFE |
232 | ANNOTATE_NOENDBR |
233 | ret |
234 | int3 |
235 | SYM_FUNC_END(srso_alias_untrain_ret) |
236 | __EXPORT_THUNK(srso_alias_untrain_ret) |
237 | #define JMP_SRSO_UNTRAIN_RET "ud2" |
238 | #endif /* CONFIG_MITIGATION_SRSO */ |
239 | |
240 | #ifdef CONFIG_MITIGATION_UNRET_ENTRY |
241 | |
242 | /* |
243 | * Some generic notes on the untraining sequences: |
244 | * |
245 | * They are interchangeable when it comes to flushing potentially wrong |
246 | * RET predictions from the BTB. |
247 | * |
248 | * The SRSO Zen1/2 (MOVABS) untraining sequence is longer than the |
249 | * Retbleed sequence because the return sequence done there |
250 | * (srso_safe_ret()) is longer and the return sequence must fully nest |
251 | * (end before) the untraining sequence. Therefore, the untraining |
252 | * sequence must fully overlap the return sequence. |
253 | * |
254 | * Regarding alignment - the instructions which need to be untrained, |
255 | * must all start at a cacheline boundary for Zen1/2 generations. That |
256 | * is, instruction sequences starting at srso_safe_ret() and |
257 | * the respective instruction sequences at retbleed_return_thunk() |
258 | * must start at a cacheline boundary. |
259 | */ |
260 | |
261 | /* |
262 | * Safety details here pertain to the AMD Zen{1,2} microarchitecture: |
263 | * 1) The RET at retbleed_return_thunk must be on a 64 byte boundary, for |
264 | * alignment within the BTB. |
265 | * 2) The instruction at retbleed_untrain_ret must contain, and not |
266 | * end with, the 0xc3 byte of the RET. |
267 | * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread |
268 | * from re-poisioning the BTB prediction. |
269 | */ |
270 | .align 64 |
271 | .skip 64 - (retbleed_return_thunk - retbleed_untrain_ret), 0xcc |
272 | SYM_CODE_START_LOCAL_NOALIGN(retbleed_untrain_ret) |
273 | ANNOTATE_NOENDBR |
274 | /* |
275 | * As executed from retbleed_untrain_ret, this is: |
276 | * |
277 | * TEST $0xcc, %bl |
278 | * LFENCE |
279 | * JMP retbleed_return_thunk |
280 | * |
281 | * Executing the TEST instruction has a side effect of evicting any BTB |
282 | * prediction (potentially attacker controlled) attached to the RET, as |
283 | * retbleed_return_thunk + 1 isn't an instruction boundary at the moment. |
284 | */ |
285 | .byte 0xf6 |
286 | |
287 | /* |
288 | * As executed from retbleed_return_thunk, this is a plain RET. |
289 | * |
290 | * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8. |
291 | * |
292 | * We subsequently jump backwards and architecturally execute the RET. |
293 | * This creates a correct BTB prediction (type=ret), but in the |
294 | * meantime we suffer Straight Line Speculation (because the type was |
295 | * no branch) which is halted by the INT3. |
296 | * |
297 | * With SMT enabled and STIBP active, a sibling thread cannot poison |
298 | * RET's prediction to a type of its choice, but can evict the |
299 | * prediction due to competitive sharing. If the prediction is |
300 | * evicted, retbleed_return_thunk will suffer Straight Line Speculation |
301 | * which will be contained safely by the INT3. |
302 | */ |
303 | SYM_INNER_LABEL(retbleed_return_thunk, SYM_L_GLOBAL) |
304 | ret |
305 | int3 |
306 | SYM_CODE_END(retbleed_return_thunk) |
307 | |
308 | /* |
309 | * Ensure the TEST decoding / BTB invalidation is complete. |
310 | */ |
311 | lfence |
312 | |
313 | /* |
314 | * Jump back and execute the RET in the middle of the TEST instruction. |
315 | * INT3 is for SLS protection. |
316 | */ |
317 | jmp retbleed_return_thunk |
318 | int3 |
319 | SYM_FUNC_END(retbleed_untrain_ret) |
320 | |
321 | #define JMP_RETBLEED_UNTRAIN_RET "jmp retbleed_untrain_ret" |
322 | #else /* !CONFIG_MITIGATION_UNRET_ENTRY */ |
323 | #define JMP_RETBLEED_UNTRAIN_RET "ud2" |
324 | #endif /* CONFIG_MITIGATION_UNRET_ENTRY */ |
325 | |
326 | #if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO) |
327 | |
328 | SYM_FUNC_START(entry_untrain_ret) |
329 | ALTERNATIVE JMP_RETBLEED_UNTRAIN_RET, JMP_SRSO_UNTRAIN_RET, X86_FEATURE_SRSO |
330 | SYM_FUNC_END(entry_untrain_ret) |
331 | __EXPORT_THUNK(entry_untrain_ret) |
332 | |
333 | #endif /* CONFIG_MITIGATION_UNRET_ENTRY || CONFIG_MITIGATION_SRSO */ |
334 | |
335 | #ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING |
336 | |
337 | .align 64 |
338 | SYM_FUNC_START(call_depth_return_thunk) |
339 | ANNOTATE_NOENDBR |
340 | /* |
341 | * Keep the hotpath in a 16byte I-fetch for the non-debug |
342 | * case. |
343 | */ |
344 | CALL_THUNKS_DEBUG_INC_RETS |
345 | shlq $5, PER_CPU_VAR(pcpu_hot + X86_call_depth) |
346 | jz 1f |
347 | ANNOTATE_UNRET_SAFE |
348 | ret |
349 | int3 |
350 | 1: |
351 | CALL_THUNKS_DEBUG_INC_STUFFS |
352 | .rept 16 |
353 | ANNOTATE_INTRA_FUNCTION_CALL |
354 | call 2f |
355 | int3 |
356 | 2: |
357 | .endr |
358 | add $(8*16), %rsp |
359 | |
360 | CREDIT_CALL_DEPTH |
361 | |
362 | ANNOTATE_UNRET_SAFE |
363 | ret |
364 | int3 |
365 | SYM_FUNC_END(call_depth_return_thunk) |
366 | |
367 | #endif /* CONFIG_MITIGATION_CALL_DEPTH_TRACKING */ |
368 | |
369 | /* |
370 | * This function name is magical and is used by -mfunction-return=thunk-extern |
371 | * for the compiler to generate JMPs to it. |
372 | * |
373 | * This code is only used during kernel boot or module init. All |
374 | * 'JMP __x86_return_thunk' sites are changed to something else by |
375 | * apply_returns(). |
376 | * |
377 | * The ALTERNATIVE below adds a really loud warning to catch the case |
378 | * where the insufficient default return thunk ends up getting used for |
379 | * whatever reason like miscompilation or failure of |
380 | * objtool/alternatives/etc to patch all the return sites. |
381 | */ |
382 | SYM_CODE_START(__x86_return_thunk) |
383 | UNWIND_HINT_FUNC |
384 | ANNOTATE_NOENDBR |
385 | #if defined(CONFIG_MITIGATION_UNRET_ENTRY) || \ |
386 | defined(CONFIG_MITIGATION_SRSO) || \ |
387 | defined(CONFIG_MITIGATION_CALL_DEPTH_TRACKING) |
388 | ALTERNATIVE __stringify(ANNOTATE_UNRET_SAFE; ret), \ |
389 | "jmp warn_thunk_thunk" , X86_FEATURE_ALWAYS |
390 | #else |
391 | ANNOTATE_UNRET_SAFE |
392 | ret |
393 | #endif |
394 | int3 |
395 | SYM_CODE_END(__x86_return_thunk) |
396 | EXPORT_SYMBOL(__x86_return_thunk) |
397 | |
398 | #endif /* CONFIG_MITIGATION_RETHUNK */ |
399 | |