1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * AMD Encrypted Register State Support |
4 | * |
5 | * Author: Joerg Roedel <jroedel@suse.de> |
6 | * |
7 | * This file is not compiled stand-alone. It contains code shared |
8 | * between the pre-decompression boot code and the running Linux kernel |
9 | * and is included directly into both code-bases. |
10 | */ |
11 | |
12 | #include <asm/setup_data.h> |
13 | |
14 | #ifndef __BOOT_COMPRESSED |
15 | #define error(v) pr_err(v) |
16 | #define has_cpuflag(f) boot_cpu_has(f) |
17 | #define sev_printk(fmt, ...) printk(fmt, ##__VA_ARGS__) |
18 | #define sev_printk_rtl(fmt, ...) printk_ratelimited(fmt, ##__VA_ARGS__) |
19 | #else |
20 | #undef WARN |
21 | #define WARN(condition, format...) (!!(condition)) |
22 | #define sev_printk(fmt, ...) |
23 | #define sev_printk_rtl(fmt, ...) |
24 | #endif |
25 | |
26 | /* I/O parameters for CPUID-related helpers */ |
27 | struct cpuid_leaf { |
28 | u32 fn; |
29 | u32 subfn; |
30 | u32 eax; |
31 | u32 ebx; |
32 | u32 ecx; |
33 | u32 edx; |
34 | }; |
35 | |
36 | /* |
37 | * Individual entries of the SNP CPUID table, as defined by the SNP |
38 | * Firmware ABI, Revision 0.9, Section 7.1, Table 14. |
39 | */ |
40 | struct snp_cpuid_fn { |
41 | u32 eax_in; |
42 | u32 ecx_in; |
43 | u64 xcr0_in; |
44 | u64 xss_in; |
45 | u32 eax; |
46 | u32 ebx; |
47 | u32 ecx; |
48 | u32 edx; |
49 | u64 __reserved; |
50 | } __packed; |
51 | |
52 | /* |
53 | * SNP CPUID table, as defined by the SNP Firmware ABI, Revision 0.9, |
54 | * Section 8.14.2.6. Also noted there is the SNP firmware-enforced limit |
55 | * of 64 entries per CPUID table. |
56 | */ |
57 | #define SNP_CPUID_COUNT_MAX 64 |
58 | |
59 | struct snp_cpuid_table { |
60 | u32 count; |
61 | u32 __reserved1; |
62 | u64 __reserved2; |
63 | struct snp_cpuid_fn fn[SNP_CPUID_COUNT_MAX]; |
64 | } __packed; |
65 | |
66 | /* |
67 | * Since feature negotiation related variables are set early in the boot |
68 | * process they must reside in the .data section so as not to be zeroed |
69 | * out when the .bss section is later cleared. |
70 | * |
71 | * GHCB protocol version negotiated with the hypervisor. |
72 | */ |
73 | static u16 ghcb_version __ro_after_init; |
74 | |
75 | /* Copy of the SNP firmware's CPUID page. */ |
76 | static struct snp_cpuid_table cpuid_table_copy __ro_after_init; |
77 | |
78 | /* |
79 | * These will be initialized based on CPUID table so that non-present |
80 | * all-zero leaves (for sparse tables) can be differentiated from |
81 | * invalid/out-of-range leaves. This is needed since all-zero leaves |
82 | * still need to be post-processed. |
83 | */ |
84 | static u32 cpuid_std_range_max __ro_after_init; |
85 | static u32 cpuid_hyp_range_max __ro_after_init; |
86 | static u32 cpuid_ext_range_max __ro_after_init; |
87 | |
88 | static bool __init sev_es_check_cpu_features(void) |
89 | { |
90 | if (!has_cpuflag(X86_FEATURE_RDRAND)) { |
91 | error(m: "RDRAND instruction not supported - no trusted source of randomness available\n" ); |
92 | return false; |
93 | } |
94 | |
95 | return true; |
96 | } |
97 | |
98 | static void __head __noreturn |
99 | sev_es_terminate(unsigned int set, unsigned int reason) |
100 | { |
101 | u64 val = GHCB_MSR_TERM_REQ; |
102 | |
103 | /* Tell the hypervisor what went wrong. */ |
104 | val |= GHCB_SEV_TERM_REASON(set, reason); |
105 | |
106 | /* Request Guest Termination from Hypervisor */ |
107 | sev_es_wr_ghcb_msr(val); |
108 | VMGEXIT(); |
109 | |
110 | while (true) |
111 | asm volatile("hlt\n" : : : "memory" ); |
112 | } |
113 | |
114 | /* |
115 | * The hypervisor features are available from GHCB version 2 onward. |
116 | */ |
117 | static u64 get_hv_features(void) |
118 | { |
119 | u64 val; |
120 | |
121 | if (ghcb_version < 2) |
122 | return 0; |
123 | |
124 | sev_es_wr_ghcb_msr(GHCB_MSR_HV_FT_REQ); |
125 | VMGEXIT(); |
126 | |
127 | val = sev_es_rd_ghcb_msr(); |
128 | if (GHCB_RESP_CODE(val) != GHCB_MSR_HV_FT_RESP) |
129 | return 0; |
130 | |
131 | return GHCB_MSR_HV_FT_RESP_VAL(val); |
132 | } |
133 | |
134 | static void snp_register_ghcb_early(unsigned long paddr) |
135 | { |
136 | unsigned long pfn = paddr >> PAGE_SHIFT; |
137 | u64 val; |
138 | |
139 | sev_es_wr_ghcb_msr(GHCB_MSR_REG_GPA_REQ_VAL(pfn)); |
140 | VMGEXIT(); |
141 | |
142 | val = sev_es_rd_ghcb_msr(); |
143 | |
144 | /* If the response GPA is not ours then abort the guest */ |
145 | if ((GHCB_RESP_CODE(val) != GHCB_MSR_REG_GPA_RESP) || |
146 | (GHCB_MSR_REG_GPA_RESP_VAL(val) != pfn)) |
147 | sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_REGISTER); |
148 | } |
149 | |
150 | static bool sev_es_negotiate_protocol(void) |
151 | { |
152 | u64 val; |
153 | |
154 | /* Do the GHCB protocol version negotiation */ |
155 | sev_es_wr_ghcb_msr(GHCB_MSR_SEV_INFO_REQ); |
156 | VMGEXIT(); |
157 | val = sev_es_rd_ghcb_msr(); |
158 | |
159 | if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP) |
160 | return false; |
161 | |
162 | if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTOCOL_MIN || |
163 | GHCB_MSR_PROTO_MIN(val) > GHCB_PROTOCOL_MAX) |
164 | return false; |
165 | |
166 | ghcb_version = min_t(size_t, GHCB_MSR_PROTO_MAX(val), GHCB_PROTOCOL_MAX); |
167 | |
168 | return true; |
169 | } |
170 | |
171 | static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb) |
172 | { |
173 | ghcb->save.sw_exit_code = 0; |
174 | __builtin_memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); |
175 | } |
176 | |
177 | static bool vc_decoding_needed(unsigned long exit_code) |
178 | { |
179 | /* Exceptions don't require to decode the instruction */ |
180 | return !(exit_code >= SVM_EXIT_EXCP_BASE && |
181 | exit_code <= SVM_EXIT_LAST_EXCP); |
182 | } |
183 | |
184 | static enum es_result vc_init_em_ctxt(struct es_em_ctxt *ctxt, |
185 | struct pt_regs *regs, |
186 | unsigned long exit_code) |
187 | { |
188 | enum es_result ret = ES_OK; |
189 | |
190 | memset(s: ctxt, c: 0, n: sizeof(*ctxt)); |
191 | ctxt->regs = regs; |
192 | |
193 | if (vc_decoding_needed(exit_code)) |
194 | ret = vc_decode_insn(ctxt); |
195 | |
196 | return ret; |
197 | } |
198 | |
199 | static void vc_finish_insn(struct es_em_ctxt *ctxt) |
200 | { |
201 | ctxt->regs->ip += ctxt->insn.length; |
202 | } |
203 | |
204 | static enum es_result verify_exception_info(struct ghcb *ghcb, struct es_em_ctxt *ctxt) |
205 | { |
206 | u32 ret; |
207 | |
208 | ret = ghcb->save.sw_exit_info_1 & GENMASK_ULL(31, 0); |
209 | if (!ret) |
210 | return ES_OK; |
211 | |
212 | if (ret == 1) { |
213 | u64 info = ghcb->save.sw_exit_info_2; |
214 | unsigned long v = info & SVM_EVTINJ_VEC_MASK; |
215 | |
216 | /* Check if exception information from hypervisor is sane. */ |
217 | if ((info & SVM_EVTINJ_VALID) && |
218 | ((v == X86_TRAP_GP) || (v == X86_TRAP_UD)) && |
219 | ((info & SVM_EVTINJ_TYPE_MASK) == SVM_EVTINJ_TYPE_EXEPT)) { |
220 | ctxt->fi.vector = v; |
221 | |
222 | if (info & SVM_EVTINJ_VALID_ERR) |
223 | ctxt->fi.error_code = info >> 32; |
224 | |
225 | return ES_EXCEPTION; |
226 | } |
227 | } |
228 | |
229 | return ES_VMM_ERROR; |
230 | } |
231 | |
232 | static enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, |
233 | struct es_em_ctxt *ctxt, |
234 | u64 exit_code, u64 exit_info_1, |
235 | u64 exit_info_2) |
236 | { |
237 | /* Fill in protocol and format specifiers */ |
238 | ghcb->protocol_version = ghcb_version; |
239 | ghcb->ghcb_usage = GHCB_DEFAULT_USAGE; |
240 | |
241 | ghcb_set_sw_exit_code(ghcb, value: exit_code); |
242 | ghcb_set_sw_exit_info_1(ghcb, value: exit_info_1); |
243 | ghcb_set_sw_exit_info_2(ghcb, value: exit_info_2); |
244 | |
245 | sev_es_wr_ghcb_msr(__pa(ghcb)); |
246 | VMGEXIT(); |
247 | |
248 | return verify_exception_info(ghcb, ctxt); |
249 | } |
250 | |
251 | static int __sev_cpuid_hv(u32 fn, int reg_idx, u32 *reg) |
252 | { |
253 | u64 val; |
254 | |
255 | sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, reg_idx)); |
256 | VMGEXIT(); |
257 | val = sev_es_rd_ghcb_msr(); |
258 | if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP) |
259 | return -EIO; |
260 | |
261 | *reg = (val >> 32); |
262 | |
263 | return 0; |
264 | } |
265 | |
266 | static int __sev_cpuid_hv_msr(struct cpuid_leaf *leaf) |
267 | { |
268 | int ret; |
269 | |
270 | /* |
271 | * MSR protocol does not support fetching non-zero subfunctions, but is |
272 | * sufficient to handle current early-boot cases. Should that change, |
273 | * make sure to report an error rather than ignoring the index and |
274 | * grabbing random values. If this issue arises in the future, handling |
275 | * can be added here to use GHCB-page protocol for cases that occur late |
276 | * enough in boot that GHCB page is available. |
277 | */ |
278 | if (cpuid_function_is_indexed(function: leaf->fn) && leaf->subfn) |
279 | return -EINVAL; |
280 | |
281 | ret = __sev_cpuid_hv(fn: leaf->fn, GHCB_CPUID_REQ_EAX, reg: &leaf->eax); |
282 | ret = ret ? : __sev_cpuid_hv(fn: leaf->fn, GHCB_CPUID_REQ_EBX, reg: &leaf->ebx); |
283 | ret = ret ? : __sev_cpuid_hv(fn: leaf->fn, GHCB_CPUID_REQ_ECX, reg: &leaf->ecx); |
284 | ret = ret ? : __sev_cpuid_hv(fn: leaf->fn, GHCB_CPUID_REQ_EDX, reg: &leaf->edx); |
285 | |
286 | return ret; |
287 | } |
288 | |
289 | static int __sev_cpuid_hv_ghcb(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) |
290 | { |
291 | u32 cr4 = native_read_cr4(); |
292 | int ret; |
293 | |
294 | ghcb_set_rax(ghcb, value: leaf->fn); |
295 | ghcb_set_rcx(ghcb, value: leaf->subfn); |
296 | |
297 | if (cr4 & X86_CR4_OSXSAVE) |
298 | /* Safe to read xcr0 */ |
299 | ghcb_set_xcr0(ghcb, value: xgetbv(XCR_XFEATURE_ENABLED_MASK)); |
300 | else |
301 | /* xgetbv will cause #UD - use reset value for xcr0 */ |
302 | ghcb_set_xcr0(ghcb, value: 1); |
303 | |
304 | ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, exit_info_1: 0, exit_info_2: 0); |
305 | if (ret != ES_OK) |
306 | return ret; |
307 | |
308 | if (!(ghcb_rax_is_valid(ghcb) && |
309 | ghcb_rbx_is_valid(ghcb) && |
310 | ghcb_rcx_is_valid(ghcb) && |
311 | ghcb_rdx_is_valid(ghcb))) |
312 | return ES_VMM_ERROR; |
313 | |
314 | leaf->eax = ghcb->save.rax; |
315 | leaf->ebx = ghcb->save.rbx; |
316 | leaf->ecx = ghcb->save.rcx; |
317 | leaf->edx = ghcb->save.rdx; |
318 | |
319 | return ES_OK; |
320 | } |
321 | |
322 | static int sev_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) |
323 | { |
324 | return ghcb ? __sev_cpuid_hv_ghcb(ghcb, ctxt, leaf) |
325 | : __sev_cpuid_hv_msr(leaf); |
326 | } |
327 | |
328 | /* |
329 | * This may be called early while still running on the initial identity |
330 | * mapping. Use RIP-relative addressing to obtain the correct address |
331 | * while running with the initial identity mapping as well as the |
332 | * switch-over to kernel virtual addresses later. |
333 | */ |
334 | static const struct snp_cpuid_table *snp_cpuid_get_table(void) |
335 | { |
336 | return &RIP_REL_REF(cpuid_table_copy); |
337 | } |
338 | |
339 | /* |
340 | * The SNP Firmware ABI, Revision 0.9, Section 7.1, details the use of |
341 | * XCR0_IN and XSS_IN to encode multiple versions of 0xD subfunctions 0 |
342 | * and 1 based on the corresponding features enabled by a particular |
343 | * combination of XCR0 and XSS registers so that a guest can look up the |
344 | * version corresponding to the features currently enabled in its XCR0/XSS |
345 | * registers. The only values that differ between these versions/table |
346 | * entries is the enabled XSAVE area size advertised via EBX. |
347 | * |
348 | * While hypervisors may choose to make use of this support, it is more |
349 | * robust/secure for a guest to simply find the entry corresponding to the |
350 | * base/legacy XSAVE area size (XCR0=1 or XCR0=3), and then calculate the |
351 | * XSAVE area size using subfunctions 2 through 64, as documented in APM |
352 | * Volume 3, Rev 3.31, Appendix E.3.8, which is what is done here. |
353 | * |
354 | * Since base/legacy XSAVE area size is documented as 0x240, use that value |
355 | * directly rather than relying on the base size in the CPUID table. |
356 | * |
357 | * Return: XSAVE area size on success, 0 otherwise. |
358 | */ |
359 | static u32 snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted) |
360 | { |
361 | const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); |
362 | u64 xfeatures_found = 0; |
363 | u32 xsave_size = 0x240; |
364 | int i; |
365 | |
366 | for (i = 0; i < cpuid_table->count; i++) { |
367 | const struct snp_cpuid_fn *e = &cpuid_table->fn[i]; |
368 | |
369 | if (!(e->eax_in == 0xD && e->ecx_in > 1 && e->ecx_in < 64)) |
370 | continue; |
371 | if (!(xfeatures_en & (BIT_ULL(e->ecx_in)))) |
372 | continue; |
373 | if (xfeatures_found & (BIT_ULL(e->ecx_in))) |
374 | continue; |
375 | |
376 | xfeatures_found |= (BIT_ULL(e->ecx_in)); |
377 | |
378 | if (compacted) |
379 | xsave_size += e->eax; |
380 | else |
381 | xsave_size = max(xsave_size, e->eax + e->ebx); |
382 | } |
383 | |
384 | /* |
385 | * Either the guest set unsupported XCR0/XSS bits, or the corresponding |
386 | * entries in the CPUID table were not present. This is not a valid |
387 | * state to be in. |
388 | */ |
389 | if (xfeatures_found != (xfeatures_en & GENMASK_ULL(63, 2))) |
390 | return 0; |
391 | |
392 | return xsave_size; |
393 | } |
394 | |
395 | static bool __head |
396 | snp_cpuid_get_validated_func(struct cpuid_leaf *leaf) |
397 | { |
398 | const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); |
399 | int i; |
400 | |
401 | for (i = 0; i < cpuid_table->count; i++) { |
402 | const struct snp_cpuid_fn *e = &cpuid_table->fn[i]; |
403 | |
404 | if (e->eax_in != leaf->fn) |
405 | continue; |
406 | |
407 | if (cpuid_function_is_indexed(function: leaf->fn) && e->ecx_in != leaf->subfn) |
408 | continue; |
409 | |
410 | /* |
411 | * For 0xD subfunctions 0 and 1, only use the entry corresponding |
412 | * to the base/legacy XSAVE area size (XCR0=1 or XCR0=3, XSS=0). |
413 | * See the comments above snp_cpuid_calc_xsave_size() for more |
414 | * details. |
415 | */ |
416 | if (e->eax_in == 0xD && (e->ecx_in == 0 || e->ecx_in == 1)) |
417 | if (!(e->xcr0_in == 1 || e->xcr0_in == 3) || e->xss_in) |
418 | continue; |
419 | |
420 | leaf->eax = e->eax; |
421 | leaf->ebx = e->ebx; |
422 | leaf->ecx = e->ecx; |
423 | leaf->edx = e->edx; |
424 | |
425 | return true; |
426 | } |
427 | |
428 | return false; |
429 | } |
430 | |
431 | static void snp_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) |
432 | { |
433 | if (sev_cpuid_hv(ghcb, ctxt, leaf)) |
434 | sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID_HV); |
435 | } |
436 | |
437 | static int snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt, |
438 | struct cpuid_leaf *leaf) |
439 | { |
440 | struct cpuid_leaf leaf_hv = *leaf; |
441 | |
442 | switch (leaf->fn) { |
443 | case 0x1: |
444 | snp_cpuid_hv(ghcb, ctxt, leaf: &leaf_hv); |
445 | |
446 | /* initial APIC ID */ |
447 | leaf->ebx = (leaf_hv.ebx & GENMASK(31, 24)) | (leaf->ebx & GENMASK(23, 0)); |
448 | /* APIC enabled bit */ |
449 | leaf->edx = (leaf_hv.edx & BIT(9)) | (leaf->edx & ~BIT(9)); |
450 | |
451 | /* OSXSAVE enabled bit */ |
452 | if (native_read_cr4() & X86_CR4_OSXSAVE) |
453 | leaf->ecx |= BIT(27); |
454 | break; |
455 | case 0x7: |
456 | /* OSPKE enabled bit */ |
457 | leaf->ecx &= ~BIT(4); |
458 | if (native_read_cr4() & X86_CR4_PKE) |
459 | leaf->ecx |= BIT(4); |
460 | break; |
461 | case 0xB: |
462 | leaf_hv.subfn = 0; |
463 | snp_cpuid_hv(ghcb, ctxt, leaf: &leaf_hv); |
464 | |
465 | /* extended APIC ID */ |
466 | leaf->edx = leaf_hv.edx; |
467 | break; |
468 | case 0xD: { |
469 | bool compacted = false; |
470 | u64 xcr0 = 1, xss = 0; |
471 | u32 xsave_size; |
472 | |
473 | if (leaf->subfn != 0 && leaf->subfn != 1) |
474 | return 0; |
475 | |
476 | if (native_read_cr4() & X86_CR4_OSXSAVE) |
477 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); |
478 | if (leaf->subfn == 1) { |
479 | /* Get XSS value if XSAVES is enabled. */ |
480 | if (leaf->eax & BIT(3)) { |
481 | unsigned long lo, hi; |
482 | |
483 | asm volatile("rdmsr" : "=a" (lo), "=d" (hi) |
484 | : "c" (MSR_IA32_XSS)); |
485 | xss = (hi << 32) | lo; |
486 | } |
487 | |
488 | /* |
489 | * The PPR and APM aren't clear on what size should be |
490 | * encoded in 0xD:0x1:EBX when compaction is not enabled |
491 | * by either XSAVEC (feature bit 1) or XSAVES (feature |
492 | * bit 3) since SNP-capable hardware has these feature |
493 | * bits fixed as 1. KVM sets it to 0 in this case, but |
494 | * to avoid this becoming an issue it's safer to simply |
495 | * treat this as unsupported for SNP guests. |
496 | */ |
497 | if (!(leaf->eax & (BIT(1) | BIT(3)))) |
498 | return -EINVAL; |
499 | |
500 | compacted = true; |
501 | } |
502 | |
503 | xsave_size = snp_cpuid_calc_xsave_size(xfeatures_en: xcr0 | xss, compacted); |
504 | if (!xsave_size) |
505 | return -EINVAL; |
506 | |
507 | leaf->ebx = xsave_size; |
508 | } |
509 | break; |
510 | case 0x8000001E: |
511 | snp_cpuid_hv(ghcb, ctxt, leaf: &leaf_hv); |
512 | |
513 | /* extended APIC ID */ |
514 | leaf->eax = leaf_hv.eax; |
515 | /* compute ID */ |
516 | leaf->ebx = (leaf->ebx & GENMASK(31, 8)) | (leaf_hv.ebx & GENMASK(7, 0)); |
517 | /* node ID */ |
518 | leaf->ecx = (leaf->ecx & GENMASK(31, 8)) | (leaf_hv.ecx & GENMASK(7, 0)); |
519 | break; |
520 | default: |
521 | /* No fix-ups needed, use values as-is. */ |
522 | break; |
523 | } |
524 | |
525 | return 0; |
526 | } |
527 | |
528 | /* |
529 | * Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value |
530 | * should be treated as fatal by caller. |
531 | */ |
532 | static int __head |
533 | snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) |
534 | { |
535 | const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); |
536 | |
537 | if (!cpuid_table->count) |
538 | return -EOPNOTSUPP; |
539 | |
540 | if (!snp_cpuid_get_validated_func(leaf)) { |
541 | /* |
542 | * Some hypervisors will avoid keeping track of CPUID entries |
543 | * where all values are zero, since they can be handled the |
544 | * same as out-of-range values (all-zero). This is useful here |
545 | * as well as it allows virtually all guest configurations to |
546 | * work using a single SNP CPUID table. |
547 | * |
548 | * To allow for this, there is a need to distinguish between |
549 | * out-of-range entries and in-range zero entries, since the |
550 | * CPUID table entries are only a template that may need to be |
551 | * augmented with additional values for things like |
552 | * CPU-specific information during post-processing. So if it's |
553 | * not in the table, set the values to zero. Then, if they are |
554 | * within a valid CPUID range, proceed with post-processing |
555 | * using zeros as the initial values. Otherwise, skip |
556 | * post-processing and just return zeros immediately. |
557 | */ |
558 | leaf->eax = leaf->ebx = leaf->ecx = leaf->edx = 0; |
559 | |
560 | /* Skip post-processing for out-of-range zero leafs. */ |
561 | if (!(leaf->fn <= RIP_REL_REF(cpuid_std_range_max) || |
562 | (leaf->fn >= 0x40000000 && leaf->fn <= RIP_REL_REF(cpuid_hyp_range_max)) || |
563 | (leaf->fn >= 0x80000000 && leaf->fn <= RIP_REL_REF(cpuid_ext_range_max)))) |
564 | return 0; |
565 | } |
566 | |
567 | return snp_cpuid_postprocess(ghcb, ctxt, leaf); |
568 | } |
569 | |
570 | /* |
571 | * Boot VC Handler - This is the first VC handler during boot, there is no GHCB |
572 | * page yet, so it only supports the MSR based communication with the |
573 | * hypervisor and only the CPUID exit-code. |
574 | */ |
575 | void __head do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) |
576 | { |
577 | unsigned int subfn = lower_bits(val: regs->cx, bits: 32); |
578 | unsigned int fn = lower_bits(val: regs->ax, bits: 32); |
579 | u16 opcode = *(unsigned short *)regs->ip; |
580 | struct cpuid_leaf leaf; |
581 | int ret; |
582 | |
583 | /* Only CPUID is supported via MSR protocol */ |
584 | if (exit_code != SVM_EXIT_CPUID) |
585 | goto fail; |
586 | |
587 | /* Is it really a CPUID insn? */ |
588 | if (opcode != 0xa20f) |
589 | goto fail; |
590 | |
591 | leaf.fn = fn; |
592 | leaf.subfn = subfn; |
593 | |
594 | ret = snp_cpuid(NULL, NULL, leaf: &leaf); |
595 | if (!ret) |
596 | goto cpuid_done; |
597 | |
598 | if (ret != -EOPNOTSUPP) |
599 | goto fail; |
600 | |
601 | if (__sev_cpuid_hv_msr(leaf: &leaf)) |
602 | goto fail; |
603 | |
604 | cpuid_done: |
605 | regs->ax = leaf.eax; |
606 | regs->bx = leaf.ebx; |
607 | regs->cx = leaf.ecx; |
608 | regs->dx = leaf.edx; |
609 | |
610 | /* |
611 | * This is a VC handler and the #VC is only raised when SEV-ES is |
612 | * active, which means SEV must be active too. Do sanity checks on the |
613 | * CPUID results to make sure the hypervisor does not trick the kernel |
614 | * into the no-sev path. This could map sensitive data unencrypted and |
615 | * make it accessible to the hypervisor. |
616 | * |
617 | * In particular, check for: |
618 | * - Availability of CPUID leaf 0x8000001f |
619 | * - SEV CPUID bit. |
620 | * |
621 | * The hypervisor might still report the wrong C-bit position, but this |
622 | * can't be checked here. |
623 | */ |
624 | |
625 | if (fn == 0x80000000 && (regs->ax < 0x8000001f)) |
626 | /* SEV leaf check */ |
627 | goto fail; |
628 | else if ((fn == 0x8000001f && !(regs->ax & BIT(1)))) |
629 | /* SEV bit */ |
630 | goto fail; |
631 | |
632 | /* Skip over the CPUID two-byte opcode */ |
633 | regs->ip += 2; |
634 | |
635 | return; |
636 | |
637 | fail: |
638 | /* Terminate the guest */ |
639 | sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); |
640 | } |
641 | |
642 | static enum es_result vc_insn_string_check(struct es_em_ctxt *ctxt, |
643 | unsigned long address, |
644 | bool write) |
645 | { |
646 | if (user_mode(regs: ctxt->regs) && fault_in_kernel_space(address)) { |
647 | ctxt->fi.vector = X86_TRAP_PF; |
648 | ctxt->fi.error_code = X86_PF_USER; |
649 | ctxt->fi.cr2 = address; |
650 | if (write) |
651 | ctxt->fi.error_code |= X86_PF_WRITE; |
652 | |
653 | return ES_EXCEPTION; |
654 | } |
655 | |
656 | return ES_OK; |
657 | } |
658 | |
659 | static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt, |
660 | void *src, char *buf, |
661 | unsigned int data_size, |
662 | unsigned int count, |
663 | bool backwards) |
664 | { |
665 | int i, b = backwards ? -1 : 1; |
666 | unsigned long address = (unsigned long)src; |
667 | enum es_result ret; |
668 | |
669 | ret = vc_insn_string_check(ctxt, address, write: false); |
670 | if (ret != ES_OK) |
671 | return ret; |
672 | |
673 | for (i = 0; i < count; i++) { |
674 | void *s = src + (i * data_size * b); |
675 | char *d = buf + (i * data_size); |
676 | |
677 | ret = vc_read_mem(ctxt, src: s, buf: d, size: data_size); |
678 | if (ret != ES_OK) |
679 | break; |
680 | } |
681 | |
682 | return ret; |
683 | } |
684 | |
685 | static enum es_result vc_insn_string_write(struct es_em_ctxt *ctxt, |
686 | void *dst, char *buf, |
687 | unsigned int data_size, |
688 | unsigned int count, |
689 | bool backwards) |
690 | { |
691 | int i, s = backwards ? -1 : 1; |
692 | unsigned long address = (unsigned long)dst; |
693 | enum es_result ret; |
694 | |
695 | ret = vc_insn_string_check(ctxt, address, write: true); |
696 | if (ret != ES_OK) |
697 | return ret; |
698 | |
699 | for (i = 0; i < count; i++) { |
700 | void *d = dst + (i * data_size * s); |
701 | char *b = buf + (i * data_size); |
702 | |
703 | ret = vc_write_mem(ctxt, dst: d, buf: b, size: data_size); |
704 | if (ret != ES_OK) |
705 | break; |
706 | } |
707 | |
708 | return ret; |
709 | } |
710 | |
711 | #define IOIO_TYPE_STR BIT(2) |
712 | #define IOIO_TYPE_IN 1 |
713 | #define IOIO_TYPE_INS (IOIO_TYPE_IN | IOIO_TYPE_STR) |
714 | #define IOIO_TYPE_OUT 0 |
715 | #define IOIO_TYPE_OUTS (IOIO_TYPE_OUT | IOIO_TYPE_STR) |
716 | |
717 | #define IOIO_REP BIT(3) |
718 | |
719 | #define IOIO_ADDR_64 BIT(9) |
720 | #define IOIO_ADDR_32 BIT(8) |
721 | #define IOIO_ADDR_16 BIT(7) |
722 | |
723 | #define IOIO_DATA_32 BIT(6) |
724 | #define IOIO_DATA_16 BIT(5) |
725 | #define IOIO_DATA_8 BIT(4) |
726 | |
727 | #define IOIO_SEG_ES (0 << 10) |
728 | #define IOIO_SEG_DS (3 << 10) |
729 | |
730 | static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) |
731 | { |
732 | struct insn *insn = &ctxt->insn; |
733 | size_t size; |
734 | u64 port; |
735 | |
736 | *exitinfo = 0; |
737 | |
738 | switch (insn->opcode.bytes[0]) { |
739 | /* INS opcodes */ |
740 | case 0x6c: |
741 | case 0x6d: |
742 | *exitinfo |= IOIO_TYPE_INS; |
743 | *exitinfo |= IOIO_SEG_ES; |
744 | port = ctxt->regs->dx & 0xffff; |
745 | break; |
746 | |
747 | /* OUTS opcodes */ |
748 | case 0x6e: |
749 | case 0x6f: |
750 | *exitinfo |= IOIO_TYPE_OUTS; |
751 | *exitinfo |= IOIO_SEG_DS; |
752 | port = ctxt->regs->dx & 0xffff; |
753 | break; |
754 | |
755 | /* IN immediate opcodes */ |
756 | case 0xe4: |
757 | case 0xe5: |
758 | *exitinfo |= IOIO_TYPE_IN; |
759 | port = (u8)insn->immediate.value & 0xffff; |
760 | break; |
761 | |
762 | /* OUT immediate opcodes */ |
763 | case 0xe6: |
764 | case 0xe7: |
765 | *exitinfo |= IOIO_TYPE_OUT; |
766 | port = (u8)insn->immediate.value & 0xffff; |
767 | break; |
768 | |
769 | /* IN register opcodes */ |
770 | case 0xec: |
771 | case 0xed: |
772 | *exitinfo |= IOIO_TYPE_IN; |
773 | port = ctxt->regs->dx & 0xffff; |
774 | break; |
775 | |
776 | /* OUT register opcodes */ |
777 | case 0xee: |
778 | case 0xef: |
779 | *exitinfo |= IOIO_TYPE_OUT; |
780 | port = ctxt->regs->dx & 0xffff; |
781 | break; |
782 | |
783 | default: |
784 | return ES_DECODE_FAILED; |
785 | } |
786 | |
787 | *exitinfo |= port << 16; |
788 | |
789 | switch (insn->opcode.bytes[0]) { |
790 | case 0x6c: |
791 | case 0x6e: |
792 | case 0xe4: |
793 | case 0xe6: |
794 | case 0xec: |
795 | case 0xee: |
796 | /* Single byte opcodes */ |
797 | *exitinfo |= IOIO_DATA_8; |
798 | size = 1; |
799 | break; |
800 | default: |
801 | /* Length determined by instruction parsing */ |
802 | *exitinfo |= (insn->opnd_bytes == 2) ? IOIO_DATA_16 |
803 | : IOIO_DATA_32; |
804 | size = (insn->opnd_bytes == 2) ? 2 : 4; |
805 | } |
806 | |
807 | switch (insn->addr_bytes) { |
808 | case 2: |
809 | *exitinfo |= IOIO_ADDR_16; |
810 | break; |
811 | case 4: |
812 | *exitinfo |= IOIO_ADDR_32; |
813 | break; |
814 | case 8: |
815 | *exitinfo |= IOIO_ADDR_64; |
816 | break; |
817 | } |
818 | |
819 | if (insn_has_rep_prefix(insn)) |
820 | *exitinfo |= IOIO_REP; |
821 | |
822 | return vc_ioio_check(ctxt, port: (u16)port, size); |
823 | } |
824 | |
825 | static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) |
826 | { |
827 | struct pt_regs *regs = ctxt->regs; |
828 | u64 exit_info_1, exit_info_2; |
829 | enum es_result ret; |
830 | |
831 | ret = vc_ioio_exitinfo(ctxt, exitinfo: &exit_info_1); |
832 | if (ret != ES_OK) |
833 | return ret; |
834 | |
835 | if (exit_info_1 & IOIO_TYPE_STR) { |
836 | |
837 | /* (REP) INS/OUTS */ |
838 | |
839 | bool df = ((regs->flags & X86_EFLAGS_DF) == X86_EFLAGS_DF); |
840 | unsigned int io_bytes, exit_bytes; |
841 | unsigned int ghcb_count, op_count; |
842 | unsigned long es_base; |
843 | u64 sw_scratch; |
844 | |
845 | /* |
846 | * For the string variants with rep prefix the amount of in/out |
847 | * operations per #VC exception is limited so that the kernel |
848 | * has a chance to take interrupts and re-schedule while the |
849 | * instruction is emulated. |
850 | */ |
851 | io_bytes = (exit_info_1 >> 4) & 0x7; |
852 | ghcb_count = sizeof(ghcb->shared_buffer) / io_bytes; |
853 | |
854 | op_count = (exit_info_1 & IOIO_REP) ? regs->cx : 1; |
855 | exit_info_2 = min(op_count, ghcb_count); |
856 | exit_bytes = exit_info_2 * io_bytes; |
857 | |
858 | es_base = insn_get_seg_base(regs: ctxt->regs, INAT_SEG_REG_ES); |
859 | |
860 | /* Read bytes of OUTS into the shared buffer */ |
861 | if (!(exit_info_1 & IOIO_TYPE_IN)) { |
862 | ret = vc_insn_string_read(ctxt, |
863 | src: (void *)(es_base + regs->si), |
864 | buf: ghcb->shared_buffer, data_size: io_bytes, |
865 | count: exit_info_2, backwards: df); |
866 | if (ret) |
867 | return ret; |
868 | } |
869 | |
870 | /* |
871 | * Issue an VMGEXIT to the HV to consume the bytes from the |
872 | * shared buffer or to have it write them into the shared buffer |
873 | * depending on the instruction: OUTS or INS. |
874 | */ |
875 | sw_scratch = __pa(ghcb) + offsetof(struct ghcb, shared_buffer); |
876 | ghcb_set_sw_scratch(ghcb, value: sw_scratch); |
877 | ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO, |
878 | exit_info_1, exit_info_2); |
879 | if (ret != ES_OK) |
880 | return ret; |
881 | |
882 | /* Read bytes from shared buffer into the guest's destination. */ |
883 | if (exit_info_1 & IOIO_TYPE_IN) { |
884 | ret = vc_insn_string_write(ctxt, |
885 | dst: (void *)(es_base + regs->di), |
886 | buf: ghcb->shared_buffer, data_size: io_bytes, |
887 | count: exit_info_2, backwards: df); |
888 | if (ret) |
889 | return ret; |
890 | |
891 | if (df) |
892 | regs->di -= exit_bytes; |
893 | else |
894 | regs->di += exit_bytes; |
895 | } else { |
896 | if (df) |
897 | regs->si -= exit_bytes; |
898 | else |
899 | regs->si += exit_bytes; |
900 | } |
901 | |
902 | if (exit_info_1 & IOIO_REP) |
903 | regs->cx -= exit_info_2; |
904 | |
905 | ret = regs->cx ? ES_RETRY : ES_OK; |
906 | |
907 | } else { |
908 | |
909 | /* IN/OUT into/from rAX */ |
910 | |
911 | int bits = (exit_info_1 & 0x70) >> 1; |
912 | u64 rax = 0; |
913 | |
914 | if (!(exit_info_1 & IOIO_TYPE_IN)) |
915 | rax = lower_bits(val: regs->ax, bits); |
916 | |
917 | ghcb_set_rax(ghcb, value: rax); |
918 | |
919 | ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO, exit_info_1, exit_info_2: 0); |
920 | if (ret != ES_OK) |
921 | return ret; |
922 | |
923 | if (exit_info_1 & IOIO_TYPE_IN) { |
924 | if (!ghcb_rax_is_valid(ghcb)) |
925 | return ES_VMM_ERROR; |
926 | regs->ax = lower_bits(val: ghcb->save.rax, bits); |
927 | } |
928 | } |
929 | |
930 | return ret; |
931 | } |
932 | |
933 | static int vc_handle_cpuid_snp(struct ghcb *ghcb, struct es_em_ctxt *ctxt) |
934 | { |
935 | struct pt_regs *regs = ctxt->regs; |
936 | struct cpuid_leaf leaf; |
937 | int ret; |
938 | |
939 | leaf.fn = regs->ax; |
940 | leaf.subfn = regs->cx; |
941 | ret = snp_cpuid(ghcb, ctxt, leaf: &leaf); |
942 | if (!ret) { |
943 | regs->ax = leaf.eax; |
944 | regs->bx = leaf.ebx; |
945 | regs->cx = leaf.ecx; |
946 | regs->dx = leaf.edx; |
947 | } |
948 | |
949 | return ret; |
950 | } |
951 | |
952 | static enum es_result vc_handle_cpuid(struct ghcb *ghcb, |
953 | struct es_em_ctxt *ctxt) |
954 | { |
955 | struct pt_regs *regs = ctxt->regs; |
956 | u32 cr4 = native_read_cr4(); |
957 | enum es_result ret; |
958 | int snp_cpuid_ret; |
959 | |
960 | snp_cpuid_ret = vc_handle_cpuid_snp(ghcb, ctxt); |
961 | if (!snp_cpuid_ret) |
962 | return ES_OK; |
963 | if (snp_cpuid_ret != -EOPNOTSUPP) |
964 | return ES_VMM_ERROR; |
965 | |
966 | ghcb_set_rax(ghcb, value: regs->ax); |
967 | ghcb_set_rcx(ghcb, value: regs->cx); |
968 | |
969 | if (cr4 & X86_CR4_OSXSAVE) |
970 | /* Safe to read xcr0 */ |
971 | ghcb_set_xcr0(ghcb, value: xgetbv(XCR_XFEATURE_ENABLED_MASK)); |
972 | else |
973 | /* xgetbv will cause #GP - use reset value for xcr0 */ |
974 | ghcb_set_xcr0(ghcb, value: 1); |
975 | |
976 | ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, exit_info_1: 0, exit_info_2: 0); |
977 | if (ret != ES_OK) |
978 | return ret; |
979 | |
980 | if (!(ghcb_rax_is_valid(ghcb) && |
981 | ghcb_rbx_is_valid(ghcb) && |
982 | ghcb_rcx_is_valid(ghcb) && |
983 | ghcb_rdx_is_valid(ghcb))) |
984 | return ES_VMM_ERROR; |
985 | |
986 | regs->ax = ghcb->save.rax; |
987 | regs->bx = ghcb->save.rbx; |
988 | regs->cx = ghcb->save.rcx; |
989 | regs->dx = ghcb->save.rdx; |
990 | |
991 | return ES_OK; |
992 | } |
993 | |
994 | static enum es_result vc_handle_rdtsc(struct ghcb *ghcb, |
995 | struct es_em_ctxt *ctxt, |
996 | unsigned long exit_code) |
997 | { |
998 | bool rdtscp = (exit_code == SVM_EXIT_RDTSCP); |
999 | enum es_result ret; |
1000 | |
1001 | ret = sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1: 0, exit_info_2: 0); |
1002 | if (ret != ES_OK) |
1003 | return ret; |
1004 | |
1005 | if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb) && |
1006 | (!rdtscp || ghcb_rcx_is_valid(ghcb)))) |
1007 | return ES_VMM_ERROR; |
1008 | |
1009 | ctxt->regs->ax = ghcb->save.rax; |
1010 | ctxt->regs->dx = ghcb->save.rdx; |
1011 | if (rdtscp) |
1012 | ctxt->regs->cx = ghcb->save.rcx; |
1013 | |
1014 | return ES_OK; |
1015 | } |
1016 | |
1017 | struct cc_setup_data { |
1018 | struct setup_data ; |
1019 | u32 cc_blob_address; |
1020 | }; |
1021 | |
1022 | /* |
1023 | * Search for a Confidential Computing blob passed in as a setup_data entry |
1024 | * via the Linux Boot Protocol. |
1025 | */ |
1026 | static __head |
1027 | struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp) |
1028 | { |
1029 | struct cc_setup_data *sd = NULL; |
1030 | struct setup_data *hdr; |
1031 | |
1032 | hdr = (struct setup_data *)bp->hdr.setup_data; |
1033 | |
1034 | while (hdr) { |
1035 | if (hdr->type == SETUP_CC_BLOB) { |
1036 | sd = (struct cc_setup_data *)hdr; |
1037 | return (struct cc_blob_sev_info *)(unsigned long)sd->cc_blob_address; |
1038 | } |
1039 | hdr = (struct setup_data *)hdr->next; |
1040 | } |
1041 | |
1042 | return NULL; |
1043 | } |
1044 | |
1045 | /* |
1046 | * Initialize the kernel's copy of the SNP CPUID table, and set up the |
1047 | * pointer that will be used to access it. |
1048 | * |
1049 | * Maintaining a direct mapping of the SNP CPUID table used by firmware would |
1050 | * be possible as an alternative, but the approach is brittle since the |
1051 | * mapping needs to be updated in sync with all the changes to virtual memory |
1052 | * layout and related mapping facilities throughout the boot process. |
1053 | */ |
1054 | static void __head setup_cpuid_table(const struct cc_blob_sev_info *cc_info) |
1055 | { |
1056 | const struct snp_cpuid_table *cpuid_table_fw, *cpuid_table; |
1057 | int i; |
1058 | |
1059 | if (!cc_info || !cc_info->cpuid_phys || cc_info->cpuid_len < PAGE_SIZE) |
1060 | sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID); |
1061 | |
1062 | cpuid_table_fw = (const struct snp_cpuid_table *)cc_info->cpuid_phys; |
1063 | if (!cpuid_table_fw->count || cpuid_table_fw->count > SNP_CPUID_COUNT_MAX) |
1064 | sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID); |
1065 | |
1066 | cpuid_table = snp_cpuid_get_table(); |
1067 | memcpy(to: (void *)cpuid_table, from: cpuid_table_fw, len: sizeof(*cpuid_table)); |
1068 | |
1069 | /* Initialize CPUID ranges for range-checking. */ |
1070 | for (i = 0; i < cpuid_table->count; i++) { |
1071 | const struct snp_cpuid_fn *fn = &cpuid_table->fn[i]; |
1072 | |
1073 | if (fn->eax_in == 0x0) |
1074 | RIP_REL_REF(cpuid_std_range_max) = fn->eax; |
1075 | else if (fn->eax_in == 0x40000000) |
1076 | RIP_REL_REF(cpuid_hyp_range_max) = fn->eax; |
1077 | else if (fn->eax_in == 0x80000000) |
1078 | RIP_REL_REF(cpuid_ext_range_max) = fn->eax; |
1079 | } |
1080 | } |
1081 | |
1082 | static void pvalidate_pages(struct snp_psc_desc *desc) |
1083 | { |
1084 | struct psc_entry *e; |
1085 | unsigned long vaddr; |
1086 | unsigned int size; |
1087 | unsigned int i; |
1088 | bool validate; |
1089 | int rc; |
1090 | |
1091 | for (i = 0; i <= desc->hdr.end_entry; i++) { |
1092 | e = &desc->entries[i]; |
1093 | |
1094 | vaddr = (unsigned long)pfn_to_kaddr(pfn: e->gfn); |
1095 | size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K; |
1096 | validate = e->operation == SNP_PAGE_STATE_PRIVATE; |
1097 | |
1098 | rc = pvalidate(vaddr, rmp_psize: size, validate); |
1099 | if (rc == PVALIDATE_FAIL_SIZEMISMATCH && size == RMP_PG_SIZE_2M) { |
1100 | unsigned long vaddr_end = vaddr + PMD_SIZE; |
1101 | |
1102 | for (; vaddr < vaddr_end; vaddr += PAGE_SIZE) { |
1103 | rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate); |
1104 | if (rc) |
1105 | break; |
1106 | } |
1107 | } |
1108 | |
1109 | if (rc) { |
1110 | WARN(1, "Failed to validate address 0x%lx ret %d" , vaddr, rc); |
1111 | sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE); |
1112 | } |
1113 | } |
1114 | } |
1115 | |
1116 | static int vmgexit_psc(struct ghcb *ghcb, struct snp_psc_desc *desc) |
1117 | { |
1118 | int cur_entry, end_entry, ret = 0; |
1119 | struct snp_psc_desc *data; |
1120 | struct es_em_ctxt ctxt; |
1121 | |
1122 | vc_ghcb_invalidate(ghcb); |
1123 | |
1124 | /* Copy the input desc into GHCB shared buffer */ |
1125 | data = (struct snp_psc_desc *)ghcb->shared_buffer; |
1126 | memcpy(to: ghcb->shared_buffer, from: desc, min_t(int, GHCB_SHARED_BUF_SIZE, sizeof(*desc))); |
1127 | |
1128 | /* |
1129 | * As per the GHCB specification, the hypervisor can resume the guest |
1130 | * before processing all the entries. Check whether all the entries |
1131 | * are processed. If not, then keep retrying. Note, the hypervisor |
1132 | * will update the data memory directly to indicate the status, so |
1133 | * reference the data->hdr everywhere. |
1134 | * |
1135 | * The strategy here is to wait for the hypervisor to change the page |
1136 | * state in the RMP table before guest accesses the memory pages. If the |
1137 | * page state change was not successful, then later memory access will |
1138 | * result in a crash. |
1139 | */ |
1140 | cur_entry = data->hdr.cur_entry; |
1141 | end_entry = data->hdr.end_entry; |
1142 | |
1143 | while (data->hdr.cur_entry <= data->hdr.end_entry) { |
1144 | ghcb_set_sw_scratch(ghcb, value: (u64)__pa(data)); |
1145 | |
1146 | /* This will advance the shared buffer data points to. */ |
1147 | ret = sev_es_ghcb_hv_call(ghcb, ctxt: &ctxt, SVM_VMGEXIT_PSC, exit_info_1: 0, exit_info_2: 0); |
1148 | |
1149 | /* |
1150 | * Page State Change VMGEXIT can pass error code through |
1151 | * exit_info_2. |
1152 | */ |
1153 | if (WARN(ret || ghcb->save.sw_exit_info_2, |
1154 | "SNP: PSC failed ret=%d exit_info_2=%llx\n" , |
1155 | ret, ghcb->save.sw_exit_info_2)) { |
1156 | ret = 1; |
1157 | goto out; |
1158 | } |
1159 | |
1160 | /* Verify that reserved bit is not set */ |
1161 | if (WARN(data->hdr.reserved, "Reserved bit is set in the PSC header\n" )) { |
1162 | ret = 1; |
1163 | goto out; |
1164 | } |
1165 | |
1166 | /* |
1167 | * Sanity check that entry processing is not going backwards. |
1168 | * This will happen only if hypervisor is tricking us. |
1169 | */ |
1170 | if (WARN(data->hdr.end_entry > end_entry || cur_entry > data->hdr.cur_entry, |
1171 | "SNP: PSC processing going backward, end_entry %d (got %d) cur_entry %d (got %d)\n" , |
1172 | end_entry, data->hdr.end_entry, cur_entry, data->hdr.cur_entry)) { |
1173 | ret = 1; |
1174 | goto out; |
1175 | } |
1176 | } |
1177 | |
1178 | out: |
1179 | return ret; |
1180 | } |
1181 | |
1182 | static enum es_result vc_check_opcode_bytes(struct es_em_ctxt *ctxt, |
1183 | unsigned long exit_code) |
1184 | { |
1185 | unsigned int opcode = (unsigned int)ctxt->insn.opcode.value; |
1186 | u8 modrm = ctxt->insn.modrm.value; |
1187 | |
1188 | switch (exit_code) { |
1189 | |
1190 | case SVM_EXIT_IOIO: |
1191 | case SVM_EXIT_NPF: |
1192 | /* handled separately */ |
1193 | return ES_OK; |
1194 | |
1195 | case SVM_EXIT_CPUID: |
1196 | if (opcode == 0xa20f) |
1197 | return ES_OK; |
1198 | break; |
1199 | |
1200 | case SVM_EXIT_INVD: |
1201 | if (opcode == 0x080f) |
1202 | return ES_OK; |
1203 | break; |
1204 | |
1205 | case SVM_EXIT_MONITOR: |
1206 | if (opcode == 0x010f && modrm == 0xc8) |
1207 | return ES_OK; |
1208 | break; |
1209 | |
1210 | case SVM_EXIT_MWAIT: |
1211 | if (opcode == 0x010f && modrm == 0xc9) |
1212 | return ES_OK; |
1213 | break; |
1214 | |
1215 | case SVM_EXIT_MSR: |
1216 | /* RDMSR */ |
1217 | if (opcode == 0x320f || |
1218 | /* WRMSR */ |
1219 | opcode == 0x300f) |
1220 | return ES_OK; |
1221 | break; |
1222 | |
1223 | case SVM_EXIT_RDPMC: |
1224 | if (opcode == 0x330f) |
1225 | return ES_OK; |
1226 | break; |
1227 | |
1228 | case SVM_EXIT_RDTSC: |
1229 | if (opcode == 0x310f) |
1230 | return ES_OK; |
1231 | break; |
1232 | |
1233 | case SVM_EXIT_RDTSCP: |
1234 | if (opcode == 0x010f && modrm == 0xf9) |
1235 | return ES_OK; |
1236 | break; |
1237 | |
1238 | case SVM_EXIT_READ_DR7: |
1239 | if (opcode == 0x210f && |
1240 | X86_MODRM_REG(ctxt->insn.modrm.value) == 7) |
1241 | return ES_OK; |
1242 | break; |
1243 | |
1244 | case SVM_EXIT_VMMCALL: |
1245 | if (opcode == 0x010f && modrm == 0xd9) |
1246 | return ES_OK; |
1247 | |
1248 | break; |
1249 | |
1250 | case SVM_EXIT_WRITE_DR7: |
1251 | if (opcode == 0x230f && |
1252 | X86_MODRM_REG(ctxt->insn.modrm.value) == 7) |
1253 | return ES_OK; |
1254 | break; |
1255 | |
1256 | case SVM_EXIT_WBINVD: |
1257 | if (opcode == 0x90f) |
1258 | return ES_OK; |
1259 | break; |
1260 | |
1261 | default: |
1262 | break; |
1263 | } |
1264 | |
1265 | sev_printk(KERN_ERR "Wrong/unhandled opcode bytes: 0x%x, exit_code: 0x%lx, rIP: 0x%lx\n" , |
1266 | opcode, exit_code, ctxt->regs->ip); |
1267 | |
1268 | return ES_UNSUPPORTED; |
1269 | } |
1270 | |