1// SPDX-License-Identifier: GPL-2.0
2/* Copyright(c) 2021 Intel Corporation. */
3#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
4
5#include <asm/sgx.h>
6
7#include "cpuid.h"
8#include "kvm_cache_regs.h"
9#include "nested.h"
10#include "sgx.h"
11#include "vmx.h"
12#include "x86.h"
13
14bool __read_mostly enable_sgx = 1;
15module_param_named(sgx, enable_sgx, bool, 0444);
16
17/* Initial value of guest's virtual SGX_LEPUBKEYHASHn MSRs */
18static u64 sgx_pubkey_hash[4] __ro_after_init;
19
20/*
21 * ENCLS's memory operands use a fixed segment (DS) and a fixed
22 * address size based on the mode. Related prefixes are ignored.
23 */
24static int sgx_get_encls_gva(struct kvm_vcpu *vcpu, unsigned long offset,
25 int size, int alignment, gva_t *gva)
26{
27 struct kvm_segment s;
28 bool fault;
29
30 /* Skip vmcs.GUEST_DS retrieval for 64-bit mode to avoid VMREADs. */
31 *gva = offset;
32 if (!is_64_bit_mode(vcpu)) {
33 vmx_get_segment(vcpu, var: &s, seg: VCPU_SREG_DS);
34 *gva += s.base;
35 }
36
37 if (!IS_ALIGNED(*gva, alignment)) {
38 fault = true;
39 } else if (likely(is_64_bit_mode(vcpu))) {
40 *gva = vmx_get_untagged_addr(vcpu, gva: *gva, flags: 0);
41 fault = is_noncanonical_address(la: *gva, vcpu);
42 } else {
43 *gva &= 0xffffffff;
44 fault = (s.unusable) ||
45 (s.type != 2 && s.type != 3) ||
46 (*gva > s.limit) ||
47 ((s.base != 0 || s.limit != 0xffffffff) &&
48 (((u64)*gva + size - 1) > s.limit + 1));
49 }
50 if (fault)
51 kvm_inject_gp(vcpu, error_code: 0);
52 return fault ? -EINVAL : 0;
53}
54
55static void sgx_handle_emulation_failure(struct kvm_vcpu *vcpu, u64 addr,
56 unsigned int size)
57{
58 uint64_t data[2] = { addr, size };
59
60 __kvm_prepare_emulation_failure_exit(vcpu, data, ARRAY_SIZE(data));
61}
62
63static int sgx_read_hva(struct kvm_vcpu *vcpu, unsigned long hva, void *data,
64 unsigned int size)
65{
66 if (__copy_from_user(to: data, from: (void __user *)hva, n: size)) {
67 sgx_handle_emulation_failure(vcpu, addr: hva, size);
68 return -EFAULT;
69 }
70
71 return 0;
72}
73
74static int sgx_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t gva, bool write,
75 gpa_t *gpa)
76{
77 struct x86_exception ex;
78
79 if (write)
80 *gpa = kvm_mmu_gva_to_gpa_write(vcpu, gva, exception: &ex);
81 else
82 *gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, exception: &ex);
83
84 if (*gpa == INVALID_GPA) {
85 kvm_inject_emulated_page_fault(vcpu, fault: &ex);
86 return -EFAULT;
87 }
88
89 return 0;
90}
91
92static int sgx_gpa_to_hva(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long *hva)
93{
94 *hva = kvm_vcpu_gfn_to_hva(vcpu, PFN_DOWN(gpa));
95 if (kvm_is_error_hva(addr: *hva)) {
96 sgx_handle_emulation_failure(vcpu, addr: gpa, size: 1);
97 return -EFAULT;
98 }
99
100 *hva |= gpa & ~PAGE_MASK;
101
102 return 0;
103}
104
105static int sgx_inject_fault(struct kvm_vcpu *vcpu, gva_t gva, int trapnr)
106{
107 struct x86_exception ex;
108
109 /*
110 * A non-EPCM #PF indicates a bad userspace HVA. This *should* check
111 * for PFEC.SGX and not assume any #PF on SGX2 originated in the EPC,
112 * but the error code isn't (yet) plumbed through the ENCLS helpers.
113 */
114 if (trapnr == PF_VECTOR && !boot_cpu_has(X86_FEATURE_SGX2)) {
115 kvm_prepare_emulation_failure_exit(vcpu);
116 return 0;
117 }
118
119 /*
120 * If the guest thinks it's running on SGX2 hardware, inject an SGX
121 * #PF if the fault matches an EPCM fault signature (#GP on SGX1,
122 * #PF on SGX2). The assumption is that EPCM faults are much more
123 * likely than a bad userspace address.
124 */
125 if ((trapnr == PF_VECTOR || !boot_cpu_has(X86_FEATURE_SGX2)) &&
126 guest_cpuid_has(vcpu, X86_FEATURE_SGX2)) {
127 memset(&ex, 0, sizeof(ex));
128 ex.vector = PF_VECTOR;
129 ex.error_code = PFERR_PRESENT_MASK | PFERR_WRITE_MASK |
130 PFERR_SGX_MASK;
131 ex.address = gva;
132 ex.error_code_valid = true;
133 ex.nested_page_fault = false;
134 kvm_inject_emulated_page_fault(vcpu, fault: &ex);
135 } else {
136 kvm_inject_gp(vcpu, error_code: 0);
137 }
138 return 1;
139}
140
141static int __handle_encls_ecreate(struct kvm_vcpu *vcpu,
142 struct sgx_pageinfo *pageinfo,
143 unsigned long secs_hva,
144 gva_t secs_gva)
145{
146 struct sgx_secs *contents = (struct sgx_secs *)pageinfo->contents;
147 struct kvm_cpuid_entry2 *sgx_12_0, *sgx_12_1;
148 u64 attributes, xfrm, size;
149 u32 miscselect;
150 u8 max_size_log2;
151 int trapnr, ret;
152
153 sgx_12_0 = kvm_find_cpuid_entry_index(vcpu, function: 0x12, index: 0);
154 sgx_12_1 = kvm_find_cpuid_entry_index(vcpu, function: 0x12, index: 1);
155 if (!sgx_12_0 || !sgx_12_1) {
156 kvm_prepare_emulation_failure_exit(vcpu);
157 return 0;
158 }
159
160 miscselect = contents->miscselect;
161 attributes = contents->attributes;
162 xfrm = contents->xfrm;
163 size = contents->size;
164
165 /* Enforce restriction of access to the PROVISIONKEY. */
166 if (!vcpu->kvm->arch.sgx_provisioning_allowed &&
167 (attributes & SGX_ATTR_PROVISIONKEY)) {
168 if (sgx_12_1->eax & SGX_ATTR_PROVISIONKEY)
169 pr_warn_once("SGX PROVISIONKEY advertised but not allowed\n");
170 kvm_inject_gp(vcpu, error_code: 0);
171 return 1;
172 }
173
174 /*
175 * Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and XFRM. Note
176 * that the allowed XFRM (XFeature Request Mask) isn't strictly bound
177 * by the supported XCR0. FP+SSE *must* be set in XFRM, even if XSAVE
178 * is unsupported, i.e. even if XCR0 itself is completely unsupported.
179 */
180 if ((u32)miscselect & ~sgx_12_0->ebx ||
181 (u32)attributes & ~sgx_12_1->eax ||
182 (u32)(attributes >> 32) & ~sgx_12_1->ebx ||
183 (u32)xfrm & ~sgx_12_1->ecx ||
184 (u32)(xfrm >> 32) & ~sgx_12_1->edx ||
185 xfrm & ~(vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FPSSE) ||
186 (xfrm & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
187 kvm_inject_gp(vcpu, error_code: 0);
188 return 1;
189 }
190
191 /* Enforce CPUID restriction on max enclave size. */
192 max_size_log2 = (attributes & SGX_ATTR_MODE64BIT) ? sgx_12_0->edx >> 8 :
193 sgx_12_0->edx;
194 if (size >= BIT_ULL(max_size_log2)) {
195 kvm_inject_gp(vcpu, error_code: 0);
196 return 1;
197 }
198
199 /*
200 * sgx_virt_ecreate() returns:
201 * 1) 0: ECREATE was successful
202 * 2) -EFAULT: ECREATE was run but faulted, and trapnr was set to the
203 * exception number.
204 * 3) -EINVAL: access_ok() on @secs_hva failed. This should never
205 * happen as KVM checks host addresses at memslot creation.
206 * sgx_virt_ecreate() has already warned in this case.
207 */
208 ret = sgx_virt_ecreate(pageinfo, secs: (void __user *)secs_hva, trapnr: &trapnr);
209 if (!ret)
210 return kvm_skip_emulated_instruction(vcpu);
211 if (ret == -EFAULT)
212 return sgx_inject_fault(vcpu, gva: secs_gva, trapnr);
213
214 return ret;
215}
216
217static int handle_encls_ecreate(struct kvm_vcpu *vcpu)
218{
219 gva_t pageinfo_gva, secs_gva;
220 gva_t metadata_gva, contents_gva;
221 gpa_t metadata_gpa, contents_gpa, secs_gpa;
222 unsigned long metadata_hva, contents_hva, secs_hva;
223 struct sgx_pageinfo pageinfo;
224 struct sgx_secs *contents;
225 struct x86_exception ex;
226 int r;
227
228 if (sgx_get_encls_gva(vcpu, offset: kvm_rbx_read(vcpu), size: 32, alignment: 32, gva: &pageinfo_gva) ||
229 sgx_get_encls_gva(vcpu, offset: kvm_rcx_read(vcpu), size: 4096, alignment: 4096, gva: &secs_gva))
230 return 1;
231
232 /*
233 * Copy the PAGEINFO to local memory, its pointers need to be
234 * translated, i.e. we need to do a deep copy/translate.
235 */
236 r = kvm_read_guest_virt(vcpu, addr: pageinfo_gva, val: &pageinfo,
237 bytes: sizeof(pageinfo), exception: &ex);
238 if (r == X86EMUL_PROPAGATE_FAULT) {
239 kvm_inject_emulated_page_fault(vcpu, fault: &ex);
240 return 1;
241 } else if (r != X86EMUL_CONTINUE) {
242 sgx_handle_emulation_failure(vcpu, addr: pageinfo_gva,
243 size: sizeof(pageinfo));
244 return 0;
245 }
246
247 if (sgx_get_encls_gva(vcpu, offset: pageinfo.metadata, size: 64, alignment: 64, gva: &metadata_gva) ||
248 sgx_get_encls_gva(vcpu, offset: pageinfo.contents, size: 4096, alignment: 4096,
249 gva: &contents_gva))
250 return 1;
251
252 /*
253 * Translate the SECINFO, SOURCE and SECS pointers from GVA to GPA.
254 * Resume the guest on failure to inject a #PF.
255 */
256 if (sgx_gva_to_gpa(vcpu, gva: metadata_gva, write: false, gpa: &metadata_gpa) ||
257 sgx_gva_to_gpa(vcpu, gva: contents_gva, write: false, gpa: &contents_gpa) ||
258 sgx_gva_to_gpa(vcpu, gva: secs_gva, write: true, gpa: &secs_gpa))
259 return 1;
260
261 /*
262 * ...and then to HVA. The order of accesses isn't architectural, i.e.
263 * KVM doesn't have to fully process one address at a time. Exit to
264 * userspace if a GPA is invalid.
265 */
266 if (sgx_gpa_to_hva(vcpu, gpa: metadata_gpa, hva: &metadata_hva) ||
267 sgx_gpa_to_hva(vcpu, gpa: contents_gpa, hva: &contents_hva) ||
268 sgx_gpa_to_hva(vcpu, gpa: secs_gpa, hva: &secs_hva))
269 return 0;
270
271 /*
272 * Copy contents into kernel memory to prevent TOCTOU attack. E.g. the
273 * guest could do ECREATE w/ SECS.SGX_ATTR_PROVISIONKEY=0, and
274 * simultaneously set SGX_ATTR_PROVISIONKEY to bypass the check to
275 * enforce restriction of access to the PROVISIONKEY.
276 */
277 contents = (struct sgx_secs *)__get_free_page(GFP_KERNEL_ACCOUNT);
278 if (!contents)
279 return -ENOMEM;
280
281 /* Exit to userspace if copying from a host userspace address fails. */
282 if (sgx_read_hva(vcpu, hva: contents_hva, data: (void *)contents, PAGE_SIZE)) {
283 free_page((unsigned long)contents);
284 return 0;
285 }
286
287 pageinfo.metadata = metadata_hva;
288 pageinfo.contents = (u64)contents;
289
290 r = __handle_encls_ecreate(vcpu, pageinfo: &pageinfo, secs_hva, secs_gva);
291
292 free_page((unsigned long)contents);
293
294 return r;
295}
296
297static int handle_encls_einit(struct kvm_vcpu *vcpu)
298{
299 unsigned long sig_hva, secs_hva, token_hva, rflags;
300 struct vcpu_vmx *vmx = to_vmx(vcpu);
301 gva_t sig_gva, secs_gva, token_gva;
302 gpa_t sig_gpa, secs_gpa, token_gpa;
303 int ret, trapnr;
304
305 if (sgx_get_encls_gva(vcpu, offset: kvm_rbx_read(vcpu), size: 1808, alignment: 4096, gva: &sig_gva) ||
306 sgx_get_encls_gva(vcpu, offset: kvm_rcx_read(vcpu), size: 4096, alignment: 4096, gva: &secs_gva) ||
307 sgx_get_encls_gva(vcpu, offset: kvm_rdx_read(vcpu), size: 304, alignment: 512, gva: &token_gva))
308 return 1;
309
310 /*
311 * Translate the SIGSTRUCT, SECS and TOKEN pointers from GVA to GPA.
312 * Resume the guest on failure to inject a #PF.
313 */
314 if (sgx_gva_to_gpa(vcpu, gva: sig_gva, write: false, gpa: &sig_gpa) ||
315 sgx_gva_to_gpa(vcpu, gva: secs_gva, write: true, gpa: &secs_gpa) ||
316 sgx_gva_to_gpa(vcpu, gva: token_gva, write: false, gpa: &token_gpa))
317 return 1;
318
319 /*
320 * ...and then to HVA. The order of accesses isn't architectural, i.e.
321 * KVM doesn't have to fully process one address at a time. Exit to
322 * userspace if a GPA is invalid. Note, all structures are aligned and
323 * cannot split pages.
324 */
325 if (sgx_gpa_to_hva(vcpu, gpa: sig_gpa, hva: &sig_hva) ||
326 sgx_gpa_to_hva(vcpu, gpa: secs_gpa, hva: &secs_hva) ||
327 sgx_gpa_to_hva(vcpu, gpa: token_gpa, hva: &token_hva))
328 return 0;
329
330 ret = sgx_virt_einit(sigstruct: (void __user *)sig_hva, token: (void __user *)token_hva,
331 secs: (void __user *)secs_hva,
332 lepubkeyhash: vmx->msr_ia32_sgxlepubkeyhash, trapnr: &trapnr);
333
334 if (ret == -EFAULT)
335 return sgx_inject_fault(vcpu, gva: secs_gva, trapnr);
336
337 /*
338 * sgx_virt_einit() returns -EINVAL when access_ok() fails on @sig_hva,
339 * @token_hva or @secs_hva. This should never happen as KVM checks host
340 * addresses at memslot creation. sgx_virt_einit() has already warned
341 * in this case, so just return.
342 */
343 if (ret < 0)
344 return ret;
345
346 rflags = vmx_get_rflags(vcpu) & ~(X86_EFLAGS_CF | X86_EFLAGS_PF |
347 X86_EFLAGS_AF | X86_EFLAGS_SF |
348 X86_EFLAGS_OF);
349 if (ret)
350 rflags |= X86_EFLAGS_ZF;
351 else
352 rflags &= ~X86_EFLAGS_ZF;
353 vmx_set_rflags(vcpu, rflags);
354
355 kvm_rax_write(vcpu, val: ret);
356 return kvm_skip_emulated_instruction(vcpu);
357}
358
359static inline bool encls_leaf_enabled_in_guest(struct kvm_vcpu *vcpu, u32 leaf)
360{
361 /*
362 * ENCLS generates a #UD if SGX1 isn't supported, i.e. this point will
363 * be reached if and only if the SGX1 leafs are enabled.
364 */
365 if (leaf >= ECREATE && leaf <= ETRACK)
366 return true;
367
368 if (leaf >= EAUG && leaf <= EMODT)
369 return guest_cpuid_has(vcpu, X86_FEATURE_SGX2);
370
371 return false;
372}
373
374static inline bool sgx_enabled_in_guest_bios(struct kvm_vcpu *vcpu)
375{
376 const u64 bits = FEAT_CTL_SGX_ENABLED | FEAT_CTL_LOCKED;
377
378 return (to_vmx(vcpu)->msr_ia32_feature_control & bits) == bits;
379}
380
381int handle_encls(struct kvm_vcpu *vcpu)
382{
383 u32 leaf = (u32)kvm_rax_read(vcpu);
384
385 if (!enable_sgx || !guest_cpuid_has(vcpu, X86_FEATURE_SGX) ||
386 !guest_cpuid_has(vcpu, X86_FEATURE_SGX1)) {
387 kvm_queue_exception(vcpu, UD_VECTOR);
388 } else if (!encls_leaf_enabled_in_guest(vcpu, leaf) ||
389 !sgx_enabled_in_guest_bios(vcpu) || !is_paging(vcpu)) {
390 kvm_inject_gp(vcpu, error_code: 0);
391 } else {
392 if (leaf == ECREATE)
393 return handle_encls_ecreate(vcpu);
394 if (leaf == EINIT)
395 return handle_encls_einit(vcpu);
396 WARN_ONCE(1, "unexpected exit on ENCLS[%u]", leaf);
397 vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
398 vcpu->run->hw.hardware_exit_reason = EXIT_REASON_ENCLS;
399 return 0;
400 }
401 return 1;
402}
403
404void setup_default_sgx_lepubkeyhash(void)
405{
406 /*
407 * Use Intel's default value for Skylake hardware if Launch Control is
408 * not supported, i.e. Intel's hash is hardcoded into silicon, or if
409 * Launch Control is supported and enabled, i.e. mimic the reset value
410 * and let the guest write the MSRs at will. If Launch Control is
411 * supported but disabled, then use the current MSR values as the hash
412 * MSRs exist but are read-only (locked and not writable).
413 */
414 if (!enable_sgx || boot_cpu_has(X86_FEATURE_SGX_LC) ||
415 rdmsrl_safe(MSR_IA32_SGXLEPUBKEYHASH0, p: &sgx_pubkey_hash[0])) {
416 sgx_pubkey_hash[0] = 0xa6053e051270b7acULL;
417 sgx_pubkey_hash[1] = 0x6cfbe8ba8b3b413dULL;
418 sgx_pubkey_hash[2] = 0xc4916d99f2b3735dULL;
419 sgx_pubkey_hash[3] = 0xd4f8c05909f9bb3bULL;
420 } else {
421 /* MSR_IA32_SGXLEPUBKEYHASH0 is read above */
422 rdmsrl(MSR_IA32_SGXLEPUBKEYHASH1, sgx_pubkey_hash[1]);
423 rdmsrl(MSR_IA32_SGXLEPUBKEYHASH2, sgx_pubkey_hash[2]);
424 rdmsrl(MSR_IA32_SGXLEPUBKEYHASH3, sgx_pubkey_hash[3]);
425 }
426}
427
428void vcpu_setup_sgx_lepubkeyhash(struct kvm_vcpu *vcpu)
429{
430 struct vcpu_vmx *vmx = to_vmx(vcpu);
431
432 memcpy(vmx->msr_ia32_sgxlepubkeyhash, sgx_pubkey_hash,
433 sizeof(sgx_pubkey_hash));
434}
435
436/*
437 * ECREATE must be intercepted to enforce MISCSELECT, ATTRIBUTES and XFRM
438 * restrictions if the guest's allowed-1 settings diverge from hardware.
439 */
440static bool sgx_intercept_encls_ecreate(struct kvm_vcpu *vcpu)
441{
442 struct kvm_cpuid_entry2 *guest_cpuid;
443 u32 eax, ebx, ecx, edx;
444
445 if (!vcpu->kvm->arch.sgx_provisioning_allowed)
446 return true;
447
448 guest_cpuid = kvm_find_cpuid_entry_index(vcpu, function: 0x12, index: 0);
449 if (!guest_cpuid)
450 return true;
451
452 cpuid_count(op: 0x12, count: 0, eax: &eax, ebx: &ebx, ecx: &ecx, edx: &edx);
453 if (guest_cpuid->ebx != ebx || guest_cpuid->edx != edx)
454 return true;
455
456 guest_cpuid = kvm_find_cpuid_entry_index(vcpu, function: 0x12, index: 1);
457 if (!guest_cpuid)
458 return true;
459
460 cpuid_count(op: 0x12, count: 1, eax: &eax, ebx: &ebx, ecx: &ecx, edx: &edx);
461 if (guest_cpuid->eax != eax || guest_cpuid->ebx != ebx ||
462 guest_cpuid->ecx != ecx || guest_cpuid->edx != edx)
463 return true;
464
465 return false;
466}
467
468void vmx_write_encls_bitmap(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
469{
470 /*
471 * There is no software enable bit for SGX that is virtualized by
472 * hardware, e.g. there's no CR4.SGXE, so when SGX is disabled in the
473 * guest (either by the host or by the guest's BIOS) but enabled in the
474 * host, trap all ENCLS leafs and inject #UD/#GP as needed to emulate
475 * the expected system behavior for ENCLS.
476 */
477 u64 bitmap = -1ull;
478
479 /* Nothing to do if hardware doesn't support SGX */
480 if (!cpu_has_vmx_encls_vmexit())
481 return;
482
483 if (guest_cpuid_has(vcpu, X86_FEATURE_SGX) &&
484 sgx_enabled_in_guest_bios(vcpu)) {
485 if (guest_cpuid_has(vcpu, X86_FEATURE_SGX1)) {
486 bitmap &= ~GENMASK_ULL(ETRACK, ECREATE);
487 if (sgx_intercept_encls_ecreate(vcpu))
488 bitmap |= (1 << ECREATE);
489 }
490
491 if (guest_cpuid_has(vcpu, X86_FEATURE_SGX2))
492 bitmap &= ~GENMASK_ULL(EMODT, EAUG);
493
494 /*
495 * Trap and execute EINIT if launch control is enabled in the
496 * host using the guest's values for launch control MSRs, even
497 * if the guest's values are fixed to hardware default values.
498 * The MSRs are not loaded/saved on VM-Enter/VM-Exit as writing
499 * the MSRs is extraordinarily expensive.
500 */
501 if (boot_cpu_has(X86_FEATURE_SGX_LC))
502 bitmap |= (1 << EINIT);
503
504 if (!vmcs12 && is_guest_mode(vcpu))
505 vmcs12 = get_vmcs12(vcpu);
506 if (vmcs12 && nested_cpu_has_encls_exit(vmcs12))
507 bitmap |= vmcs12->encls_exiting_bitmap;
508 }
509 vmcs_write64(field: ENCLS_EXITING_BITMAP, value: bitmap);
510}
511

source code of linux/arch/x86/kvm/vmx/sgx.c