1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Hyper-V Isolation VM interface with paravisor and hypervisor |
4 | * |
5 | * Author: |
6 | * Tianyu Lan <Tianyu.Lan@microsoft.com> |
7 | */ |
8 | |
9 | #include <linux/bitfield.h> |
10 | #include <linux/hyperv.h> |
11 | #include <linux/types.h> |
12 | #include <linux/slab.h> |
13 | #include <asm/svm.h> |
14 | #include <asm/sev.h> |
15 | #include <asm/io.h> |
16 | #include <asm/coco.h> |
17 | #include <asm/mem_encrypt.h> |
18 | #include <asm/set_memory.h> |
19 | #include <asm/mshyperv.h> |
20 | #include <asm/hypervisor.h> |
21 | #include <asm/mtrr.h> |
22 | #include <asm/io_apic.h> |
23 | #include <asm/realmode.h> |
24 | #include <asm/e820/api.h> |
25 | #include <asm/desc.h> |
26 | #include <uapi/asm/vmx.h> |
27 | |
28 | #ifdef CONFIG_AMD_MEM_ENCRYPT |
29 | |
30 | #define GHCB_USAGE_HYPERV_CALL 1 |
31 | |
32 | union hv_ghcb { |
33 | struct ghcb ghcb; |
34 | struct { |
35 | u64 hypercalldata[509]; |
36 | u64 outputgpa; |
37 | union { |
38 | union { |
39 | struct { |
40 | u32 callcode : 16; |
41 | u32 isfast : 1; |
42 | u32 reserved1 : 14; |
43 | u32 isnested : 1; |
44 | u32 countofelements : 12; |
45 | u32 reserved2 : 4; |
46 | u32 repstartindex : 12; |
47 | u32 reserved3 : 4; |
48 | }; |
49 | u64 asuint64; |
50 | } hypercallinput; |
51 | union { |
52 | struct { |
53 | u16 callstatus; |
54 | u16 reserved1; |
55 | u32 elementsprocessed : 12; |
56 | u32 reserved2 : 20; |
57 | }; |
58 | u64 asunit64; |
59 | } hypercalloutput; |
60 | }; |
61 | u64 reserved2; |
62 | } hypercall; |
63 | } __packed __aligned(HV_HYP_PAGE_SIZE); |
64 | |
65 | /* Only used in an SNP VM with the paravisor */ |
66 | static u16 hv_ghcb_version __ro_after_init; |
67 | |
68 | /* Functions only used in an SNP VM with the paravisor go here. */ |
69 | u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size) |
70 | { |
71 | union hv_ghcb *hv_ghcb; |
72 | void **ghcb_base; |
73 | unsigned long flags; |
74 | u64 status; |
75 | |
76 | if (!hv_ghcb_pg) |
77 | return -EFAULT; |
78 | |
79 | WARN_ON(in_nmi()); |
80 | |
81 | local_irq_save(flags); |
82 | ghcb_base = (void **)this_cpu_ptr(hv_ghcb_pg); |
83 | hv_ghcb = (union hv_ghcb *)*ghcb_base; |
84 | if (!hv_ghcb) { |
85 | local_irq_restore(flags); |
86 | return -EFAULT; |
87 | } |
88 | |
89 | hv_ghcb->ghcb.protocol_version = GHCB_PROTOCOL_MAX; |
90 | hv_ghcb->ghcb.ghcb_usage = GHCB_USAGE_HYPERV_CALL; |
91 | |
92 | hv_ghcb->hypercall.outputgpa = (u64)output; |
93 | hv_ghcb->hypercall.hypercallinput.asuint64 = 0; |
94 | hv_ghcb->hypercall.hypercallinput.callcode = control; |
95 | |
96 | if (input_size) |
97 | memcpy(hv_ghcb->hypercall.hypercalldata, input, input_size); |
98 | |
99 | VMGEXIT(); |
100 | |
101 | hv_ghcb->ghcb.ghcb_usage = 0xffffffff; |
102 | memset(hv_ghcb->ghcb.save.valid_bitmap, 0, |
103 | sizeof(hv_ghcb->ghcb.save.valid_bitmap)); |
104 | |
105 | status = hv_ghcb->hypercall.hypercalloutput.callstatus; |
106 | |
107 | local_irq_restore(flags); |
108 | |
109 | return status; |
110 | } |
111 | |
112 | static inline u64 rd_ghcb_msr(void) |
113 | { |
114 | return __rdmsr(MSR_AMD64_SEV_ES_GHCB); |
115 | } |
116 | |
117 | static inline void wr_ghcb_msr(u64 val) |
118 | { |
119 | native_wrmsrl(MSR_AMD64_SEV_ES_GHCB, val); |
120 | } |
121 | |
122 | static enum es_result hv_ghcb_hv_call(struct ghcb *ghcb, u64 exit_code, |
123 | u64 exit_info_1, u64 exit_info_2) |
124 | { |
125 | /* Fill in protocol and format specifiers */ |
126 | ghcb->protocol_version = hv_ghcb_version; |
127 | ghcb->ghcb_usage = GHCB_DEFAULT_USAGE; |
128 | |
129 | ghcb_set_sw_exit_code(ghcb, value: exit_code); |
130 | ghcb_set_sw_exit_info_1(ghcb, value: exit_info_1); |
131 | ghcb_set_sw_exit_info_2(ghcb, value: exit_info_2); |
132 | |
133 | VMGEXIT(); |
134 | |
135 | if (ghcb->save.sw_exit_info_1 & GENMASK_ULL(31, 0)) |
136 | return ES_VMM_ERROR; |
137 | else |
138 | return ES_OK; |
139 | } |
140 | |
141 | void __noreturn hv_ghcb_terminate(unsigned int set, unsigned int reason) |
142 | { |
143 | u64 val = GHCB_MSR_TERM_REQ; |
144 | |
145 | /* Tell the hypervisor what went wrong. */ |
146 | val |= GHCB_SEV_TERM_REASON(set, reason); |
147 | |
148 | /* Request Guest Termination from Hypervisor */ |
149 | wr_ghcb_msr(val); |
150 | VMGEXIT(); |
151 | |
152 | while (true) |
153 | asm volatile("hlt\n" : : : "memory" ); |
154 | } |
155 | |
156 | bool hv_ghcb_negotiate_protocol(void) |
157 | { |
158 | u64 ghcb_gpa; |
159 | u64 val; |
160 | |
161 | /* Save ghcb page gpa. */ |
162 | ghcb_gpa = rd_ghcb_msr(); |
163 | |
164 | /* Do the GHCB protocol version negotiation */ |
165 | wr_ghcb_msr(GHCB_MSR_SEV_INFO_REQ); |
166 | VMGEXIT(); |
167 | val = rd_ghcb_msr(); |
168 | |
169 | if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP) |
170 | return false; |
171 | |
172 | if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTOCOL_MIN || |
173 | GHCB_MSR_PROTO_MIN(val) > GHCB_PROTOCOL_MAX) |
174 | return false; |
175 | |
176 | hv_ghcb_version = min_t(size_t, GHCB_MSR_PROTO_MAX(val), |
177 | GHCB_PROTOCOL_MAX); |
178 | |
179 | /* Write ghcb page back after negotiating protocol. */ |
180 | wr_ghcb_msr(val: ghcb_gpa); |
181 | VMGEXIT(); |
182 | |
183 | return true; |
184 | } |
185 | |
186 | static void hv_ghcb_msr_write(u64 msr, u64 value) |
187 | { |
188 | union hv_ghcb *hv_ghcb; |
189 | void **ghcb_base; |
190 | unsigned long flags; |
191 | |
192 | if (!hv_ghcb_pg) |
193 | return; |
194 | |
195 | WARN_ON(in_nmi()); |
196 | |
197 | local_irq_save(flags); |
198 | ghcb_base = (void **)this_cpu_ptr(hv_ghcb_pg); |
199 | hv_ghcb = (union hv_ghcb *)*ghcb_base; |
200 | if (!hv_ghcb) { |
201 | local_irq_restore(flags); |
202 | return; |
203 | } |
204 | |
205 | ghcb_set_rcx(ghcb: &hv_ghcb->ghcb, value: msr); |
206 | ghcb_set_rax(ghcb: &hv_ghcb->ghcb, lower_32_bits(value)); |
207 | ghcb_set_rdx(ghcb: &hv_ghcb->ghcb, upper_32_bits(value)); |
208 | |
209 | if (hv_ghcb_hv_call(ghcb: &hv_ghcb->ghcb, SVM_EXIT_MSR, exit_info_1: 1, exit_info_2: 0)) |
210 | pr_warn("Fail to write msr via ghcb %llx.\n" , msr); |
211 | |
212 | local_irq_restore(flags); |
213 | } |
214 | |
215 | static void hv_ghcb_msr_read(u64 msr, u64 *value) |
216 | { |
217 | union hv_ghcb *hv_ghcb; |
218 | void **ghcb_base; |
219 | unsigned long flags; |
220 | |
221 | /* Check size of union hv_ghcb here. */ |
222 | BUILD_BUG_ON(sizeof(union hv_ghcb) != HV_HYP_PAGE_SIZE); |
223 | |
224 | if (!hv_ghcb_pg) |
225 | return; |
226 | |
227 | WARN_ON(in_nmi()); |
228 | |
229 | local_irq_save(flags); |
230 | ghcb_base = (void **)this_cpu_ptr(hv_ghcb_pg); |
231 | hv_ghcb = (union hv_ghcb *)*ghcb_base; |
232 | if (!hv_ghcb) { |
233 | local_irq_restore(flags); |
234 | return; |
235 | } |
236 | |
237 | ghcb_set_rcx(ghcb: &hv_ghcb->ghcb, value: msr); |
238 | if (hv_ghcb_hv_call(ghcb: &hv_ghcb->ghcb, SVM_EXIT_MSR, exit_info_1: 0, exit_info_2: 0)) |
239 | pr_warn("Fail to read msr via ghcb %llx.\n" , msr); |
240 | else |
241 | *value = (u64)lower_32_bits(hv_ghcb->ghcb.save.rax) |
242 | | ((u64)lower_32_bits(hv_ghcb->ghcb.save.rdx) << 32); |
243 | local_irq_restore(flags); |
244 | } |
245 | |
246 | /* Only used in a fully enlightened SNP VM, i.e. without the paravisor */ |
247 | static u8 ap_start_input_arg[PAGE_SIZE] __bss_decrypted __aligned(PAGE_SIZE); |
248 | static u8 ap_start_stack[PAGE_SIZE] __aligned(PAGE_SIZE); |
249 | static DEFINE_PER_CPU(struct sev_es_save_area *, hv_sev_vmsa); |
250 | |
251 | /* Functions only used in an SNP VM without the paravisor go here. */ |
252 | |
253 | #define hv_populate_vmcb_seg(seg, gdtr_base) \ |
254 | do { \ |
255 | if (seg.selector) { \ |
256 | seg.base = 0; \ |
257 | seg.limit = HV_AP_SEGMENT_LIMIT; \ |
258 | seg.attrib = *(u16 *)(gdtr_base + seg.selector + 5); \ |
259 | seg.attrib = (seg.attrib & 0xFF) | ((seg.attrib >> 4) & 0xF00); \ |
260 | } \ |
261 | } while (0) \ |
262 | |
263 | static int snp_set_vmsa(void *va, bool vmsa) |
264 | { |
265 | u64 attrs; |
266 | |
267 | /* |
268 | * Running at VMPL0 allows the kernel to change the VMSA bit for a page |
269 | * using the RMPADJUST instruction. However, for the instruction to |
270 | * succeed it must target the permissions of a lesser privileged |
271 | * (higher numbered) VMPL level, so use VMPL1 (refer to the RMPADJUST |
272 | * instruction in the AMD64 APM Volume 3). |
273 | */ |
274 | attrs = 1; |
275 | if (vmsa) |
276 | attrs |= RMPADJUST_VMSA_PAGE_BIT; |
277 | |
278 | return rmpadjust(vaddr: (unsigned long)va, RMP_PG_SIZE_4K, attrs); |
279 | } |
280 | |
281 | static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa) |
282 | { |
283 | int err; |
284 | |
285 | err = snp_set_vmsa(va: vmsa, vmsa: false); |
286 | if (err) |
287 | pr_err("clear VMSA page failed (%u), leaking page\n" , err); |
288 | else |
289 | free_page((unsigned long)vmsa); |
290 | } |
291 | |
292 | int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) |
293 | { |
294 | struct sev_es_save_area *vmsa = (struct sev_es_save_area *) |
295 | __get_free_page(GFP_KERNEL | __GFP_ZERO); |
296 | struct sev_es_save_area *cur_vmsa; |
297 | struct desc_ptr gdtr; |
298 | u64 ret, retry = 5; |
299 | struct hv_enable_vp_vtl *start_vp_input; |
300 | unsigned long flags; |
301 | |
302 | if (!vmsa) |
303 | return -ENOMEM; |
304 | |
305 | native_store_gdt(dtr: &gdtr); |
306 | |
307 | vmsa->gdtr.base = gdtr.address; |
308 | vmsa->gdtr.limit = gdtr.size; |
309 | |
310 | asm volatile("movl %%es, %%eax;" : "=a" (vmsa->es.selector)); |
311 | hv_populate_vmcb_seg(vmsa->es, vmsa->gdtr.base); |
312 | |
313 | asm volatile("movl %%cs, %%eax;" : "=a" (vmsa->cs.selector)); |
314 | hv_populate_vmcb_seg(vmsa->cs, vmsa->gdtr.base); |
315 | |
316 | asm volatile("movl %%ss, %%eax;" : "=a" (vmsa->ss.selector)); |
317 | hv_populate_vmcb_seg(vmsa->ss, vmsa->gdtr.base); |
318 | |
319 | asm volatile("movl %%ds, %%eax;" : "=a" (vmsa->ds.selector)); |
320 | hv_populate_vmcb_seg(vmsa->ds, vmsa->gdtr.base); |
321 | |
322 | vmsa->efer = native_read_msr(MSR_EFER); |
323 | |
324 | asm volatile("movq %%cr4, %%rax;" : "=a" (vmsa->cr4)); |
325 | asm volatile("movq %%cr3, %%rax;" : "=a" (vmsa->cr3)); |
326 | asm volatile("movq %%cr0, %%rax;" : "=a" (vmsa->cr0)); |
327 | |
328 | vmsa->xcr0 = 1; |
329 | vmsa->g_pat = HV_AP_INIT_GPAT_DEFAULT; |
330 | vmsa->rip = (u64)secondary_startup_64_no_verify; |
331 | vmsa->rsp = (u64)&ap_start_stack[PAGE_SIZE]; |
332 | |
333 | /* |
334 | * Set the SNP-specific fields for this VMSA: |
335 | * VMPL level |
336 | * SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits) |
337 | */ |
338 | vmsa->vmpl = 0; |
339 | vmsa->sev_features = sev_status >> 2; |
340 | |
341 | ret = snp_set_vmsa(va: vmsa, vmsa: true); |
342 | if (!ret) { |
343 | pr_err("RMPADJUST(%llx) failed: %llx\n" , (u64)vmsa, ret); |
344 | free_page((u64)vmsa); |
345 | return ret; |
346 | } |
347 | |
348 | local_irq_save(flags); |
349 | start_vp_input = (struct hv_enable_vp_vtl *)ap_start_input_arg; |
350 | memset(start_vp_input, 0, sizeof(*start_vp_input)); |
351 | start_vp_input->partition_id = -1; |
352 | start_vp_input->vp_index = cpu; |
353 | start_vp_input->target_vtl.target_vtl = ms_hyperv.vtl; |
354 | *(u64 *)&start_vp_input->vp_context = __pa(vmsa) | 1; |
355 | |
356 | do { |
357 | ret = hv_do_hypercall(HVCALL_START_VP, |
358 | inputaddr: start_vp_input, NULL); |
359 | } while (hv_result(status: ret) == HV_STATUS_TIME_OUT && retry--); |
360 | |
361 | local_irq_restore(flags); |
362 | |
363 | if (!hv_result_success(status: ret)) { |
364 | pr_err("HvCallStartVirtualProcessor failed: %llx\n" , ret); |
365 | snp_cleanup_vmsa(vmsa); |
366 | vmsa = NULL; |
367 | } |
368 | |
369 | cur_vmsa = per_cpu(hv_sev_vmsa, cpu); |
370 | /* Free up any previous VMSA page */ |
371 | if (cur_vmsa) |
372 | snp_cleanup_vmsa(vmsa: cur_vmsa); |
373 | |
374 | /* Record the current VMSA page */ |
375 | per_cpu(hv_sev_vmsa, cpu) = vmsa; |
376 | |
377 | return ret; |
378 | } |
379 | |
380 | #else |
381 | static inline void hv_ghcb_msr_write(u64 msr, u64 value) {} |
382 | static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {} |
383 | #endif /* CONFIG_AMD_MEM_ENCRYPT */ |
384 | |
385 | #ifdef CONFIG_INTEL_TDX_GUEST |
386 | static void hv_tdx_msr_write(u64 msr, u64 val) |
387 | { |
388 | struct tdx_module_args args = { |
389 | .r10 = TDX_HYPERCALL_STANDARD, |
390 | .r11 = EXIT_REASON_MSR_WRITE, |
391 | .r12 = msr, |
392 | .r13 = val, |
393 | }; |
394 | |
395 | u64 ret = __tdx_hypercall(args: &args); |
396 | |
397 | WARN_ONCE(ret, "Failed to emulate MSR write: %lld\n" , ret); |
398 | } |
399 | |
400 | static void hv_tdx_msr_read(u64 msr, u64 *val) |
401 | { |
402 | struct tdx_module_args args = { |
403 | .r10 = TDX_HYPERCALL_STANDARD, |
404 | .r11 = EXIT_REASON_MSR_READ, |
405 | .r12 = msr, |
406 | }; |
407 | |
408 | u64 ret = __tdx_hypercall(args: &args); |
409 | |
410 | if (WARN_ONCE(ret, "Failed to emulate MSR read: %lld\n" , ret)) |
411 | *val = 0; |
412 | else |
413 | *val = args.r11; |
414 | } |
415 | |
416 | u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2) |
417 | { |
418 | struct tdx_module_args args = { }; |
419 | |
420 | args.r10 = control; |
421 | args.rdx = param1; |
422 | args.r8 = param2; |
423 | |
424 | (void)__tdx_hypercall(args: &args); |
425 | |
426 | return args.r11; |
427 | } |
428 | |
429 | #else |
430 | static inline void hv_tdx_msr_write(u64 msr, u64 value) {} |
431 | static inline void hv_tdx_msr_read(u64 msr, u64 *value) {} |
432 | #endif /* CONFIG_INTEL_TDX_GUEST */ |
433 | |
434 | #if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) |
435 | void hv_ivm_msr_write(u64 msr, u64 value) |
436 | { |
437 | if (!ms_hyperv.paravisor_present) |
438 | return; |
439 | |
440 | if (hv_isolation_type_tdx()) |
441 | hv_tdx_msr_write(msr, val: value); |
442 | else if (hv_isolation_type_snp()) |
443 | hv_ghcb_msr_write(msr, value); |
444 | } |
445 | |
446 | void hv_ivm_msr_read(u64 msr, u64 *value) |
447 | { |
448 | if (!ms_hyperv.paravisor_present) |
449 | return; |
450 | |
451 | if (hv_isolation_type_tdx()) |
452 | hv_tdx_msr_read(msr, val: value); |
453 | else if (hv_isolation_type_snp()) |
454 | hv_ghcb_msr_read(msr, value); |
455 | } |
456 | |
457 | /* |
458 | * hv_mark_gpa_visibility - Set pages visible to host via hvcall. |
459 | * |
460 | * In Isolation VM, all guest memory is encrypted from host and guest |
461 | * needs to set memory visible to host via hvcall before sharing memory |
462 | * with host. |
463 | */ |
464 | static int hv_mark_gpa_visibility(u16 count, const u64 pfn[], |
465 | enum hv_mem_host_visibility visibility) |
466 | { |
467 | struct hv_gpa_range_for_visibility *input; |
468 | u16 pages_processed; |
469 | u64 hv_status; |
470 | unsigned long flags; |
471 | |
472 | /* no-op if partition isolation is not enabled */ |
473 | if (!hv_is_isolation_supported()) |
474 | return 0; |
475 | |
476 | if (count > HV_MAX_MODIFY_GPA_REP_COUNT) { |
477 | pr_err("Hyper-V: GPA count:%d exceeds supported:%lu\n" , count, |
478 | HV_MAX_MODIFY_GPA_REP_COUNT); |
479 | return -EINVAL; |
480 | } |
481 | |
482 | local_irq_save(flags); |
483 | input = *this_cpu_ptr(hyperv_pcpu_input_arg); |
484 | |
485 | if (unlikely(!input)) { |
486 | local_irq_restore(flags); |
487 | return -EINVAL; |
488 | } |
489 | |
490 | input->partition_id = HV_PARTITION_ID_SELF; |
491 | input->host_visibility = visibility; |
492 | input->reserved0 = 0; |
493 | input->reserved1 = 0; |
494 | memcpy((void *)input->gpa_page_list, pfn, count * sizeof(*pfn)); |
495 | hv_status = hv_do_rep_hypercall( |
496 | HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY, rep_count: count, |
497 | varhead_size: 0, input, output: &pages_processed); |
498 | local_irq_restore(flags); |
499 | |
500 | if (hv_result_success(status: hv_status)) |
501 | return 0; |
502 | else |
503 | return -EFAULT; |
504 | } |
505 | |
506 | /* |
507 | * When transitioning memory between encrypted and decrypted, the caller |
508 | * of set_memory_encrypted() or set_memory_decrypted() is responsible for |
509 | * ensuring that the memory isn't in use and isn't referenced while the |
510 | * transition is in progress. The transition has multiple steps, and the |
511 | * memory is in an inconsistent state until all steps are complete. A |
512 | * reference while the state is inconsistent could result in an exception |
513 | * that can't be cleanly fixed up. |
514 | * |
515 | * But the Linux kernel load_unaligned_zeropad() mechanism could cause a |
516 | * stray reference that can't be prevented by the caller, so Linux has |
517 | * specific code to handle this case. But when the #VC and #VE exceptions |
518 | * routed to a paravisor, the specific code doesn't work. To avoid this |
519 | * problem, mark the pages as "not present" while the transition is in |
520 | * progress. If load_unaligned_zeropad() causes a stray reference, a normal |
521 | * page fault is generated instead of #VC or #VE, and the page-fault-based |
522 | * handlers for load_unaligned_zeropad() resolve the reference. When the |
523 | * transition is complete, hv_vtom_set_host_visibility() marks the pages |
524 | * as "present" again. |
525 | */ |
526 | static bool hv_vtom_clear_present(unsigned long kbuffer, int pagecount, bool enc) |
527 | { |
528 | return !set_memory_np(addr: kbuffer, numpages: pagecount); |
529 | } |
530 | |
531 | /* |
532 | * hv_vtom_set_host_visibility - Set specified memory visible to host. |
533 | * |
534 | * In Isolation VM, all guest memory is encrypted from host and guest |
535 | * needs to set memory visible to host via hvcall before sharing memory |
536 | * with host. This function works as wrap of hv_mark_gpa_visibility() |
537 | * with memory base and size. |
538 | */ |
539 | static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bool enc) |
540 | { |
541 | enum hv_mem_host_visibility visibility = enc ? |
542 | VMBUS_PAGE_NOT_VISIBLE : VMBUS_PAGE_VISIBLE_READ_WRITE; |
543 | u64 *pfn_array; |
544 | phys_addr_t paddr; |
545 | void *vaddr; |
546 | int ret = 0; |
547 | bool result = true; |
548 | int i, pfn; |
549 | |
550 | pfn_array = kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); |
551 | if (!pfn_array) { |
552 | result = false; |
553 | goto err_set_memory_p; |
554 | } |
555 | |
556 | for (i = 0, pfn = 0; i < pagecount; i++) { |
557 | /* |
558 | * Use slow_virt_to_phys() because the PRESENT bit has been |
559 | * temporarily cleared in the PTEs. slow_virt_to_phys() works |
560 | * without the PRESENT bit while virt_to_hvpfn() or similar |
561 | * does not. |
562 | */ |
563 | vaddr = (void *)kbuffer + (i * HV_HYP_PAGE_SIZE); |
564 | paddr = slow_virt_to_phys(address: vaddr); |
565 | pfn_array[pfn] = paddr >> HV_HYP_PAGE_SHIFT; |
566 | pfn++; |
567 | |
568 | if (pfn == HV_MAX_MODIFY_GPA_REP_COUNT || i == pagecount - 1) { |
569 | ret = hv_mark_gpa_visibility(count: pfn, pfn: pfn_array, |
570 | visibility); |
571 | if (ret) { |
572 | result = false; |
573 | goto err_free_pfn_array; |
574 | } |
575 | pfn = 0; |
576 | } |
577 | } |
578 | |
579 | err_free_pfn_array: |
580 | kfree(objp: pfn_array); |
581 | |
582 | err_set_memory_p: |
583 | /* |
584 | * Set the PTE PRESENT bits again to revert what hv_vtom_clear_present() |
585 | * did. Do this even if there is an error earlier in this function in |
586 | * order to avoid leaving the memory range in a "broken" state. Setting |
587 | * the PRESENT bits shouldn't fail, but return an error if it does. |
588 | */ |
589 | if (set_memory_p(addr: kbuffer, numpages: pagecount)) |
590 | result = false; |
591 | |
592 | return result; |
593 | } |
594 | |
595 | static bool hv_vtom_tlb_flush_required(bool private) |
596 | { |
597 | /* |
598 | * Since hv_vtom_clear_present() marks the PTEs as "not present" |
599 | * and flushes the TLB, they can't be in the TLB. That makes the |
600 | * flush controlled by this function redundant, so return "false". |
601 | */ |
602 | return false; |
603 | } |
604 | |
605 | static bool hv_vtom_cache_flush_required(void) |
606 | { |
607 | return false; |
608 | } |
609 | |
610 | static bool hv_is_private_mmio(u64 addr) |
611 | { |
612 | /* |
613 | * Hyper-V always provides a single IO-APIC in a guest VM. |
614 | * When a paravisor is used, it is emulated by the paravisor |
615 | * in the guest context and must be mapped private. |
616 | */ |
617 | if (addr >= HV_IOAPIC_BASE_ADDRESS && |
618 | addr < (HV_IOAPIC_BASE_ADDRESS + PAGE_SIZE)) |
619 | return true; |
620 | |
621 | /* Same with a vTPM */ |
622 | if (addr >= VTPM_BASE_ADDRESS && |
623 | addr < (VTPM_BASE_ADDRESS + PAGE_SIZE)) |
624 | return true; |
625 | |
626 | return false; |
627 | } |
628 | |
629 | void __init hv_vtom_init(void) |
630 | { |
631 | enum hv_isolation_type type = hv_get_isolation_type(); |
632 | |
633 | switch (type) { |
634 | case HV_ISOLATION_TYPE_VBS: |
635 | fallthrough; |
636 | /* |
637 | * By design, a VM using vTOM doesn't see the SEV setting, |
638 | * so SEV initialization is bypassed and sev_status isn't set. |
639 | * Set it here to indicate a vTOM VM. |
640 | * |
641 | * Note: if CONFIG_AMD_MEM_ENCRYPT is not set, sev_status is |
642 | * defined as 0ULL, to which we can't assigned a value. |
643 | */ |
644 | #ifdef CONFIG_AMD_MEM_ENCRYPT |
645 | case HV_ISOLATION_TYPE_SNP: |
646 | sev_status = MSR_AMD64_SNP_VTOM; |
647 | cc_vendor = CC_VENDOR_AMD; |
648 | break; |
649 | #endif |
650 | |
651 | case HV_ISOLATION_TYPE_TDX: |
652 | cc_vendor = CC_VENDOR_INTEL; |
653 | break; |
654 | |
655 | default: |
656 | panic(fmt: "hv_vtom_init: unsupported isolation type %d\n" , type); |
657 | } |
658 | |
659 | cc_set_mask(mask: ms_hyperv.shared_gpa_boundary); |
660 | physical_mask &= ms_hyperv.shared_gpa_boundary - 1; |
661 | |
662 | x86_platform.hyper.is_private_mmio = hv_is_private_mmio; |
663 | x86_platform.guest.enc_cache_flush_required = hv_vtom_cache_flush_required; |
664 | x86_platform.guest.enc_tlb_flush_required = hv_vtom_tlb_flush_required; |
665 | x86_platform.guest.enc_status_change_prepare = hv_vtom_clear_present; |
666 | x86_platform.guest.enc_status_change_finish = hv_vtom_set_host_visibility; |
667 | |
668 | /* Set WB as the default cache mode. */ |
669 | mtrr_overwrite_state(NULL, num_var: 0, MTRR_TYPE_WRBACK); |
670 | } |
671 | |
672 | #endif /* defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) */ |
673 | |
674 | enum hv_isolation_type hv_get_isolation_type(void) |
675 | { |
676 | if (!(ms_hyperv.priv_high & HV_ISOLATION)) |
677 | return HV_ISOLATION_TYPE_NONE; |
678 | return FIELD_GET(HV_ISOLATION_TYPE, ms_hyperv.isolation_config_b); |
679 | } |
680 | EXPORT_SYMBOL_GPL(hv_get_isolation_type); |
681 | |
682 | /* |
683 | * hv_is_isolation_supported - Check system runs in the Hyper-V |
684 | * isolation VM. |
685 | */ |
686 | bool hv_is_isolation_supported(void) |
687 | { |
688 | if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) |
689 | return false; |
690 | |
691 | if (!hypervisor_is_type(type: X86_HYPER_MS_HYPERV)) |
692 | return false; |
693 | |
694 | return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE; |
695 | } |
696 | |
697 | DEFINE_STATIC_KEY_FALSE(isolation_type_snp); |
698 | |
699 | /* |
700 | * hv_isolation_type_snp - Check if the system runs in an AMD SEV-SNP based |
701 | * isolation VM. |
702 | */ |
703 | bool hv_isolation_type_snp(void) |
704 | { |
705 | return static_branch_unlikely(&isolation_type_snp); |
706 | } |
707 | |
708 | DEFINE_STATIC_KEY_FALSE(isolation_type_tdx); |
709 | /* |
710 | * hv_isolation_type_tdx - Check if the system runs in an Intel TDX based |
711 | * isolated VM. |
712 | */ |
713 | bool hv_isolation_type_tdx(void) |
714 | { |
715 | return static_branch_unlikely(&isolation_type_tdx); |
716 | } |
717 | |