1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | #ifndef __KVM_X86_VMX_H |
3 | #define __KVM_X86_VMX_H |
4 | |
5 | #include <linux/kvm_host.h> |
6 | |
7 | #include <asm/kvm.h> |
8 | #include <asm/intel_pt.h> |
9 | #include <asm/perf_event.h> |
10 | |
11 | #include "capabilities.h" |
12 | #include "../kvm_cache_regs.h" |
13 | #include "posted_intr.h" |
14 | #include "vmcs.h" |
15 | #include "vmx_ops.h" |
16 | #include "../cpuid.h" |
17 | #include "run_flags.h" |
18 | #include "../mmu.h" |
19 | |
20 | #define MSR_TYPE_R 1 |
21 | #define MSR_TYPE_W 2 |
22 | #define MSR_TYPE_RW 3 |
23 | |
24 | #define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4)) |
25 | |
26 | #ifdef CONFIG_X86_64 |
27 | #define MAX_NR_USER_RETURN_MSRS 7 |
28 | #else |
29 | #define MAX_NR_USER_RETURN_MSRS 4 |
30 | #endif |
31 | |
32 | #define MAX_NR_LOADSTORE_MSRS 8 |
33 | |
34 | struct vmx_msrs { |
35 | unsigned int nr; |
36 | struct vmx_msr_entry val[MAX_NR_LOADSTORE_MSRS]; |
37 | }; |
38 | |
39 | struct vmx_uret_msr { |
40 | bool load_into_hardware; |
41 | u64 data; |
42 | u64 mask; |
43 | }; |
44 | |
45 | enum segment_cache_field { |
46 | SEG_FIELD_SEL = 0, |
47 | SEG_FIELD_BASE = 1, |
48 | SEG_FIELD_LIMIT = 2, |
49 | SEG_FIELD_AR = 3, |
50 | |
51 | SEG_FIELD_NR = 4 |
52 | }; |
53 | |
54 | #define RTIT_ADDR_RANGE 4 |
55 | |
56 | struct pt_ctx { |
57 | u64 ctl; |
58 | u64 status; |
59 | u64 output_base; |
60 | u64 output_mask; |
61 | u64 cr3_match; |
62 | u64 addr_a[RTIT_ADDR_RANGE]; |
63 | u64 addr_b[RTIT_ADDR_RANGE]; |
64 | }; |
65 | |
66 | struct pt_desc { |
67 | u64 ctl_bitmask; |
68 | u32 num_address_ranges; |
69 | u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES]; |
70 | struct pt_ctx host; |
71 | struct pt_ctx guest; |
72 | }; |
73 | |
74 | union vmx_exit_reason { |
75 | struct { |
76 | u32 basic : 16; |
77 | u32 reserved16 : 1; |
78 | u32 reserved17 : 1; |
79 | u32 reserved18 : 1; |
80 | u32 reserved19 : 1; |
81 | u32 reserved20 : 1; |
82 | u32 reserved21 : 1; |
83 | u32 reserved22 : 1; |
84 | u32 reserved23 : 1; |
85 | u32 reserved24 : 1; |
86 | u32 reserved25 : 1; |
87 | u32 bus_lock_detected : 1; |
88 | u32 enclave_mode : 1; |
89 | u32 smi_pending_mtf : 1; |
90 | u32 smi_from_vmx_root : 1; |
91 | u32 reserved30 : 1; |
92 | u32 failed_vmentry : 1; |
93 | }; |
94 | u32 full; |
95 | }; |
96 | |
97 | struct lbr_desc { |
98 | /* Basic info about guest LBR records. */ |
99 | struct x86_pmu_lbr records; |
100 | |
101 | /* |
102 | * Emulate LBR feature via passthrough LBR registers when the |
103 | * per-vcpu guest LBR event is scheduled on the current pcpu. |
104 | * |
105 | * The records may be inaccurate if the host reclaims the LBR. |
106 | */ |
107 | struct perf_event *event; |
108 | |
109 | /* True if LBRs are marked as not intercepted in the MSR bitmap */ |
110 | bool msr_passthrough; |
111 | }; |
112 | |
113 | extern struct x86_pmu_lbr vmx_lbr_caps; |
114 | |
115 | /* |
116 | * The nested_vmx structure is part of vcpu_vmx, and holds information we need |
117 | * for correct emulation of VMX (i.e., nested VMX) on this vcpu. |
118 | */ |
119 | struct nested_vmx { |
120 | /* Has the level1 guest done vmxon? */ |
121 | bool vmxon; |
122 | gpa_t vmxon_ptr; |
123 | bool pml_full; |
124 | |
125 | /* The guest-physical address of the current VMCS L1 keeps for L2 */ |
126 | gpa_t current_vmptr; |
127 | /* |
128 | * Cache of the guest's VMCS, existing outside of guest memory. |
129 | * Loaded from guest memory during VMPTRLD. Flushed to guest |
130 | * memory during VMCLEAR and VMPTRLD. |
131 | */ |
132 | struct vmcs12 *cached_vmcs12; |
133 | /* |
134 | * Cache of the guest's shadow VMCS, existing outside of guest |
135 | * memory. Loaded from guest memory during VM entry. Flushed |
136 | * to guest memory during VM exit. |
137 | */ |
138 | struct vmcs12 *cached_shadow_vmcs12; |
139 | |
140 | /* |
141 | * GPA to HVA cache for accessing vmcs12->vmcs_link_pointer |
142 | */ |
143 | struct gfn_to_hva_cache shadow_vmcs12_cache; |
144 | |
145 | /* |
146 | * GPA to HVA cache for VMCS12 |
147 | */ |
148 | struct gfn_to_hva_cache vmcs12_cache; |
149 | |
150 | /* |
151 | * Indicates if the shadow vmcs or enlightened vmcs must be updated |
152 | * with the data held by struct vmcs12. |
153 | */ |
154 | bool need_vmcs12_to_shadow_sync; |
155 | bool dirty_vmcs12; |
156 | |
157 | /* |
158 | * Indicates whether MSR bitmap for L2 needs to be rebuilt due to |
159 | * changes in MSR bitmap for L1 or switching to a different L2. Note, |
160 | * this flag can only be used reliably in conjunction with a paravirt L1 |
161 | * which informs L0 whether any changes to MSR bitmap for L2 were done |
162 | * on its side. |
163 | */ |
164 | bool force_msr_bitmap_recalc; |
165 | |
166 | /* |
167 | * Indicates lazily loaded guest state has not yet been decached from |
168 | * vmcs02. |
169 | */ |
170 | bool need_sync_vmcs02_to_vmcs12_rare; |
171 | |
172 | /* |
173 | * vmcs02 has been initialized, i.e. state that is constant for |
174 | * vmcs02 has been written to the backing VMCS. Initialization |
175 | * is delayed until L1 actually attempts to run a nested VM. |
176 | */ |
177 | bool vmcs02_initialized; |
178 | |
179 | bool change_vmcs01_virtual_apic_mode; |
180 | bool reload_vmcs01_apic_access_page; |
181 | bool update_vmcs01_cpu_dirty_logging; |
182 | bool update_vmcs01_apicv_status; |
183 | |
184 | /* |
185 | * Enlightened VMCS has been enabled. It does not mean that L1 has to |
186 | * use it. However, VMX features available to L1 will be limited based |
187 | * on what the enlightened VMCS supports. |
188 | */ |
189 | bool enlightened_vmcs_enabled; |
190 | |
191 | /* L2 must run next, and mustn't decide to exit to L1. */ |
192 | bool nested_run_pending; |
193 | |
194 | /* Pending MTF VM-exit into L1. */ |
195 | bool mtf_pending; |
196 | |
197 | struct loaded_vmcs vmcs02; |
198 | |
199 | /* |
200 | * Guest pages referred to in the vmcs02 with host-physical |
201 | * pointers, so we must keep them pinned while L2 runs. |
202 | */ |
203 | struct kvm_host_map apic_access_page_map; |
204 | struct kvm_host_map virtual_apic_map; |
205 | struct kvm_host_map pi_desc_map; |
206 | |
207 | struct kvm_host_map msr_bitmap_map; |
208 | |
209 | struct pi_desc *pi_desc; |
210 | bool pi_pending; |
211 | u16 posted_intr_nv; |
212 | |
213 | struct hrtimer preemption_timer; |
214 | u64 preemption_timer_deadline; |
215 | bool has_preemption_timer_deadline; |
216 | bool preemption_timer_expired; |
217 | |
218 | /* |
219 | * Used to snapshot MSRs that are conditionally loaded on VM-Enter in |
220 | * order to propagate the guest's pre-VM-Enter value into vmcs02. For |
221 | * emulation of VMLAUNCH/VMRESUME, the snapshot will be of L1's value. |
222 | * For KVM_SET_NESTED_STATE, the snapshot is of L2's value, _if_ |
223 | * userspace restores MSRs before nested state. If userspace restores |
224 | * MSRs after nested state, the snapshot holds garbage, but KVM can't |
225 | * detect that, and the garbage value in vmcs02 will be overwritten by |
226 | * MSR restoration in any case. |
227 | */ |
228 | u64 pre_vmenter_debugctl; |
229 | u64 pre_vmenter_bndcfgs; |
230 | |
231 | /* to migrate it to L1 if L2 writes to L1's CR8 directly */ |
232 | int l1_tpr_threshold; |
233 | |
234 | u16 vpid02; |
235 | u16 last_vpid; |
236 | |
237 | struct nested_vmx_msrs msrs; |
238 | |
239 | /* SMM related state */ |
240 | struct { |
241 | /* in VMX operation on SMM entry? */ |
242 | bool vmxon; |
243 | /* in guest mode on SMM entry? */ |
244 | bool guest_mode; |
245 | } smm; |
246 | |
247 | #ifdef CONFIG_KVM_HYPERV |
248 | gpa_t hv_evmcs_vmptr; |
249 | struct kvm_host_map hv_evmcs_map; |
250 | struct hv_enlightened_vmcs *hv_evmcs; |
251 | #endif |
252 | }; |
253 | |
254 | struct vcpu_vmx { |
255 | struct kvm_vcpu vcpu; |
256 | u8 fail; |
257 | u8 x2apic_msr_bitmap_mode; |
258 | |
259 | /* |
260 | * If true, host state has been stored in vmx->loaded_vmcs for |
261 | * the CPU registers that only need to be switched when transitioning |
262 | * to/from the kernel, and the registers have been loaded with guest |
263 | * values. If false, host state is loaded in the CPU registers |
264 | * and vmx->loaded_vmcs->host_state is invalid. |
265 | */ |
266 | bool guest_state_loaded; |
267 | |
268 | unsigned long exit_qualification; |
269 | u32 exit_intr_info; |
270 | u32 idt_vectoring_info; |
271 | ulong rflags; |
272 | |
273 | /* |
274 | * User return MSRs are always emulated when enabled in the guest, but |
275 | * only loaded into hardware when necessary, e.g. SYSCALL #UDs outside |
276 | * of 64-bit mode or if EFER.SCE=1, thus the SYSCALL MSRs don't need to |
277 | * be loaded into hardware if those conditions aren't met. |
278 | */ |
279 | struct vmx_uret_msr guest_uret_msrs[MAX_NR_USER_RETURN_MSRS]; |
280 | bool guest_uret_msrs_loaded; |
281 | #ifdef CONFIG_X86_64 |
282 | u64 msr_host_kernel_gs_base; |
283 | u64 msr_guest_kernel_gs_base; |
284 | #endif |
285 | |
286 | u64 spec_ctrl; |
287 | u32 msr_ia32_umwait_control; |
288 | |
289 | /* |
290 | * loaded_vmcs points to the VMCS currently used in this vcpu. For a |
291 | * non-nested (L1) guest, it always points to vmcs01. For a nested |
292 | * guest (L2), it points to a different VMCS. |
293 | */ |
294 | struct loaded_vmcs vmcs01; |
295 | struct loaded_vmcs *loaded_vmcs; |
296 | |
297 | struct msr_autoload { |
298 | struct vmx_msrs guest; |
299 | struct vmx_msrs host; |
300 | } msr_autoload; |
301 | |
302 | struct msr_autostore { |
303 | struct vmx_msrs guest; |
304 | } msr_autostore; |
305 | |
306 | struct { |
307 | int vm86_active; |
308 | ulong save_rflags; |
309 | struct kvm_segment segs[8]; |
310 | } rmode; |
311 | struct { |
312 | u32 bitmask; /* 4 bits per segment (1 bit per field) */ |
313 | struct kvm_save_segment { |
314 | u16 selector; |
315 | unsigned long base; |
316 | u32 limit; |
317 | u32 ar; |
318 | } seg[8]; |
319 | } segment_cache; |
320 | int vpid; |
321 | bool emulation_required; |
322 | |
323 | union vmx_exit_reason exit_reason; |
324 | |
325 | /* Posted interrupt descriptor */ |
326 | struct pi_desc pi_desc; |
327 | |
328 | /* Used if this vCPU is waiting for PI notification wakeup. */ |
329 | struct list_head pi_wakeup_list; |
330 | |
331 | /* Support for a guest hypervisor (nested VMX) */ |
332 | struct nested_vmx nested; |
333 | |
334 | /* Dynamic PLE window. */ |
335 | unsigned int ple_window; |
336 | bool ple_window_dirty; |
337 | |
338 | /* Support for PML */ |
339 | #define PML_ENTITY_NUM 512 |
340 | struct page *pml_pg; |
341 | |
342 | /* apic deadline value in host tsc */ |
343 | u64 hv_deadline_tsc; |
344 | |
345 | unsigned long host_debugctlmsr; |
346 | |
347 | /* |
348 | * Only bits masked by msr_ia32_feature_control_valid_bits can be set in |
349 | * msr_ia32_feature_control. FEAT_CTL_LOCKED is always included |
350 | * in msr_ia32_feature_control_valid_bits. |
351 | */ |
352 | u64 msr_ia32_feature_control; |
353 | u64 msr_ia32_feature_control_valid_bits; |
354 | /* SGX Launch Control public key hash */ |
355 | u64 msr_ia32_sgxlepubkeyhash[4]; |
356 | u64 msr_ia32_mcu_opt_ctrl; |
357 | bool disable_fb_clear; |
358 | |
359 | struct pt_desc pt_desc; |
360 | struct lbr_desc lbr_desc; |
361 | |
362 | /* Save desired MSR intercept (read: pass-through) state */ |
363 | #define MAX_POSSIBLE_PASSTHROUGH_MSRS 16 |
364 | struct { |
365 | DECLARE_BITMAP(read, MAX_POSSIBLE_PASSTHROUGH_MSRS); |
366 | DECLARE_BITMAP(write, MAX_POSSIBLE_PASSTHROUGH_MSRS); |
367 | } shadow_msr_intercept; |
368 | }; |
369 | |
370 | struct kvm_vmx { |
371 | struct kvm kvm; |
372 | |
373 | unsigned int tss_addr; |
374 | bool ept_identity_pagetable_done; |
375 | gpa_t ept_identity_map_addr; |
376 | /* Posted Interrupt Descriptor (PID) table for IPI virtualization */ |
377 | u64 *pid_table; |
378 | }; |
379 | |
380 | void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu, |
381 | struct loaded_vmcs *buddy); |
382 | int allocate_vpid(void); |
383 | void free_vpid(int vpid); |
384 | void vmx_set_constant_host_state(struct vcpu_vmx *vmx); |
385 | void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu); |
386 | void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel, |
387 | unsigned long fs_base, unsigned long gs_base); |
388 | int vmx_get_cpl(struct kvm_vcpu *vcpu); |
389 | bool vmx_emulation_required(struct kvm_vcpu *vcpu); |
390 | unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu); |
391 | void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); |
392 | u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu); |
393 | void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask); |
394 | int vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer); |
395 | void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); |
396 | void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); |
397 | void set_cr4_guest_host_mask(struct vcpu_vmx *vmx); |
398 | void ept_save_pdptrs(struct kvm_vcpu *vcpu); |
399 | void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); |
400 | void __vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); |
401 | u64 construct_eptp(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level); |
402 | |
403 | bool vmx_guest_inject_ac(struct kvm_vcpu *vcpu); |
404 | void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu); |
405 | bool vmx_nmi_blocked(struct kvm_vcpu *vcpu); |
406 | bool vmx_interrupt_blocked(struct kvm_vcpu *vcpu); |
407 | bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu); |
408 | void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); |
409 | void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu); |
410 | struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr); |
411 | void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu); |
412 | void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); |
413 | void vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, unsigned int flags); |
414 | unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx); |
415 | bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, |
416 | unsigned int flags); |
417 | int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr); |
418 | void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu); |
419 | |
420 | void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type); |
421 | void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type); |
422 | |
423 | u64 vmx_get_l2_tsc_offset(struct kvm_vcpu *vcpu); |
424 | u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu); |
425 | |
426 | gva_t vmx_get_untagged_addr(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags); |
427 | |
428 | static inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, |
429 | int type, bool value) |
430 | { |
431 | if (value) |
432 | vmx_enable_intercept_for_msr(vcpu, msr, type); |
433 | else |
434 | vmx_disable_intercept_for_msr(vcpu, msr, type); |
435 | } |
436 | |
437 | void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu); |
438 | |
439 | /* |
440 | * Note, early Intel manuals have the write-low and read-high bitmap offsets |
441 | * the wrong way round. The bitmaps control MSRs 0x00000000-0x00001fff and |
442 | * 0xc0000000-0xc0001fff. The former (low) uses bytes 0-0x3ff for reads and |
443 | * 0x800-0xbff for writes. The latter (high) uses 0x400-0x7ff for reads and |
444 | * 0xc00-0xfff for writes. MSRs not covered by either of the ranges always |
445 | * VM-Exit. |
446 | */ |
447 | #define __BUILD_VMX_MSR_BITMAP_HELPER(rtype, action, bitop, access, base) \ |
448 | static inline rtype vmx_##action##_msr_bitmap_##access(unsigned long *bitmap, \ |
449 | u32 msr) \ |
450 | { \ |
451 | int f = sizeof(unsigned long); \ |
452 | \ |
453 | if (msr <= 0x1fff) \ |
454 | return bitop##_bit(msr, bitmap + base / f); \ |
455 | else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) \ |
456 | return bitop##_bit(msr & 0x1fff, bitmap + (base + 0x400) / f); \ |
457 | return (rtype)true; \ |
458 | } |
459 | #define BUILD_VMX_MSR_BITMAP_HELPERS(ret_type, action, bitop) \ |
460 | __BUILD_VMX_MSR_BITMAP_HELPER(ret_type, action, bitop, read, 0x0) \ |
461 | __BUILD_VMX_MSR_BITMAP_HELPER(ret_type, action, bitop, write, 0x800) |
462 | |
463 | BUILD_VMX_MSR_BITMAP_HELPERS(bool, test, test) |
464 | BUILD_VMX_MSR_BITMAP_HELPERS(void, clear, __clear) |
465 | BUILD_VMX_MSR_BITMAP_HELPERS(void, set, __set) |
466 | |
467 | static inline u8 vmx_get_rvi(void) |
468 | { |
469 | return vmcs_read16(field: GUEST_INTR_STATUS) & 0xff; |
470 | } |
471 | |
472 | #define __KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS \ |
473 | (VM_ENTRY_LOAD_DEBUG_CONTROLS) |
474 | #ifdef CONFIG_X86_64 |
475 | #define KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS \ |
476 | (__KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS | \ |
477 | VM_ENTRY_IA32E_MODE) |
478 | #else |
479 | #define KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS \ |
480 | __KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS |
481 | #endif |
482 | #define KVM_OPTIONAL_VMX_VM_ENTRY_CONTROLS \ |
483 | (VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | \ |
484 | VM_ENTRY_LOAD_IA32_PAT | \ |
485 | VM_ENTRY_LOAD_IA32_EFER | \ |
486 | VM_ENTRY_LOAD_BNDCFGS | \ |
487 | VM_ENTRY_PT_CONCEAL_PIP | \ |
488 | VM_ENTRY_LOAD_IA32_RTIT_CTL) |
489 | |
490 | #define __KVM_REQUIRED_VMX_VM_EXIT_CONTROLS \ |
491 | (VM_EXIT_SAVE_DEBUG_CONTROLS | \ |
492 | VM_EXIT_ACK_INTR_ON_EXIT) |
493 | #ifdef CONFIG_X86_64 |
494 | #define KVM_REQUIRED_VMX_VM_EXIT_CONTROLS \ |
495 | (__KVM_REQUIRED_VMX_VM_EXIT_CONTROLS | \ |
496 | VM_EXIT_HOST_ADDR_SPACE_SIZE) |
497 | #else |
498 | #define KVM_REQUIRED_VMX_VM_EXIT_CONTROLS \ |
499 | __KVM_REQUIRED_VMX_VM_EXIT_CONTROLS |
500 | #endif |
501 | #define KVM_OPTIONAL_VMX_VM_EXIT_CONTROLS \ |
502 | (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | \ |
503 | VM_EXIT_SAVE_IA32_PAT | \ |
504 | VM_EXIT_LOAD_IA32_PAT | \ |
505 | VM_EXIT_SAVE_IA32_EFER | \ |
506 | VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | \ |
507 | VM_EXIT_LOAD_IA32_EFER | \ |
508 | VM_EXIT_CLEAR_BNDCFGS | \ |
509 | VM_EXIT_PT_CONCEAL_PIP | \ |
510 | VM_EXIT_CLEAR_IA32_RTIT_CTL) |
511 | |
512 | #define KVM_REQUIRED_VMX_PIN_BASED_VM_EXEC_CONTROL \ |
513 | (PIN_BASED_EXT_INTR_MASK | \ |
514 | PIN_BASED_NMI_EXITING) |
515 | #define KVM_OPTIONAL_VMX_PIN_BASED_VM_EXEC_CONTROL \ |
516 | (PIN_BASED_VIRTUAL_NMIS | \ |
517 | PIN_BASED_POSTED_INTR | \ |
518 | PIN_BASED_VMX_PREEMPTION_TIMER) |
519 | |
520 | #define __KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL \ |
521 | (CPU_BASED_HLT_EXITING | \ |
522 | CPU_BASED_CR3_LOAD_EXITING | \ |
523 | CPU_BASED_CR3_STORE_EXITING | \ |
524 | CPU_BASED_UNCOND_IO_EXITING | \ |
525 | CPU_BASED_MOV_DR_EXITING | \ |
526 | CPU_BASED_USE_TSC_OFFSETTING | \ |
527 | CPU_BASED_MWAIT_EXITING | \ |
528 | CPU_BASED_MONITOR_EXITING | \ |
529 | CPU_BASED_INVLPG_EXITING | \ |
530 | CPU_BASED_RDPMC_EXITING | \ |
531 | CPU_BASED_INTR_WINDOW_EXITING) |
532 | |
533 | #ifdef CONFIG_X86_64 |
534 | #define KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL \ |
535 | (__KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL | \ |
536 | CPU_BASED_CR8_LOAD_EXITING | \ |
537 | CPU_BASED_CR8_STORE_EXITING) |
538 | #else |
539 | #define KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL \ |
540 | __KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL |
541 | #endif |
542 | |
543 | #define KVM_OPTIONAL_VMX_CPU_BASED_VM_EXEC_CONTROL \ |
544 | (CPU_BASED_RDTSC_EXITING | \ |
545 | CPU_BASED_TPR_SHADOW | \ |
546 | CPU_BASED_USE_IO_BITMAPS | \ |
547 | CPU_BASED_MONITOR_TRAP_FLAG | \ |
548 | CPU_BASED_USE_MSR_BITMAPS | \ |
549 | CPU_BASED_NMI_WINDOW_EXITING | \ |
550 | CPU_BASED_PAUSE_EXITING | \ |
551 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS | \ |
552 | CPU_BASED_ACTIVATE_TERTIARY_CONTROLS) |
553 | |
554 | #define KVM_REQUIRED_VMX_SECONDARY_VM_EXEC_CONTROL 0 |
555 | #define KVM_OPTIONAL_VMX_SECONDARY_VM_EXEC_CONTROL \ |
556 | (SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | \ |
557 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | \ |
558 | SECONDARY_EXEC_WBINVD_EXITING | \ |
559 | SECONDARY_EXEC_ENABLE_VPID | \ |
560 | SECONDARY_EXEC_ENABLE_EPT | \ |
561 | SECONDARY_EXEC_UNRESTRICTED_GUEST | \ |
562 | SECONDARY_EXEC_PAUSE_LOOP_EXITING | \ |
563 | SECONDARY_EXEC_DESC | \ |
564 | SECONDARY_EXEC_ENABLE_RDTSCP | \ |
565 | SECONDARY_EXEC_ENABLE_INVPCID | \ |
566 | SECONDARY_EXEC_APIC_REGISTER_VIRT | \ |
567 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | \ |
568 | SECONDARY_EXEC_SHADOW_VMCS | \ |
569 | SECONDARY_EXEC_ENABLE_XSAVES | \ |
570 | SECONDARY_EXEC_RDSEED_EXITING | \ |
571 | SECONDARY_EXEC_RDRAND_EXITING | \ |
572 | SECONDARY_EXEC_ENABLE_PML | \ |
573 | SECONDARY_EXEC_TSC_SCALING | \ |
574 | SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | \ |
575 | SECONDARY_EXEC_PT_USE_GPA | \ |
576 | SECONDARY_EXEC_PT_CONCEAL_VMX | \ |
577 | SECONDARY_EXEC_ENABLE_VMFUNC | \ |
578 | SECONDARY_EXEC_BUS_LOCK_DETECTION | \ |
579 | SECONDARY_EXEC_NOTIFY_VM_EXITING | \ |
580 | SECONDARY_EXEC_ENCLS_EXITING) |
581 | |
582 | #define KVM_REQUIRED_VMX_TERTIARY_VM_EXEC_CONTROL 0 |
583 | #define KVM_OPTIONAL_VMX_TERTIARY_VM_EXEC_CONTROL \ |
584 | (TERTIARY_EXEC_IPI_VIRT) |
585 | |
586 | #define BUILD_CONTROLS_SHADOW(lname, uname, bits) \ |
587 | static inline void lname##_controls_set(struct vcpu_vmx *vmx, u##bits val) \ |
588 | { \ |
589 | if (vmx->loaded_vmcs->controls_shadow.lname != val) { \ |
590 | vmcs_write##bits(uname, val); \ |
591 | vmx->loaded_vmcs->controls_shadow.lname = val; \ |
592 | } \ |
593 | } \ |
594 | static inline u##bits __##lname##_controls_get(struct loaded_vmcs *vmcs) \ |
595 | { \ |
596 | return vmcs->controls_shadow.lname; \ |
597 | } \ |
598 | static inline u##bits lname##_controls_get(struct vcpu_vmx *vmx) \ |
599 | { \ |
600 | return __##lname##_controls_get(vmx->loaded_vmcs); \ |
601 | } \ |
602 | static __always_inline void lname##_controls_setbit(struct vcpu_vmx *vmx, u##bits val) \ |
603 | { \ |
604 | BUILD_BUG_ON(!(val & (KVM_REQUIRED_VMX_##uname | KVM_OPTIONAL_VMX_##uname))); \ |
605 | lname##_controls_set(vmx, lname##_controls_get(vmx) | val); \ |
606 | } \ |
607 | static __always_inline void lname##_controls_clearbit(struct vcpu_vmx *vmx, u##bits val) \ |
608 | { \ |
609 | BUILD_BUG_ON(!(val & (KVM_REQUIRED_VMX_##uname | KVM_OPTIONAL_VMX_##uname))); \ |
610 | lname##_controls_set(vmx, lname##_controls_get(vmx) & ~val); \ |
611 | } |
612 | BUILD_CONTROLS_SHADOW(vm_entry, VM_ENTRY_CONTROLS, 32) |
613 | BUILD_CONTROLS_SHADOW(vm_exit, VM_EXIT_CONTROLS, 32) |
614 | BUILD_CONTROLS_SHADOW(pin, PIN_BASED_VM_EXEC_CONTROL, 32) |
615 | BUILD_CONTROLS_SHADOW(exec, CPU_BASED_VM_EXEC_CONTROL, 32) |
616 | BUILD_CONTROLS_SHADOW(secondary_exec, SECONDARY_VM_EXEC_CONTROL, 32) |
617 | BUILD_CONTROLS_SHADOW(tertiary_exec, TERTIARY_VM_EXEC_CONTROL, 64) |
618 | |
619 | /* |
620 | * VMX_REGS_LAZY_LOAD_SET - The set of registers that will be updated in the |
621 | * cache on demand. Other registers not listed here are synced to |
622 | * the cache immediately after VM-Exit. |
623 | */ |
624 | #define VMX_REGS_LAZY_LOAD_SET ((1 << VCPU_REGS_RIP) | \ |
625 | (1 << VCPU_REGS_RSP) | \ |
626 | (1 << VCPU_EXREG_RFLAGS) | \ |
627 | (1 << VCPU_EXREG_PDPTR) | \ |
628 | (1 << VCPU_EXREG_SEGMENTS) | \ |
629 | (1 << VCPU_EXREG_CR0) | \ |
630 | (1 << VCPU_EXREG_CR3) | \ |
631 | (1 << VCPU_EXREG_CR4) | \ |
632 | (1 << VCPU_EXREG_EXIT_INFO_1) | \ |
633 | (1 << VCPU_EXREG_EXIT_INFO_2)) |
634 | |
635 | static inline unsigned long vmx_l1_guest_owned_cr0_bits(void) |
636 | { |
637 | unsigned long bits = KVM_POSSIBLE_CR0_GUEST_BITS; |
638 | |
639 | /* |
640 | * CR0.WP needs to be intercepted when KVM is shadowing legacy paging |
641 | * in order to construct shadow PTEs with the correct protections. |
642 | * Note! CR0.WP technically can be passed through to the guest if |
643 | * paging is disabled, but checking CR0.PG would generate a cyclical |
644 | * dependency of sorts due to forcing the caller to ensure CR0 holds |
645 | * the correct value prior to determining which CR0 bits can be owned |
646 | * by L1. Keep it simple and limit the optimization to EPT. |
647 | */ |
648 | if (!enable_ept) |
649 | bits &= ~X86_CR0_WP; |
650 | return bits; |
651 | } |
652 | |
653 | static __always_inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm) |
654 | { |
655 | return container_of(kvm, struct kvm_vmx, kvm); |
656 | } |
657 | |
658 | static __always_inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) |
659 | { |
660 | return container_of(vcpu, struct vcpu_vmx, vcpu); |
661 | } |
662 | |
663 | static inline struct lbr_desc *vcpu_to_lbr_desc(struct kvm_vcpu *vcpu) |
664 | { |
665 | return &to_vmx(vcpu)->lbr_desc; |
666 | } |
667 | |
668 | static inline struct x86_pmu_lbr *vcpu_to_lbr_records(struct kvm_vcpu *vcpu) |
669 | { |
670 | return &vcpu_to_lbr_desc(vcpu)->records; |
671 | } |
672 | |
673 | static inline bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu) |
674 | { |
675 | return !!vcpu_to_lbr_records(vcpu)->nr; |
676 | } |
677 | |
678 | void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu); |
679 | int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu); |
680 | void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu); |
681 | |
682 | static __always_inline unsigned long vmx_get_exit_qual(struct kvm_vcpu *vcpu) |
683 | { |
684 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
685 | |
686 | if (!kvm_register_test_and_mark_available(vcpu, reg: VCPU_EXREG_EXIT_INFO_1)) |
687 | vmx->exit_qualification = vmcs_readl(field: EXIT_QUALIFICATION); |
688 | |
689 | return vmx->exit_qualification; |
690 | } |
691 | |
692 | static __always_inline u32 vmx_get_intr_info(struct kvm_vcpu *vcpu) |
693 | { |
694 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
695 | |
696 | if (!kvm_register_test_and_mark_available(vcpu, reg: VCPU_EXREG_EXIT_INFO_2)) |
697 | vmx->exit_intr_info = vmcs_read32(field: VM_EXIT_INTR_INFO); |
698 | |
699 | return vmx->exit_intr_info; |
700 | } |
701 | |
702 | struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags); |
703 | void free_vmcs(struct vmcs *vmcs); |
704 | int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs); |
705 | void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs); |
706 | void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs); |
707 | |
708 | static inline struct vmcs *alloc_vmcs(bool shadow) |
709 | { |
710 | return alloc_vmcs_cpu(shadow, raw_smp_processor_id(), |
711 | GFP_KERNEL_ACCOUNT); |
712 | } |
713 | |
714 | static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx) |
715 | { |
716 | return secondary_exec_controls_get(vmx) & |
717 | SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; |
718 | } |
719 | |
720 | static inline bool vmx_need_pf_intercept(struct kvm_vcpu *vcpu) |
721 | { |
722 | if (!enable_ept) |
723 | return true; |
724 | |
725 | return allow_smaller_maxphyaddr && |
726 | cpuid_maxphyaddr(vcpu) < kvm_get_shadow_phys_bits(); |
727 | } |
728 | |
729 | static inline bool is_unrestricted_guest(struct kvm_vcpu *vcpu) |
730 | { |
731 | return enable_unrestricted_guest && (!is_guest_mode(vcpu) || |
732 | (secondary_exec_controls_get(vmx: to_vmx(vcpu)) & |
733 | SECONDARY_EXEC_UNRESTRICTED_GUEST)); |
734 | } |
735 | |
736 | bool __vmx_guest_state_valid(struct kvm_vcpu *vcpu); |
737 | static inline bool vmx_guest_state_valid(struct kvm_vcpu *vcpu) |
738 | { |
739 | return is_unrestricted_guest(vcpu) || __vmx_guest_state_valid(vcpu); |
740 | } |
741 | |
742 | void dump_vmcs(struct kvm_vcpu *vcpu); |
743 | |
744 | static inline int vmx_get_instr_info_reg2(u32 vmx_instr_info) |
745 | { |
746 | return (vmx_instr_info >> 28) & 0xf; |
747 | } |
748 | |
749 | static inline bool vmx_can_use_ipiv(struct kvm_vcpu *vcpu) |
750 | { |
751 | return lapic_in_kernel(vcpu) && enable_ipiv; |
752 | } |
753 | |
754 | #endif /* __KVM_X86_VMX_H */ |
755 | |