|
@@ -373,6 +373,7 @@ struct nested_vmx {
|
|
|
* we must keep them pinned while L2 runs.
|
|
|
*/
|
|
|
struct page *apic_access_page;
|
|
|
+ u64 msr_ia32_feature_control;
|
|
|
};
|
|
|
|
|
|
#define POSTED_INTR_ON 0
|
|
@@ -711,10 +712,10 @@ static void nested_release_page_clean(struct page *page)
|
|
|
kvm_release_page_clean(page);
|
|
|
}
|
|
|
|
|
|
+static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
|
|
|
static u64 construct_eptp(unsigned long root_hpa);
|
|
|
static void kvm_cpu_vmxon(u64 addr);
|
|
|
static void kvm_cpu_vmxoff(void);
|
|
|
-static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
|
|
|
static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
|
|
|
static void vmx_set_segment(struct kvm_vcpu *vcpu,
|
|
|
struct kvm_segment *var, int seg);
|
|
@@ -1039,12 +1040,16 @@ static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit)
|
|
|
(vmcs12->secondary_vm_exec_control & bit);
|
|
|
}
|
|
|
|
|
|
-static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12,
|
|
|
- struct kvm_vcpu *vcpu)
|
|
|
+static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
|
|
|
{
|
|
|
return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
|
|
|
}
|
|
|
|
|
|
+static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
|
|
|
+{
|
|
|
+ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT);
|
|
|
+}
|
|
|
+
|
|
|
static inline bool is_exception(u32 intr_info)
|
|
|
{
|
|
|
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
|
|
@@ -2155,6 +2160,7 @@ static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high;
|
|
|
static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high;
|
|
|
static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high;
|
|
|
static u32 nested_vmx_misc_low, nested_vmx_misc_high;
|
|
|
+static u32 nested_vmx_ept_caps;
|
|
|
static __init void nested_vmx_setup_ctls_msrs(void)
|
|
|
{
|
|
|
/*
|
|
@@ -2190,14 +2196,17 @@ static __init void nested_vmx_setup_ctls_msrs(void)
|
|
|
* If bit 55 of VMX_BASIC is off, bits 0-8 and 10, 11, 13, 14, 16 and
|
|
|
* 17 must be 1.
|
|
|
*/
|
|
|
+ rdmsr(MSR_IA32_VMX_EXIT_CTLS,
|
|
|
+ nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high);
|
|
|
nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
|
|
|
/* Note that guest use of VM_EXIT_ACK_INTR_ON_EXIT is not supported. */
|
|
|
+ nested_vmx_exit_ctls_high &=
|
|
|
#ifdef CONFIG_X86_64
|
|
|
- nested_vmx_exit_ctls_high = VM_EXIT_HOST_ADDR_SPACE_SIZE;
|
|
|
-#else
|
|
|
- nested_vmx_exit_ctls_high = 0;
|
|
|
+ VM_EXIT_HOST_ADDR_SPACE_SIZE |
|
|
|
#endif
|
|
|
- nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
|
|
|
+ VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
|
|
|
+ nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
|
|
|
+ VM_EXIT_LOAD_IA32_EFER);
|
|
|
|
|
|
/* entry controls */
|
|
|
rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
|
|
@@ -2205,8 +2214,12 @@ static __init void nested_vmx_setup_ctls_msrs(void)
|
|
|
/* If bit 55 of VMX_BASIC is off, bits 0-8 and 12 must be 1. */
|
|
|
nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
|
|
|
nested_vmx_entry_ctls_high &=
|
|
|
- VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE;
|
|
|
- nested_vmx_entry_ctls_high |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
|
|
|
+#ifdef CONFIG_X86_64
|
|
|
+ VM_ENTRY_IA32E_MODE |
|
|
|
+#endif
|
|
|
+ VM_ENTRY_LOAD_IA32_PAT;
|
|
|
+ nested_vmx_entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR |
|
|
|
+ VM_ENTRY_LOAD_IA32_EFER);
|
|
|
|
|
|
/* cpu-based controls */
|
|
|
rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
|
|
@@ -2241,6 +2254,22 @@ static __init void nested_vmx_setup_ctls_msrs(void)
|
|
|
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
|
|
|
SECONDARY_EXEC_WBINVD_EXITING;
|
|
|
|
|
|
+ if (enable_ept) {
|
|
|
+ /* nested EPT: emulate EPT also to L1 */
|
|
|
+ nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT;
|
|
|
+ nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
|
|
|
+ VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT;
|
|
|
+ nested_vmx_ept_caps &= vmx_capability.ept;
|
|
|
+ /*
|
|
|
+ * Since invept is completely emulated we support both global
|
|
|
+ * and context invalidation independent of what host cpu
|
|
|
+ * supports
|
|
|
+ */
|
|
|
+ nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
|
|
|
+ VMX_EPT_EXTENT_CONTEXT_BIT;
|
|
|
+ } else
|
|
|
+ nested_vmx_ept_caps = 0;
|
|
|
+
|
|
|
/* miscellaneous data */
|
|
|
rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high);
|
|
|
nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK |
|
|
@@ -2282,8 +2311,11 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
|
|
|
|
|
|
switch (msr_index) {
|
|
|
case MSR_IA32_FEATURE_CONTROL:
|
|
|
- *pdata = 0;
|
|
|
- break;
|
|
|
+ if (nested_vmx_allowed(vcpu)) {
|
|
|
+ *pdata = to_vmx(vcpu)->nested.msr_ia32_feature_control;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ return 0;
|
|
|
case MSR_IA32_VMX_BASIC:
|
|
|
/*
|
|
|
* This MSR reports some information about VMX support. We
|
|
@@ -2346,8 +2378,8 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
|
|
|
nested_vmx_secondary_ctls_high);
|
|
|
break;
|
|
|
case MSR_IA32_VMX_EPT_VPID_CAP:
|
|
|
- /* Currently, no nested ept or nested vpid */
|
|
|
- *pdata = 0;
|
|
|
+ /* Currently, no nested vpid support */
|
|
|
+ *pdata = nested_vmx_ept_caps;
|
|
|
break;
|
|
|
default:
|
|
|
return 0;
|
|
@@ -2356,14 +2388,24 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
|
|
|
return 1;
|
|
|
}
|
|
|
|
|
|
-static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
|
|
|
+static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
|
|
{
|
|
|
+ u32 msr_index = msr_info->index;
|
|
|
+ u64 data = msr_info->data;
|
|
|
+ bool host_initialized = msr_info->host_initiated;
|
|
|
+
|
|
|
if (!nested_vmx_allowed(vcpu))
|
|
|
return 0;
|
|
|
|
|
|
- if (msr_index == MSR_IA32_FEATURE_CONTROL)
|
|
|
- /* TODO: the right thing. */
|
|
|
+ if (msr_index == MSR_IA32_FEATURE_CONTROL) {
|
|
|
+ if (!host_initialized &&
|
|
|
+ to_vmx(vcpu)->nested.msr_ia32_feature_control
|
|
|
+ & FEATURE_CONTROL_LOCKED)
|
|
|
+ return 0;
|
|
|
+ to_vmx(vcpu)->nested.msr_ia32_feature_control = data;
|
|
|
return 1;
|
|
|
+ }
|
|
|
+
|
|
|
/*
|
|
|
* No need to treat VMX capability MSRs specially: If we don't handle
|
|
|
* them, handle_wrmsr will #GP(0), which is correct (they are readonly)
|
|
@@ -2494,7 +2536,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
|
|
return 1;
|
|
|
/* Otherwise falls through */
|
|
|
default:
|
|
|
- if (vmx_set_vmx_msr(vcpu, msr_index, data))
|
|
|
+ if (vmx_set_vmx_msr(vcpu, msr_info))
|
|
|
break;
|
|
|
msr = find_msr_entry(vmx, msr_index);
|
|
|
if (msr) {
|
|
@@ -5302,9 +5344,13 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
|
|
|
|
|
|
/* It is a write fault? */
|
|
|
error_code = exit_qualification & (1U << 1);
|
|
|
+ /* It is a fetch fault? */
|
|
|
+ error_code |= (exit_qualification & (1U << 2)) << 2;
|
|
|
/* ept page table is present? */
|
|
|
error_code |= (exit_qualification >> 3) & 0x1;
|
|
|
|
|
|
+ vcpu->arch.exit_qualification = exit_qualification;
|
|
|
+
|
|
|
return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
|
|
|
}
|
|
|
|
|
@@ -5438,7 +5484,8 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
|
|
|
|
|
|
err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE);
|
|
|
|
|
|
- if (err == EMULATE_DO_MMIO) {
|
|
|
+ if (err == EMULATE_USER_EXIT) {
|
|
|
+ ++vcpu->stat.mmio_exits;
|
|
|
ret = 0;
|
|
|
goto out;
|
|
|
}
|
|
@@ -5567,8 +5614,47 @@ static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx)
|
|
|
free_loaded_vmcs(&vmx->vmcs01);
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
|
|
|
+ * set the success or error code of an emulated VMX instruction, as specified
|
|
|
+ * by Vol 2B, VMX Instruction Reference, "Conventions".
|
|
|
+ */
|
|
|
+static void nested_vmx_succeed(struct kvm_vcpu *vcpu)
|
|
|
+{
|
|
|
+ vmx_set_rflags(vcpu, vmx_get_rflags(vcpu)
|
|
|
+ & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
|
|
|
+ X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF));
|
|
|
+}
|
|
|
+
|
|
|
+static void nested_vmx_failInvalid(struct kvm_vcpu *vcpu)
|
|
|
+{
|
|
|
+ vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
|
|
|
+ & ~(X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
|
|
|
+ X86_EFLAGS_SF | X86_EFLAGS_OF))
|
|
|
+ | X86_EFLAGS_CF);
|
|
|
+}
|
|
|
+
|
|
|
static void nested_vmx_failValid(struct kvm_vcpu *vcpu,
|
|
|
- u32 vm_instruction_error);
|
|
|
+ u32 vm_instruction_error)
|
|
|
+{
|
|
|
+ if (to_vmx(vcpu)->nested.current_vmptr == -1ull) {
|
|
|
+ /*
|
|
|
+ * failValid writes the error number to the current VMCS, which
|
|
|
+ * can't be done there isn't a current VMCS.
|
|
|
+ */
|
|
|
+ nested_vmx_failInvalid(vcpu);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
|
|
|
+ & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
|
|
|
+ X86_EFLAGS_SF | X86_EFLAGS_OF))
|
|
|
+ | X86_EFLAGS_ZF);
|
|
|
+ get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error;
|
|
|
+ /*
|
|
|
+ * We don't need to force a shadow sync because
|
|
|
+ * VM_INSTRUCTION_ERROR is not shadowed
|
|
|
+ */
|
|
|
+}
|
|
|
|
|
|
/*
|
|
|
* Emulate the VMXON instruction.
|
|
@@ -5583,6 +5669,8 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
|
|
|
struct kvm_segment cs;
|
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
|
struct vmcs *shadow_vmcs;
|
|
|
+ const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED
|
|
|
+ | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
|
|
|
|
|
|
/* The Intel VMX Instruction Reference lists a bunch of bits that
|
|
|
* are prerequisite to running VMXON, most notably cr4.VMXE must be
|
|
@@ -5611,6 +5699,13 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
|
|
|
skip_emulated_instruction(vcpu);
|
|
|
return 1;
|
|
|
}
|
|
|
+
|
|
|
+ if ((vmx->nested.msr_ia32_feature_control & VMXON_NEEDED_FEATURES)
|
|
|
+ != VMXON_NEEDED_FEATURES) {
|
|
|
+ kvm_inject_gp(vcpu, 0);
|
|
|
+ return 1;
|
|
|
+ }
|
|
|
+
|
|
|
if (enable_shadow_vmcs) {
|
|
|
shadow_vmcs = alloc_vmcs();
|
|
|
if (!shadow_vmcs)
|
|
@@ -5628,6 +5723,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
|
|
|
vmx->nested.vmxon = true;
|
|
|
|
|
|
skip_emulated_instruction(vcpu);
|
|
|
+ nested_vmx_succeed(vcpu);
|
|
|
return 1;
|
|
|
}
|
|
|
|
|
@@ -5712,6 +5808,7 @@ static int handle_vmoff(struct kvm_vcpu *vcpu)
|
|
|
return 1;
|
|
|
free_nested(to_vmx(vcpu));
|
|
|
skip_emulated_instruction(vcpu);
|
|
|
+ nested_vmx_succeed(vcpu);
|
|
|
return 1;
|
|
|
}
|
|
|
|
|
@@ -5768,48 +5865,6 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
|
|
|
- * set the success or error code of an emulated VMX instruction, as specified
|
|
|
- * by Vol 2B, VMX Instruction Reference, "Conventions".
|
|
|
- */
|
|
|
-static void nested_vmx_succeed(struct kvm_vcpu *vcpu)
|
|
|
-{
|
|
|
- vmx_set_rflags(vcpu, vmx_get_rflags(vcpu)
|
|
|
- & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
|
|
|
- X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF));
|
|
|
-}
|
|
|
-
|
|
|
-static void nested_vmx_failInvalid(struct kvm_vcpu *vcpu)
|
|
|
-{
|
|
|
- vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
|
|
|
- & ~(X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
|
|
|
- X86_EFLAGS_SF | X86_EFLAGS_OF))
|
|
|
- | X86_EFLAGS_CF);
|
|
|
-}
|
|
|
-
|
|
|
-static void nested_vmx_failValid(struct kvm_vcpu *vcpu,
|
|
|
- u32 vm_instruction_error)
|
|
|
-{
|
|
|
- if (to_vmx(vcpu)->nested.current_vmptr == -1ull) {
|
|
|
- /*
|
|
|
- * failValid writes the error number to the current VMCS, which
|
|
|
- * can't be done there isn't a current VMCS.
|
|
|
- */
|
|
|
- nested_vmx_failInvalid(vcpu);
|
|
|
- return;
|
|
|
- }
|
|
|
- vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
|
|
|
- & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
|
|
|
- X86_EFLAGS_SF | X86_EFLAGS_OF))
|
|
|
- | X86_EFLAGS_ZF);
|
|
|
- get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error;
|
|
|
- /*
|
|
|
- * We don't need to force a shadow sync because
|
|
|
- * VM_INSTRUCTION_ERROR is not shadowed
|
|
|
- */
|
|
|
-}
|
|
|
-
|
|
|
/* Emulate the VMCLEAR instruction */
|
|
|
static int handle_vmclear(struct kvm_vcpu *vcpu)
|
|
|
{
|
|
@@ -5972,8 +6027,8 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
|
|
|
unsigned long field;
|
|
|
u64 field_value;
|
|
|
struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs;
|
|
|
- unsigned long *fields = (unsigned long *)shadow_read_write_fields;
|
|
|
- int num_fields = max_shadow_read_write_fields;
|
|
|
+ const unsigned long *fields = shadow_read_write_fields;
|
|
|
+ const int num_fields = max_shadow_read_write_fields;
|
|
|
|
|
|
vmcs_load(shadow_vmcs);
|
|
|
|
|
@@ -6002,12 +6057,11 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
|
|
|
|
|
|
static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
|
|
|
{
|
|
|
- unsigned long *fields[] = {
|
|
|
- (unsigned long *)shadow_read_write_fields,
|
|
|
- (unsigned long *)shadow_read_only_fields
|
|
|
+ const unsigned long *fields[] = {
|
|
|
+ shadow_read_write_fields,
|
|
|
+ shadow_read_only_fields
|
|
|
};
|
|
|
- int num_lists = ARRAY_SIZE(fields);
|
|
|
- int max_fields[] = {
|
|
|
+ const int max_fields[] = {
|
|
|
max_shadow_read_write_fields,
|
|
|
max_shadow_read_only_fields
|
|
|
};
|
|
@@ -6018,7 +6072,7 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
|
|
|
|
|
|
vmcs_load(shadow_vmcs);
|
|
|
|
|
|
- for (q = 0; q < num_lists; q++) {
|
|
|
+ for (q = 0; q < ARRAY_SIZE(fields); q++) {
|
|
|
for (i = 0; i < max_fields[q]; i++) {
|
|
|
field = fields[q][i];
|
|
|
vmcs12_read_any(&vmx->vcpu, field, &field_value);
|
|
@@ -6248,6 +6302,74 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
|
|
|
return 1;
|
|
|
}
|
|
|
|
|
|
+/* Emulate the INVEPT instruction */
|
|
|
+static int handle_invept(struct kvm_vcpu *vcpu)
|
|
|
+{
|
|
|
+ u32 vmx_instruction_info, types;
|
|
|
+ unsigned long type;
|
|
|
+ gva_t gva;
|
|
|
+ struct x86_exception e;
|
|
|
+ struct {
|
|
|
+ u64 eptp, gpa;
|
|
|
+ } operand;
|
|
|
+ u64 eptp_mask = ((1ull << 51) - 1) & PAGE_MASK;
|
|
|
+
|
|
|
+ if (!(nested_vmx_secondary_ctls_high & SECONDARY_EXEC_ENABLE_EPT) ||
|
|
|
+ !(nested_vmx_ept_caps & VMX_EPT_INVEPT_BIT)) {
|
|
|
+ kvm_queue_exception(vcpu, UD_VECTOR);
|
|
|
+ return 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!nested_vmx_check_permission(vcpu))
|
|
|
+ return 1;
|
|
|
+
|
|
|
+ if (!kvm_read_cr0_bits(vcpu, X86_CR0_PE)) {
|
|
|
+ kvm_queue_exception(vcpu, UD_VECTOR);
|
|
|
+ return 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
|
|
|
+ type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf);
|
|
|
+
|
|
|
+ types = (nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
|
|
|
+
|
|
|
+ if (!(types & (1UL << type))) {
|
|
|
+ nested_vmx_failValid(vcpu,
|
|
|
+ VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
|
|
|
+ return 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* According to the Intel VMX instruction reference, the memory
|
|
|
+ * operand is read even if it isn't needed (e.g., for type==global)
|
|
|
+ */
|
|
|
+ if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
|
|
|
+ vmx_instruction_info, &gva))
|
|
|
+ return 1;
|
|
|
+ if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand,
|
|
|
+ sizeof(operand), &e)) {
|
|
|
+ kvm_inject_page_fault(vcpu, &e);
|
|
|
+ return 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ switch (type) {
|
|
|
+ case VMX_EPT_EXTENT_CONTEXT:
|
|
|
+ if ((operand.eptp & eptp_mask) !=
|
|
|
+ (nested_ept_get_cr3(vcpu) & eptp_mask))
|
|
|
+ break;
|
|
|
+ case VMX_EPT_EXTENT_GLOBAL:
|
|
|
+ kvm_mmu_sync_roots(vcpu);
|
|
|
+ kvm_mmu_flush_tlb(vcpu);
|
|
|
+ nested_vmx_succeed(vcpu);
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ BUG_ON(1);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ skip_emulated_instruction(vcpu);
|
|
|
+ return 1;
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* The exit handlers return 1 if the exit was handled fully and guest execution
|
|
|
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
|
|
@@ -6292,6 +6414,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
|
|
|
[EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause,
|
|
|
[EXIT_REASON_MWAIT_INSTRUCTION] = handle_invalid_op,
|
|
|
[EXIT_REASON_MONITOR_INSTRUCTION] = handle_invalid_op,
|
|
|
+ [EXIT_REASON_INVEPT] = handle_invept,
|
|
|
};
|
|
|
|
|
|
static const int kvm_vmx_max_exit_handlers =
|
|
@@ -6518,6 +6641,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
|
|
|
case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
|
|
|
case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE:
|
|
|
case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
|
|
|
+ case EXIT_REASON_INVEPT:
|
|
|
/*
|
|
|
* VMX instructions trap unconditionally. This allows L1 to
|
|
|
* emulate them for its L2 guest, i.e., allows 3-level nesting!
|
|
@@ -6550,7 +6674,20 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
|
|
|
return nested_cpu_has2(vmcs12,
|
|
|
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
|
|
|
case EXIT_REASON_EPT_VIOLATION:
|
|
|
+ /*
|
|
|
+ * L0 always deals with the EPT violation. If nested EPT is
|
|
|
+ * used, and the nested mmu code discovers that the address is
|
|
|
+ * missing in the guest EPT table (EPT12), the EPT violation
|
|
|
+ * will be injected with nested_ept_inject_page_fault()
|
|
|
+ */
|
|
|
+ return 0;
|
|
|
case EXIT_REASON_EPT_MISCONFIG:
|
|
|
+ /*
|
|
|
+ * L2 never uses directly L1's EPT, but rather L0's own EPT
|
|
|
+ * table (shadow on EPT) or a merged EPT table that L0 built
|
|
|
+ * (EPT on EPT). So any problems with the structure of the
|
|
|
+ * table is L0's fault.
|
|
|
+ */
|
|
|
return 0;
|
|
|
case EXIT_REASON_PREEMPTION_TIMER:
|
|
|
return vmcs12->pin_based_vm_exec_control &
|
|
@@ -6638,7 +6775,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
|
|
|
|
|
|
if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked &&
|
|
|
!(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis(
|
|
|
- get_vmcs12(vcpu), vcpu)))) {
|
|
|
+ get_vmcs12(vcpu))))) {
|
|
|
if (vmx_interrupt_allowed(vcpu)) {
|
|
|
vmx->soft_vnmi_blocked = 0;
|
|
|
} else if (vmx->vnmi_blocked_time > 1000000000LL &&
|
|
@@ -7326,6 +7463,48 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
|
|
|
entry->ecx |= bit(X86_FEATURE_VMX);
|
|
|
}
|
|
|
|
|
|
+static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
|
|
|
+ struct x86_exception *fault)
|
|
|
+{
|
|
|
+ struct vmcs12 *vmcs12;
|
|
|
+ nested_vmx_vmexit(vcpu);
|
|
|
+ vmcs12 = get_vmcs12(vcpu);
|
|
|
+
|
|
|
+ if (fault->error_code & PFERR_RSVD_MASK)
|
|
|
+ vmcs12->vm_exit_reason = EXIT_REASON_EPT_MISCONFIG;
|
|
|
+ else
|
|
|
+ vmcs12->vm_exit_reason = EXIT_REASON_EPT_VIOLATION;
|
|
|
+ vmcs12->exit_qualification = vcpu->arch.exit_qualification;
|
|
|
+ vmcs12->guest_physical_address = fault->address;
|
|
|
+}
|
|
|
+
|
|
|
+/* Callbacks for nested_ept_init_mmu_context: */
|
|
|
+
|
|
|
+static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
|
|
|
+{
|
|
|
+ /* return the page table to be shadowed - in our case, EPT12 */
|
|
|
+ return get_vmcs12(vcpu)->ept_pointer;
|
|
|
+}
|
|
|
+
|
|
|
+static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
|
|
|
+{
|
|
|
+ int r = kvm_init_shadow_ept_mmu(vcpu, &vcpu->arch.mmu,
|
|
|
+ nested_vmx_ept_caps & VMX_EPT_EXECUTE_ONLY_BIT);
|
|
|
+
|
|
|
+ vcpu->arch.mmu.set_cr3 = vmx_set_cr3;
|
|
|
+ vcpu->arch.mmu.get_cr3 = nested_ept_get_cr3;
|
|
|
+ vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault;
|
|
|
+
|
|
|
+ vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
|
|
|
+
|
|
|
+ return r;
|
|
|
+}
|
|
|
+
|
|
|
+static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
|
|
|
+{
|
|
|
+ vcpu->arch.walk_mmu = &vcpu->arch.mmu;
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
|
|
|
* L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
|
|
@@ -7388,7 +7567,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
|
|
|
vmcs12->guest_interruptibility_info);
|
|
|
vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
|
|
|
kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
|
|
|
- vmcs_writel(GUEST_RFLAGS, vmcs12->guest_rflags);
|
|
|
+ vmx_set_rflags(vcpu, vmcs12->guest_rflags);
|
|
|
vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
|
|
|
vmcs12->guest_pending_dbg_exceptions);
|
|
|
vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
|
|
@@ -7508,15 +7687,24 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
|
|
|
vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask;
|
|
|
vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
|
|
|
|
|
|
- /* Note: IA32_MODE, LOAD_IA32_EFER are modified by vmx_set_efer below */
|
|
|
- vmcs_write32(VM_EXIT_CONTROLS,
|
|
|
- vmcs12->vm_exit_controls | vmcs_config.vmexit_ctrl);
|
|
|
- vmcs_write32(VM_ENTRY_CONTROLS, vmcs12->vm_entry_controls |
|
|
|
+ /* L2->L1 exit controls are emulated - the hardware exit is to L0 so
|
|
|
+ * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
|
|
|
+ * bits are further modified by vmx_set_efer() below.
|
|
|
+ */
|
|
|
+ vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
|
|
|
+
|
|
|
+ /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
|
|
|
+ * emulated by vmx_set_efer(), below.
|
|
|
+ */
|
|
|
+ vmcs_write32(VM_ENTRY_CONTROLS,
|
|
|
+ (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER &
|
|
|
+ ~VM_ENTRY_IA32E_MODE) |
|
|
|
(vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
|
|
|
|
|
|
- if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)
|
|
|
+ if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) {
|
|
|
vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat);
|
|
|
- else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
|
|
|
+ vcpu->arch.pat = vmcs12->guest_ia32_pat;
|
|
|
+ } else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
|
|
|
vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
|
|
|
|
|
|
|
|
@@ -7538,6 +7726,11 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
|
|
|
vmx_flush_tlb(vcpu);
|
|
|
}
|
|
|
|
|
|
+ if (nested_cpu_has_ept(vmcs12)) {
|
|
|
+ kvm_mmu_unload(vcpu);
|
|
|
+ nested_ept_init_mmu_context(vcpu);
|
|
|
+ }
|
|
|
+
|
|
|
if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)
|
|
|
vcpu->arch.efer = vmcs12->guest_ia32_efer;
|
|
|
else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
|
|
@@ -7565,6 +7758,16 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
|
|
|
kvm_set_cr3(vcpu, vmcs12->guest_cr3);
|
|
|
kvm_mmu_reset_context(vcpu);
|
|
|
|
|
|
+ /*
|
|
|
+ * L1 may access the L2's PDPTR, so save them to construct vmcs12
|
|
|
+ */
|
|
|
+ if (enable_ept) {
|
|
|
+ vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
|
|
|
+ vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
|
|
|
+ vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
|
|
|
+ vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
|
|
|
+ }
|
|
|
+
|
|
|
kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp);
|
|
|
kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip);
|
|
|
}
|
|
@@ -7887,6 +8090,22 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
|
|
|
vmcs12->guest_pending_dbg_exceptions =
|
|
|
vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
|
|
|
|
|
|
+ /*
|
|
|
+ * In some cases (usually, nested EPT), L2 is allowed to change its
|
|
|
+ * own CR3 without exiting. If it has changed it, we must keep it.
|
|
|
+ * Of course, if L0 is using shadow page tables, GUEST_CR3 was defined
|
|
|
+ * by L0, not L1 or L2, so we mustn't unconditionally copy it to vmcs12.
|
|
|
+ *
|
|
|
+ * Additionally, restore L2's PDPTR to vmcs12.
|
|
|
+ */
|
|
|
+ if (enable_ept) {
|
|
|
+ vmcs12->guest_cr3 = vmcs_read64(GUEST_CR3);
|
|
|
+ vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
|
|
|
+ vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
|
|
|
+ vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
|
|
|
+ vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
|
|
|
+ }
|
|
|
+
|
|
|
vmcs12->vm_entry_controls =
|
|
|
(vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
|
|
|
(vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE);
|
|
@@ -7948,6 +8167,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
|
|
|
static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
|
|
|
struct vmcs12 *vmcs12)
|
|
|
{
|
|
|
+ struct kvm_segment seg;
|
|
|
+
|
|
|
if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER)
|
|
|
vcpu->arch.efer = vmcs12->host_ia32_efer;
|
|
|
else if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
|
|
@@ -7982,7 +8203,9 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
|
|
|
vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
|
|
|
kvm_set_cr4(vcpu, vmcs12->host_cr4);
|
|
|
|
|
|
- /* shadow page tables on either EPT or shadow page tables */
|
|
|
+ if (nested_cpu_has_ept(vmcs12))
|
|
|
+ nested_ept_uninit_mmu_context(vcpu);
|
|
|
+
|
|
|
kvm_set_cr3(vcpu, vmcs12->host_cr3);
|
|
|
kvm_mmu_reset_context(vcpu);
|
|
|
|
|
@@ -8001,23 +8224,61 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
|
|
|
vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip);
|
|
|
vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base);
|
|
|
vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base);
|
|
|
- vmcs_writel(GUEST_TR_BASE, vmcs12->host_tr_base);
|
|
|
- vmcs_writel(GUEST_GS_BASE, vmcs12->host_gs_base);
|
|
|
- vmcs_writel(GUEST_FS_BASE, vmcs12->host_fs_base);
|
|
|
- vmcs_write16(GUEST_ES_SELECTOR, vmcs12->host_es_selector);
|
|
|
- vmcs_write16(GUEST_CS_SELECTOR, vmcs12->host_cs_selector);
|
|
|
- vmcs_write16(GUEST_SS_SELECTOR, vmcs12->host_ss_selector);
|
|
|
- vmcs_write16(GUEST_DS_SELECTOR, vmcs12->host_ds_selector);
|
|
|
- vmcs_write16(GUEST_FS_SELECTOR, vmcs12->host_fs_selector);
|
|
|
- vmcs_write16(GUEST_GS_SELECTOR, vmcs12->host_gs_selector);
|
|
|
- vmcs_write16(GUEST_TR_SELECTOR, vmcs12->host_tr_selector);
|
|
|
-
|
|
|
- if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT)
|
|
|
+
|
|
|
+ if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) {
|
|
|
vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
|
|
|
+ vcpu->arch.pat = vmcs12->host_ia32_pat;
|
|
|
+ }
|
|
|
if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
|
|
|
vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL,
|
|
|
vmcs12->host_ia32_perf_global_ctrl);
|
|
|
|
|
|
+ /* Set L1 segment info according to Intel SDM
|
|
|
+ 27.5.2 Loading Host Segment and Descriptor-Table Registers */
|
|
|
+ seg = (struct kvm_segment) {
|
|
|
+ .base = 0,
|
|
|
+ .limit = 0xFFFFFFFF,
|
|
|
+ .selector = vmcs12->host_cs_selector,
|
|
|
+ .type = 11,
|
|
|
+ .present = 1,
|
|
|
+ .s = 1,
|
|
|
+ .g = 1
|
|
|
+ };
|
|
|
+ if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
|
|
|
+ seg.l = 1;
|
|
|
+ else
|
|
|
+ seg.db = 1;
|
|
|
+ vmx_set_segment(vcpu, &seg, VCPU_SREG_CS);
|
|
|
+ seg = (struct kvm_segment) {
|
|
|
+ .base = 0,
|
|
|
+ .limit = 0xFFFFFFFF,
|
|
|
+ .type = 3,
|
|
|
+ .present = 1,
|
|
|
+ .s = 1,
|
|
|
+ .db = 1,
|
|
|
+ .g = 1
|
|
|
+ };
|
|
|
+ seg.selector = vmcs12->host_ds_selector;
|
|
|
+ vmx_set_segment(vcpu, &seg, VCPU_SREG_DS);
|
|
|
+ seg.selector = vmcs12->host_es_selector;
|
|
|
+ vmx_set_segment(vcpu, &seg, VCPU_SREG_ES);
|
|
|
+ seg.selector = vmcs12->host_ss_selector;
|
|
|
+ vmx_set_segment(vcpu, &seg, VCPU_SREG_SS);
|
|
|
+ seg.selector = vmcs12->host_fs_selector;
|
|
|
+ seg.base = vmcs12->host_fs_base;
|
|
|
+ vmx_set_segment(vcpu, &seg, VCPU_SREG_FS);
|
|
|
+ seg.selector = vmcs12->host_gs_selector;
|
|
|
+ seg.base = vmcs12->host_gs_base;
|
|
|
+ vmx_set_segment(vcpu, &seg, VCPU_SREG_GS);
|
|
|
+ seg = (struct kvm_segment) {
|
|
|
+ .base = vmcs12->host_tr_base,
|
|
|
+ .limit = 0x67,
|
|
|
+ .selector = vmcs12->host_tr_selector,
|
|
|
+ .type = 11,
|
|
|
+ .present = 1
|
|
|
+ };
|
|
|
+ vmx_set_segment(vcpu, &seg, VCPU_SREG_TR);
|
|
|
+
|
|
|
kvm_set_dr(vcpu, 7, 0x400);
|
|
|
vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
|
|
|
}
|