|
|
@@ -43,6 +43,7 @@
|
|
|
#include <linux/slab.h>
|
|
|
#include <linux/perf_event.h>
|
|
|
#include <linux/uaccess.h>
|
|
|
+#include <linux/hash.h>
|
|
|
#include <trace/events/kvm.h>
|
|
|
|
|
|
#define CREATE_TRACE_POINTS
|
|
|
@@ -155,6 +156,13 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
|
|
|
|
|
|
u64 __read_mostly host_xcr0;
|
|
|
|
|
|
+static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
|
|
|
+{
|
|
|
+ int i;
|
|
|
+ for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU); i++)
|
|
|
+ vcpu->arch.apf.gfns[i] = ~0;
|
|
|
+}
|
|
|
+
|
|
|
static void kvm_on_user_return(struct user_return_notifier *urn)
|
|
|
{
|
|
|
unsigned slot;
|
|
|
@@ -326,23 +334,28 @@ void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(kvm_requeue_exception);
|
|
|
|
|
|
-void kvm_inject_page_fault(struct kvm_vcpu *vcpu)
|
|
|
+void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
|
|
|
{
|
|
|
- unsigned error_code = vcpu->arch.fault.error_code;
|
|
|
+ if (err)
|
|
|
+ kvm_inject_gp(vcpu, 0);
|
|
|
+ else
|
|
|
+ kvm_x86_ops->skip_emulated_instruction(vcpu);
|
|
|
+}
|
|
|
+EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
|
|
|
|
|
|
+void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
|
|
|
+{
|
|
|
++vcpu->stat.pf_guest;
|
|
|
- vcpu->arch.cr2 = vcpu->arch.fault.address;
|
|
|
- kvm_queue_exception_e(vcpu, PF_VECTOR, error_code);
|
|
|
+ vcpu->arch.cr2 = fault->address;
|
|
|
+ kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
|
|
|
}
|
|
|
|
|
|
-void kvm_propagate_fault(struct kvm_vcpu *vcpu)
|
|
|
+void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
|
|
|
{
|
|
|
- if (mmu_is_nested(vcpu) && !vcpu->arch.fault.nested)
|
|
|
- vcpu->arch.nested_mmu.inject_page_fault(vcpu);
|
|
|
+ if (mmu_is_nested(vcpu) && !fault->nested_page_fault)
|
|
|
+ vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault);
|
|
|
else
|
|
|
- vcpu->arch.mmu.inject_page_fault(vcpu);
|
|
|
-
|
|
|
- vcpu->arch.fault.nested = false;
|
|
|
+ vcpu->arch.mmu.inject_page_fault(vcpu, fault);
|
|
|
}
|
|
|
|
|
|
void kvm_inject_nmi(struct kvm_vcpu *vcpu)
|
|
|
@@ -460,8 +473,8 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu)
|
|
|
(unsigned long *)&vcpu->arch.regs_avail))
|
|
|
return true;
|
|
|
|
|
|
- gfn = (vcpu->arch.cr3 & ~31u) >> PAGE_SHIFT;
|
|
|
- offset = (vcpu->arch.cr3 & ~31u) & (PAGE_SIZE - 1);
|
|
|
+ gfn = (kvm_read_cr3(vcpu) & ~31u) >> PAGE_SHIFT;
|
|
|
+ offset = (kvm_read_cr3(vcpu) & ~31u) & (PAGE_SIZE - 1);
|
|
|
r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte),
|
|
|
PFERR_USER_MASK | PFERR_WRITE_MASK);
|
|
|
if (r < 0)
|
|
|
@@ -506,12 +519,15 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
|
|
|
} else
|
|
|
#endif
|
|
|
if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
|
|
|
- vcpu->arch.cr3))
|
|
|
+ kvm_read_cr3(vcpu)))
|
|
|
return 1;
|
|
|
}
|
|
|
|
|
|
kvm_x86_ops->set_cr0(vcpu, cr0);
|
|
|
|
|
|
+ if ((cr0 ^ old_cr0) & X86_CR0_PG)
|
|
|
+ kvm_clear_async_pf_completion_queue(vcpu);
|
|
|
+
|
|
|
if ((cr0 ^ old_cr0) & update_bits)
|
|
|
kvm_mmu_reset_context(vcpu);
|
|
|
return 0;
|
|
|
@@ -595,7 +611,8 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
|
|
|
return 1;
|
|
|
} else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
|
|
|
&& ((cr4 ^ old_cr4) & pdptr_bits)
|
|
|
- && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3))
|
|
|
+ && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
|
|
|
+ kvm_read_cr3(vcpu)))
|
|
|
return 1;
|
|
|
|
|
|
if (cr4 & X86_CR4_VMXE)
|
|
|
@@ -615,7 +632,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cr4);
|
|
|
|
|
|
int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
|
|
|
{
|
|
|
- if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) {
|
|
|
+ if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
|
|
|
kvm_mmu_sync_roots(vcpu);
|
|
|
kvm_mmu_flush_tlb(vcpu);
|
|
|
return 0;
|
|
|
@@ -650,12 +667,13 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
|
|
|
if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT)))
|
|
|
return 1;
|
|
|
vcpu->arch.cr3 = cr3;
|
|
|
+ __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
|
|
|
vcpu->arch.mmu.new_cr3(vcpu);
|
|
|
return 0;
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(kvm_set_cr3);
|
|
|
|
|
|
-int __kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
|
|
|
+int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
|
|
|
{
|
|
|
if (cr8 & CR8_RESERVED_BITS)
|
|
|
return 1;
|
|
|
@@ -665,12 +683,6 @@ int __kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
|
|
|
vcpu->arch.cr8 = cr8;
|
|
|
return 0;
|
|
|
}
|
|
|
-
|
|
|
-void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
|
|
|
-{
|
|
|
- if (__kvm_set_cr8(vcpu, cr8))
|
|
|
- kvm_inject_gp(vcpu, 0);
|
|
|
-}
|
|
|
EXPORT_SYMBOL_GPL(kvm_set_cr8);
|
|
|
|
|
|
unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
|
|
|
@@ -775,12 +787,12 @@ EXPORT_SYMBOL_GPL(kvm_get_dr);
|
|
|
* kvm-specific. Those are put in the beginning of the list.
|
|
|
*/
|
|
|
|
|
|
-#define KVM_SAVE_MSRS_BEGIN 7
|
|
|
+#define KVM_SAVE_MSRS_BEGIN 8
|
|
|
static u32 msrs_to_save[] = {
|
|
|
MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
|
|
|
MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
|
|
|
HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
|
|
|
- HV_X64_MSR_APIC_ASSIST_PAGE,
|
|
|
+ HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN,
|
|
|
MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
|
|
|
MSR_STAR,
|
|
|
#ifdef CONFIG_X86_64
|
|
|
@@ -830,7 +842,6 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
|
|
|
kvm_x86_ops->set_efer(vcpu, efer);
|
|
|
|
|
|
vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
|
|
|
- kvm_mmu_reset_context(vcpu);
|
|
|
|
|
|
/* Update reserved bits */
|
|
|
if ((efer ^ old_efer) & EFER_NX)
|
|
|
@@ -1418,6 +1429,30 @@ static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
|
|
|
+{
|
|
|
+ gpa_t gpa = data & ~0x3f;
|
|
|
+
|
|
|
+ /* Bits 2:5 are resrved, Should be zero */
|
|
|
+ if (data & 0x3c)
|
|
|
+ return 1;
|
|
|
+
|
|
|
+ vcpu->arch.apf.msr_val = data;
|
|
|
+
|
|
|
+ if (!(data & KVM_ASYNC_PF_ENABLED)) {
|
|
|
+ kvm_clear_async_pf_completion_queue(vcpu);
|
|
|
+ kvm_async_pf_hash_reset(vcpu);
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa))
|
|
|
+ return 1;
|
|
|
+
|
|
|
+ vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
|
|
|
+ kvm_async_pf_wakeup_all(vcpu);
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
|
|
{
|
|
|
switch (msr) {
|
|
|
@@ -1499,6 +1534,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
|
|
}
|
|
|
break;
|
|
|
}
|
|
|
+ case MSR_KVM_ASYNC_PF_EN:
|
|
|
+ if (kvm_pv_enable_async_pf(vcpu, data))
|
|
|
+ return 1;
|
|
|
+ break;
|
|
|
case MSR_IA32_MCG_CTL:
|
|
|
case MSR_IA32_MCG_STATUS:
|
|
|
case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
|
|
|
@@ -1775,6 +1814,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
|
|
case MSR_KVM_SYSTEM_TIME_NEW:
|
|
|
data = vcpu->arch.time;
|
|
|
break;
|
|
|
+ case MSR_KVM_ASYNC_PF_EN:
|
|
|
+ data = vcpu->arch.apf.msr_val;
|
|
|
+ break;
|
|
|
case MSR_IA32_P5_MC_ADDR:
|
|
|
case MSR_IA32_P5_MC_TYPE:
|
|
|
case MSR_IA32_MCG_CAP:
|
|
|
@@ -1904,6 +1946,7 @@ int kvm_dev_ioctl_check_extension(long ext)
|
|
|
case KVM_CAP_NOP_IO_DELAY:
|
|
|
case KVM_CAP_MP_STATE:
|
|
|
case KVM_CAP_SYNC_MMU:
|
|
|
+ case KVM_CAP_USER_NMI:
|
|
|
case KVM_CAP_REINJECT_CONTROL:
|
|
|
case KVM_CAP_IRQ_INJECT_STATUS:
|
|
|
case KVM_CAP_ASSIGN_DEV_IRQ:
|
|
|
@@ -1922,6 +1965,7 @@ int kvm_dev_ioctl_check_extension(long ext)
|
|
|
case KVM_CAP_DEBUGREGS:
|
|
|
case KVM_CAP_X86_ROBUST_SINGLESTEP:
|
|
|
case KVM_CAP_XSAVE:
|
|
|
+ case KVM_CAP_ASYNC_PF:
|
|
|
r = 1;
|
|
|
break;
|
|
|
case KVM_CAP_COALESCED_MMIO:
|
|
|
@@ -2185,6 +2229,11 @@ out:
|
|
|
return r;
|
|
|
}
|
|
|
|
|
|
+static void cpuid_mask(u32 *word, int wordnum)
|
|
|
+{
|
|
|
+ *word &= boot_cpu_data.x86_capability[wordnum];
|
|
|
+}
|
|
|
+
|
|
|
static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
|
|
u32 index)
|
|
|
{
|
|
|
@@ -2259,7 +2308,9 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
|
|
break;
|
|
|
case 1:
|
|
|
entry->edx &= kvm_supported_word0_x86_features;
|
|
|
+ cpuid_mask(&entry->edx, 0);
|
|
|
entry->ecx &= kvm_supported_word4_x86_features;
|
|
|
+ cpuid_mask(&entry->ecx, 4);
|
|
|
/* we support x2apic emulation even if host does not support
|
|
|
* it since we emulate x2apic in software */
|
|
|
entry->ecx |= F(X2APIC);
|
|
|
@@ -2350,7 +2401,9 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
|
|
break;
|
|
|
case 0x80000001:
|
|
|
entry->edx &= kvm_supported_word1_x86_features;
|
|
|
+ cpuid_mask(&entry->edx, 1);
|
|
|
entry->ecx &= kvm_supported_word6_x86_features;
|
|
|
+ cpuid_mask(&entry->ecx, 6);
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
@@ -3169,20 +3222,18 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
|
|
|
struct kvm_memslots *slots, *old_slots;
|
|
|
unsigned long *dirty_bitmap;
|
|
|
|
|
|
- r = -ENOMEM;
|
|
|
- dirty_bitmap = vmalloc(n);
|
|
|
- if (!dirty_bitmap)
|
|
|
- goto out;
|
|
|
+ dirty_bitmap = memslot->dirty_bitmap_head;
|
|
|
+ if (memslot->dirty_bitmap == dirty_bitmap)
|
|
|
+ dirty_bitmap += n / sizeof(long);
|
|
|
memset(dirty_bitmap, 0, n);
|
|
|
|
|
|
r = -ENOMEM;
|
|
|
slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
|
|
|
- if (!slots) {
|
|
|
- vfree(dirty_bitmap);
|
|
|
+ if (!slots)
|
|
|
goto out;
|
|
|
- }
|
|
|
memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
|
|
|
slots->memslots[log->slot].dirty_bitmap = dirty_bitmap;
|
|
|
+ slots->generation++;
|
|
|
|
|
|
old_slots = kvm->memslots;
|
|
|
rcu_assign_pointer(kvm->memslots, slots);
|
|
|
@@ -3195,11 +3246,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
|
|
|
spin_unlock(&kvm->mmu_lock);
|
|
|
|
|
|
r = -EFAULT;
|
|
|
- if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) {
|
|
|
- vfree(dirty_bitmap);
|
|
|
+ if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n))
|
|
|
goto out;
|
|
|
- }
|
|
|
- vfree(dirty_bitmap);
|
|
|
} else {
|
|
|
r = -EFAULT;
|
|
|
if (clear_user(log->dirty_bitmap, n))
|
|
|
@@ -3266,8 +3314,10 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
|
|
if (vpic) {
|
|
|
r = kvm_ioapic_init(kvm);
|
|
|
if (r) {
|
|
|
+ mutex_lock(&kvm->slots_lock);
|
|
|
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
|
|
|
&vpic->dev);
|
|
|
+ mutex_unlock(&kvm->slots_lock);
|
|
|
kfree(vpic);
|
|
|
goto create_irqchip_unlock;
|
|
|
}
|
|
|
@@ -3278,10 +3328,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
|
|
smp_wmb();
|
|
|
r = kvm_setup_default_irq_routing(kvm);
|
|
|
if (r) {
|
|
|
+ mutex_lock(&kvm->slots_lock);
|
|
|
mutex_lock(&kvm->irq_lock);
|
|
|
kvm_ioapic_destroy(kvm);
|
|
|
kvm_destroy_pic(kvm);
|
|
|
mutex_unlock(&kvm->irq_lock);
|
|
|
+ mutex_unlock(&kvm->slots_lock);
|
|
|
}
|
|
|
create_irqchip_unlock:
|
|
|
mutex_unlock(&kvm->lock);
|
|
|
@@ -3557,63 +3609,63 @@ static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)
|
|
|
static gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)
|
|
|
{
|
|
|
gpa_t t_gpa;
|
|
|
- u32 error;
|
|
|
+ struct x86_exception exception;
|
|
|
|
|
|
BUG_ON(!mmu_is_nested(vcpu));
|
|
|
|
|
|
/* NPT walks are always user-walks */
|
|
|
access |= PFERR_USER_MASK;
|
|
|
- t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, &error);
|
|
|
- if (t_gpa == UNMAPPED_GVA)
|
|
|
- vcpu->arch.fault.nested = true;
|
|
|
+ t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, &exception);
|
|
|
|
|
|
return t_gpa;
|
|
|
}
|
|
|
|
|
|
-gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
|
|
|
+gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
|
|
|
+ struct x86_exception *exception)
|
|
|
{
|
|
|
u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
|
|
|
- return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, error);
|
|
|
+ return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
|
|
|
}
|
|
|
|
|
|
- gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
|
|
|
+ gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
|
|
|
+ struct x86_exception *exception)
|
|
|
{
|
|
|
u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
|
|
|
access |= PFERR_FETCH_MASK;
|
|
|
- return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, error);
|
|
|
+ return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
|
|
|
}
|
|
|
|
|
|
-gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
|
|
|
+gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
|
|
|
+ struct x86_exception *exception)
|
|
|
{
|
|
|
u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
|
|
|
access |= PFERR_WRITE_MASK;
|
|
|
- return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, error);
|
|
|
+ return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
|
|
|
}
|
|
|
|
|
|
/* uses this to access any guest's mapped memory without checking CPL */
|
|
|
-gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
|
|
|
+gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
|
|
|
+ struct x86_exception *exception)
|
|
|
{
|
|
|
- return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, error);
|
|
|
+ return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, exception);
|
|
|
}
|
|
|
|
|
|
static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
|
|
|
struct kvm_vcpu *vcpu, u32 access,
|
|
|
- u32 *error)
|
|
|
+ struct x86_exception *exception)
|
|
|
{
|
|
|
void *data = val;
|
|
|
int r = X86EMUL_CONTINUE;
|
|
|
|
|
|
while (bytes) {
|
|
|
gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access,
|
|
|
- error);
|
|
|
+ exception);
|
|
|
unsigned offset = addr & (PAGE_SIZE-1);
|
|
|
unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
|
|
|
int ret;
|
|
|
|
|
|
- if (gpa == UNMAPPED_GVA) {
|
|
|
- r = X86EMUL_PROPAGATE_FAULT;
|
|
|
- goto out;
|
|
|
- }
|
|
|
+ if (gpa == UNMAPPED_GVA)
|
|
|
+ return X86EMUL_PROPAGATE_FAULT;
|
|
|
ret = kvm_read_guest(vcpu->kvm, gpa, data, toread);
|
|
|
if (ret < 0) {
|
|
|
r = X86EMUL_IO_NEEDED;
|
|
|
@@ -3630,31 +3682,35 @@ out:
|
|
|
|
|
|
/* used for instruction fetching */
|
|
|
static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes,
|
|
|
- struct kvm_vcpu *vcpu, u32 *error)
|
|
|
+ struct kvm_vcpu *vcpu,
|
|
|
+ struct x86_exception *exception)
|
|
|
{
|
|
|
u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
|
|
|
return kvm_read_guest_virt_helper(addr, val, bytes, vcpu,
|
|
|
- access | PFERR_FETCH_MASK, error);
|
|
|
+ access | PFERR_FETCH_MASK,
|
|
|
+ exception);
|
|
|
}
|
|
|
|
|
|
static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
|
|
|
- struct kvm_vcpu *vcpu, u32 *error)
|
|
|
+ struct kvm_vcpu *vcpu,
|
|
|
+ struct x86_exception *exception)
|
|
|
{
|
|
|
u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
|
|
|
return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
|
|
|
- error);
|
|
|
+ exception);
|
|
|
}
|
|
|
|
|
|
static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes,
|
|
|
- struct kvm_vcpu *vcpu, u32 *error)
|
|
|
+ struct kvm_vcpu *vcpu,
|
|
|
+ struct x86_exception *exception)
|
|
|
{
|
|
|
- return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error);
|
|
|
+ return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception);
|
|
|
}
|
|
|
|
|
|
static int kvm_write_guest_virt_system(gva_t addr, void *val,
|
|
|
unsigned int bytes,
|
|
|
struct kvm_vcpu *vcpu,
|
|
|
- u32 *error)
|
|
|
+ struct x86_exception *exception)
|
|
|
{
|
|
|
void *data = val;
|
|
|
int r = X86EMUL_CONTINUE;
|
|
|
@@ -3662,15 +3718,13 @@ static int kvm_write_guest_virt_system(gva_t addr, void *val,
|
|
|
while (bytes) {
|
|
|
gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr,
|
|
|
PFERR_WRITE_MASK,
|
|
|
- error);
|
|
|
+ exception);
|
|
|
unsigned offset = addr & (PAGE_SIZE-1);
|
|
|
unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
|
|
|
int ret;
|
|
|
|
|
|
- if (gpa == UNMAPPED_GVA) {
|
|
|
- r = X86EMUL_PROPAGATE_FAULT;
|
|
|
- goto out;
|
|
|
- }
|
|
|
+ if (gpa == UNMAPPED_GVA)
|
|
|
+ return X86EMUL_PROPAGATE_FAULT;
|
|
|
ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite);
|
|
|
if (ret < 0) {
|
|
|
r = X86EMUL_IO_NEEDED;
|
|
|
@@ -3688,7 +3742,7 @@ out:
|
|
|
static int emulator_read_emulated(unsigned long addr,
|
|
|
void *val,
|
|
|
unsigned int bytes,
|
|
|
- unsigned int *error_code,
|
|
|
+ struct x86_exception *exception,
|
|
|
struct kvm_vcpu *vcpu)
|
|
|
{
|
|
|
gpa_t gpa;
|
|
|
@@ -3701,7 +3755,7 @@ static int emulator_read_emulated(unsigned long addr,
|
|
|
return X86EMUL_CONTINUE;
|
|
|
}
|
|
|
|
|
|
- gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, error_code);
|
|
|
+ gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, exception);
|
|
|
|
|
|
if (gpa == UNMAPPED_GVA)
|
|
|
return X86EMUL_PROPAGATE_FAULT;
|
|
|
@@ -3710,8 +3764,8 @@ static int emulator_read_emulated(unsigned long addr,
|
|
|
if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
|
|
|
goto mmio;
|
|
|
|
|
|
- if (kvm_read_guest_virt(addr, val, bytes, vcpu, NULL)
|
|
|
- == X86EMUL_CONTINUE)
|
|
|
+ if (kvm_read_guest_virt(addr, val, bytes, vcpu, exception)
|
|
|
+ == X86EMUL_CONTINUE)
|
|
|
return X86EMUL_CONTINUE;
|
|
|
|
|
|
mmio:
|
|
|
@@ -3735,7 +3789,7 @@ mmio:
|
|
|
}
|
|
|
|
|
|
int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
|
|
|
- const void *val, int bytes)
|
|
|
+ const void *val, int bytes)
|
|
|
{
|
|
|
int ret;
|
|
|
|
|
|
@@ -3749,12 +3803,12 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
|
|
|
static int emulator_write_emulated_onepage(unsigned long addr,
|
|
|
const void *val,
|
|
|
unsigned int bytes,
|
|
|
- unsigned int *error_code,
|
|
|
+ struct x86_exception *exception,
|
|
|
struct kvm_vcpu *vcpu)
|
|
|
{
|
|
|
gpa_t gpa;
|
|
|
|
|
|
- gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error_code);
|
|
|
+ gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception);
|
|
|
|
|
|
if (gpa == UNMAPPED_GVA)
|
|
|
return X86EMUL_PROPAGATE_FAULT;
|
|
|
@@ -3787,7 +3841,7 @@ mmio:
|
|
|
int emulator_write_emulated(unsigned long addr,
|
|
|
const void *val,
|
|
|
unsigned int bytes,
|
|
|
- unsigned int *error_code,
|
|
|
+ struct x86_exception *exception,
|
|
|
struct kvm_vcpu *vcpu)
|
|
|
{
|
|
|
/* Crossing a page boundary? */
|
|
|
@@ -3795,7 +3849,7 @@ int emulator_write_emulated(unsigned long addr,
|
|
|
int rc, now;
|
|
|
|
|
|
now = -addr & ~PAGE_MASK;
|
|
|
- rc = emulator_write_emulated_onepage(addr, val, now, error_code,
|
|
|
+ rc = emulator_write_emulated_onepage(addr, val, now, exception,
|
|
|
vcpu);
|
|
|
if (rc != X86EMUL_CONTINUE)
|
|
|
return rc;
|
|
|
@@ -3803,7 +3857,7 @@ int emulator_write_emulated(unsigned long addr,
|
|
|
val += now;
|
|
|
bytes -= now;
|
|
|
}
|
|
|
- return emulator_write_emulated_onepage(addr, val, bytes, error_code,
|
|
|
+ return emulator_write_emulated_onepage(addr, val, bytes, exception,
|
|
|
vcpu);
|
|
|
}
|
|
|
|
|
|
@@ -3821,7 +3875,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
|
|
|
const void *old,
|
|
|
const void *new,
|
|
|
unsigned int bytes,
|
|
|
- unsigned int *error_code,
|
|
|
+ struct x86_exception *exception,
|
|
|
struct kvm_vcpu *vcpu)
|
|
|
{
|
|
|
gpa_t gpa;
|
|
|
@@ -3879,7 +3933,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
|
|
|
emul_write:
|
|
|
printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
|
|
|
|
|
|
- return emulator_write_emulated(addr, new, bytes, error_code, vcpu);
|
|
|
+ return emulator_write_emulated(addr, new, bytes, exception, vcpu);
|
|
|
}
|
|
|
|
|
|
static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
|
|
|
@@ -3904,7 +3958,7 @@ static int emulator_pio_in_emulated(int size, unsigned short port, void *val,
|
|
|
if (vcpu->arch.pio.count)
|
|
|
goto data_avail;
|
|
|
|
|
|
- trace_kvm_pio(0, port, size, 1);
|
|
|
+ trace_kvm_pio(0, port, size, count);
|
|
|
|
|
|
vcpu->arch.pio.port = port;
|
|
|
vcpu->arch.pio.in = 1;
|
|
|
@@ -3932,7 +3986,7 @@ static int emulator_pio_out_emulated(int size, unsigned short port,
|
|
|
const void *val, unsigned int count,
|
|
|
struct kvm_vcpu *vcpu)
|
|
|
{
|
|
|
- trace_kvm_pio(1, port, size, 1);
|
|
|
+ trace_kvm_pio(1, port, size, count);
|
|
|
|
|
|
vcpu->arch.pio.port = port;
|
|
|
vcpu->arch.pio.in = 0;
|
|
|
@@ -3973,13 +4027,15 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
|
|
|
return X86EMUL_CONTINUE;
|
|
|
|
|
|
if (kvm_x86_ops->has_wbinvd_exit()) {
|
|
|
- preempt_disable();
|
|
|
+ int cpu = get_cpu();
|
|
|
+
|
|
|
+ cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
|
|
|
smp_call_function_many(vcpu->arch.wbinvd_dirty_mask,
|
|
|
wbinvd_ipi, NULL, 1);
|
|
|
- preempt_enable();
|
|
|
+ put_cpu();
|
|
|
cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
|
|
|
- }
|
|
|
- wbinvd();
|
|
|
+ } else
|
|
|
+ wbinvd();
|
|
|
return X86EMUL_CONTINUE;
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
|
|
|
@@ -4019,7 +4075,7 @@ static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu)
|
|
|
value = vcpu->arch.cr2;
|
|
|
break;
|
|
|
case 3:
|
|
|
- value = vcpu->arch.cr3;
|
|
|
+ value = kvm_read_cr3(vcpu);
|
|
|
break;
|
|
|
case 4:
|
|
|
value = kvm_read_cr4(vcpu);
|
|
|
@@ -4053,7 +4109,7 @@ static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu)
|
|
|
res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
|
|
|
break;
|
|
|
case 8:
|
|
|
- res = __kvm_set_cr8(vcpu, val & 0xfUL);
|
|
|
+ res = kvm_set_cr8(vcpu, val);
|
|
|
break;
|
|
|
default:
|
|
|
vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
|
|
|
@@ -4206,12 +4262,13 @@ static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
|
|
|
static void inject_emulated_exception(struct kvm_vcpu *vcpu)
|
|
|
{
|
|
|
struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
|
|
|
- if (ctxt->exception == PF_VECTOR)
|
|
|
- kvm_propagate_fault(vcpu);
|
|
|
- else if (ctxt->error_code_valid)
|
|
|
- kvm_queue_exception_e(vcpu, ctxt->exception, ctxt->error_code);
|
|
|
+ if (ctxt->exception.vector == PF_VECTOR)
|
|
|
+ kvm_propagate_fault(vcpu, &ctxt->exception);
|
|
|
+ else if (ctxt->exception.error_code_valid)
|
|
|
+ kvm_queue_exception_e(vcpu, ctxt->exception.vector,
|
|
|
+ ctxt->exception.error_code);
|
|
|
else
|
|
|
- kvm_queue_exception(vcpu, ctxt->exception);
|
|
|
+ kvm_queue_exception(vcpu, ctxt->exception.vector);
|
|
|
}
|
|
|
|
|
|
static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
|
|
|
@@ -4267,13 +4324,19 @@ EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
|
|
|
|
|
|
static int handle_emulation_failure(struct kvm_vcpu *vcpu)
|
|
|
{
|
|
|
+ int r = EMULATE_DONE;
|
|
|
+
|
|
|
++vcpu->stat.insn_emulation_fail;
|
|
|
trace_kvm_emulate_insn_failed(vcpu);
|
|
|
- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
|
|
- vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
|
|
|
- vcpu->run->internal.ndata = 0;
|
|
|
+ if (!is_guest_mode(vcpu)) {
|
|
|
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
|
|
+ vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
|
|
|
+ vcpu->run->internal.ndata = 0;
|
|
|
+ r = EMULATE_FAIL;
|
|
|
+ }
|
|
|
kvm_queue_exception(vcpu, UD_VECTOR);
|
|
|
- return EMULATE_FAIL;
|
|
|
+
|
|
|
+ return r;
|
|
|
}
|
|
|
|
|
|
static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
|
|
|
@@ -4302,10 +4365,11 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
|
|
|
return false;
|
|
|
}
|
|
|
|
|
|
-int emulate_instruction(struct kvm_vcpu *vcpu,
|
|
|
- unsigned long cr2,
|
|
|
- u16 error_code,
|
|
|
- int emulation_type)
|
|
|
+int x86_emulate_instruction(struct kvm_vcpu *vcpu,
|
|
|
+ unsigned long cr2,
|
|
|
+ int emulation_type,
|
|
|
+ void *insn,
|
|
|
+ int insn_len)
|
|
|
{
|
|
|
int r;
|
|
|
struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
|
|
|
@@ -4323,10 +4387,10 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
|
|
|
if (!(emulation_type & EMULTYPE_NO_DECODE)) {
|
|
|
init_emulate_ctxt(vcpu);
|
|
|
vcpu->arch.emulate_ctxt.interruptibility = 0;
|
|
|
- vcpu->arch.emulate_ctxt.exception = -1;
|
|
|
+ vcpu->arch.emulate_ctxt.have_exception = false;
|
|
|
vcpu->arch.emulate_ctxt.perm_ok = false;
|
|
|
|
|
|
- r = x86_decode_insn(&vcpu->arch.emulate_ctxt);
|
|
|
+ r = x86_decode_insn(&vcpu->arch.emulate_ctxt, insn, insn_len);
|
|
|
if (r == X86EMUL_PROPAGATE_FAULT)
|
|
|
goto done;
|
|
|
|
|
|
@@ -4389,7 +4453,7 @@ restart:
|
|
|
}
|
|
|
|
|
|
done:
|
|
|
- if (vcpu->arch.emulate_ctxt.exception >= 0) {
|
|
|
+ if (vcpu->arch.emulate_ctxt.have_exception) {
|
|
|
inject_emulated_exception(vcpu);
|
|
|
r = EMULATE_DONE;
|
|
|
} else if (vcpu->arch.pio.count) {
|
|
|
@@ -4413,7 +4477,7 @@ done:
|
|
|
|
|
|
return r;
|
|
|
}
|
|
|
-EXPORT_SYMBOL_GPL(emulate_instruction);
|
|
|
+EXPORT_SYMBOL_GPL(x86_emulate_instruction);
|
|
|
|
|
|
int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
|
|
|
{
|
|
|
@@ -4653,7 +4717,6 @@ int kvm_arch_init(void *opaque)
|
|
|
|
|
|
kvm_x86_ops = ops;
|
|
|
kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
|
|
|
- kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
|
|
|
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
|
|
|
PT_DIRTY_MASK, PT64_NX_MASK, 0);
|
|
|
|
|
|
@@ -5116,6 +5179,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
|
|
vcpu->fpu_active = 0;
|
|
|
kvm_x86_ops->fpu_deactivate(vcpu);
|
|
|
}
|
|
|
+ if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
|
|
|
+ /* Page is swapped out. Do synthetic halt */
|
|
|
+ vcpu->arch.apf.halted = true;
|
|
|
+ r = 1;
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
r = kvm_mmu_reload(vcpu);
|
|
|
@@ -5244,7 +5313,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
|
|
|
|
|
|
r = 1;
|
|
|
while (r > 0) {
|
|
|
- if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
|
|
|
+ if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
|
|
|
+ !vcpu->arch.apf.halted)
|
|
|
r = vcpu_enter_guest(vcpu);
|
|
|
else {
|
|
|
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
|
|
|
@@ -5257,6 +5327,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
|
|
|
vcpu->arch.mp_state =
|
|
|
KVM_MP_STATE_RUNNABLE;
|
|
|
case KVM_MP_STATE_RUNNABLE:
|
|
|
+ vcpu->arch.apf.halted = false;
|
|
|
break;
|
|
|
case KVM_MP_STATE_SIPI_RECEIVED:
|
|
|
default:
|
|
|
@@ -5278,6 +5349,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
|
|
|
vcpu->run->exit_reason = KVM_EXIT_INTR;
|
|
|
++vcpu->stat.request_irq_exits;
|
|
|
}
|
|
|
+
|
|
|
+ kvm_check_async_pf_completion(vcpu);
|
|
|
+
|
|
|
if (signal_pending(current)) {
|
|
|
r = -EINTR;
|
|
|
vcpu->run->exit_reason = KVM_EXIT_INTR;
|
|
|
@@ -5302,6 +5376,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
|
|
int r;
|
|
|
sigset_t sigsaved;
|
|
|
|
|
|
+ if (!tsk_used_math(current) && init_fpu(current))
|
|
|
+ return -ENOMEM;
|
|
|
+
|
|
|
if (vcpu->sigset_active)
|
|
|
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
|
|
|
|
|
|
@@ -5313,8 +5390,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
|
|
}
|
|
|
|
|
|
/* re-sync apic's tpr */
|
|
|
- if (!irqchip_in_kernel(vcpu->kvm))
|
|
|
- kvm_set_cr8(vcpu, kvm_run->cr8);
|
|
|
+ if (!irqchip_in_kernel(vcpu->kvm)) {
|
|
|
+ if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
|
|
|
+ r = -EINVAL;
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
if (vcpu->arch.pio.count || vcpu->mmio_needed) {
|
|
|
if (vcpu->mmio_needed) {
|
|
|
@@ -5323,7 +5404,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
|
|
vcpu->mmio_needed = 0;
|
|
|
}
|
|
|
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
|
|
- r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE);
|
|
|
+ r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
|
|
|
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
|
|
|
if (r != EMULATE_DONE) {
|
|
|
r = 0;
|
|
|
@@ -5436,7 +5517,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
|
|
|
|
|
|
sregs->cr0 = kvm_read_cr0(vcpu);
|
|
|
sregs->cr2 = vcpu->arch.cr2;
|
|
|
- sregs->cr3 = vcpu->arch.cr3;
|
|
|
+ sregs->cr3 = kvm_read_cr3(vcpu);
|
|
|
sregs->cr4 = kvm_read_cr4(vcpu);
|
|
|
sregs->cr8 = kvm_get_cr8(vcpu);
|
|
|
sregs->efer = vcpu->arch.efer;
|
|
|
@@ -5504,8 +5585,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
|
|
|
kvm_x86_ops->set_gdt(vcpu, &dt);
|
|
|
|
|
|
vcpu->arch.cr2 = sregs->cr2;
|
|
|
- mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3;
|
|
|
+ mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
|
|
|
vcpu->arch.cr3 = sregs->cr3;
|
|
|
+ __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
|
|
|
|
|
|
kvm_set_cr8(vcpu, sregs->cr8);
|
|
|
|
|
|
@@ -5522,7 +5604,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
|
|
|
if (sregs->cr4 & X86_CR4_OSXSAVE)
|
|
|
update_cpuid(vcpu);
|
|
|
if (!is_long_mode(vcpu) && is_pae(vcpu)) {
|
|
|
- load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3);
|
|
|
+ load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
|
|
|
mmu_reset_needed = 1;
|
|
|
}
|
|
|
|
|
|
@@ -5773,6 +5855,8 @@ free_vcpu:
|
|
|
|
|
|
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
|
|
|
{
|
|
|
+ vcpu->arch.apf.msr_val = 0;
|
|
|
+
|
|
|
vcpu_load(vcpu);
|
|
|
kvm_mmu_unload(vcpu);
|
|
|
vcpu_put(vcpu);
|
|
|
@@ -5792,6 +5876,11 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
|
|
|
vcpu->arch.dr7 = DR7_FIXED_1;
|
|
|
|
|
|
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
|
|
+ vcpu->arch.apf.msr_val = 0;
|
|
|
+
|
|
|
+ kvm_clear_async_pf_completion_queue(vcpu);
|
|
|
+ kvm_async_pf_hash_reset(vcpu);
|
|
|
+ vcpu->arch.apf.halted = false;
|
|
|
|
|
|
return kvm_x86_ops->vcpu_reset(vcpu);
|
|
|
}
|
|
|
@@ -5881,6 +5970,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
|
|
|
if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL))
|
|
|
goto fail_free_mce_banks;
|
|
|
|
|
|
+ kvm_async_pf_hash_reset(vcpu);
|
|
|
+
|
|
|
return 0;
|
|
|
fail_free_mce_banks:
|
|
|
kfree(vcpu->arch.mce_banks);
|
|
|
@@ -5906,13 +5997,8 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
|
|
|
free_page((unsigned long)vcpu->arch.pio_data);
|
|
|
}
|
|
|
|
|
|
-struct kvm *kvm_arch_create_vm(void)
|
|
|
+int kvm_arch_init_vm(struct kvm *kvm)
|
|
|
{
|
|
|
- struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
|
|
|
-
|
|
|
- if (!kvm)
|
|
|
- return ERR_PTR(-ENOMEM);
|
|
|
-
|
|
|
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
|
|
|
INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
|
|
|
|
|
|
@@ -5921,7 +6007,7 @@ struct kvm *kvm_arch_create_vm(void)
|
|
|
|
|
|
spin_lock_init(&kvm->arch.tsc_write_lock);
|
|
|
|
|
|
- return kvm;
|
|
|
+ return 0;
|
|
|
}
|
|
|
|
|
|
static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
|
|
|
@@ -5939,8 +6025,10 @@ static void kvm_free_vcpus(struct kvm *kvm)
|
|
|
/*
|
|
|
* Unpin any mmu pages first.
|
|
|
*/
|
|
|
- kvm_for_each_vcpu(i, vcpu, kvm)
|
|
|
+ kvm_for_each_vcpu(i, vcpu, kvm) {
|
|
|
+ kvm_clear_async_pf_completion_queue(vcpu);
|
|
|
kvm_unload_vcpu_mmu(vcpu);
|
|
|
+ }
|
|
|
kvm_for_each_vcpu(i, vcpu, kvm)
|
|
|
kvm_arch_vcpu_free(vcpu);
|
|
|
|
|
|
@@ -5964,13 +6052,10 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
|
|
|
kfree(kvm->arch.vpic);
|
|
|
kfree(kvm->arch.vioapic);
|
|
|
kvm_free_vcpus(kvm);
|
|
|
- kvm_free_physmem(kvm);
|
|
|
if (kvm->arch.apic_access_page)
|
|
|
put_page(kvm->arch.apic_access_page);
|
|
|
if (kvm->arch.ept_identity_pagetable)
|
|
|
put_page(kvm->arch.ept_identity_pagetable);
|
|
|
- cleanup_srcu_struct(&kvm->srcu);
|
|
|
- kfree(kvm);
|
|
|
}
|
|
|
|
|
|
int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
|
|
@@ -6051,7 +6136,9 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
|
|
|
|
|
|
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
|
|
|
{
|
|
|
- return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
|
|
|
+ return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
|
|
|
+ !vcpu->arch.apf.halted)
|
|
|
+ || !list_empty_careful(&vcpu->async_pf.done)
|
|
|
|| vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
|
|
|
|| vcpu->arch.nmi_pending ||
|
|
|
(kvm_arch_interrupt_allowed(vcpu) &&
|
|
|
@@ -6110,6 +6197,147 @@ void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(kvm_set_rflags);
|
|
|
|
|
|
+void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
|
|
|
+{
|
|
|
+ int r;
|
|
|
+
|
|
|
+ if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
|
|
|
+ is_error_page(work->page))
|
|
|
+ return;
|
|
|
+
|
|
|
+ r = kvm_mmu_reload(vcpu);
|
|
|
+ if (unlikely(r))
|
|
|
+ return;
|
|
|
+
|
|
|
+ if (!vcpu->arch.mmu.direct_map &&
|
|
|
+ work->arch.cr3 != vcpu->arch.mmu.get_cr3(vcpu))
|
|
|
+ return;
|
|
|
+
|
|
|
+ vcpu->arch.mmu.page_fault(vcpu, work->gva, 0, true);
|
|
|
+}
|
|
|
+
|
|
|
+static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
|
|
|
+{
|
|
|
+ return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU));
|
|
|
+}
|
|
|
+
|
|
|
+static inline u32 kvm_async_pf_next_probe(u32 key)
|
|
|
+{
|
|
|
+ return (key + 1) & (roundup_pow_of_two(ASYNC_PF_PER_VCPU) - 1);
|
|
|
+}
|
|
|
+
|
|
|
+static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
|
|
|
+{
|
|
|
+ u32 key = kvm_async_pf_hash_fn(gfn);
|
|
|
+
|
|
|
+ while (vcpu->arch.apf.gfns[key] != ~0)
|
|
|
+ key = kvm_async_pf_next_probe(key);
|
|
|
+
|
|
|
+ vcpu->arch.apf.gfns[key] = gfn;
|
|
|
+}
|
|
|
+
|
|
|
+static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
|
|
|
+{
|
|
|
+ int i;
|
|
|
+ u32 key = kvm_async_pf_hash_fn(gfn);
|
|
|
+
|
|
|
+ for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) &&
|
|
|
+ (vcpu->arch.apf.gfns[key] != gfn &&
|
|
|
+ vcpu->arch.apf.gfns[key] != ~0); i++)
|
|
|
+ key = kvm_async_pf_next_probe(key);
|
|
|
+
|
|
|
+ return key;
|
|
|
+}
|
|
|
+
|
|
|
+bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
|
|
|
+{
|
|
|
+ return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn;
|
|
|
+}
|
|
|
+
|
|
|
+static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
|
|
|
+{
|
|
|
+ u32 i, j, k;
|
|
|
+
|
|
|
+ i = j = kvm_async_pf_gfn_slot(vcpu, gfn);
|
|
|
+ while (true) {
|
|
|
+ vcpu->arch.apf.gfns[i] = ~0;
|
|
|
+ do {
|
|
|
+ j = kvm_async_pf_next_probe(j);
|
|
|
+ if (vcpu->arch.apf.gfns[j] == ~0)
|
|
|
+ return;
|
|
|
+ k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]);
|
|
|
+ /*
|
|
|
+ * k lies cyclically in ]i,j]
|
|
|
+ * | i.k.j |
|
|
|
+ * |....j i.k.| or |.k..j i...|
|
|
|
+ */
|
|
|
+ } while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j));
|
|
|
+ vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j];
|
|
|
+ i = j;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+static int apf_put_user(struct kvm_vcpu *vcpu, u32 val)
|
|
|
+{
|
|
|
+
|
|
|
+ return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val,
|
|
|
+ sizeof(val));
|
|
|
+}
|
|
|
+
|
|
|
+void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
|
|
|
+ struct kvm_async_pf *work)
|
|
|
+{
|
|
|
+ struct x86_exception fault;
|
|
|
+
|
|
|
+ trace_kvm_async_pf_not_present(work->arch.token, work->gva);
|
|
|
+ kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
|
|
|
+
|
|
|
+ if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) ||
|
|
|
+ (vcpu->arch.apf.send_user_only &&
|
|
|
+ kvm_x86_ops->get_cpl(vcpu) == 0))
|
|
|
+ kvm_make_request(KVM_REQ_APF_HALT, vcpu);
|
|
|
+ else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) {
|
|
|
+ fault.vector = PF_VECTOR;
|
|
|
+ fault.error_code_valid = true;
|
|
|
+ fault.error_code = 0;
|
|
|
+ fault.nested_page_fault = false;
|
|
|
+ fault.address = work->arch.token;
|
|
|
+ kvm_inject_page_fault(vcpu, &fault);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
|
|
|
+ struct kvm_async_pf *work)
|
|
|
+{
|
|
|
+ struct x86_exception fault;
|
|
|
+
|
|
|
+ trace_kvm_async_pf_ready(work->arch.token, work->gva);
|
|
|
+ if (is_error_page(work->page))
|
|
|
+ work->arch.token = ~0; /* broadcast wakeup */
|
|
|
+ else
|
|
|
+ kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
|
|
|
+
|
|
|
+ if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) &&
|
|
|
+ !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
|
|
|
+ fault.vector = PF_VECTOR;
|
|
|
+ fault.error_code_valid = true;
|
|
|
+ fault.error_code = 0;
|
|
|
+ fault.nested_page_fault = false;
|
|
|
+ fault.address = work->arch.token;
|
|
|
+ kvm_inject_page_fault(vcpu, &fault);
|
|
|
+ }
|
|
|
+ vcpu->arch.apf.halted = false;
|
|
|
+}
|
|
|
+
|
|
|
+bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
|
|
|
+{
|
|
|
+ if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED))
|
|
|
+ return true;
|
|
|
+ else
|
|
|
+ return !kvm_event_needs_reinjection(vcpu) &&
|
|
|
+ kvm_x86_ops->interrupt_allowed(vcpu);
|
|
|
+}
|
|
|
+
|
|
|
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
|
|
|
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
|
|
|
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
|