|
@@ -197,6 +197,47 @@ static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
|
|
|
static u64 __read_mostly shadow_user_mask;
|
|
|
static u64 __read_mostly shadow_accessed_mask;
|
|
|
static u64 __read_mostly shadow_dirty_mask;
|
|
|
+static u64 __read_mostly shadow_mmio_mask;
|
|
|
+
|
|
|
+static void mmu_spte_set(u64 *sptep, u64 spte);
|
|
|
+
|
|
|
+void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask)
|
|
|
+{
|
|
|
+ shadow_mmio_mask = mmio_mask;
|
|
|
+}
|
|
|
+EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
|
|
|
+
|
|
|
+static void mark_mmio_spte(u64 *sptep, u64 gfn, unsigned access)
|
|
|
+{
|
|
|
+ access &= ACC_WRITE_MASK | ACC_USER_MASK;
|
|
|
+
|
|
|
+ mmu_spte_set(sptep, shadow_mmio_mask | access | gfn << PAGE_SHIFT);
|
|
|
+}
|
|
|
+
|
|
|
+static bool is_mmio_spte(u64 spte)
|
|
|
+{
|
|
|
+ return (spte & shadow_mmio_mask) == shadow_mmio_mask;
|
|
|
+}
|
|
|
+
|
|
|
+static gfn_t get_mmio_spte_gfn(u64 spte)
|
|
|
+{
|
|
|
+ return (spte & ~shadow_mmio_mask) >> PAGE_SHIFT;
|
|
|
+}
|
|
|
+
|
|
|
+static unsigned get_mmio_spte_access(u64 spte)
|
|
|
+{
|
|
|
+ return (spte & ~shadow_mmio_mask) & ~PAGE_MASK;
|
|
|
+}
|
|
|
+
|
|
|
+static bool set_mmio_spte(u64 *sptep, gfn_t gfn, pfn_t pfn, unsigned access)
|
|
|
+{
|
|
|
+ if (unlikely(is_noslot_pfn(pfn))) {
|
|
|
+ mark_mmio_spte(sptep, gfn, access);
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+
|
|
|
+ return false;
|
|
|
+}
|
|
|
|
|
|
static inline u64 rsvd_bits(int s, int e)
|
|
|
{
|
|
@@ -226,7 +267,7 @@ static int is_nx(struct kvm_vcpu *vcpu)
|
|
|
|
|
|
static int is_shadow_present_pte(u64 pte)
|
|
|
{
|
|
|
- return pte & PT_PRESENT_MASK;
|
|
|
+ return pte & PT_PRESENT_MASK && !is_mmio_spte(pte);
|
|
|
}
|
|
|
|
|
|
static int is_large_pte(u64 pte)
|
|
@@ -285,6 +326,12 @@ static u64 __get_spte_lockless(u64 *sptep)
|
|
|
{
|
|
|
return ACCESS_ONCE(*sptep);
|
|
|
}
|
|
|
+
|
|
|
+static bool __check_direct_spte_mmio_pf(u64 spte)
|
|
|
+{
|
|
|
+ /* It is valid if the spte is zapped. */
|
|
|
+ return spte == 0ull;
|
|
|
+}
|
|
|
#else
|
|
|
union split_spte {
|
|
|
struct {
|
|
@@ -388,6 +435,23 @@ retry:
|
|
|
|
|
|
return spte.spte;
|
|
|
}
|
|
|
+
|
|
|
+static bool __check_direct_spte_mmio_pf(u64 spte)
|
|
|
+{
|
|
|
+ union split_spte sspte = (union split_spte)spte;
|
|
|
+ u32 high_mmio_mask = shadow_mmio_mask >> 32;
|
|
|
+
|
|
|
+ /* It is valid if the spte is zapped. */
|
|
|
+ if (spte == 0ull)
|
|
|
+ return true;
|
|
|
+
|
|
|
+ /* It is valid if the spte is being zapped. */
|
|
|
+ if (sspte.spte_low == 0ull &&
|
|
|
+ (sspte.spte_high & high_mmio_mask) == high_mmio_mask)
|
|
|
+ return true;
|
|
|
+
|
|
|
+ return false;
|
|
|
+}
|
|
|
#endif
|
|
|
|
|
|
static bool spte_has_volatile_bits(u64 spte)
|
|
@@ -1745,7 +1809,8 @@ static void mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
|
|
|
child = page_header(pte & PT64_BASE_ADDR_MASK);
|
|
|
drop_parent_pte(child, spte);
|
|
|
}
|
|
|
- }
|
|
|
+ } else if (is_mmio_spte(pte))
|
|
|
+ mmu_spte_clear_no_track(spte);
|
|
|
|
|
|
if (is_large_pte(pte))
|
|
|
--kvm->stat.lpages;
|
|
@@ -2120,6 +2185,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
|
|
|
u64 spte, entry = *sptep;
|
|
|
int ret = 0;
|
|
|
|
|
|
+ if (set_mmio_spte(sptep, gfn, pfn, pte_access))
|
|
|
+ return 0;
|
|
|
+
|
|
|
/*
|
|
|
* We don't set the accessed bit, since we sometimes want to see
|
|
|
* whether the guest actually used the pte (in order to detect
|
|
@@ -2255,6 +2323,9 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
|
|
|
kvm_mmu_flush_tlb(vcpu);
|
|
|
}
|
|
|
|
|
|
+ if (unlikely(is_mmio_spte(*sptep) && emulate))
|
|
|
+ *emulate = 1;
|
|
|
+
|
|
|
pgprintk("%s: setting spte %llx\n", __func__, *sptep);
|
|
|
pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n",
|
|
|
is_large_pte(*sptep)? "2MB" : "4kB",
|
|
@@ -2481,7 +2552,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
|
|
|
|
|
|
static bool mmu_invalid_pfn(pfn_t pfn)
|
|
|
{
|
|
|
- return unlikely(is_invalid_pfn(pfn) || is_noslot_pfn(pfn));
|
|
|
+ return unlikely(is_invalid_pfn(pfn));
|
|
|
}
|
|
|
|
|
|
static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
|
|
@@ -2495,11 +2566,8 @@ static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
|
|
|
goto exit;
|
|
|
}
|
|
|
|
|
|
- if (unlikely(is_noslot_pfn(pfn))) {
|
|
|
+ if (unlikely(is_noslot_pfn(pfn)))
|
|
|
vcpu_cache_mmio_info(vcpu, gva, gfn, access);
|
|
|
- *ret_val = 1;
|
|
|
- goto exit;
|
|
|
- }
|
|
|
|
|
|
ret = false;
|
|
|
exit:
|
|
@@ -2813,6 +2881,92 @@ static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr,
|
|
|
return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access);
|
|
|
}
|
|
|
|
|
|
+static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct)
|
|
|
+{
|
|
|
+ if (direct)
|
|
|
+ return vcpu_match_mmio_gpa(vcpu, addr);
|
|
|
+
|
|
|
+ return vcpu_match_mmio_gva(vcpu, addr);
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+/*
|
|
|
+ * On direct hosts, the last spte is only allows two states
|
|
|
+ * for mmio page fault:
|
|
|
+ * - It is the mmio spte
|
|
|
+ * - It is zapped or it is being zapped.
|
|
|
+ *
|
|
|
+ * This function completely checks the spte when the last spte
|
|
|
+ * is not the mmio spte.
|
|
|
+ */
|
|
|
+static bool check_direct_spte_mmio_pf(u64 spte)
|
|
|
+{
|
|
|
+ return __check_direct_spte_mmio_pf(spte);
|
|
|
+}
|
|
|
+
|
|
|
+static u64 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr)
|
|
|
+{
|
|
|
+ struct kvm_shadow_walk_iterator iterator;
|
|
|
+ u64 spte = 0ull;
|
|
|
+
|
|
|
+ walk_shadow_page_lockless_begin(vcpu);
|
|
|
+ for_each_shadow_entry_lockless(vcpu, addr, iterator, spte)
|
|
|
+ if (!is_shadow_present_pte(spte))
|
|
|
+ break;
|
|
|
+ walk_shadow_page_lockless_end(vcpu);
|
|
|
+
|
|
|
+ return spte;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * If it is a real mmio page fault, return 1 and emulat the instruction
|
|
|
+ * directly, return 0 to let CPU fault again on the address, -1 is
|
|
|
+ * returned if bug is detected.
|
|
|
+ */
|
|
|
+int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct)
|
|
|
+{
|
|
|
+ u64 spte;
|
|
|
+
|
|
|
+ if (quickly_check_mmio_pf(vcpu, addr, direct))
|
|
|
+ return 1;
|
|
|
+
|
|
|
+ spte = walk_shadow_page_get_mmio_spte(vcpu, addr);
|
|
|
+
|
|
|
+ if (is_mmio_spte(spte)) {
|
|
|
+ gfn_t gfn = get_mmio_spte_gfn(spte);
|
|
|
+ unsigned access = get_mmio_spte_access(spte);
|
|
|
+
|
|
|
+ if (direct)
|
|
|
+ addr = 0;
|
|
|
+ vcpu_cache_mmio_info(vcpu, addr, gfn, access);
|
|
|
+ return 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * It's ok if the gva is remapped by other cpus on shadow guest,
|
|
|
+ * it's a BUG if the gfn is not a mmio page.
|
|
|
+ */
|
|
|
+ if (direct && !check_direct_spte_mmio_pf(spte))
|
|
|
+ return -1;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If the page table is zapped by other cpus, let CPU fault again on
|
|
|
+ * the address.
|
|
|
+ */
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+EXPORT_SYMBOL_GPL(handle_mmio_page_fault_common);
|
|
|
+
|
|
|
+static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr,
|
|
|
+ u32 error_code, bool direct)
|
|
|
+{
|
|
|
+ int ret;
|
|
|
+
|
|
|
+ ret = handle_mmio_page_fault_common(vcpu, addr, direct);
|
|
|
+ WARN_ON(ret < 0);
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
|
|
|
u32 error_code, bool prefault)
|
|
|
{
|
|
@@ -2820,6 +2974,10 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
|
|
|
int r;
|
|
|
|
|
|
pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code);
|
|
|
+
|
|
|
+ if (unlikely(error_code & PFERR_RSVD_MASK))
|
|
|
+ return handle_mmio_page_fault(vcpu, gva, error_code, true);
|
|
|
+
|
|
|
r = mmu_topup_memory_caches(vcpu);
|
|
|
if (r)
|
|
|
return r;
|
|
@@ -2896,6 +3054,9 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
|
|
|
ASSERT(vcpu);
|
|
|
ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
|
|
|
|
|
|
+ if (unlikely(error_code & PFERR_RSVD_MASK))
|
|
|
+ return handle_mmio_page_fault(vcpu, gpa, error_code, true);
|
|
|
+
|
|
|
r = mmu_topup_memory_caches(vcpu);
|
|
|
if (r)
|
|
|
return r;
|
|
@@ -2993,6 +3154,23 @@ static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level)
|
|
|
return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0;
|
|
|
}
|
|
|
|
|
|
+static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access,
|
|
|
+ int *nr_present)
|
|
|
+{
|
|
|
+ if (unlikely(is_mmio_spte(*sptep))) {
|
|
|
+ if (gfn != get_mmio_spte_gfn(*sptep)) {
|
|
|
+ mmu_spte_clear_no_track(sptep);
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+
|
|
|
+ (*nr_present)++;
|
|
|
+ mark_mmio_spte(sptep, gfn, access);
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+
|
|
|
+ return false;
|
|
|
+}
|
|
|
+
|
|
|
#define PTTYPE 64
|
|
|
#include "paging_tmpl.h"
|
|
|
#undef PTTYPE
|