|
@@ -446,8 +446,22 @@ static bool __check_direct_spte_mmio_pf(u64 spte)
|
|
|
}
|
|
|
#endif
|
|
|
|
|
|
+static bool spte_is_locklessly_modifiable(u64 spte)
|
|
|
+{
|
|
|
+ return !(~spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE));
|
|
|
+}
|
|
|
+
|
|
|
static bool spte_has_volatile_bits(u64 spte)
|
|
|
{
|
|
|
+ /*
|
|
|
+ * Always atomicly update spte if it can be updated
|
|
|
+ * out of mmu-lock, it can ensure dirty bit is not lost,
|
|
|
+ * also, it can help us to get a stable is_writable_pte()
|
|
|
+ * to ensure tlb flush is not missed.
|
|
|
+ */
|
|
|
+ if (spte_is_locklessly_modifiable(spte))
|
|
|
+ return true;
|
|
|
+
|
|
|
if (!shadow_accessed_mask)
|
|
|
return false;
|
|
|
|
|
@@ -489,7 +503,7 @@ static void mmu_spte_set(u64 *sptep, u64 new_spte)
|
|
|
*/
|
|
|
static bool mmu_spte_update(u64 *sptep, u64 new_spte)
|
|
|
{
|
|
|
- u64 mask, old_spte = *sptep;
|
|
|
+ u64 old_spte = *sptep;
|
|
|
bool ret = false;
|
|
|
|
|
|
WARN_ON(!is_rmap_spte(new_spte));
|
|
@@ -499,17 +513,16 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte)
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
- new_spte |= old_spte & shadow_dirty_mask;
|
|
|
-
|
|
|
- mask = shadow_accessed_mask;
|
|
|
- if (is_writable_pte(old_spte))
|
|
|
- mask |= shadow_dirty_mask;
|
|
|
-
|
|
|
- if (!spte_has_volatile_bits(old_spte) || (new_spte & mask) == mask)
|
|
|
+ if (!spte_has_volatile_bits(old_spte))
|
|
|
__update_clear_spte_fast(sptep, new_spte);
|
|
|
else
|
|
|
old_spte = __update_clear_spte_slow(sptep, new_spte);
|
|
|
|
|
|
+ /*
|
|
|
+ * For the spte updated out of mmu-lock is safe, since
|
|
|
+ * we always atomicly update it, see the comments in
|
|
|
+ * spte_has_volatile_bits().
|
|
|
+ */
|
|
|
if (is_writable_pte(old_spte) && !is_writable_pte(new_spte))
|
|
|
ret = true;
|
|
|
|
|
@@ -1085,11 +1098,6 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
|
|
|
kvm_flush_remote_tlbs(vcpu->kvm);
|
|
|
}
|
|
|
|
|
|
-static bool spte_is_locklessly_modifiable(u64 spte)
|
|
|
-{
|
|
|
- return !(~spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE));
|
|
|
-}
|
|
|
-
|
|
|
/*
|
|
|
* Write-protect on the specified @sptep, @pt_protect indicates whether
|
|
|
* spte writ-protection is caused by protecting shadow page table.
|
|
@@ -2677,18 +2685,114 @@ exit:
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
+static bool page_fault_can_be_fast(struct kvm_vcpu *vcpu, u32 error_code)
|
|
|
+{
|
|
|
+ /*
|
|
|
+ * #PF can be fast only if the shadow page table is present and it
|
|
|
+ * is caused by write-protect, that means we just need change the
|
|
|
+ * W bit of the spte which can be done out of mmu-lock.
|
|
|
+ */
|
|
|
+ if (!(error_code & PFERR_PRESENT_MASK) ||
|
|
|
+ !(error_code & PFERR_WRITE_MASK))
|
|
|
+ return false;
|
|
|
+
|
|
|
+ return true;
|
|
|
+}
|
|
|
+
|
|
|
+static bool
|
|
|
+fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 spte)
|
|
|
+{
|
|
|
+ struct kvm_mmu_page *sp = page_header(__pa(sptep));
|
|
|
+ gfn_t gfn;
|
|
|
+
|
|
|
+ WARN_ON(!sp->role.direct);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * The gfn of direct spte is stable since it is calculated
|
|
|
+ * by sp->gfn.
|
|
|
+ */
|
|
|
+ gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt);
|
|
|
+
|
|
|
+ if (cmpxchg64(sptep, spte, spte | PT_WRITABLE_MASK) == spte)
|
|
|
+ mark_page_dirty(vcpu->kvm, gfn);
|
|
|
+
|
|
|
+ return true;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Return value:
|
|
|
+ * - true: let the vcpu to access on the same address again.
|
|
|
+ * - false: let the real page fault path to fix it.
|
|
|
+ */
|
|
|
+static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
|
|
|
+ u32 error_code)
|
|
|
+{
|
|
|
+ struct kvm_shadow_walk_iterator iterator;
|
|
|
+ bool ret = false;
|
|
|
+ u64 spte = 0ull;
|
|
|
+
|
|
|
+ if (!page_fault_can_be_fast(vcpu, error_code))
|
|
|
+ return false;
|
|
|
+
|
|
|
+ walk_shadow_page_lockless_begin(vcpu);
|
|
|
+ for_each_shadow_entry_lockless(vcpu, gva, iterator, spte)
|
|
|
+ if (!is_shadow_present_pte(spte) || iterator.level < level)
|
|
|
+ break;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If the mapping has been changed, let the vcpu fault on the
|
|
|
+ * same address again.
|
|
|
+ */
|
|
|
+ if (!is_rmap_spte(spte)) {
|
|
|
+ ret = true;
|
|
|
+ goto exit;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!is_last_spte(spte, level))
|
|
|
+ goto exit;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Check if it is a spurious fault caused by TLB lazily flushed.
|
|
|
+ *
|
|
|
+ * Need not check the access of upper level table entries since
|
|
|
+ * they are always ACC_ALL.
|
|
|
+ */
|
|
|
+ if (is_writable_pte(spte)) {
|
|
|
+ ret = true;
|
|
|
+ goto exit;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Currently, to simplify the code, only the spte write-protected
|
|
|
+ * by dirty-log can be fast fixed.
|
|
|
+ */
|
|
|
+ if (!spte_is_locklessly_modifiable(spte))
|
|
|
+ goto exit;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Currently, fast page fault only works for direct mapping since
|
|
|
+ * the gfn is not stable for indirect shadow page.
|
|
|
+ * See Documentation/virtual/kvm/locking.txt to get more detail.
|
|
|
+ */
|
|
|
+ ret = fast_pf_fix_direct_spte(vcpu, iterator.sptep, spte);
|
|
|
+exit:
|
|
|
+ walk_shadow_page_lockless_end(vcpu);
|
|
|
+
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
|
|
|
gva_t gva, pfn_t *pfn, bool write, bool *writable);
|
|
|
|
|
|
-static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn,
|
|
|
- bool prefault)
|
|
|
+static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
|
|
|
+ gfn_t gfn, bool prefault)
|
|
|
{
|
|
|
int r;
|
|
|
int level;
|
|
|
int force_pt_level;
|
|
|
pfn_t pfn;
|
|
|
unsigned long mmu_seq;
|
|
|
- bool map_writable;
|
|
|
+ bool map_writable, write = error_code & PFERR_WRITE_MASK;
|
|
|
|
|
|
force_pt_level = mapping_level_dirty_bitmap(vcpu, gfn);
|
|
|
if (likely(!force_pt_level)) {
|
|
@@ -2705,6 +2809,9 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn,
|
|
|
} else
|
|
|
level = PT_PAGE_TABLE_LEVEL;
|
|
|
|
|
|
+ if (fast_page_fault(vcpu, v, level, error_code))
|
|
|
+ return 0;
|
|
|
+
|
|
|
mmu_seq = vcpu->kvm->mmu_notifier_seq;
|
|
|
smp_rmb();
|
|
|
|
|
@@ -3093,7 +3200,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
|
|
|
gfn = gva >> PAGE_SHIFT;
|
|
|
|
|
|
return nonpaging_map(vcpu, gva & PAGE_MASK,
|
|
|
- error_code & PFERR_WRITE_MASK, gfn, prefault);
|
|
|
+ error_code, gfn, prefault);
|
|
|
}
|
|
|
|
|
|
static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
|
|
@@ -3173,6 +3280,9 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
|
|
|
} else
|
|
|
level = PT_PAGE_TABLE_LEVEL;
|
|
|
|
|
|
+ if (fast_page_fault(vcpu, gpa, level, error_code))
|
|
|
+ return 0;
|
|
|
+
|
|
|
mmu_seq = vcpu->kvm->mmu_notifier_seq;
|
|
|
smp_rmb();
|
|
|
|