|
@@ -554,14 +554,18 @@ static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
-static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
|
|
|
+static bool mapping_level_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t large_gfn)
|
|
|
{
|
|
|
struct kvm_memory_slot *slot;
|
|
|
- int host_level, level, max_level;
|
|
|
-
|
|
|
slot = gfn_to_memslot(vcpu->kvm, large_gfn);
|
|
|
if (slot && slot->dirty_bitmap)
|
|
|
- return PT_PAGE_TABLE_LEVEL;
|
|
|
+ return true;
|
|
|
+ return false;
|
|
|
+}
|
|
|
+
|
|
|
+static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
|
|
|
+{
|
|
|
+ int host_level, level, max_level;
|
|
|
|
|
|
host_level = host_mapping_level(vcpu->kvm, large_gfn);
|
|
|
|
|
@@ -2281,6 +2285,48 @@ static int kvm_handle_bad_page(struct kvm *kvm, gfn_t gfn, pfn_t pfn)
|
|
|
return 1;
|
|
|
}
|
|
|
|
|
|
+static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
|
|
|
+ gfn_t *gfnp, pfn_t *pfnp, int *levelp)
|
|
|
+{
|
|
|
+ pfn_t pfn = *pfnp;
|
|
|
+ gfn_t gfn = *gfnp;
|
|
|
+ int level = *levelp;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Check if it's a transparent hugepage. If this would be an
|
|
|
+ * hugetlbfs page, level wouldn't be set to
|
|
|
+ * PT_PAGE_TABLE_LEVEL and there would be no adjustment done
|
|
|
+ * here.
|
|
|
+ */
|
|
|
+ if (!is_error_pfn(pfn) && !kvm_is_mmio_pfn(pfn) &&
|
|
|
+ level == PT_PAGE_TABLE_LEVEL &&
|
|
|
+ PageTransCompound(pfn_to_page(pfn)) &&
|
|
|
+ !has_wrprotected_page(vcpu->kvm, gfn, PT_DIRECTORY_LEVEL)) {
|
|
|
+ unsigned long mask;
|
|
|
+ /*
|
|
|
+ * mmu_notifier_retry was successful and we hold the
|
|
|
+ * mmu_lock here, so the pmd can't become splitting
|
|
|
+ * from under us, and in turn
|
|
|
+ * __split_huge_page_refcount() can't run from under
|
|
|
+ * us and we can safely transfer the refcount from
|
|
|
+ * PG_tail to PG_head as we switch the pfn to tail to
|
|
|
+ * head.
|
|
|
+ */
|
|
|
+ *levelp = level = PT_DIRECTORY_LEVEL;
|
|
|
+ mask = KVM_PAGES_PER_HPAGE(level) - 1;
|
|
|
+ VM_BUG_ON((gfn & mask) != (pfn & mask));
|
|
|
+ if (pfn & mask) {
|
|
|
+ gfn &= ~mask;
|
|
|
+ *gfnp = gfn;
|
|
|
+ kvm_release_pfn_clean(pfn);
|
|
|
+ pfn &= ~mask;
|
|
|
+ if (!get_page_unless_zero(pfn_to_page(pfn)))
|
|
|
+ BUG();
|
|
|
+ *pfnp = pfn;
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
|
|
|
gva_t gva, pfn_t *pfn, bool write, bool *writable);
|
|
|
|
|
@@ -2289,20 +2335,25 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn,
|
|
|
{
|
|
|
int r;
|
|
|
int level;
|
|
|
+ int force_pt_level;
|
|
|
pfn_t pfn;
|
|
|
unsigned long mmu_seq;
|
|
|
bool map_writable;
|
|
|
|
|
|
- level = mapping_level(vcpu, gfn);
|
|
|
-
|
|
|
- /*
|
|
|
- * This path builds a PAE pagetable - so we can map 2mb pages at
|
|
|
- * maximum. Therefore check if the level is larger than that.
|
|
|
- */
|
|
|
- if (level > PT_DIRECTORY_LEVEL)
|
|
|
- level = PT_DIRECTORY_LEVEL;
|
|
|
+ force_pt_level = mapping_level_dirty_bitmap(vcpu, gfn);
|
|
|
+ if (likely(!force_pt_level)) {
|
|
|
+ level = mapping_level(vcpu, gfn);
|
|
|
+ /*
|
|
|
+ * This path builds a PAE pagetable - so we can map
|
|
|
+ * 2mb pages at maximum. Therefore check if the level
|
|
|
+ * is larger than that.
|
|
|
+ */
|
|
|
+ if (level > PT_DIRECTORY_LEVEL)
|
|
|
+ level = PT_DIRECTORY_LEVEL;
|
|
|
|
|
|
- gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1);
|
|
|
+ gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1);
|
|
|
+ } else
|
|
|
+ level = PT_PAGE_TABLE_LEVEL;
|
|
|
|
|
|
mmu_seq = vcpu->kvm->mmu_notifier_seq;
|
|
|
smp_rmb();
|
|
@@ -2318,6 +2369,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn,
|
|
|
if (mmu_notifier_retry(vcpu, mmu_seq))
|
|
|
goto out_unlock;
|
|
|
kvm_mmu_free_some_pages(vcpu);
|
|
|
+ if (likely(!force_pt_level))
|
|
|
+ transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
|
|
|
r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn,
|
|
|
prefault);
|
|
|
spin_unlock(&vcpu->kvm->mmu_lock);
|
|
@@ -2655,6 +2708,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
|
|
|
pfn_t pfn;
|
|
|
int r;
|
|
|
int level;
|
|
|
+ int force_pt_level;
|
|
|
gfn_t gfn = gpa >> PAGE_SHIFT;
|
|
|
unsigned long mmu_seq;
|
|
|
int write = error_code & PFERR_WRITE_MASK;
|
|
@@ -2667,9 +2721,12 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
|
|
|
if (r)
|
|
|
return r;
|
|
|
|
|
|
- level = mapping_level(vcpu, gfn);
|
|
|
-
|
|
|
- gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1);
|
|
|
+ force_pt_level = mapping_level_dirty_bitmap(vcpu, gfn);
|
|
|
+ if (likely(!force_pt_level)) {
|
|
|
+ level = mapping_level(vcpu, gfn);
|
|
|
+ gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1);
|
|
|
+ } else
|
|
|
+ level = PT_PAGE_TABLE_LEVEL;
|
|
|
|
|
|
mmu_seq = vcpu->kvm->mmu_notifier_seq;
|
|
|
smp_rmb();
|
|
@@ -2684,6 +2741,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
|
|
|
if (mmu_notifier_retry(vcpu, mmu_seq))
|
|
|
goto out_unlock;
|
|
|
kvm_mmu_free_some_pages(vcpu);
|
|
|
+ if (likely(!force_pt_level))
|
|
|
+ transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
|
|
|
r = __direct_map(vcpu, gpa, write, map_writable,
|
|
|
level, gfn, pfn, prefault);
|
|
|
spin_unlock(&vcpu->kvm->mmu_lock);
|