|
@@ -63,10 +63,12 @@
|
|
*/
|
|
*/
|
|
struct guest_walker {
|
|
struct guest_walker {
|
|
int level;
|
|
int level;
|
|
|
|
+ unsigned max_level;
|
|
gfn_t table_gfn[PT_MAX_FULL_LEVELS];
|
|
gfn_t table_gfn[PT_MAX_FULL_LEVELS];
|
|
pt_element_t ptes[PT_MAX_FULL_LEVELS];
|
|
pt_element_t ptes[PT_MAX_FULL_LEVELS];
|
|
pt_element_t prefetch_ptes[PTE_PREFETCH_NUM];
|
|
pt_element_t prefetch_ptes[PTE_PREFETCH_NUM];
|
|
gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
|
|
gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
|
|
|
|
+ pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS];
|
|
unsigned pt_access;
|
|
unsigned pt_access;
|
|
unsigned pte_access;
|
|
unsigned pte_access;
|
|
gfn_t gfn;
|
|
gfn_t gfn;
|
|
@@ -101,38 +103,41 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
|
|
return (ret != orig_pte);
|
|
return (ret != orig_pte);
|
|
}
|
|
}
|
|
|
|
|
|
-static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte,
|
|
|
|
- bool last)
|
|
|
|
|
|
+static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
|
|
|
|
+ struct kvm_mmu *mmu,
|
|
|
|
+ struct guest_walker *walker,
|
|
|
|
+ int write_fault)
|
|
{
|
|
{
|
|
- unsigned access;
|
|
|
|
-
|
|
|
|
- access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
|
|
|
|
- if (last && !is_dirty_gpte(gpte))
|
|
|
|
- access &= ~ACC_WRITE_MASK;
|
|
|
|
-
|
|
|
|
-#if PTTYPE == 64
|
|
|
|
- if (vcpu->arch.mmu.nx)
|
|
|
|
- access &= ~(gpte >> PT64_NX_SHIFT);
|
|
|
|
-#endif
|
|
|
|
- return access;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-static bool FNAME(is_last_gpte)(struct guest_walker *walker,
|
|
|
|
- struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
|
|
|
|
- pt_element_t gpte)
|
|
|
|
-{
|
|
|
|
- if (walker->level == PT_PAGE_TABLE_LEVEL)
|
|
|
|
- return true;
|
|
|
|
-
|
|
|
|
- if ((walker->level == PT_DIRECTORY_LEVEL) && is_large_pte(gpte) &&
|
|
|
|
- (PTTYPE == 64 || is_pse(vcpu)))
|
|
|
|
- return true;
|
|
|
|
|
|
+ unsigned level, index;
|
|
|
|
+ pt_element_t pte, orig_pte;
|
|
|
|
+ pt_element_t __user *ptep_user;
|
|
|
|
+ gfn_t table_gfn;
|
|
|
|
+ int ret;
|
|
|
|
+
|
|
|
|
+ for (level = walker->max_level; level >= walker->level; --level) {
|
|
|
|
+ pte = orig_pte = walker->ptes[level - 1];
|
|
|
|
+ table_gfn = walker->table_gfn[level - 1];
|
|
|
|
+ ptep_user = walker->ptep_user[level - 1];
|
|
|
|
+ index = offset_in_page(ptep_user) / sizeof(pt_element_t);
|
|
|
|
+ if (!(pte & PT_ACCESSED_MASK)) {
|
|
|
|
+ trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte));
|
|
|
|
+ pte |= PT_ACCESSED_MASK;
|
|
|
|
+ }
|
|
|
|
+ if (level == walker->level && write_fault && !is_dirty_gpte(pte)) {
|
|
|
|
+ trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
|
|
|
|
+ pte |= PT_DIRTY_MASK;
|
|
|
|
+ }
|
|
|
|
+ if (pte == orig_pte)
|
|
|
|
+ continue;
|
|
|
|
|
|
- if ((walker->level == PT_PDPE_LEVEL) && is_large_pte(gpte) &&
|
|
|
|
- (mmu->root_level == PT64_ROOT_LEVEL))
|
|
|
|
- return true;
|
|
|
|
|
|
+ ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte);
|
|
|
|
+ if (ret)
|
|
|
|
+ return ret;
|
|
|
|
|
|
- return false;
|
|
|
|
|
|
+ mark_page_dirty(vcpu->kvm, table_gfn);
|
|
|
|
+ walker->ptes[level] = pte;
|
|
|
|
+ }
|
|
|
|
+ return 0;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -142,21 +147,22 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
|
|
struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
|
|
struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
|
|
gva_t addr, u32 access)
|
|
gva_t addr, u32 access)
|
|
{
|
|
{
|
|
|
|
+ int ret;
|
|
pt_element_t pte;
|
|
pt_element_t pte;
|
|
pt_element_t __user *uninitialized_var(ptep_user);
|
|
pt_element_t __user *uninitialized_var(ptep_user);
|
|
gfn_t table_gfn;
|
|
gfn_t table_gfn;
|
|
- unsigned index, pt_access, uninitialized_var(pte_access);
|
|
|
|
|
|
+ unsigned index, pt_access, pte_access, accessed_dirty, shift;
|
|
gpa_t pte_gpa;
|
|
gpa_t pte_gpa;
|
|
- bool eperm, last_gpte;
|
|
|
|
int offset;
|
|
int offset;
|
|
const int write_fault = access & PFERR_WRITE_MASK;
|
|
const int write_fault = access & PFERR_WRITE_MASK;
|
|
const int user_fault = access & PFERR_USER_MASK;
|
|
const int user_fault = access & PFERR_USER_MASK;
|
|
const int fetch_fault = access & PFERR_FETCH_MASK;
|
|
const int fetch_fault = access & PFERR_FETCH_MASK;
|
|
u16 errcode = 0;
|
|
u16 errcode = 0;
|
|
|
|
+ gpa_t real_gpa;
|
|
|
|
+ gfn_t gfn;
|
|
|
|
|
|
trace_kvm_mmu_pagetable_walk(addr, access);
|
|
trace_kvm_mmu_pagetable_walk(addr, access);
|
|
retry_walk:
|
|
retry_walk:
|
|
- eperm = false;
|
|
|
|
walker->level = mmu->root_level;
|
|
walker->level = mmu->root_level;
|
|
pte = mmu->get_cr3(vcpu);
|
|
pte = mmu->get_cr3(vcpu);
|
|
|
|
|
|
@@ -169,15 +175,21 @@ retry_walk:
|
|
--walker->level;
|
|
--walker->level;
|
|
}
|
|
}
|
|
#endif
|
|
#endif
|
|
|
|
+ walker->max_level = walker->level;
|
|
ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
|
|
ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
|
|
(mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0);
|
|
(mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0);
|
|
|
|
|
|
- pt_access = ACC_ALL;
|
|
|
|
|
|
+ accessed_dirty = PT_ACCESSED_MASK;
|
|
|
|
+ pt_access = pte_access = ACC_ALL;
|
|
|
|
+ ++walker->level;
|
|
|
|
|
|
- for (;;) {
|
|
|
|
|
|
+ do {
|
|
gfn_t real_gfn;
|
|
gfn_t real_gfn;
|
|
unsigned long host_addr;
|
|
unsigned long host_addr;
|
|
|
|
|
|
|
|
+ pt_access &= pte_access;
|
|
|
|
+ --walker->level;
|
|
|
|
+
|
|
index = PT_INDEX(addr, walker->level);
|
|
index = PT_INDEX(addr, walker->level);
|
|
|
|
|
|
table_gfn = gpte_to_gfn(pte);
|
|
table_gfn = gpte_to_gfn(pte);
|
|
@@ -199,6 +211,7 @@ retry_walk:
|
|
ptep_user = (pt_element_t __user *)((void *)host_addr + offset);
|
|
ptep_user = (pt_element_t __user *)((void *)host_addr + offset);
|
|
if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte))))
|
|
if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte))))
|
|
goto error;
|
|
goto error;
|
|
|
|
+ walker->ptep_user[walker->level - 1] = ptep_user;
|
|
|
|
|
|
trace_kvm_mmu_paging_element(pte, walker->level);
|
|
trace_kvm_mmu_paging_element(pte, walker->level);
|
|
|
|
|
|
@@ -211,92 +224,48 @@ retry_walk:
|
|
goto error;
|
|
goto error;
|
|
}
|
|
}
|
|
|
|
|
|
- if (!check_write_user_access(vcpu, write_fault, user_fault,
|
|
|
|
- pte))
|
|
|
|
- eperm = true;
|
|
|
|
-
|
|
|
|
-#if PTTYPE == 64
|
|
|
|
- if (unlikely(fetch_fault && (pte & PT64_NX_MASK)))
|
|
|
|
- eperm = true;
|
|
|
|
-#endif
|
|
|
|
-
|
|
|
|
- last_gpte = FNAME(is_last_gpte)(walker, vcpu, mmu, pte);
|
|
|
|
- if (last_gpte) {
|
|
|
|
- pte_access = pt_access &
|
|
|
|
- FNAME(gpte_access)(vcpu, pte, true);
|
|
|
|
- /* check if the kernel is fetching from user page */
|
|
|
|
- if (unlikely(pte_access & PT_USER_MASK) &&
|
|
|
|
- kvm_read_cr4_bits(vcpu, X86_CR4_SMEP))
|
|
|
|
- if (fetch_fault && !user_fault)
|
|
|
|
- eperm = true;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- if (!eperm && unlikely(!(pte & PT_ACCESSED_MASK))) {
|
|
|
|
- int ret;
|
|
|
|
- trace_kvm_mmu_set_accessed_bit(table_gfn, index,
|
|
|
|
- sizeof(pte));
|
|
|
|
- ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index,
|
|
|
|
- pte, pte|PT_ACCESSED_MASK);
|
|
|
|
- if (unlikely(ret < 0))
|
|
|
|
- goto error;
|
|
|
|
- else if (ret)
|
|
|
|
- goto retry_walk;
|
|
|
|
-
|
|
|
|
- mark_page_dirty(vcpu->kvm, table_gfn);
|
|
|
|
- pte |= PT_ACCESSED_MASK;
|
|
|
|
- }
|
|
|
|
|
|
+ accessed_dirty &= pte;
|
|
|
|
+ pte_access = pt_access & gpte_access(vcpu, pte);
|
|
|
|
|
|
walker->ptes[walker->level - 1] = pte;
|
|
walker->ptes[walker->level - 1] = pte;
|
|
|
|
+ } while (!is_last_gpte(mmu, walker->level, pte));
|
|
|
|
|
|
- if (last_gpte) {
|
|
|
|
- int lvl = walker->level;
|
|
|
|
- gpa_t real_gpa;
|
|
|
|
- gfn_t gfn;
|
|
|
|
- u32 ac;
|
|
|
|
-
|
|
|
|
- gfn = gpte_to_gfn_lvl(pte, lvl);
|
|
|
|
- gfn += (addr & PT_LVL_OFFSET_MASK(lvl)) >> PAGE_SHIFT;
|
|
|
|
-
|
|
|
|
- if (PTTYPE == 32 &&
|
|
|
|
- walker->level == PT_DIRECTORY_LEVEL &&
|
|
|
|
- is_cpuid_PSE36())
|
|
|
|
- gfn += pse36_gfn_delta(pte);
|
|
|
|
-
|
|
|
|
- ac = write_fault | fetch_fault | user_fault;
|
|
|
|
|
|
+ if (unlikely(permission_fault(mmu, pte_access, access))) {
|
|
|
|
+ errcode |= PFERR_PRESENT_MASK;
|
|
|
|
+ goto error;
|
|
|
|
+ }
|
|
|
|
|
|
- real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn),
|
|
|
|
- ac);
|
|
|
|
- if (real_gpa == UNMAPPED_GVA)
|
|
|
|
- return 0;
|
|
|
|
|
|
+ gfn = gpte_to_gfn_lvl(pte, walker->level);
|
|
|
|
+ gfn += (addr & PT_LVL_OFFSET_MASK(walker->level)) >> PAGE_SHIFT;
|
|
|
|
|
|
- walker->gfn = real_gpa >> PAGE_SHIFT;
|
|
|
|
|
|
+ if (PTTYPE == 32 && walker->level == PT_DIRECTORY_LEVEL && is_cpuid_PSE36())
|
|
|
|
+ gfn += pse36_gfn_delta(pte);
|
|
|
|
|
|
- break;
|
|
|
|
- }
|
|
|
|
|
|
+ real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access);
|
|
|
|
+ if (real_gpa == UNMAPPED_GVA)
|
|
|
|
+ return 0;
|
|
|
|
|
|
- pt_access &= FNAME(gpte_access)(vcpu, pte, false);
|
|
|
|
- --walker->level;
|
|
|
|
- }
|
|
|
|
|
|
+ walker->gfn = real_gpa >> PAGE_SHIFT;
|
|
|
|
|
|
- if (unlikely(eperm)) {
|
|
|
|
- errcode |= PFERR_PRESENT_MASK;
|
|
|
|
- goto error;
|
|
|
|
- }
|
|
|
|
|
|
+ if (!write_fault)
|
|
|
|
+ protect_clean_gpte(&pte_access, pte);
|
|
|
|
|
|
- if (write_fault && unlikely(!is_dirty_gpte(pte))) {
|
|
|
|
- int ret;
|
|
|
|
|
|
+ /*
|
|
|
|
+ * On a write fault, fold the dirty bit into accessed_dirty by shifting it one
|
|
|
|
+ * place right.
|
|
|
|
+ *
|
|
|
|
+ * On a read fault, do nothing.
|
|
|
|
+ */
|
|
|
|
+ shift = write_fault >> ilog2(PFERR_WRITE_MASK);
|
|
|
|
+ shift *= PT_DIRTY_SHIFT - PT_ACCESSED_SHIFT;
|
|
|
|
+ accessed_dirty &= pte >> shift;
|
|
|
|
|
|
- trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
|
|
|
|
- ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index,
|
|
|
|
- pte, pte|PT_DIRTY_MASK);
|
|
|
|
|
|
+ if (unlikely(!accessed_dirty)) {
|
|
|
|
+ ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault);
|
|
if (unlikely(ret < 0))
|
|
if (unlikely(ret < 0))
|
|
goto error;
|
|
goto error;
|
|
else if (ret)
|
|
else if (ret)
|
|
goto retry_walk;
|
|
goto retry_walk;
|
|
-
|
|
|
|
- mark_page_dirty(vcpu->kvm, table_gfn);
|
|
|
|
- pte |= PT_DIRTY_MASK;
|
|
|
|
- walker->ptes[walker->level - 1] = pte;
|
|
|
|
}
|
|
}
|
|
|
|
|
|
walker->pt_access = pt_access;
|
|
walker->pt_access = pt_access;
|
|
@@ -368,7 +337,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
|
return;
|
|
return;
|
|
|
|
|
|
pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
|
|
pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
|
|
- pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte, true);
|
|
|
|
|
|
+ pte_access = sp->role.access & gpte_access(vcpu, gpte);
|
|
|
|
+ protect_clean_gpte(&pte_access, gpte);
|
|
pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte));
|
|
pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte));
|
|
if (mmu_invalid_pfn(pfn))
|
|
if (mmu_invalid_pfn(pfn))
|
|
return;
|
|
return;
|
|
@@ -441,8 +411,8 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
|
|
if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
|
|
if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
|
|
continue;
|
|
continue;
|
|
|
|
|
|
- pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte,
|
|
|
|
- true);
|
|
|
|
|
|
+ pte_access = sp->role.access & gpte_access(vcpu, gpte);
|
|
|
|
+ protect_clean_gpte(&pte_access, gpte);
|
|
gfn = gpte_to_gfn(gpte);
|
|
gfn = gpte_to_gfn(gpte);
|
|
pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
|
|
pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
|
|
pte_access & ACC_WRITE_MASK);
|
|
pte_access & ACC_WRITE_MASK);
|
|
@@ -794,7 +764,8 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
|
|
|
|
|
|
gfn = gpte_to_gfn(gpte);
|
|
gfn = gpte_to_gfn(gpte);
|
|
pte_access = sp->role.access;
|
|
pte_access = sp->role.access;
|
|
- pte_access &= FNAME(gpte_access)(vcpu, gpte, true);
|
|
|
|
|
|
+ pte_access &= gpte_access(vcpu, gpte);
|
|
|
|
+ protect_clean_gpte(&pte_access, gpte);
|
|
|
|
|
|
if (sync_mmio_spte(&sp->spt[i], gfn, pte_access, &nr_present))
|
|
if (sync_mmio_spte(&sp->spt[i], gfn, pte_access, &nr_present))
|
|
continue;
|
|
continue;
|