Эх сурвалжийг харах

KVM: Fix write protection race during dirty logging

This patch fixes a race introduced by:

  commit 95d4c16ce78cb6b7549a09159c409d52ddd18dae
  KVM: Optimize dirty logging by rmap_write_protect()

During protecting pages for dirty logging, other threads may also try
to protect a page in mmu_sync_children() or kvm_mmu_get_page().

In such a case, because get_dirty_log releases mmu_lock before flushing
TLB's, the following race condition can happen:

  A (get_dirty_log)     B (another thread)

  lock(mmu_lock)
  clear pte.w
  unlock(mmu_lock)
                        lock(mmu_lock)
                        pte.w is already cleared
                        unlock(mmu_lock)
                        skip TLB flush
                        return
  ...
  TLB flush

Though thread B assumes the page has already been protected when it
returns, the remaining TLB entry will break that assumption.

This patch fixes this problem by making get_dirty_log hold the mmu_lock
until it flushes the TLB's.

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
Takuya Yoshikawa 13 жил өмнө
parent
commit
6dbf79e716

+ 5 - 6
arch/x86/kvm/x86.c

@@ -3065,6 +3065,8 @@ static void write_protect_slot(struct kvm *kvm,
 			       unsigned long *dirty_bitmap,
 			       unsigned long *dirty_bitmap,
 			       unsigned long nr_dirty_pages)
 			       unsigned long nr_dirty_pages)
 {
 {
+	spin_lock(&kvm->mmu_lock);
+
 	/* Not many dirty pages compared to # of shadow pages. */
 	/* Not many dirty pages compared to # of shadow pages. */
 	if (nr_dirty_pages < kvm->arch.n_used_mmu_pages) {
 	if (nr_dirty_pages < kvm->arch.n_used_mmu_pages) {
 		unsigned long gfn_offset;
 		unsigned long gfn_offset;
@@ -3072,16 +3074,13 @@ static void write_protect_slot(struct kvm *kvm,
 		for_each_set_bit(gfn_offset, dirty_bitmap, memslot->npages) {
 		for_each_set_bit(gfn_offset, dirty_bitmap, memslot->npages) {
 			unsigned long gfn = memslot->base_gfn + gfn_offset;
 			unsigned long gfn = memslot->base_gfn + gfn_offset;
 
 
-			spin_lock(&kvm->mmu_lock);
 			kvm_mmu_rmap_write_protect(kvm, gfn, memslot);
 			kvm_mmu_rmap_write_protect(kvm, gfn, memslot);
-			spin_unlock(&kvm->mmu_lock);
 		}
 		}
 		kvm_flush_remote_tlbs(kvm);
 		kvm_flush_remote_tlbs(kvm);
-	} else {
-		spin_lock(&kvm->mmu_lock);
+	} else
 		kvm_mmu_slot_remove_write_access(kvm, memslot->id);
 		kvm_mmu_slot_remove_write_access(kvm, memslot->id);
-		spin_unlock(&kvm->mmu_lock);
-	}
+
+	spin_unlock(&kvm->mmu_lock);
 }
 }
 
 
 /*
 /*