12 years ago · accaefe07d
--- a/Documentation/virtual/kvm/mmu.txt
+++ b/Documentation/virtual/kvm/mmu.txt
@@ -210,6 +210,11 @@ Shadow pages contain the following information:
 
				     A bitmap indicating which sptes in spt point (directly or indirectly) at
			
 
				     pages that may be unsynchronized.  Used to quickly locate all unsychronized
			
 
				     pages reachable from a given page.
			
 
				+  clear_spte_count:
			
 
				+    Only present on 32-bit hosts, where a 64-bit spte cannot be written
			
 
				+    atomically.  The reader uses this while running out of the MMU lock
			
 
				+    to detect in-progress updates and retry them until the writer has
			
 
				+    finished the write.
			
 
				 
			
 
				 Reverse map
			
 
				 ===========
			
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -226,6 +226,10 @@ struct kvm_mmu_page {
 
				 	DECLARE_BITMAP(unsync_child_bitmap, 512);
			
 
				 
			
 
				 #ifdef CONFIG_X86_32
			
 
				+	/*
			
 
				+	 * Used out of the mmu-lock to avoid reading spte values while an
			
 
				+	 * update is in progress; see the comments in __get_spte_lockless().
			
 
				+	 */
			
 
				 	int clear_spte_count;
			
 
				 #endif
			
 
				 
			
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -466,9 +466,20 @@ static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
 
				 /*
			
 
				  * The idea using the light way get the spte on x86_32 guest is from
			
 
				  * gup_get_pte(arch/x86/mm/gup.c).
			
 
				- * The difference is we can not catch the spte tlb flush if we leave
			
 
				- * guest mode, so we emulate it by increase clear_spte_count when spte
			
 
				- * is cleared.
			
 
				+ *
			
 
				+ * An spte tlb flush may be pending, because kvm_set_pte_rmapp
			
 
				+ * coalesces them and we are running out of the MMU lock.  Therefore
			
 
				+ * we need to protect against in-progress updates of the spte.
			
 
				+ *
			
 
				+ * Reading the spte while an update is in progress may get the old value
			
 
				+ * for the high part of the spte.  The race is fine for a present->non-present
			
 
				+ * change (because the high part of the spte is ignored for non-present spte),
			
 
				+ * but for a present->present change we must reread the spte.
			
 
				+ *
			
 
				+ * All such changes are done in two steps (present->non-present and
			
 
				+ * non-present->present), hence it is enough to count the number of
			
 
				+ * present->non-present updates: if it changed while reading the spte,
			
 
				+ * we might have hit the race.  This is done using clear_spte_count.
			
 
				  */
			
 
				 static u64 __get_spte_lockless(u64 *sptep)
			
 
				 {