12 years ago · 899f7b26bc
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -55,6 +55,13 @@ static int psw_interrupts_disabled(struct kvm_vcpu *vcpu)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				+static u64 int_word_to_isc_bits(u32 int_word)
			
 
				+{
			
 
				+	u8 isc = (int_word & 0x38000000) >> 27;
			
 
				+
			
 
				+	return (0x80 >> isc) << 24;
			
 
				+}
			
 
				+
			
 
				 static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
			
 
				 				      struct kvm_s390_interrupt_info *inti)
			
 
				 {
			
@@ -96,7 +103,8 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
 
				 	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
			
 
				 		if (psw_ioint_disabled(vcpu))
			
 
				 			return 0;
			
 
				-		if (vcpu->arch.sie_block->gcr[6] & inti->io.io_int_word)
			
 
				+		if (vcpu->arch.sie_block->gcr[6] &
			
 
				+		    int_word_to_isc_bits(inti->io.io_int_word))
			
 
				 			return 1;
			
 
				 		return 0;
			
 
				 	default:
			
@@ -724,7 +732,8 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
 
				 	list_for_each_entry(iter, &fi->list, list) {
			
 
				 		if (!is_ioint(iter->type))
			
 
				 			continue;
			
 
				-		if (cr6 && ((cr6 & iter->io.io_int_word) == 0))
			
 
				+		if (cr6 &&
			
 
				+		    ((cr6 & int_word_to_isc_bits(iter->io.io_int_word)) == 0))
			
 
				 			continue;
			
 
				 		if (schid) {
			
 
				 			if (((schid & 0x00000000ffff0000) >> 16) !=
			
@@ -811,11 +820,14 @@ int kvm_s390_inject_vm(struct kvm *kvm,
 
				 	if (!is_ioint(inti->type))
			
 
				 		list_add_tail(&inti->list, &fi->list);
			
 
				 	else {
			
 
				+		u64 isc_bits = int_word_to_isc_bits(inti->io.io_int_word);
			
 
				+
			
 
				 		/* Keep I/O interrupts sorted in isc order. */
			
 
				 		list_for_each_entry(iter, &fi->list, list) {
			
 
				 			if (!is_ioint(iter->type))
			
 
				 				continue;
			
 
				-			if (iter->io.io_int_word <= inti->io.io_int_word)
			
 
				+			if (int_word_to_isc_bits(iter->io.io_int_word)
			
 
				+			    <= isc_bits)
			
 
				 				continue;
			
 
				 			break;
			
 
				 		}
			
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2995,14 +2995,11 @@ static int em_aad(struct x86_emulate_ctxt *ctxt)
 
				 
			
 
				 	ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al;
			
 
				 
			
 
				-	ctxt->eflags &= ~(X86_EFLAGS_PF | X86_EFLAGS_SF | X86_EFLAGS_ZF);
			
 
				-
			
 
				-	if (!al)
			
 
				-		ctxt->eflags |= X86_EFLAGS_ZF;
			
 
				-	if (!(al & 1))
			
 
				-		ctxt->eflags |= X86_EFLAGS_PF;
			
 
				-	if (al & 0x80)
			
 
				-		ctxt->eflags |= X86_EFLAGS_SF;
			
 
				+	/* Set PF, ZF, SF */
			
 
				+	ctxt->src.type = OP_IMM;
			
 
				+	ctxt->src.val = 0;
			
 
				+	ctxt->src.bytes = 1;
			
 
				+	fastop(ctxt, em_or);
			
 
				 
			
 
				 	return X86EMUL_CONTINUE;
			
 
				 }
			
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -832,8 +832,7 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
 
				 	if (host_level == PT_PAGE_TABLE_LEVEL)
			
 
				 		return host_level;
			
 
				 
			
 
				-	max_level = kvm_x86_ops->get_lpage_level() < host_level ?
			
 
				-		kvm_x86_ops->get_lpage_level() : host_level;
			
 
				+	max_level = min(kvm_x86_ops->get_lpage_level(), host_level);
			
 
				 
			
 
				 	for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level)
			
 
				 		if (has_wrprotected_page(vcpu->kvm, large_gfn, level))
			
@@ -1106,8 +1105,7 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
 
				 
			
 
				 /*
			
 
				  * Write-protect on the specified @sptep, @pt_protect indicates whether
			
 
				- * spte writ-protection is caused by protecting shadow page table.
			
 
				- * @flush indicates whether tlb need be flushed.
			
 
				+ * spte write-protection is caused by protecting shadow page table.
			
 
				  *
			
 
				  * Note: write protection is difference between drity logging and spte
			
 
				  * protection:
			
@@ -1116,10 +1114,9 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
 
				  * - for spte protection, the spte can be writable only after unsync-ing
			
 
				  *   shadow page.
			
 
				  *
			
 
				- * Return true if the spte is dropped.
			
 
				+ * Return true if tlb need be flushed.
			
 
				  */
			
 
				-static bool
			
 
				-spte_write_protect(struct kvm *kvm, u64 *sptep, bool *flush, bool pt_protect)
			
 
				+static bool spte_write_protect(struct kvm *kvm, u64 *sptep, bool pt_protect)
			
 
				 {
			
 
				 	u64 spte = *sptep;
			
 
				 
			
@@ -1129,17 +1126,11 @@ spte_write_protect(struct kvm *kvm, u64 *sptep, bool *flush, bool pt_protect)
 
				 
			
 
				 	rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, *sptep);
			
 
				 
			
 
				-	if (__drop_large_spte(kvm, sptep)) {
			
 
				-		*flush |= true;
			
 
				-		return true;
			
 
				-	}
			
 
				-
			
 
				 	if (pt_protect)
			
 
				 		spte &= ~SPTE_MMU_WRITEABLE;
			
 
				 	spte = spte & ~PT_WRITABLE_MASK;
			
 
				 
			
 
				-	*flush |= mmu_spte_update(sptep, spte);
			
 
				-	return false;
			
 
				+	return mmu_spte_update(sptep, spte);
			
 
				 }
			
 
				 
			
 
				 static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp,
			
@@ -1151,11 +1142,8 @@ static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp,
 
				 
			
 
				 	for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
			
 
				 		BUG_ON(!(*sptep & PT_PRESENT_MASK));
			
 
				-		if (spte_write_protect(kvm, sptep, &flush, pt_protect)) {
			
 
				-			sptep = rmap_get_first(*rmapp, &iter);
			
 
				-			continue;
			
 
				-		}
			
 
				 
			
 
				+		flush |= spte_write_protect(kvm, sptep, pt_protect);
			
 
				 		sptep = rmap_get_next(&iter);
			
 
				 	}
			
 
				 
			
@@ -1959,9 +1947,9 @@ static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp)
 
				 {
			
 
				 	u64 spte;
			
 
				 
			
 
				-	spte = __pa(sp->spt)
			
 
				-		| PT_PRESENT_MASK | PT_ACCESSED_MASK
			
 
				-		| PT_WRITABLE_MASK | PT_USER_MASK;
			
 
				+	spte = __pa(sp->spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK |
			
 
				+	       shadow_user_mask | shadow_x_mask | shadow_accessed_mask;
			
 
				+
			
 
				 	mmu_spte_set(sptep, spte);
			
 
				 }
			
 
				 
			
@@ -2400,16 +2388,15 @@ done:
 
				 }
			
 
				 
			
 
				 static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
			
 
				-			 unsigned pt_access, unsigned pte_access,
			
 
				-			 int write_fault, int *emulate, int level, gfn_t gfn,
			
 
				-			 pfn_t pfn, bool speculative, bool host_writable)
			
 
				+			 unsigned pte_access, int write_fault, int *emulate,
			
 
				+			 int level, gfn_t gfn, pfn_t pfn, bool speculative,
			
 
				+			 bool host_writable)
			
 
				 {
			
 
				 	int was_rmapped = 0;
			
 
				 	int rmap_count;
			
 
				 
			
 
				-	pgprintk("%s: spte %llx access %x write_fault %d gfn %llx\n",
			
 
				-		 __func__, *sptep, pt_access,
			
 
				-		 write_fault, gfn);
			
 
				+	pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__,
			
 
				+		 *sptep, write_fault, gfn);
			
 
				 
			
 
				 	if (is_rmap_spte(*sptep)) {
			
 
				 		/*
			
@@ -2525,7 +2512,7 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
 
				 		return -1;
			
 
				 
			
 
				 	for (i = 0; i < ret; i++, gfn++, start++)
			
 
				-		mmu_set_spte(vcpu, start, ACC_ALL, access, 0, NULL,
			
 
				+		mmu_set_spte(vcpu, start, access, 0, NULL,
			
 
				 			     sp->role.level, gfn, page_to_pfn(pages[i]),
			
 
				 			     true, true);
			
 
				 
			
@@ -2586,9 +2573,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
 
				 
			
 
				 	for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
			
 
				 		if (iterator.level == level) {
			
 
				-			unsigned pte_access = ACC_ALL;
			
 
				-
			
 
				-			mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, pte_access,
			
 
				+			mmu_set_spte(vcpu, iterator.sptep, ACC_ALL,
			
 
				 				     write, &emulate, level, gfn, pfn,
			
 
				 				     prefault, map_writable);
			
 
				 			direct_pte_prefetch(vcpu, iterator.sptep);
			
@@ -2596,6 +2581,8 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
 
				 			break;
			
 
				 		}
			
 
				 
			
 
				+		drop_large_spte(vcpu, iterator.sptep);
			
 
				+
			
 
				 		if (!is_shadow_present_pte(*iterator.sptep)) {
			
 
				 			u64 base_addr = iterator.addr;
			
 
				 
			
@@ -2605,11 +2592,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
 
				 					      iterator.level - 1,
			
 
				 					      1, ACC_ALL, iterator.sptep);
			
 
				 
			
 
				-			mmu_spte_set(iterator.sptep,
			
 
				-				     __pa(sp->spt)
			
 
				-				     | PT_PRESENT_MASK | PT_WRITABLE_MASK
			
 
				-				     | shadow_user_mask | shadow_x_mask
			
 
				-				     | shadow_accessed_mask);
			
 
				+			link_shadow_page(iterator.sptep, sp);
			
 
				 		}
			
 
				 	}
			
 
				 	return emulate;
			
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -326,8 +326,8 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 
				 	 * we call mmu_set_spte() with host_writable = true because
			
 
				 	 * pte_prefetch_gfn_to_pfn always gets a writable pfn.
			
 
				 	 */
			
 
				-	mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0,
			
 
				-		     NULL, PT_PAGE_TABLE_LEVEL, gfn, pfn, true, true);
			
 
				+	mmu_set_spte(vcpu, spte, pte_access, 0, NULL, PT_PAGE_TABLE_LEVEL,
			
 
				+		     gfn, pfn, true, true);
			
 
				 
			
 
				 	return true;
			
 
				 }
			
@@ -470,9 +470,8 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 
				 	}
			
 
				 
			
 
				 	clear_sp_write_flooding_count(it.sptep);
			
 
				-	mmu_set_spte(vcpu, it.sptep, access, gw->pte_access,
			
 
				-		     write_fault, &emulate, it.level,
			
 
				-		     gw->gfn, pfn, prefault, map_writable);
			
 
				+	mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault, &emulate,
			
 
				+		     it.level, gw->gfn, pfn, prefault, map_writable);
			
 
				 	FNAME(pte_prefetch)(vcpu, gw, it.sptep);
			
 
				 
			
 
				 	return emulate;
			
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -84,8 +84,7 @@ module_param(vmm_exclusive, bool, S_IRUGO);
 
				 static bool __read_mostly fasteoi = 1;
			
 
				 module_param(fasteoi, bool, S_IRUGO);
			
 
				 
			
 
				-static bool __read_mostly enable_apicv_reg_vid = 1;
			
 
				-module_param(enable_apicv_reg_vid, bool, S_IRUGO);
			
 
				+static bool __read_mostly enable_apicv_reg_vid;
			
 
				 
			
 
				 /*
			
 
				  * If nested=1, nested virtualization is supported, i.e., guests may use
			
@@ -95,12 +94,8 @@ module_param(enable_apicv_reg_vid, bool, S_IRUGO);
 
				 static bool __read_mostly nested = 0;
			
 
				 module_param(nested, bool, S_IRUGO);
			
 
				 
			
 
				-#define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST				\
			
 
				-	(X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD)
			
 
				-#define KVM_GUEST_CR0_MASK						\
			
 
				-	(KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
			
 
				-#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST				\
			
 
				-	(X86_CR0_WP | X86_CR0_NE)
			
 
				+#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
			
 
				+#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
			
 
				 #define KVM_VM_CR0_ALWAYS_ON						\
			
 
				 	(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
			
 
				 #define KVM_CR4_GUEST_OWNED_BITS				      \
			
@@ -3137,11 +3132,11 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 
				 	struct vcpu_vmx *vmx = to_vmx(vcpu);
			
 
				 	unsigned long hw_cr0;
			
 
				 
			
 
				+	hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK);
			
 
				 	if (enable_unrestricted_guest)
			
 
				-		hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST)
			
 
				-			| KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
			
 
				+		hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
			
 
				 	else {
			
 
				-		hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON;
			
 
				+		hw_cr0 |= KVM_VM_CR0_ALWAYS_ON;
			
 
				 
			
 
				 		if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE))
			
 
				 			enter_pmode(vcpu);
			
@@ -5925,7 +5920,7 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
 
				 	u32 msr_index = vcpu->arch.regs[VCPU_REGS_RCX];
			
 
				 	gpa_t bitmap;
			
 
				 
			
 
				-	if (!nested_cpu_has(get_vmcs12(vcpu), CPU_BASED_USE_MSR_BITMAPS))
			
 
				+	if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
			
 
				 		return 1;
			
 
				 
			
 
				 	/*
			
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6897,33 +6897,28 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 
				 				bool user_alloc)
			
 
				 {
			
 
				 	int npages = memslot->npages;
			
 
				-	int map_flags = MAP_PRIVATE | MAP_ANONYMOUS;
			
 
				 
			
 
				-	/* Prevent internal slot pages from being moved by fork()/COW. */
			
 
				-	if (memslot->id >= KVM_USER_MEM_SLOTS)
			
 
				-		map_flags = MAP_SHARED | MAP_ANONYMOUS;
			
 
				-
			
 
				-	/*To keep backward compatibility with older userspace,
			
 
				-	 *x86 needs to handle !user_alloc case.
			
 
				+	/*
			
 
				+	 * Only private memory slots need to be mapped here since
			
 
				+	 * KVM_SET_MEMORY_REGION ioctl is no longer supported.
			
 
				 	 */
			
 
				-	if (!user_alloc) {
			
 
				-		if (npages && !old.npages) {
			
 
				-			unsigned long userspace_addr;
			
 
				+	if ((memslot->id >= KVM_USER_MEM_SLOTS) && npages && !old.npages) {
			
 
				+		unsigned long userspace_addr;
			
 
				 
			
 
				-			userspace_addr = vm_mmap(NULL, 0,
			
 
				-						 npages * PAGE_SIZE,
			
 
				-						 PROT_READ | PROT_WRITE,
			
 
				-						 map_flags,
			
 
				-						 0);
			
 
				+		/*
			
 
				+		 * MAP_SHARED to prevent internal slot pages from being moved
			
 
				+		 * by fork()/COW.
			
 
				+		 */
			
 
				+		userspace_addr = vm_mmap(NULL, 0, npages * PAGE_SIZE,
			
 
				+					 PROT_READ | PROT_WRITE,
			
 
				+					 MAP_SHARED | MAP_ANONYMOUS, 0);
			
 
				 
			
 
				-			if (IS_ERR((void *)userspace_addr))
			
 
				-				return PTR_ERR((void *)userspace_addr);
			
 
				+		if (IS_ERR((void *)userspace_addr))
			
 
				+			return PTR_ERR((void *)userspace_addr);
			
 
				 
			
 
				-			memslot->userspace_addr = userspace_addr;
			
 
				-		}
			
 
				+		memslot->userspace_addr = userspace_addr;
			
 
				 	}
			
 
				 
			
 
				-
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -6935,7 +6930,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 
				 
			
 
				 	int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT;
			
 
				 
			
 
				-	if (!user_alloc && !old.user_alloc && old.npages && !npages) {
			
 
				+	if ((mem->slot >= KVM_USER_MEM_SLOTS) && old.npages && !npages) {
			
 
				 		int ret;
			
 
				 
			
 
				 		ret = vm_munmap(old.userspace_addr,
			
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -273,7 +273,6 @@ struct kvm_memory_slot {
 
				 	unsigned long userspace_addr;
			
 
				 	u32 flags;
			
 
				 	short id;
			
 
				-	bool user_alloc;
			
 
				 };
			
 
				 
			
 
				 static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot)
			
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -839,7 +839,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
 
				 
			
 
				 	r = -ENOMEM;
			
 
				 	if (change == KVM_MR_CREATE) {
			
 
				-		new.user_alloc = user_alloc;
			
 
				 		new.userspace_addr = mem->userspace_addr;
			
 
				 
			
 
				 		if (kvm_arch_create_memslot(&new, npages))