Browse Source

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm:
  x86: disable KVM for Voyager and friends
  KVM: VMX: Avoid rearranging switched guest msrs while they are loaded
  KVM: MMU: Fix race when instantiating a shadow pte
  KVM: Route irq 0 to vcpu 0 exclusively
  KVM: Avoid infinite-frequency local apic timer
  KVM: make MMU_DEBUG compile again
  KVM: move alloc_apic_access_page() outside of non-preemptable region
  KVM: SVM: fix Windows XP 64 bit installation crash
  KVM: remove the usage of the mmap_sem for the protection of the memory slots.
  KVM: emulate access to MSR_IA32_MCG_CTL
  KVM: Make the supported cpuid list a host property rather than a vm property
  KVM: Fix kvm_arch_vcpu_ioctl_set_sregs so that set_cr0 works properly
  KVM: SVM: set NM intercept when enabling CR0.TS in the guest
  KVM: SVM: Fix lazy FPU switching
Linus Torvalds 17 years ago
parent
commit
67171a3f03

+ 1 - 1
arch/x86/Kconfig

@@ -21,7 +21,7 @@ config X86
 	select HAVE_IDE
 	select HAVE_OPROFILE
 	select HAVE_KPROBES
-	select HAVE_KVM
+	select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
 
 
 config GENERIC_LOCKBREAK

+ 4 - 0
arch/x86/kvm/lapic.c

@@ -647,6 +647,10 @@ static void start_apic_timer(struct kvm_lapic *apic)
 	apic->timer.period = apic_get_reg(apic, APIC_TMICT) *
 		    APIC_BUS_CYCLE_NS * apic->timer.divide_count;
 	atomic_set(&apic->timer.pending, 0);
+
+	if (!apic->timer.period)
+		return;
+
 	hrtimer_start(&apic->timer.dev,
 		      ktime_add_ns(now, apic->timer.period),
 		      HRTIMER_MODE_ABS);

+ 25 - 13
arch/x86/kvm/mmu.c

@@ -681,8 +681,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 					     unsigned level,
 					     int metaphysical,
 					     unsigned access,
-					     u64 *parent_pte,
-					     bool *new_page)
+					     u64 *parent_pte)
 {
 	union kvm_mmu_page_role role;
 	unsigned index;
@@ -722,8 +721,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 	vcpu->arch.mmu.prefetch_page(vcpu, sp);
 	if (!metaphysical)
 		rmap_write_protect(vcpu->kvm, gfn);
-	if (new_page)
-		*new_page = 1;
 	return sp;
 }
 
@@ -876,11 +873,18 @@ static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
 
 struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
 {
+	struct page *page;
+
 	gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
 
 	if (gpa == UNMAPPED_GVA)
 		return NULL;
-	return gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+
+	down_read(&current->mm->mmap_sem);
+	page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+	up_read(&current->mm->mmap_sem);
+
+	return page;
 }
 
 static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
@@ -999,8 +1003,7 @@ static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write,
 				>> PAGE_SHIFT;
 			new_table = kvm_mmu_get_page(vcpu, pseudo_gfn,
 						     v, level - 1,
-						     1, ACC_ALL, &table[index],
-						     NULL);
+						     1, ACC_ALL, &table[index]);
 			if (!new_table) {
 				pgprintk("nonpaging_map: ENOMEM\n");
 				kvm_release_page_clean(page);
@@ -1020,15 +1023,18 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
 
 	struct page *page;
 
+	down_read(&vcpu->kvm->slots_lock);
+
 	down_read(&current->mm->mmap_sem);
 	page = gfn_to_page(vcpu->kvm, gfn);
+	up_read(&current->mm->mmap_sem);
 
 	spin_lock(&vcpu->kvm->mmu_lock);
 	kvm_mmu_free_some_pages(vcpu);
 	r = __nonpaging_map(vcpu, v, write, gfn, page);
 	spin_unlock(&vcpu->kvm->mmu_lock);
 
-	up_read(&current->mm->mmap_sem);
+	up_read(&vcpu->kvm->slots_lock);
 
 	return r;
 }
@@ -1090,7 +1096,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
 
 		ASSERT(!VALID_PAGE(root));
 		sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
-				      PT64_ROOT_LEVEL, 0, ACC_ALL, NULL, NULL);
+				      PT64_ROOT_LEVEL, 0, ACC_ALL, NULL);
 		root = __pa(sp->spt);
 		++sp->root_count;
 		vcpu->arch.mmu.root_hpa = root;
@@ -1111,7 +1117,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
 			root_gfn = 0;
 		sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
 				      PT32_ROOT_LEVEL, !is_paging(vcpu),
-				      ACC_ALL, NULL, NULL);
+				      ACC_ALL, NULL);
 		root = __pa(sp->spt);
 		++sp->root_count;
 		vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK;
@@ -1172,7 +1178,7 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
 
 static void paging_new_cr3(struct kvm_vcpu *vcpu)
 {
-	pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->cr3);
+	pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->arch.cr3);
 	mmu_free_roots(vcpu);
 }
 
@@ -1362,6 +1368,7 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 	gfn_t gfn;
 	int r;
 	u64 gpte = 0;
+	struct page *page;
 
 	if (bytes != 4 && bytes != 8)
 		return;
@@ -1389,6 +1396,11 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 	if (!is_present_pte(gpte))
 		return;
 	gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
+
+	down_read(&current->mm->mmap_sem);
+	page = gfn_to_page(vcpu->kvm, gfn);
+	up_read(&current->mm->mmap_sem);
+
 	vcpu->arch.update_pte.gfn = gfn;
 	vcpu->arch.update_pte.page = gfn_to_page(vcpu->kvm, gfn);
 }
@@ -1496,9 +1508,9 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
 	gpa_t gpa;
 	int r;
 
-	down_read(&current->mm->mmap_sem);
+	down_read(&vcpu->kvm->slots_lock);
 	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
-	up_read(&current->mm->mmap_sem);
+	up_read(&vcpu->kvm->slots_lock);
 
 	spin_lock(&vcpu->kvm->mmu_lock);
 	r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);

+ 12 - 8
arch/x86/kvm/paging_tmpl.h

@@ -91,7 +91,10 @@ static bool FNAME(cmpxchg_gpte)(struct kvm *kvm,
 	pt_element_t *table;
 	struct page *page;
 
+	down_read(&current->mm->mmap_sem);
 	page = gfn_to_page(kvm, table_gfn);
+	up_read(&current->mm->mmap_sem);
+
 	table = kmap_atomic(page, KM_USER0);
 
 	ret = CMPXCHG(&table[index], orig_pte, new_pte);
@@ -140,7 +143,7 @@ walk:
 	}
 #endif
 	ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
-	       (vcpu->cr3 & CR3_NONPAE_RESERVED_BITS) == 0);
+	       (vcpu->arch.cr3 & CR3_NONPAE_RESERVED_BITS) == 0);
 
 	pt_access = ACC_ALL;
 
@@ -297,7 +300,6 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 		u64 shadow_pte;
 		int metaphysical;
 		gfn_t table_gfn;
-		bool new_page = 0;
 
 		shadow_ent = ((u64 *)__va(shadow_addr)) + index;
 		if (level == PT_PAGE_TABLE_LEVEL)
@@ -319,8 +321,8 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 		}
 		shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
 					       metaphysical, access,
-					       shadow_ent, &new_page);
-		if (new_page && !metaphysical) {
+					       shadow_ent);
+		if (!metaphysical) {
 			int r;
 			pt_element_t curr_pte;
 			r = kvm_read_guest_atomic(vcpu->kvm,
@@ -378,7 +380,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 	if (r)
 		return r;
 
-	down_read(&current->mm->mmap_sem);
+	down_read(&vcpu->kvm->slots_lock);
 	/*
 	 * Look up the shadow pte for the faulting address.
 	 */
@@ -392,11 +394,13 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 		pgprintk("%s: guest page fault\n", __FUNCTION__);
 		inject_page_fault(vcpu, addr, walker.error_code);
 		vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
-		up_read(&current->mm->mmap_sem);
+		up_read(&vcpu->kvm->slots_lock);
 		return 0;
 	}
 
+	down_read(&current->mm->mmap_sem);
 	page = gfn_to_page(vcpu->kvm, walker.gfn);
+	up_read(&current->mm->mmap_sem);
 
 	spin_lock(&vcpu->kvm->mmu_lock);
 	kvm_mmu_free_some_pages(vcpu);
@@ -413,14 +417,14 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 	 */
 	if (shadow_pte && is_io_pte(*shadow_pte)) {
 		spin_unlock(&vcpu->kvm->mmu_lock);
-		up_read(&current->mm->mmap_sem);
+		up_read(&vcpu->kvm->slots_lock);
 		return 1;
 	}
 
 	++vcpu->stat.pf_fixed;
 	kvm_mmu_audit(vcpu, "post page fault (fixed)");
 	spin_unlock(&vcpu->kvm->mmu_lock);
-	up_read(&current->mm->mmap_sem);
+	up_read(&vcpu->kvm->slots_lock);
 
 	return write_pt;
 }

+ 26 - 0
arch/x86/kvm/svm.c

@@ -792,6 +792,10 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 	vcpu->arch.cr0 = cr0;
 	cr0 |= X86_CR0_PG | X86_CR0_WP;
 	cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
+	if (!vcpu->fpu_active) {
+		svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR);
+		cr0 |= X86_CR0_TS;
+	}
 	svm->vmcb->save.cr0 = cr0;
 }
 
@@ -1096,6 +1100,24 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
 	case MSR_IA32_SYSENTER_ESP:
 		*data = svm->vmcb->save.sysenter_esp;
 		break;
+	/* Nobody will change the following 5 values in the VMCB so
+	   we can safely return them on rdmsr. They will always be 0
+	   until LBRV is implemented. */
+	case MSR_IA32_DEBUGCTLMSR:
+		*data = svm->vmcb->save.dbgctl;
+		break;
+	case MSR_IA32_LASTBRANCHFROMIP:
+		*data = svm->vmcb->save.br_from;
+		break;
+	case MSR_IA32_LASTBRANCHTOIP:
+		*data = svm->vmcb->save.br_to;
+		break;
+	case MSR_IA32_LASTINTFROMIP:
+		*data = svm->vmcb->save.last_excp_from;
+		break;
+	case MSR_IA32_LASTINTTOIP:
+		*data = svm->vmcb->save.last_excp_to;
+		break;
 	default:
 		return kvm_get_msr_common(vcpu, ecx, data);
 	}
@@ -1156,6 +1178,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
 	case MSR_IA32_SYSENTER_ESP:
 		svm->vmcb->save.sysenter_esp = data;
 		break;
+	case MSR_IA32_DEBUGCTLMSR:
+		pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
+				__FUNCTION__, data);
+		break;
 	case MSR_K7_EVNTSEL0:
 	case MSR_K7_EVNTSEL1:
 	case MSR_K7_EVNTSEL2:

+ 9 - 5
arch/x86/kvm/vmx.c

@@ -638,6 +638,7 @@ static void setup_msrs(struct vcpu_vmx *vmx)
 {
 	int save_nmsrs;
 
+	vmx_load_host_state(vmx);
 	save_nmsrs = 0;
 #ifdef CONFIG_X86_64
 	if (is_long_mode(&vmx->vcpu)) {
@@ -1477,7 +1478,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
 	struct kvm_userspace_memory_region kvm_userspace_mem;
 	int r = 0;
 
-	down_write(&current->mm->mmap_sem);
+	down_write(&kvm->slots_lock);
 	if (kvm->arch.apic_access_page)
 		goto out;
 	kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
@@ -1487,9 +1488,12 @@ static int alloc_apic_access_page(struct kvm *kvm)
 	r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0);
 	if (r)
 		goto out;
+
+	down_read(&current->mm->mmap_sem);
 	kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00);
+	up_read(&current->mm->mmap_sem);
 out:
-	up_write(&current->mm->mmap_sem);
+	up_write(&kvm->slots_lock);
 	return r;
 }
 
@@ -1602,9 +1606,6 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
 	vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK);
 
-	if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm))
-		if (alloc_apic_access_page(vmx->vcpu.kvm) != 0)
-			return -ENOMEM;
 
 	return 0;
 }
@@ -2534,6 +2535,9 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	put_cpu();
 	if (err)
 		goto free_vmcs;
+	if (vm_need_virtualize_apic_accesses(kvm))
+		if (alloc_apic_access_page(kvm) != 0)
+			goto free_vmcs;
 
 	return &vmx->vcpu;
 

+ 65 - 49
arch/x86/kvm/x86.c

@@ -46,6 +46,9 @@
 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
 
+static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
+				    struct kvm_cpuid_entry2 __user *entries);
+
 struct kvm_x86_ops *kvm_x86_ops;
 
 struct kvm_stats_debugfs_item debugfs_entries[] = {
@@ -181,7 +184,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
 	int ret;
 	u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)];
 
-	down_read(&current->mm->mmap_sem);
+	down_read(&vcpu->kvm->slots_lock);
 	ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte,
 				  offset * sizeof(u64), sizeof(pdpte));
 	if (ret < 0) {
@@ -198,7 +201,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
 
 	memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs));
 out:
-	up_read(&current->mm->mmap_sem);
+	up_read(&vcpu->kvm->slots_lock);
 
 	return ret;
 }
@@ -212,13 +215,13 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu)
 	if (is_long_mode(vcpu) || !is_pae(vcpu))
 		return false;
 
-	down_read(&current->mm->mmap_sem);
+	down_read(&vcpu->kvm->slots_lock);
 	r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte));
 	if (r < 0)
 		goto out;
 	changed = memcmp(pdpte, vcpu->arch.pdptrs, sizeof(pdpte)) != 0;
 out:
-	up_read(&current->mm->mmap_sem);
+	up_read(&vcpu->kvm->slots_lock);
 
 	return changed;
 }
@@ -356,7 +359,7 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 		 */
 	}
 
-	down_read(&current->mm->mmap_sem);
+	down_read(&vcpu->kvm->slots_lock);
 	/*
 	 * Does the new cr3 value map to physical memory? (Note, we
 	 * catch an invalid cr3 even in real-mode, because it would
@@ -372,7 +375,7 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 		vcpu->arch.cr3 = cr3;
 		vcpu->arch.mmu.new_cr3(vcpu);
 	}
-	up_read(&current->mm->mmap_sem);
+	up_read(&vcpu->kvm->slots_lock);
 }
 EXPORT_SYMBOL_GPL(set_cr3);
 
@@ -484,6 +487,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 		pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n",
 			__FUNCTION__, data);
 		break;
+	case MSR_IA32_MCG_CTL:
+		pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n",
+			__FUNCTION__, data);
+		break;
 	case MSR_IA32_UCODE_REV:
 	case MSR_IA32_UCODE_WRITE:
 	case 0x200 ... 0x2ff: /* MTRRs */
@@ -526,6 +533,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	case MSR_IA32_MC0_CTL:
 	case MSR_IA32_MCG_STATUS:
 	case MSR_IA32_MCG_CAP:
+	case MSR_IA32_MCG_CTL:
 	case MSR_IA32_MC0_MISC:
 	case MSR_IA32_MC0_MISC+4:
 	case MSR_IA32_MC0_MISC+8:
@@ -727,6 +735,24 @@ long kvm_arch_dev_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
+	case KVM_GET_SUPPORTED_CPUID: {
+		struct kvm_cpuid2 __user *cpuid_arg = argp;
+		struct kvm_cpuid2 cpuid;
+
+		r = -EFAULT;
+		if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
+			goto out;
+		r = kvm_dev_ioctl_get_supported_cpuid(&cpuid,
+			cpuid_arg->entries);
+		if (r)
+			goto out;
+
+		r = -EFAULT;
+		if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
+			goto out;
+		r = 0;
+		break;
+	}
 	default:
 		r = -EINVAL;
 	}
@@ -974,8 +1000,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 	put_cpu();
 }
 
-static int kvm_vm_ioctl_get_supported_cpuid(struct kvm *kvm,
-				    struct kvm_cpuid2 *cpuid,
+static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
 				    struct kvm_cpuid_entry2 __user *entries)
 {
 	struct kvm_cpuid_entry2 *cpuid_entries;
@@ -1207,12 +1232,12 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
 	if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
 		return -EINVAL;
 
-	down_write(&current->mm->mmap_sem);
+	down_write(&kvm->slots_lock);
 
 	kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
 	kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
 
-	up_write(&current->mm->mmap_sem);
+	up_write(&kvm->slots_lock);
 	return 0;
 }
 
@@ -1261,7 +1286,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
 	    < alias->target_phys_addr)
 		goto out;
 
-	down_write(&current->mm->mmap_sem);
+	down_write(&kvm->slots_lock);
 
 	p = &kvm->arch.aliases[alias->slot];
 	p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
@@ -1275,7 +1300,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
 
 	kvm_mmu_zap_all(kvm);
 
-	up_write(&current->mm->mmap_sem);
+	up_write(&kvm->slots_lock);
 
 	return 0;
 
@@ -1351,7 +1376,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	struct kvm_memory_slot *memslot;
 	int is_dirty = 0;
 
-	down_write(&current->mm->mmap_sem);
+	down_write(&kvm->slots_lock);
 
 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
 	if (r)
@@ -1367,7 +1392,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	}
 	r = 0;
 out:
-	up_write(&current->mm->mmap_sem);
+	up_write(&kvm->slots_lock);
 	return r;
 }
 
@@ -1487,24 +1512,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
-	case KVM_GET_SUPPORTED_CPUID: {
-		struct kvm_cpuid2 __user *cpuid_arg = argp;
-		struct kvm_cpuid2 cpuid;
-
-		r = -EFAULT;
-		if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
-			goto out;
-		r = kvm_vm_ioctl_get_supported_cpuid(kvm, &cpuid,
-			cpuid_arg->entries);
-		if (r)
-			goto out;
-
-		r = -EFAULT;
-		if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
-			goto out;
-		r = 0;
-		break;
-	}
 	default:
 		;
 	}
@@ -1563,7 +1570,7 @@ int emulator_read_std(unsigned long addr,
 	void *data = val;
 	int r = X86EMUL_CONTINUE;
 
-	down_read(&current->mm->mmap_sem);
+	down_read(&vcpu->kvm->slots_lock);
 	while (bytes) {
 		gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
 		unsigned offset = addr & (PAGE_SIZE-1);
@@ -1585,7 +1592,7 @@ int emulator_read_std(unsigned long addr,
 		addr += tocopy;
 	}
 out:
-	up_read(&current->mm->mmap_sem);
+	up_read(&vcpu->kvm->slots_lock);
 	return r;
 }
 EXPORT_SYMBOL_GPL(emulator_read_std);
@@ -1604,9 +1611,9 @@ static int emulator_read_emulated(unsigned long addr,
 		return X86EMUL_CONTINUE;
 	}
 
-	down_read(&current->mm->mmap_sem);
+	down_read(&vcpu->kvm->slots_lock);
 	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
-	up_read(&current->mm->mmap_sem);
+	up_read(&vcpu->kvm->slots_lock);
 
 	/* For APIC access vmexit */
 	if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
@@ -1644,14 +1651,14 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
 {
 	int ret;
 
-	down_read(&current->mm->mmap_sem);
+	down_read(&vcpu->kvm->slots_lock);
 	ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
 	if (ret < 0) {
-		up_read(&current->mm->mmap_sem);
+		up_read(&vcpu->kvm->slots_lock);
 		return 0;
 	}
 	kvm_mmu_pte_write(vcpu, gpa, val, bytes);
-	up_read(&current->mm->mmap_sem);
+	up_read(&vcpu->kvm->slots_lock);
 	return 1;
 }
 
@@ -1663,9 +1670,9 @@ static int emulator_write_emulated_onepage(unsigned long addr,
 	struct kvm_io_device *mmio_dev;
 	gpa_t                 gpa;
 
-	down_read(&current->mm->mmap_sem);
+	down_read(&vcpu->kvm->slots_lock);
 	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
-	up_read(&current->mm->mmap_sem);
+	up_read(&vcpu->kvm->slots_lock);
 
 	if (gpa == UNMAPPED_GVA) {
 		kvm_inject_page_fault(vcpu, addr, 2);
@@ -1742,7 +1749,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
 		char *kaddr;
 		u64 val;
 
-		down_read(&current->mm->mmap_sem);
+		down_read(&vcpu->kvm->slots_lock);
 		gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
 
 		if (gpa == UNMAPPED_GVA ||
@@ -1753,13 +1760,17 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
 			goto emul_write;
 
 		val = *(u64 *)new;
+
+		down_read(&current->mm->mmap_sem);
 		page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+		up_read(&current->mm->mmap_sem);
+
 		kaddr = kmap_atomic(page, KM_USER0);
 		set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val);
 		kunmap_atomic(kaddr, KM_USER0);
 		kvm_release_page_dirty(page);
 	emul_write:
-		up_read(&current->mm->mmap_sem);
+		up_read(&vcpu->kvm->slots_lock);
 	}
 #endif
 
@@ -2152,10 +2163,10 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 		kvm_x86_ops->skip_emulated_instruction(vcpu);
 
 	for (i = 0; i < nr_pages; ++i) {
-		down_read(&current->mm->mmap_sem);
+		down_read(&vcpu->kvm->slots_lock);
 		page = gva_to_page(vcpu, address + i * PAGE_SIZE);
 		vcpu->arch.pio.guest_pages[i] = page;
-		up_read(&current->mm->mmap_sem);
+		up_read(&vcpu->kvm->slots_lock);
 		if (!page) {
 			kvm_inject_gp(vcpu, 0);
 			free_pio_guest_pages(vcpu);
@@ -2478,8 +2489,9 @@ static void vapic_enter(struct kvm_vcpu *vcpu)
 
 	down_read(&current->mm->mmap_sem);
 	page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
-	vcpu->arch.apic->vapic_page = page;
 	up_read(&current->mm->mmap_sem);
+
+	vcpu->arch.apic->vapic_page = page;
 }
 
 static void vapic_exit(struct kvm_vcpu *vcpu)
@@ -2861,8 +2873,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	kvm_x86_ops->decache_cr4_guest_bits(vcpu);
 
 	mmu_reset_needed |= vcpu->arch.cr0 != sregs->cr0;
-	vcpu->arch.cr0 = sregs->cr0;
 	kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
+	vcpu->arch.cr0 = sregs->cr0;
 
 	mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4;
 	kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
@@ -2952,9 +2964,9 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 	gpa_t gpa;
 
 	vcpu_load(vcpu);
-	down_read(&current->mm->mmap_sem);
+	down_read(&vcpu->kvm->slots_lock);
 	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, vaddr);
-	up_read(&current->mm->mmap_sem);
+	up_read(&vcpu->kvm->slots_lock);
 	tr->physical_address = gpa;
 	tr->valid = gpa != UNMAPPED_GVA;
 	tr->writeable = 1;
@@ -3227,11 +3239,13 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 	 */
 	if (!user_alloc) {
 		if (npages && !old.rmap) {
+			down_write(&current->mm->mmap_sem);
 			memslot->userspace_addr = do_mmap(NULL, 0,
 						     npages * PAGE_SIZE,
 						     PROT_READ | PROT_WRITE,
 						     MAP_SHARED | MAP_ANONYMOUS,
 						     0);
+			up_write(&current->mm->mmap_sem);
 
 			if (IS_ERR((void *)memslot->userspace_addr))
 				return PTR_ERR((void *)memslot->userspace_addr);
@@ -3239,8 +3253,10 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 			if (!old.user_alloc && old.rmap) {
 				int ret;
 
+				down_write(&current->mm->mmap_sem);
 				ret = do_munmap(current->mm, old.userspace_addr,
 						old.npages * PAGE_SIZE);
+				up_write(&current->mm->mmap_sem);
 				if (ret < 0)
 					printk(KERN_WARNING
 				       "kvm_vm_ioctl_set_memory_region: "

+ 2 - 2
include/linux/kvm.h

@@ -221,6 +221,7 @@ struct kvm_vapic_addr {
  * Get size for mmap(vcpu_fd)
  */
 #define KVM_GET_VCPU_MMAP_SIZE    _IO(KVMIO,   0x04) /* in bytes */
+#define KVM_GET_SUPPORTED_CPUID   _IOWR(KVMIO, 0x05, struct kvm_cpuid2)
 
 /*
  * Extension capability list.
@@ -230,8 +231,8 @@ struct kvm_vapic_addr {
 #define KVM_CAP_MMU_SHADOW_CACHE_CONTROL 2
 #define KVM_CAP_USER_MEMORY 3
 #define KVM_CAP_SET_TSS_ADDR 4
-#define KVM_CAP_EXT_CPUID 5
 #define KVM_CAP_VAPIC 6
+#define KVM_CAP_EXT_CPUID 7
 
 /*
  * ioctls for VM fds
@@ -249,7 +250,6 @@ struct kvm_vapic_addr {
 #define KVM_CREATE_VCPU           _IO(KVMIO,  0x41)
 #define KVM_GET_DIRTY_LOG         _IOW(KVMIO, 0x42, struct kvm_dirty_log)
 #define KVM_SET_MEMORY_ALIAS      _IOW(KVMIO, 0x43, struct kvm_memory_alias)
-#define KVM_GET_SUPPORTED_CPUID   _IOWR(KVMIO, 0x48, struct kvm_cpuid2)
 /* Device model IOC */
 #define KVM_CREATE_IRQCHIP	  _IO(KVMIO,  0x60)
 #define KVM_IRQ_LINE		  _IOW(KVMIO, 0x61, struct kvm_irq_level)

+ 1 - 0
include/linux/kvm_host.h

@@ -107,6 +107,7 @@ struct kvm_memory_slot {
 struct kvm {
 	struct mutex lock; /* protects the vcpus array and APIC accesses */
 	spinlock_t mmu_lock;
+	struct rw_semaphore slots_lock;
 	struct mm_struct *mm; /* userspace tied to this vm */
 	int nmemslots;
 	struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS +

+ 8 - 0
virt/kvm/ioapic.c

@@ -211,6 +211,10 @@ static void ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
 	case IOAPIC_LOWEST_PRIORITY:
 		vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector,
 				deliver_bitmask);
+#ifdef CONFIG_X86
+		if (irq == 0)
+			vcpu = ioapic->kvm->vcpus[0];
+#endif
 		if (vcpu != NULL)
 			ioapic_inj_irq(ioapic, vcpu, vector,
 				       trig_mode, delivery_mode);
@@ -220,6 +224,10 @@ static void ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
 				     deliver_bitmask, vector, IOAPIC_LOWEST_PRIORITY);
 		break;
 	case IOAPIC_FIXED:
+#ifdef CONFIG_X86
+		if (irq == 0)
+			deliver_bitmask = 1;
+#endif
 		for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
 			if (!(deliver_bitmask & (1 << vcpu_id)))
 				continue;

+ 3 - 2
virt/kvm/kvm_main.c

@@ -169,6 +169,7 @@ static struct kvm *kvm_create_vm(void)
 	kvm_io_bus_init(&kvm->pio_bus);
 	mutex_init(&kvm->lock);
 	kvm_io_bus_init(&kvm->mmio_bus);
+	init_rwsem(&kvm->slots_lock);
 	spin_lock(&kvm_lock);
 	list_add(&kvm->vm_list, &vm_list);
 	spin_unlock(&kvm_lock);
@@ -339,9 +340,9 @@ int kvm_set_memory_region(struct kvm *kvm,
 {
 	int r;
 
-	down_write(&current->mm->mmap_sem);
+	down_write(&kvm->slots_lock);
 	r = __kvm_set_memory_region(kvm, mem, user_alloc);
-	up_write(&current->mm->mmap_sem);
+	up_write(&kvm->slots_lock);
 	return r;
 }
 EXPORT_SYMBOL_GPL(kvm_set_memory_region);