15 years ago · ed9216c171
--- a/Documentation/kvm/api.txt
+++ b/Documentation/kvm/api.txt
@@ -593,6 +593,115 @@ struct kvm_irqchip {
 
				 	} chip;
			
 
				 };
			
 
				 
			
 
				+4.27 KVM_XEN_HVM_CONFIG
			
 
				+
			
 
				+Capability: KVM_CAP_XEN_HVM
			
 
				+Architectures: x86
			
 
				+Type: vm ioctl
			
 
				+Parameters: struct kvm_xen_hvm_config (in)
			
 
				+Returns: 0 on success, -1 on error
			
 
				+
			
 
				+Sets the MSR that the Xen HVM guest uses to initialize its hypercall
			
 
				+page, and provides the starting address and size of the hypercall
			
 
				+blobs in userspace.  When the guest writes the MSR, kvm copies one
			
 
				+page of a blob (32- or 64-bit, depending on the vcpu mode) to guest
			
 
				+memory.
			
 
				+
			
 
				+struct kvm_xen_hvm_config {
			
 
				+	__u32 flags;
			
 
				+	__u32 msr;
			
 
				+	__u64 blob_addr_32;
			
 
				+	__u64 blob_addr_64;
			
 
				+	__u8 blob_size_32;
			
 
				+	__u8 blob_size_64;
			
 
				+	__u8 pad2[30];
			
 
				+};
			
 
				+
			
 
				+4.27 KVM_GET_CLOCK
			
 
				+
			
 
				+Capability: KVM_CAP_ADJUST_CLOCK
			
 
				+Architectures: x86
			
 
				+Type: vm ioctl
			
 
				+Parameters: struct kvm_clock_data (out)
			
 
				+Returns: 0 on success, -1 on error
			
 
				+
			
 
				+Gets the current timestamp of kvmclock as seen by the current guest. In
			
 
				+conjunction with KVM_SET_CLOCK, it is used to ensure monotonicity on scenarios
			
 
				+such as migration.
			
 
				+
			
 
				+struct kvm_clock_data {
			
 
				+	__u64 clock;  /* kvmclock current value */
			
 
				+	__u32 flags;
			
 
				+	__u32 pad[9];
			
 
				+};
			
 
				+
			
 
				+4.28 KVM_SET_CLOCK
			
 
				+
			
 
				+Capability: KVM_CAP_ADJUST_CLOCK
			
 
				+Architectures: x86
			
 
				+Type: vm ioctl
			
 
				+Parameters: struct kvm_clock_data (in)
			
 
				+Returns: 0 on success, -1 on error
			
 
				+
			
 
				+Sets the current timestamp of kvmclock to the valued specific in its parameter.
			
 
				+In conjunction with KVM_GET_CLOCK, it is used to ensure monotonicity on scenarios
			
 
				+such as migration.
			
 
				+
			
 
				+struct kvm_clock_data {
			
 
				+	__u64 clock;  /* kvmclock current value */
			
 
				+	__u32 flags;
			
 
				+	__u32 pad[9];
			
 
				+};
			
 
				+
			
 
				+4.29 KVM_GET_VCPU_EVENTS
			
 
				+
			
 
				+Capability: KVM_CAP_VCPU_EVENTS
			
 
				+Architectures: x86
			
 
				+Type: vm ioctl
			
 
				+Parameters: struct kvm_vcpu_event (out)
			
 
				+Returns: 0 on success, -1 on error
			
 
				+
			
 
				+Gets currently pending exceptions, interrupts, and NMIs as well as related
			
 
				+states of the vcpu.
			
 
				+
			
 
				+struct kvm_vcpu_events {
			
 
				+	struct {
			
 
				+		__u8 injected;
			
 
				+		__u8 nr;
			
 
				+		__u8 has_error_code;
			
 
				+		__u8 pad;
			
 
				+		__u32 error_code;
			
 
				+	} exception;
			
 
				+	struct {
			
 
				+		__u8 injected;
			
 
				+		__u8 nr;
			
 
				+		__u8 soft;
			
 
				+		__u8 pad;
			
 
				+	} interrupt;
			
 
				+	struct {
			
 
				+		__u8 injected;
			
 
				+		__u8 pending;
			
 
				+		__u8 masked;
			
 
				+		__u8 pad;
			
 
				+	} nmi;
			
 
				+	__u32 sipi_vector;
			
 
				+	__u32 flags;   /* must be zero */
			
 
				+};
			
 
				+
			
 
				+4.30 KVM_SET_VCPU_EVENTS
			
 
				+
			
 
				+Capability: KVM_CAP_VCPU_EVENTS
			
 
				+Architectures: x86
			
 
				+Type: vm ioctl
			
 
				+Parameters: struct kvm_vcpu_event (in)
			
 
				+Returns: 0 on success, -1 on error
			
 
				+
			
 
				+Set pending exceptions, interrupts, and NMIs as well as related states of the
			
 
				+vcpu.
			
 
				+
			
 
				+See KVM_GET_VCPU_EVENTS for the data structure.
			
 
				+
			
 
				+
			
 
				 5. The kvm_run structure
			
 
				 
			
 
				 Application code obtains a pointer to the kvm_run structure by
			
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -83,6 +83,13 @@ config KRETPROBES
 
				 	def_bool y
			
 
				 	depends on KPROBES && HAVE_KRETPROBES
			
 
				 
			
 
				+config USER_RETURN_NOTIFIER
			
 
				+	bool
			
 
				+	depends on HAVE_USER_RETURN_NOTIFIER
			
 
				+	help
			
 
				+	  Provide a kernel-internal notification when a cpu is about to
			
 
				+	  switch to user mode.
			
 
				+
			
 
				 config HAVE_IOREMAP_PROT
			
 
				 	bool
			
 
				 
			
@@ -132,5 +139,7 @@ config HAVE_HW_BREAKPOINT
 
				 	select ANON_INODES
			
 
				 	select PERF_EVENTS
			
 
				 
			
 
				+config HAVE_USER_RETURN_NOTIFIER
			
 
				+	bool
			
 
				 
			
 
				 source "kernel/gcov/Kconfig"
			
--- a/arch/ia64/include/asm/kvm.h
+++ b/arch/ia64/include/asm/kvm.h
@@ -60,6 +60,7 @@ struct kvm_ioapic_state {
 
				 #define KVM_IRQCHIP_PIC_MASTER   0
			
 
				 #define KVM_IRQCHIP_PIC_SLAVE    1
			
 
				 #define KVM_IRQCHIP_IOAPIC       2
			
 
				+#define KVM_NR_IRQCHIPS          3
			
 
				 
			
 
				 #define KVM_CONTEXT_SIZE	8*1024
			
 
				 
			
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -475,7 +475,6 @@ struct kvm_arch {
 
				 	struct list_head assigned_dev_head;
			
 
				 	struct iommu_domain *iommu_domain;
			
 
				 	int iommu_flags;
			
 
				-	struct hlist_head irq_ack_notifier_list;
			
 
				 
			
 
				 	unsigned long irq_sources_bitmap;
			
 
				 	unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
			
--- a/arch/ia64/kvm/Makefile
+++ b/arch/ia64/kvm/Makefile
@@ -49,7 +49,7 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
 
				 EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
			
 
				 
			
 
				 common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
			
 
				-		coalesced_mmio.o irq_comm.o)
			
 
				+		coalesced_mmio.o irq_comm.o assigned-dev.o)
			
 
				 
			
 
				 ifeq ($(CONFIG_IOMMU_API),y)
			
 
				 common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
			
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -124,7 +124,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
 
				 
			
 
				 static  DEFINE_SPINLOCK(vp_lock);
			
 
				 
			
 
				-void kvm_arch_hardware_enable(void *garbage)
			
 
				+int kvm_arch_hardware_enable(void *garbage)
			
 
				 {
			
 
				 	long  status;
			
 
				 	long  tmp_base;
			
@@ -137,7 +137,7 @@ void kvm_arch_hardware_enable(void *garbage)
 
				 	slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
			
 
				 	local_irq_restore(saved_psr);
			
 
				 	if (slot < 0)
			
 
				-		return;
			
 
				+		return -EINVAL;
			
 
				 
			
 
				 	spin_lock(&vp_lock);
			
 
				 	status = ia64_pal_vp_init_env(kvm_vsa_base ?
			
@@ -145,7 +145,7 @@ void kvm_arch_hardware_enable(void *garbage)
 
				 			__pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base);
			
 
				 	if (status != 0) {
			
 
				 		printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n");
			
 
				-		return ;
			
 
				+		return -EINVAL;
			
 
				 	}
			
 
				 
			
 
				 	if (!kvm_vsa_base) {
			
@@ -154,6 +154,8 @@ void kvm_arch_hardware_enable(void *garbage)
 
				 	}
			
 
				 	spin_unlock(&vp_lock);
			
 
				 	ia64_ptr_entry(0x3, slot);
			
 
				+
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 void kvm_arch_hardware_disable(void *garbage)
			
@@ -851,8 +853,7 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm,
 
				 	r = 0;
			
 
				 	switch (chip->chip_id) {
			
 
				 	case KVM_IRQCHIP_IOAPIC:
			
 
				-		memcpy(&chip->chip.ioapic, ioapic_irqchip(kvm),
			
 
				-				sizeof(struct kvm_ioapic_state));
			
 
				+		r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
			
 
				 		break;
			
 
				 	default:
			
 
				 		r = -EINVAL;
			
@@ -868,9 +869,7 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
 
				 	r = 0;
			
 
				 	switch (chip->chip_id) {
			
 
				 	case KVM_IRQCHIP_IOAPIC:
			
 
				-		memcpy(ioapic_irqchip(kvm),
			
 
				-				&chip->chip.ioapic,
			
 
				-				sizeof(struct kvm_ioapic_state));
			
 
				+		r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
			
 
				 		break;
			
 
				 	default:
			
 
				 		r = -EINVAL;
			
@@ -944,7 +943,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
				 {
			
 
				 	struct kvm *kvm = filp->private_data;
			
 
				 	void __user *argp = (void __user *)arg;
			
 
				-	int r = -EINVAL;
			
 
				+	int r = -ENOTTY;
			
 
				 
			
 
				 	switch (ioctl) {
			
 
				 	case KVM_SET_MEMORY_REGION: {
			
@@ -985,10 +984,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
				 			goto out;
			
 
				 		if (irqchip_in_kernel(kvm)) {
			
 
				 			__s32 status;
			
 
				-			mutex_lock(&kvm->irq_lock);
			
 
				 			status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
			
 
				 				    irq_event.irq, irq_event.level);
			
 
				-			mutex_unlock(&kvm->irq_lock);
			
 
				 			if (ioctl == KVM_IRQ_LINE_STATUS) {
			
 
				 				irq_event.status = status;
			
 
				 				if (copy_to_user(argp, &irq_event,
			
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -78,8 +78,9 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
				 	return r;
			
 
				 }
			
 
				 
			
 
				-void kvm_arch_hardware_enable(void *garbage)
			
 
				+int kvm_arch_hardware_enable(void *garbage)
			
 
				 {
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 void kvm_arch_hardware_disable(void *garbage)
			
@@ -421,7 +422,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
				 
			
 
				 	switch (ioctl) {
			
 
				 	default:
			
 
				-		r = -EINVAL;
			
 
				+		r = -ENOTTY;
			
 
				 	}
			
 
				 
			
 
				 	return r;
			
--- a/arch/powerpc/kvm/timing.h
+++ b/arch/powerpc/kvm/timing.h
@@ -51,7 +51,7 @@ static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type)
 
				 
			
 
				 	/* The BUILD_BUG_ON below breaks in funny ways, commented out
			
 
				 	 * for now ... -BenH
			
 
				-	BUILD_BUG_ON(__builtin_constant_p(type));
			
 
				+	BUILD_BUG_ON(!__builtin_constant_p(type));
			
 
				 	*/
			
 
				 	switch (type) {
			
 
				 	case EXT_INTR_EXITS:
			
--- a/arch/s390/include/asm/kvm.h
+++ b/arch/s390/include/asm/kvm.h
@@ -1,6 +1,5 @@
 
				 #ifndef __LINUX_KVM_S390_H
			
 
				 #define __LINUX_KVM_S390_H
			
 
				-
			
 
				 /*
			
 
				  * asm-s390/kvm.h - KVM s390 specific structures and definitions
			
 
				  *
			
@@ -15,6 +14,8 @@
 
				  */
			
 
				 #include <linux/types.h>
			
 
				 
			
 
				+#define __KVM_S390
			
 
				+
			
 
				 /* for KVM_GET_REGS and KVM_SET_REGS */
			
 
				 struct kvm_regs {
			
 
				 	/* general purpose regs for s390 */
			
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -74,9 +74,10 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
				 static unsigned long long *facilities;
			
 
				 
			
 
				 /* Section: not file related */
			
 
				-void kvm_arch_hardware_enable(void *garbage)
			
 
				+int kvm_arch_hardware_enable(void *garbage)
			
 
				 {
			
 
				 	/* every s390 is virtualization enabled ;-) */
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 void kvm_arch_hardware_disable(void *garbage)
			
@@ -116,10 +117,16 @@ long kvm_arch_dev_ioctl(struct file *filp,
 
				 
			
 
				 int kvm_dev_ioctl_check_extension(long ext)
			
 
				 {
			
 
				+	int r;
			
 
				+
			
 
				 	switch (ext) {
			
 
				+	case KVM_CAP_S390_PSW:
			
 
				+		r = 1;
			
 
				+		break;
			
 
				 	default:
			
 
				-		return 0;
			
 
				+		r = 0;
			
 
				 	}
			
 
				+	return r;
			
 
				 }
			
 
				 
			
 
				 /* Section: vm related */
			
@@ -150,7 +157,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
				 		break;
			
 
				 	}
			
 
				 	default:
			
 
				-		r = -EINVAL;
			
 
				+		r = -ENOTTY;
			
 
				 	}
			
 
				 
			
 
				 	return r;
			
@@ -419,8 +426,10 @@ static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
 
				 	vcpu_load(vcpu);
			
 
				 	if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING)
			
 
				 		rc = -EBUSY;
			
 
				-	else
			
 
				-		vcpu->arch.sie_block->gpsw = psw;
			
 
				+	else {
			
 
				+		vcpu->run->psw_mask = psw.mask;
			
 
				+		vcpu->run->psw_addr = psw.addr;
			
 
				+	}
			
 
				 	vcpu_put(vcpu);
			
 
				 	return rc;
			
 
				 }
			
@@ -508,9 +517,6 @@ rerun_vcpu:
 
				 
			
 
				 	switch (kvm_run->exit_reason) {
			
 
				 	case KVM_EXIT_S390_SIEIC:
			
 
				-		vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask;
			
 
				-		vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr;
			
 
				-		break;
			
 
				 	case KVM_EXIT_UNKNOWN:
			
 
				 	case KVM_EXIT_INTR:
			
 
				 	case KVM_EXIT_S390_RESET:
			
@@ -519,6 +525,9 @@ rerun_vcpu:
 
				 		BUG();
			
 
				 	}
			
 
				 
			
 
				+	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
			
 
				+	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
			
 
				+
			
 
				 	might_fault();
			
 
				 
			
 
				 	do {
			
@@ -538,8 +547,6 @@ rerun_vcpu:
 
				 		/* intercept cannot be handled in-kernel, prepare kvm-run */
			
 
				 		kvm_run->exit_reason         = KVM_EXIT_S390_SIEIC;
			
 
				 		kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
			
 
				-		kvm_run->s390_sieic.mask     = vcpu->arch.sie_block->gpsw.mask;
			
 
				-		kvm_run->s390_sieic.addr     = vcpu->arch.sie_block->gpsw.addr;
			
 
				 		kvm_run->s390_sieic.ipa      = vcpu->arch.sie_block->ipa;
			
 
				 		kvm_run->s390_sieic.ipb      = vcpu->arch.sie_block->ipb;
			
 
				 		rc = 0;
			
@@ -551,6 +558,9 @@ rerun_vcpu:
 
				 		rc = 0;
			
 
				 	}
			
 
				 
			
 
				+	kvm_run->psw_mask     = vcpu->arch.sie_block->gpsw.mask;
			
 
				+	kvm_run->psw_addr     = vcpu->arch.sie_block->gpsw.addr;
			
 
				+
			
 
				 	if (vcpu->sigset_active)
			
 
				 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
			
 
				 
			
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -188,9 +188,9 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
 
				 
			
 
				 	/* make sure that the new value is valid memory */
			
 
				 	address = address & 0x7fffe000u;
			
 
				-	if ((copy_from_guest(vcpu, &tmp,
			
 
				-		(u64) (address + vcpu->arch.sie_block->gmsor) , 1)) ||
			
 
				-	   (copy_from_guest(vcpu, &tmp, (u64) (address +
			
 
				+	if ((copy_from_user(&tmp, (void __user *)
			
 
				+		(address + vcpu->arch.sie_block->gmsor) , 1)) ||
			
 
				+	   (copy_from_user(&tmp, (void __user *)(address +
			
 
				 			vcpu->arch.sie_block->gmsor + PAGE_SIZE), 1))) {
			
 
				 		*reg |= SIGP_STAT_INVALID_PARAMETER;
			
 
				 		return 1; /* invalid parameter */
			
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -51,6 +51,7 @@ config X86
 
				 	select HAVE_KERNEL_LZMA
			
 
				 	select HAVE_HW_BREAKPOINT
			
 
				 	select HAVE_ARCH_KMEMCHECK
			
 
				+	select HAVE_USER_RETURN_NOTIFIER
			
 
				 
			
 
				 config OUTPUT_FORMAT
			
 
				 	string
			
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -19,6 +19,8 @@
 
				 #define __KVM_HAVE_MSIX
			
 
				 #define __KVM_HAVE_MCE
			
 
				 #define __KVM_HAVE_PIT_STATE2
			
 
				+#define __KVM_HAVE_XEN_HVM
			
 
				+#define __KVM_HAVE_VCPU_EVENTS
			
 
				 
			
 
				 /* Architectural interrupt line count. */
			
 
				 #define KVM_NR_INTERRUPTS 256
			
@@ -79,6 +81,7 @@ struct kvm_ioapic_state {
 
				 #define KVM_IRQCHIP_PIC_MASTER   0
			
 
				 #define KVM_IRQCHIP_PIC_SLAVE    1
			
 
				 #define KVM_IRQCHIP_IOAPIC       2
			
 
				+#define KVM_NR_IRQCHIPS          3
			
 
				 
			
 
				 /* for KVM_GET_REGS and KVM_SET_REGS */
			
 
				 struct kvm_regs {
			
@@ -250,4 +253,31 @@ struct kvm_reinject_control {
 
				 	__u8 pit_reinject;
			
 
				 	__u8 reserved[31];
			
 
				 };
			
 
				+
			
 
				+/* for KVM_GET/SET_VCPU_EVENTS */
			
 
				+struct kvm_vcpu_events {
			
 
				+	struct {
			
 
				+		__u8 injected;
			
 
				+		__u8 nr;
			
 
				+		__u8 has_error_code;
			
 
				+		__u8 pad;
			
 
				+		__u32 error_code;
			
 
				+	} exception;
			
 
				+	struct {
			
 
				+		__u8 injected;
			
 
				+		__u8 nr;
			
 
				+		__u8 soft;
			
 
				+		__u8 pad;
			
 
				+	} interrupt;
			
 
				+	struct {
			
 
				+		__u8 injected;
			
 
				+		__u8 pending;
			
 
				+		__u8 masked;
			
 
				+		__u8 pad;
			
 
				+	} nmi;
			
 
				+	__u32 sipi_vector;
			
 
				+	__u32 flags;
			
 
				+	__u32 reserved[10];
			
 
				+};
			
 
				+
			
 
				 #endif /* _ASM_X86_KVM_H */
			
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -129,7 +129,7 @@ struct decode_cache {
 
				 	u8 seg_override;
			
 
				 	unsigned int d;
			
 
				 	unsigned long regs[NR_VCPU_REGS];
			
 
				-	unsigned long eip;
			
 
				+	unsigned long eip, eip_orig;
			
 
				 	/* modrm */
			
 
				 	u8 modrm;
			
 
				 	u8 modrm_mod;
			
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -354,7 +354,6 @@ struct kvm_vcpu_arch {
 
				 	unsigned int time_offset;
			
 
				 	struct page *time_page;
			
 
				 
			
 
				-	bool singlestep; /* guest is single stepped by KVM */
			
 
				 	bool nmi_pending;
			
 
				 	bool nmi_injected;
			
 
				 
			
@@ -371,6 +370,10 @@ struct kvm_vcpu_arch {
 
				 	u64 mcg_status;
			
 
				 	u64 mcg_ctl;
			
 
				 	u64 *mce_banks;
			
 
				+
			
 
				+	/* used for guest single stepping over the given code position */
			
 
				+	u16 singlestep_cs;
			
 
				+	unsigned long singlestep_rip;
			
 
				 };
			
 
				 
			
 
				 struct kvm_mem_alias {
			
@@ -397,7 +400,6 @@ struct kvm_arch{
 
				 	struct kvm_pic *vpic;
			
 
				 	struct kvm_ioapic *vioapic;
			
 
				 	struct kvm_pit *vpit;
			
 
				-	struct hlist_head irq_ack_notifier_list;
			
 
				 	int vapics_in_nmi_mode;
			
 
				 
			
 
				 	unsigned int tss_addr;
			
@@ -410,8 +412,10 @@ struct kvm_arch{
 
				 	gpa_t ept_identity_map_addr;
			
 
				 
			
 
				 	unsigned long irq_sources_bitmap;
			
 
				-	unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
			
 
				 	u64 vm_init_tsc;
			
 
				+	s64 kvmclock_offset;
			
 
				+
			
 
				+	struct kvm_xen_hvm_config xen_hvm_config;
			
 
				 };
			
 
				 
			
 
				 struct kvm_vm_stat {
			
@@ -461,7 +465,7 @@ struct descriptor_table {
 
				 struct kvm_x86_ops {
			
 
				 	int (*cpu_has_kvm_support)(void);          /* __init */
			
 
				 	int (*disabled_by_bios)(void);             /* __init */
			
 
				-	void (*hardware_enable)(void *dummy);      /* __init */
			
 
				+	int (*hardware_enable)(void *dummy);
			
 
				 	void (*hardware_disable)(void *dummy);
			
 
				 	void (*check_processor_compatibility)(void *rtn);
			
 
				 	int (*hardware_setup)(void);               /* __init */
			
@@ -477,8 +481,8 @@ struct kvm_x86_ops {
 
				 	void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
			
 
				 	void (*vcpu_put)(struct kvm_vcpu *vcpu);
			
 
				 
			
 
				-	int (*set_guest_debug)(struct kvm_vcpu *vcpu,
			
 
				-			       struct kvm_guest_debug *dbg);
			
 
				+	void (*set_guest_debug)(struct kvm_vcpu *vcpu,
			
 
				+				struct kvm_guest_debug *dbg);
			
 
				 	int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
			
 
				 	int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
			
 
				 	u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
			
@@ -506,8 +510,8 @@ struct kvm_x86_ops {
 
				 
			
 
				 	void (*tlb_flush)(struct kvm_vcpu *vcpu);
			
 
				 
			
 
				-	void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
			
 
				-	int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu);
			
 
				+	void (*run)(struct kvm_vcpu *vcpu);
			
 
				+	int (*handle_exit)(struct kvm_vcpu *vcpu);
			
 
				 	void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
			
 
				 	void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
			
 
				 	u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
			
@@ -519,6 +523,8 @@ struct kvm_x86_ops {
 
				 				bool has_error_code, u32 error_code);
			
 
				 	int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
			
 
				 	int (*nmi_allowed)(struct kvm_vcpu *vcpu);
			
 
				+	bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
			
 
				+	void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
			
 
				 	void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
			
 
				 	void (*enable_irq_window)(struct kvm_vcpu *vcpu);
			
 
				 	void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
			
@@ -568,7 +574,7 @@ enum emulation_result {
 
				 #define EMULTYPE_NO_DECODE	    (1 << 0)
			
 
				 #define EMULTYPE_TRAP_UD	    (1 << 1)
			
 
				 #define EMULTYPE_SKIP		    (1 << 2)
			
 
				-int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run,
			
 
				+int emulate_instruction(struct kvm_vcpu *vcpu,
			
 
				 			unsigned long cr2, u16 error_code, int emulation_type);
			
 
				 void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context);
			
 
				 void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
			
@@ -585,9 +591,9 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
 
				 
			
 
				 struct x86_emulate_ctxt;
			
 
				 
			
 
				-int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
			
 
				+int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in,
			
 
				 		     int size, unsigned port);
			
 
				-int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
			
 
				+int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in,
			
 
				 			   int size, unsigned long count, int down,
			
 
				 			    gva_t address, int rep, unsigned port);
			
 
				 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
			
@@ -616,6 +622,9 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l);
 
				 int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
			
 
				 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data);
			
 
				 
			
 
				+unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu);
			
 
				+void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
			
 
				+
			
 
				 void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
			
 
				 void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
			
 
				 void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2,
			
@@ -802,4 +811,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
 
				 int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
			
 
				 int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
			
 
				 
			
 
				+void kvm_define_shared_msr(unsigned index, u32 msr);
			
 
				+void kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
			
 
				+
			
 
				 #endif /* _ASM_X86_KVM_HOST_H */
			
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -57,7 +57,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
 
				 	u16 intercept_dr_write;
			
 
				 	u32 intercept_exceptions;
			
 
				 	u64 intercept;
			
 
				-	u8 reserved_1[44];
			
 
				+	u8 reserved_1[42];
			
 
				+	u16 pause_filter_count;
			
 
				 	u64 iopm_base_pa;
			
 
				 	u64 msrpm_base_pa;
			
 
				 	u64 tsc_offset;
			
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -83,6 +83,7 @@ struct thread_info {
 
				 #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
			
 
				 #define TIF_SECCOMP		8	/* secure computing */
			
 
				 #define TIF_MCE_NOTIFY		10	/* notify userspace of an MCE */
			
 
				+#define TIF_USER_RETURN_NOTIFY	11	/* notify kernel of userspace return */
			
 
				 #define TIF_NOTSC		16	/* TSC is not accessible in userland */
			
 
				 #define TIF_IA32		17	/* 32bit process */
			
 
				 #define TIF_FORK		18	/* ret_from_fork */
			
@@ -107,6 +108,7 @@ struct thread_info {
 
				 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
			
 
				 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
			
 
				 #define _TIF_MCE_NOTIFY		(1 << TIF_MCE_NOTIFY)
			
 
				+#define _TIF_USER_RETURN_NOTIFY	(1 << TIF_USER_RETURN_NOTIFY)
			
 
				 #define _TIF_NOTSC		(1 << TIF_NOTSC)
			
 
				 #define _TIF_IA32		(1 << TIF_IA32)
			
 
				 #define _TIF_FORK		(1 << TIF_FORK)
			
@@ -142,13 +144,14 @@ struct thread_info {
 
				 
			
 
				 /* Only used for 64 bit */
			
 
				 #define _TIF_DO_NOTIFY_MASK						\
			
 
				-	(_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME)
			
 
				+	(_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME |	\
			
 
				+	 _TIF_USER_RETURN_NOTIFY)
			
 
				 
			
 
				 /* flags to check in __switch_to() */
			
 
				 #define _TIF_WORK_CTXSW							\
			
 
				 	(_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC)
			
 
				 
			
 
				-#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
			
 
				+#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
			
 
				 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
			
 
				 
			
 
				 #define PREEMPT_ACTIVE		0x10000000
			
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -56,6 +56,7 @@
 
				 #define SECONDARY_EXEC_ENABLE_VPID              0x00000020
			
 
				 #define SECONDARY_EXEC_WBINVD_EXITING		0x00000040
			
 
				 #define SECONDARY_EXEC_UNRESTRICTED_GUEST	0x00000080
			
 
				+#define SECONDARY_EXEC_PAUSE_LOOP_EXITING	0x00000400
			
 
				 
			
 
				 
			
 
				 #define PIN_BASED_EXT_INTR_MASK                 0x00000001
			
@@ -144,6 +145,8 @@ enum vmcs_field {
 
				 	VM_ENTRY_INSTRUCTION_LEN        = 0x0000401a,
			
 
				 	TPR_THRESHOLD                   = 0x0000401c,
			
 
				 	SECONDARY_VM_EXEC_CONTROL       = 0x0000401e,
			
 
				+	PLE_GAP                         = 0x00004020,
			
 
				+	PLE_WINDOW                      = 0x00004022,
			
 
				 	VM_INSTRUCTION_ERROR            = 0x00004400,
			
 
				 	VM_EXIT_REASON                  = 0x00004402,
			
 
				 	VM_EXIT_INTR_INFO               = 0x00004404,
			
@@ -248,6 +251,7 @@ enum vmcs_field {
 
				 #define EXIT_REASON_MSR_READ            31
			
 
				 #define EXIT_REASON_MSR_WRITE           32
			
 
				 #define EXIT_REASON_MWAIT_INSTRUCTION   36
			
 
				+#define EXIT_REASON_PAUSE_INSTRUCTION   40
			
 
				 #define EXIT_REASON_MCE_DURING_VMENTRY	 41
			
 
				 #define EXIT_REASON_TPR_BELOW_THRESHOLD 43
			
 
				 #define EXIT_REASON_APIC_ACCESS         44
			
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -9,6 +9,7 @@
 
				 #include <linux/pm.h>
			
 
				 #include <linux/clockchips.h>
			
 
				 #include <linux/random.h>
			
 
				+#include <linux/user-return-notifier.h>
			
 
				 #include <trace/events/power.h>
			
 
				 #include <linux/hw_breakpoint.h>
			
 
				 #include <asm/system.h>
			
@@ -209,6 +210,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 
				 		 */
			
 
				 		memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
			
 
				 	}
			
 
				+	propagate_user_return_notify(prev_p, next_p);
			
 
				 }
			
 
				 
			
 
				 int sys_fork(struct pt_regs *regs)
			
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -19,6 +19,7 @@
 
				 #include <linux/stddef.h>
			
 
				 #include <linux/personality.h>
			
 
				 #include <linux/uaccess.h>
			
 
				+#include <linux/user-return-notifier.h>
			
 
				 
			
 
				 #include <asm/processor.h>
			
 
				 #include <asm/ucontext.h>
			
@@ -863,6 +864,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 
				 		if (current->replacement_session_keyring)
			
 
				 			key_replace_session_keyring();
			
 
				 	}
			
 
				+	if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
			
 
				+		fire_user_return_notifiers();
			
 
				 
			
 
				 #ifdef CONFIG_X86_32
			
 
				 	clear_thread_flag(TIF_IRET);
			
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -28,6 +28,7 @@ config KVM
 
				 	select HAVE_KVM_IRQCHIP
			
 
				 	select HAVE_KVM_EVENTFD
			
 
				 	select KVM_APIC_ARCHITECTURE
			
 
				+	select USER_RETURN_NOTIFIER
			
 
				 	---help---
			
 
				 	  Support hosting fully virtualized guest machines using hardware
			
 
				 	  virtualization extensions.  You will need a fairly recent
			
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -6,7 +6,8 @@ CFLAGS_svm.o := -I.
 
				 CFLAGS_vmx.o := -I.
			
 
				 
			
 
				 kvm-y			+= $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
			
 
				-				coalesced_mmio.o irq_comm.o eventfd.o)
			
 
				+				coalesced_mmio.o irq_comm.o eventfd.o \
			
 
				+				assigned-dev.o)
			
 
				 kvm-$(CONFIG_IOMMU_API)	+= $(addprefix ../../../virt/kvm/, iommu.o)
			
 
				 
			
 
				 kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
			
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -75,6 +75,8 @@
 
				 #define Group       (1<<14)     /* Bits 3:5 of modrm byte extend opcode */
			
 
				 #define GroupDual   (1<<15)     /* Alternate decoding of mod == 3 */
			
 
				 #define GroupMask   0xff        /* Group number stored in bits 0:7 */
			
 
				+/* Misc flags */
			
 
				+#define No64	    (1<<28)
			
 
				 /* Source 2 operand type */
			
 
				 #define Src2None    (0<<29)
			
 
				 #define Src2CL      (1<<29)
			
@@ -92,19 +94,23 @@ static u32 opcode_table[256] = {
 
				 	/* 0x00 - 0x07 */
			
 
				 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
			
 
				 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
			
 
				-	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
			
 
				+	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
			
 
				+	ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
			
 
				 	/* 0x08 - 0x0F */
			
 
				 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
			
 
				 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
			
 
				-	0, 0, 0, 0,
			
 
				+	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
			
 
				+	ImplicitOps | Stack | No64, 0,
			
 
				 	/* 0x10 - 0x17 */
			
 
				 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
			
 
				 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
			
 
				-	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
			
 
				+	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
			
 
				+	ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
			
 
				 	/* 0x18 - 0x1F */
			
 
				 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
			
 
				 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
			
 
				-	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
			
 
				+	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
			
 
				+	ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
			
 
				 	/* 0x20 - 0x27 */
			
 
				 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
			
 
				 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
			
@@ -133,7 +139,8 @@ static u32 opcode_table[256] = {
 
				 	DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
			
 
				 	DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
			
 
				 	/* 0x60 - 0x67 */
			
 
				-	0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
			
 
				+	ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
			
 
				+	0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
			
 
				 	0, 0, 0, 0,
			
 
				 	/* 0x68 - 0x6F */
			
 
				 	SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0,
			
@@ -158,7 +165,7 @@ static u32 opcode_table[256] = {
 
				 	/* 0x90 - 0x97 */
			
 
				 	DstReg, DstReg, DstReg, DstReg,	DstReg, DstReg, DstReg, DstReg,
			
 
				 	/* 0x98 - 0x9F */
			
 
				-	0, 0, SrcImm | Src2Imm16, 0,
			
 
				+	0, 0, SrcImm | Src2Imm16 | No64, 0,
			
 
				 	ImplicitOps | Stack, ImplicitOps | Stack, 0, 0,
			
 
				 	/* 0xA0 - 0xA7 */
			
 
				 	ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs,
			
@@ -185,7 +192,7 @@ static u32 opcode_table[256] = {
 
				 	ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
			
 
				 	/* 0xC8 - 0xCF */
			
 
				 	0, 0, 0, ImplicitOps | Stack,
			
 
				-	ImplicitOps, SrcImmByte, ImplicitOps, ImplicitOps,
			
 
				+	ImplicitOps, SrcImmByte, ImplicitOps | No64, ImplicitOps,
			
 
				 	/* 0xD0 - 0xD7 */
			
 
				 	ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
			
 
				 	ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
			
@@ -198,7 +205,7 @@ static u32 opcode_table[256] = {
 
				 	ByteOp | SrcImmUByte, SrcImmUByte,
			
 
				 	/* 0xE8 - 0xEF */
			
 
				 	SrcImm | Stack, SrcImm | ImplicitOps,
			
 
				-	SrcImmU | Src2Imm16, SrcImmByte | ImplicitOps,
			
 
				+	SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps,
			
 
				 	SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
			
 
				 	SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
			
 
				 	/* 0xF0 - 0xF7 */
			
@@ -244,11 +251,13 @@ static u32 twobyte_table[256] = {
 
				 	/* 0x90 - 0x9F */
			
 
				 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
			
 
				 	/* 0xA0 - 0xA7 */
			
 
				-	0, 0, 0, DstMem | SrcReg | ModRM | BitOp,
			
 
				+	ImplicitOps | Stack, ImplicitOps | Stack,
			
 
				+	0, DstMem | SrcReg | ModRM | BitOp,
			
 
				 	DstMem | SrcReg | Src2ImmByte | ModRM,
			
 
				 	DstMem | SrcReg | Src2CL | ModRM, 0, 0,
			
 
				 	/* 0xA8 - 0xAF */
			
 
				-	0, 0, 0, DstMem | SrcReg | ModRM | BitOp,
			
 
				+	ImplicitOps | Stack, ImplicitOps | Stack,
			
 
				+	0, DstMem | SrcReg | ModRM | BitOp,
			
 
				 	DstMem | SrcReg | Src2ImmByte | ModRM,
			
 
				 	DstMem | SrcReg | Src2CL | ModRM,
			
 
				 	ModRM, 0,
			
@@ -613,6 +622,9 @@ static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
 
				 {
			
 
				 	int rc = 0;
			
 
				 
			
 
				+	/* x86 instructions are limited to 15 bytes. */
			
 
				+	if (eip + size - ctxt->decode.eip_orig > 15)
			
 
				+		return X86EMUL_UNHANDLEABLE;
			
 
				 	eip += ctxt->cs_base;
			
 
				 	while (size--) {
			
 
				 		rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++);
			
@@ -871,7 +883,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 
				 	/* Shadow copy of register state. Committed on successful emulation. */
			
 
				 
			
 
				 	memset(c, 0, sizeof(struct decode_cache));
			
 
				-	c->eip = kvm_rip_read(ctxt->vcpu);
			
 
				+	c->eip = c->eip_orig = kvm_rip_read(ctxt->vcpu);
			
 
				 	ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS);
			
 
				 	memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
			
 
				 
			
@@ -962,6 +974,11 @@ done_prefixes:
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	if (mode == X86EMUL_MODE_PROT64 && (c->d & No64)) {
			
 
				+		kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction");;
			
 
				+		return -1;
			
 
				+	}
			
 
				+
			
 
				 	if (c->d & Group) {
			
 
				 		group = c->d & GroupMask;
			
 
				 		c->modrm = insn_fetch(u8, 1, c->eip);
			
@@ -1186,6 +1203,69 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt,
 
				 	return rc;
			
 
				 }
			
 
				 
			
 
				+static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg)
			
 
				+{
			
 
				+	struct decode_cache *c = &ctxt->decode;
			
 
				+	struct kvm_segment segment;
			
 
				+
			
 
				+	kvm_x86_ops->get_segment(ctxt->vcpu, &segment, seg);
			
 
				+
			
 
				+	c->src.val = segment.selector;
			
 
				+	emulate_push(ctxt);
			
 
				+}
			
 
				+
			
 
				+static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,
			
 
				+			     struct x86_emulate_ops *ops, int seg)
			
 
				+{
			
 
				+	struct decode_cache *c = &ctxt->decode;
			
 
				+	unsigned long selector;
			
 
				+	int rc;
			
 
				+
			
 
				+	rc = emulate_pop(ctxt, ops, &selector, c->op_bytes);
			
 
				+	if (rc != 0)
			
 
				+		return rc;
			
 
				+
			
 
				+	rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, 1, seg);
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+static void emulate_pusha(struct x86_emulate_ctxt *ctxt)
			
 
				+{
			
 
				+	struct decode_cache *c = &ctxt->decode;
			
 
				+	unsigned long old_esp = c->regs[VCPU_REGS_RSP];
			
 
				+	int reg = VCPU_REGS_RAX;
			
 
				+
			
 
				+	while (reg <= VCPU_REGS_RDI) {
			
 
				+		(reg == VCPU_REGS_RSP) ?
			
 
				+		(c->src.val = old_esp) : (c->src.val = c->regs[reg]);
			
 
				+
			
 
				+		emulate_push(ctxt);
			
 
				+		++reg;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static int emulate_popa(struct x86_emulate_ctxt *ctxt,
			
 
				+			struct x86_emulate_ops *ops)
			
 
				+{
			
 
				+	struct decode_cache *c = &ctxt->decode;
			
 
				+	int rc = 0;
			
 
				+	int reg = VCPU_REGS_RDI;
			
 
				+
			
 
				+	while (reg >= VCPU_REGS_RAX) {
			
 
				+		if (reg == VCPU_REGS_RSP) {
			
 
				+			register_address_increment(c, &c->regs[VCPU_REGS_RSP],
			
 
				+							c->op_bytes);
			
 
				+			--reg;
			
 
				+		}
			
 
				+
			
 
				+		rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes);
			
 
				+		if (rc != 0)
			
 
				+			break;
			
 
				+		--reg;
			
 
				+	}
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				 static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
			
 
				 				struct x86_emulate_ops *ops)
			
 
				 {
			
@@ -1707,18 +1787,45 @@ special_insn:
 
				 	      add:		/* add */
			
 
				 		emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags);
			
 
				 		break;
			
 
				+	case 0x06:		/* push es */
			
 
				+		emulate_push_sreg(ctxt, VCPU_SREG_ES);
			
 
				+		break;
			
 
				+	case 0x07:		/* pop es */
			
 
				+		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES);
			
 
				+		if (rc != 0)
			
 
				+			goto done;
			
 
				+		break;
			
 
				 	case 0x08 ... 0x0d:
			
 
				 	      or:		/* or */
			
 
				 		emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags);
			
 
				 		break;
			
 
				+	case 0x0e:		/* push cs */
			
 
				+		emulate_push_sreg(ctxt, VCPU_SREG_CS);
			
 
				+		break;
			
 
				 	case 0x10 ... 0x15:
			
 
				 	      adc:		/* adc */
			
 
				 		emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags);
			
 
				 		break;
			
 
				+	case 0x16:		/* push ss */
			
 
				+		emulate_push_sreg(ctxt, VCPU_SREG_SS);
			
 
				+		break;
			
 
				+	case 0x17:		/* pop ss */
			
 
				+		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS);
			
 
				+		if (rc != 0)
			
 
				+			goto done;
			
 
				+		break;
			
 
				 	case 0x18 ... 0x1d:
			
 
				 	      sbb:		/* sbb */
			
 
				 		emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags);
			
 
				 		break;
			
 
				+	case 0x1e:		/* push ds */
			
 
				+		emulate_push_sreg(ctxt, VCPU_SREG_DS);
			
 
				+		break;
			
 
				+	case 0x1f:		/* pop ds */
			
 
				+		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS);
			
 
				+		if (rc != 0)
			
 
				+			goto done;
			
 
				+		break;
			
 
				 	case 0x20 ... 0x25:
			
 
				 	      and:		/* and */
			
 
				 		emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags);
			
@@ -1750,6 +1857,14 @@ special_insn:
 
				 		if (rc != 0)
			
 
				 			goto done;
			
 
				 		break;
			
 
				+	case 0x60:	/* pusha */
			
 
				+		emulate_pusha(ctxt);
			
 
				+		break;
			
 
				+	case 0x61:	/* popa */
			
 
				+		rc = emulate_popa(ctxt, ops);
			
 
				+		if (rc != 0)
			
 
				+			goto done;
			
 
				+		break;
			
 
				 	case 0x63:		/* movsxd */
			
 
				 		if (ctxt->mode != X86EMUL_MODE_PROT64)
			
 
				 			goto cannot_emulate;
			
@@ -1761,7 +1876,7 @@ special_insn:
 
				 		break;
			
 
				 	case 0x6c:		/* insb */
			
 
				 	case 0x6d:		/* insw/insd */
			
 
				-		 if (kvm_emulate_pio_string(ctxt->vcpu, NULL,
			
 
				+		 if (kvm_emulate_pio_string(ctxt->vcpu,
			
 
				 				1,
			
 
				 				(c->d & ByteOp) ? 1 : c->op_bytes,
			
 
				 				c->rep_prefix ?
			
@@ -1777,7 +1892,7 @@ special_insn:
 
				 		return 0;
			
 
				 	case 0x6e:		/* outsb */
			
 
				 	case 0x6f:		/* outsw/outsd */
			
 
				-		if (kvm_emulate_pio_string(ctxt->vcpu, NULL,
			
 
				+		if (kvm_emulate_pio_string(ctxt->vcpu,
			
 
				 				0,
			
 
				 				(c->d & ByteOp) ? 1 : c->op_bytes,
			
 
				 				c->rep_prefix ?
			
@@ -2070,7 +2185,7 @@ special_insn:
 
				 	case 0xef: /* out (e/r)ax,dx */
			
 
				 		port = c->regs[VCPU_REGS_RDX];
			
 
				 		io_dir_in = 0;
			
 
				-	do_io:	if (kvm_emulate_pio(ctxt->vcpu, NULL, io_dir_in,
			
 
				+	do_io:	if (kvm_emulate_pio(ctxt->vcpu, io_dir_in,
			
 
				 				   (c->d & ByteOp) ? 1 : c->op_bytes,
			
 
				 				   port) != 0) {
			
 
				 			c->eip = saved_eip;
			
@@ -2297,6 +2412,14 @@ twobyte_insn:
 
				 			jmp_rel(c, c->src.val);
			
 
				 		c->dst.type = OP_NONE;
			
 
				 		break;
			
 
				+	case 0xa0:	  /* push fs */
			
 
				+		emulate_push_sreg(ctxt, VCPU_SREG_FS);
			
 
				+		break;
			
 
				+	case 0xa1:	 /* pop fs */
			
 
				+		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS);
			
 
				+		if (rc != 0)
			
 
				+			goto done;
			
 
				+		break;
			
 
				 	case 0xa3:
			
 
				 	      bt:		/* bt */
			
 
				 		c->dst.type = OP_NONE;
			
@@ -2308,6 +2431,14 @@ twobyte_insn:
 
				 	case 0xa5: /* shld cl, r, r/m */
			
 
				 		emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags);
			
 
				 		break;
			
 
				+	case 0xa8:	/* push gs */
			
 
				+		emulate_push_sreg(ctxt, VCPU_SREG_GS);
			
 
				+		break;
			
 
				+	case 0xa9:	/* pop gs */
			
 
				+		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS);
			
 
				+		if (rc != 0)
			
 
				+			goto done;
			
 
				+		break;
			
 
				 	case 0xab:
			
 
				 	      bts:		/* bts */
			
 
				 		/* only subword offset */
			
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -688,10 +688,8 @@ static void __inject_pit_timer_intr(struct kvm *kvm)
 
				 	struct kvm_vcpu *vcpu;
			
 
				 	int i;
			
 
				 
			
 
				-	mutex_lock(&kvm->irq_lock);
			
 
				 	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
			
 
				 	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
			
 
				-	mutex_unlock(&kvm->irq_lock);
			
 
				 
			
 
				 	/*
			
 
				 	 * Provides NMI watchdog support via Virtual Wire mode.
			
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -38,7 +38,15 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
 
				 	s->isr_ack |= (1 << irq);
			
 
				 	if (s != &s->pics_state->pics[0])
			
 
				 		irq += 8;
			
 
				+	/*
			
 
				+	 * We are dropping lock while calling ack notifiers since ack
			
 
				+	 * notifier callbacks for assigned devices call into PIC recursively.
			
 
				+	 * Other interrupt may be delivered to PIC while lock is dropped but
			
 
				+	 * it should be safe since PIC state is already updated at this stage.
			
 
				+	 */
			
 
				+	spin_unlock(&s->pics_state->lock);
			
 
				 	kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq);
			
 
				+	spin_lock(&s->pics_state->lock);
			
 
				 }
			
 
				 
			
 
				 void kvm_pic_clear_isr_ack(struct kvm *kvm)
			
@@ -176,16 +184,18 @@ int kvm_pic_set_irq(void *opaque, int irq, int level)
 
				 static inline void pic_intack(struct kvm_kpic_state *s, int irq)
			
 
				 {
			
 
				 	s->isr |= 1 << irq;
			
 
				-	if (s->auto_eoi) {
			
 
				-		if (s->rotate_on_auto_eoi)
			
 
				-			s->priority_add = (irq + 1) & 7;
			
 
				-		pic_clear_isr(s, irq);
			
 
				-	}
			
 
				 	/*
			
 
				 	 * We don't clear a level sensitive interrupt here
			
 
				 	 */
			
 
				 	if (!(s->elcr & (1 << irq)))
			
 
				 		s->irr &= ~(1 << irq);
			
 
				+
			
 
				+	if (s->auto_eoi) {
			
 
				+		if (s->rotate_on_auto_eoi)
			
 
				+			s->priority_add = (irq + 1) & 7;
			
 
				+		pic_clear_isr(s, irq);
			
 
				+	}
			
 
				+
			
 
				 }
			
 
				 
			
 
				 int kvm_pic_read_irq(struct kvm *kvm)
			
@@ -225,22 +235,11 @@ int kvm_pic_read_irq(struct kvm *kvm)
 
				 
			
 
				 void kvm_pic_reset(struct kvm_kpic_state *s)
			
 
				 {
			
 
				-	int irq, irqbase, n;
			
 
				+	int irq;
			
 
				 	struct kvm *kvm = s->pics_state->irq_request_opaque;
			
 
				 	struct kvm_vcpu *vcpu0 = kvm->bsp_vcpu;
			
 
				+	u8 irr = s->irr, isr = s->imr;
			
 
				 
			
 
				-	if (s == &s->pics_state->pics[0])
			
 
				-		irqbase = 0;
			
 
				-	else
			
 
				-		irqbase = 8;
			
 
				-
			
 
				-	for (irq = 0; irq < PIC_NUM_PINS/2; irq++) {
			
 
				-		if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0))
			
 
				-			if (s->irr & (1 << irq) || s->isr & (1 << irq)) {
			
 
				-				n = irq + irqbase;
			
 
				-				kvm_notify_acked_irq(kvm, SELECT_PIC(n), n);
			
 
				-			}
			
 
				-	}
			
 
				 	s->last_irr = 0;
			
 
				 	s->irr = 0;
			
 
				 	s->imr = 0;
			
@@ -256,6 +255,13 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
 
				 	s->rotate_on_auto_eoi = 0;
			
 
				 	s->special_fully_nested_mode = 0;
			
 
				 	s->init4 = 0;
			
 
				+
			
 
				+	for (irq = 0; irq < PIC_NUM_PINS/2; irq++) {
			
 
				+		if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0))
			
 
				+			if (irr & (1 << irq) || isr & (1 << irq)) {
			
 
				+				pic_clear_isr(s, irq);
			
 
				+			}
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 static void pic_ioport_write(void *opaque, u32 addr, u32 val)
			
@@ -298,9 +304,9 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val)
 
				 				priority = get_priority(s, s->isr);
			
 
				 				if (priority != 8) {
			
 
				 					irq = (priority + s->priority_add) & 7;
			
 
				-					pic_clear_isr(s, irq);
			
 
				 					if (cmd == 5)
			
 
				 						s->priority_add = (irq + 1) & 7;
			
 
				+					pic_clear_isr(s, irq);
			
 
				 					pic_update_irq(s->pics_state);
			
 
				 				}
			
 
				 				break;
			
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -71,6 +71,7 @@ struct kvm_pic {
 
				 	int output;		/* intr from master PIC */
			
 
				 	struct kvm_io_device dev;
			
 
				 	void (*ack_notifier)(void *opaque, int irq);
			
 
				+	unsigned long irq_states[16];
			
 
				 };
			
 
				 
			
 
				 struct kvm_pic *kvm_create_pic(struct kvm *kvm);
			
@@ -85,7 +86,11 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
 
				 
			
 
				 static inline int irqchip_in_kernel(struct kvm *kvm)
			
 
				 {
			
 
				-	return pic_irqchip(kvm) != NULL;
			
 
				+	int ret;
			
 
				+
			
 
				+	ret = (pic_irqchip(kvm) != NULL);
			
 
				+	smp_rmb();
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 void kvm_pic_reset(struct kvm_kpic_state *s);
			
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -32,7 +32,6 @@
 
				 #include <asm/current.h>
			
 
				 #include <asm/apicdef.h>
			
 
				 #include <asm/atomic.h>
			
 
				-#include <asm/apicdef.h>
			
 
				 #include "kvm_cache_regs.h"
			
 
				 #include "irq.h"
			
 
				 #include "trace.h"
			
@@ -471,11 +470,8 @@ static void apic_set_eoi(struct kvm_lapic *apic)
 
				 		trigger_mode = IOAPIC_LEVEL_TRIG;
			
 
				 	else
			
 
				 		trigger_mode = IOAPIC_EDGE_TRIG;
			
 
				-	if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)) {
			
 
				-		mutex_lock(&apic->vcpu->kvm->irq_lock);
			
 
				+	if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI))
			
 
				 		kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
			
 
				-		mutex_unlock(&apic->vcpu->kvm->irq_lock);
			
 
				-	}
			
 
				 }
			
 
				 
			
 
				 static void apic_send_ipi(struct kvm_lapic *apic)
			
@@ -504,9 +500,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
 
				 		   irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
			
 
				 		   irq.vector);
			
 
				 
			
 
				-	mutex_lock(&apic->vcpu->kvm->irq_lock);
			
 
				 	kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq);
			
 
				-	mutex_unlock(&apic->vcpu->kvm->irq_lock);
			
 
				 }
			
 
				 
			
 
				 static u32 apic_get_tmcct(struct kvm_lapic *apic)
			
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2789,7 +2789,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
 
				 	if (r)
			
 
				 		goto out;
			
 
				 
			
 
				-	er = emulate_instruction(vcpu, vcpu->run, cr2, error_code, 0);
			
 
				+	er = emulate_instruction(vcpu, cr2, error_code, 0);
			
 
				 
			
 
				 	switch (er) {
			
 
				 	case EMULATE_DONE:
			
@@ -2800,6 +2800,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
 
				 	case EMULATE_FAIL:
			
 
				 		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
			
 
				 		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
			
 
				+		vcpu->run->internal.ndata = 0;
			
 
				 		return 0;
			
 
				 	default:
			
 
				 		BUG();
			
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -467,7 +467,6 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 
				 		level = iterator.level;
			
 
				 		sptep = iterator.sptep;
			
 
				 
			
 
				-		/* FIXME: properly handle invlpg on large guest pages */
			
 
				 		if (level == PT_PAGE_TABLE_LEVEL  ||
			
 
				 		    ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) ||
			
 
				 		    ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) {
			
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -46,6 +46,7 @@ MODULE_LICENSE("GPL");
 
				 #define SVM_FEATURE_NPT  (1 << 0)
			
 
				 #define SVM_FEATURE_LBRV (1 << 1)
			
 
				 #define SVM_FEATURE_SVML (1 << 2)
			
 
				+#define SVM_FEATURE_PAUSE_FILTER (1 << 10)
			
 
				 
			
 
				 #define NESTED_EXIT_HOST	0	/* Exit handled on host level */
			
 
				 #define NESTED_EXIT_DONE	1	/* Exit caused nested vmexit  */
			
@@ -53,15 +54,6 @@ MODULE_LICENSE("GPL");
 
				 
			
 
				 #define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
			
 
				 
			
 
				-/* Turn on to get debugging output*/
			
 
				-/* #define NESTED_DEBUG */
			
 
				-
			
 
				-#ifdef NESTED_DEBUG
			
 
				-#define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args)
			
 
				-#else
			
 
				-#define nsvm_printk(fmt, args...) do {} while(0)
			
 
				-#endif
			
 
				-
			
 
				 static const u32 host_save_user_msrs[] = {
			
 
				 #ifdef CONFIG_X86_64
			
 
				 	MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
			
@@ -85,6 +77,9 @@ struct nested_state {
 
				 	/* gpa pointers to the real vectors */
			
 
				 	u64 vmcb_msrpm;
			
 
				 
			
 
				+	/* A VMEXIT is required but not yet emulated */
			
 
				+	bool exit_required;
			
 
				+
			
 
				 	/* cache for intercepts of the guest */
			
 
				 	u16 intercept_cr_read;
			
 
				 	u16 intercept_cr_write;
			
@@ -112,6 +107,8 @@ struct vcpu_svm {
 
				 	u32 *msrpm;
			
 
				 
			
 
				 	struct nested_state nested;
			
 
				+
			
 
				+	bool nmi_singlestep;
			
 
				 };
			
 
				 
			
 
				 /* enable NPT for AMD64 and X86 with PAE */
			
@@ -286,7 +283,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 
				 	struct vcpu_svm *svm = to_svm(vcpu);
			
 
				 
			
 
				 	if (!svm->next_rip) {
			
 
				-		if (emulate_instruction(vcpu, vcpu->run, 0, 0, EMULTYPE_SKIP) !=
			
 
				+		if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) !=
			
 
				 				EMULATE_DONE)
			
 
				 			printk(KERN_DEBUG "%s: NOP\n", __func__);
			
 
				 		return;
			
@@ -316,7 +313,7 @@ static void svm_hardware_disable(void *garbage)
 
				 	cpu_svm_disable();
			
 
				 }
			
 
				 
			
 
				-static void svm_hardware_enable(void *garbage)
			
 
				+static int svm_hardware_enable(void *garbage)
			
 
				 {
			
 
				 
			
 
				 	struct svm_cpu_data *svm_data;
			
@@ -325,16 +322,21 @@ static void svm_hardware_enable(void *garbage)
 
				 	struct desc_struct *gdt;
			
 
				 	int me = raw_smp_processor_id();
			
 
				 
			
 
				+	rdmsrl(MSR_EFER, efer);
			
 
				+	if (efer & EFER_SVME)
			
 
				+		return -EBUSY;
			
 
				+
			
 
				 	if (!has_svm()) {
			
 
				-		printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me);
			
 
				-		return;
			
 
				+		printk(KERN_ERR "svm_hardware_enable: err EOPNOTSUPP on %d\n",
			
 
				+		       me);
			
 
				+		return -EINVAL;
			
 
				 	}
			
 
				 	svm_data = per_cpu(svm_data, me);
			
 
				 
			
 
				 	if (!svm_data) {
			
 
				-		printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n",
			
 
				+		printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n",
			
 
				 		       me);
			
 
				-		return;
			
 
				+		return -EINVAL;
			
 
				 	}
			
 
				 
			
 
				 	svm_data->asid_generation = 1;
			
@@ -345,11 +347,12 @@ static void svm_hardware_enable(void *garbage)
 
				 	gdt = (struct desc_struct *)gdt_descr.base;
			
 
				 	svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
			
 
				 
			
 
				-	rdmsrl(MSR_EFER, efer);
			
 
				 	wrmsrl(MSR_EFER, efer | EFER_SVME);
			
 
				 
			
 
				 	wrmsrl(MSR_VM_HSAVE_PA,
			
 
				 	       page_to_pfn(svm_data->save_area) << PAGE_SHIFT);
			
 
				+
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 static void svm_cpu_uninit(int cpu)
			
@@ -476,7 +479,7 @@ static __init int svm_hardware_setup(void)
 
				 		kvm_enable_efer_bits(EFER_SVME);
			
 
				 	}
			
 
				 
			
 
				-	for_each_online_cpu(cpu) {
			
 
				+	for_each_possible_cpu(cpu) {
			
 
				 		r = svm_cpu_init(cpu);
			
 
				 		if (r)
			
 
				 			goto err;
			
@@ -510,7 +513,7 @@ static __exit void svm_hardware_unsetup(void)
 
				 {
			
 
				 	int cpu;
			
 
				 
			
 
				-	for_each_online_cpu(cpu)
			
 
				+	for_each_possible_cpu(cpu)
			
 
				 		svm_cpu_uninit(cpu);
			
 
				 
			
 
				 	__free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
			
@@ -625,11 +628,12 @@ static void init_vmcb(struct vcpu_svm *svm)
 
				 	save->rip = 0x0000fff0;
			
 
				 	svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
			
 
				 
			
 
				-	/*
			
 
				-	 * cr0 val on cpu init should be 0x60000010, we enable cpu
			
 
				-	 * cache by default. the orderly way is to enable cache in bios.
			
 
				+	/* This is the guest-visible cr0 value.
			
 
				+	 * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
			
 
				 	 */
			
 
				-	save->cr0 = 0x00000010 | X86_CR0_PG | X86_CR0_WP;
			
 
				+	svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
			
 
				+	kvm_set_cr0(&svm->vcpu, svm->vcpu.arch.cr0);
			
 
				+
			
 
				 	save->cr4 = X86_CR4_PAE;
			
 
				 	/* rdx = ?? */
			
 
				 
			
@@ -644,8 +648,6 @@ static void init_vmcb(struct vcpu_svm *svm)
 
				 		control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK|
			
 
				 						 INTERCEPT_CR3_MASK);
			
 
				 		save->g_pat = 0x0007040600070406ULL;
			
 
				-		/* enable caching because the QEMU Bios doesn't enable it */
			
 
				-		save->cr0 = X86_CR0_ET;
			
 
				 		save->cr3 = 0;
			
 
				 		save->cr4 = 0;
			
 
				 	}
			
@@ -654,6 +656,11 @@ static void init_vmcb(struct vcpu_svm *svm)
 
				 	svm->nested.vmcb = 0;
			
 
				 	svm->vcpu.arch.hflags = 0;
			
 
				 
			
 
				+	if (svm_has(SVM_FEATURE_PAUSE_FILTER)) {
			
 
				+		control->pause_filter_count = 3000;
			
 
				+		control->intercept |= (1ULL << INTERCEPT_PAUSE);
			
 
				+	}
			
 
				+
			
 
				 	enable_gif(svm);
			
 
				 }
			
 
				 
			
@@ -758,14 +765,13 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
				 	int i;
			
 
				 
			
 
				 	if (unlikely(cpu != vcpu->cpu)) {
			
 
				-		u64 tsc_this, delta;
			
 
				+		u64 delta;
			
 
				 
			
 
				 		/*
			
 
				 		 * Make sure that the guest sees a monotonically
			
 
				 		 * increasing TSC.
			
 
				 		 */
			
 
				-		rdtscll(tsc_this);
			
 
				-		delta = vcpu->arch.host_tsc - tsc_this;
			
 
				+		delta = vcpu->arch.host_tsc - native_read_tsc();
			
 
				 		svm->vmcb->control.tsc_offset += delta;
			
 
				 		if (is_nested(svm))
			
 
				 			svm->nested.hsave->control.tsc_offset += delta;
			
@@ -787,7 +793,7 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
 
				 	for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
			
 
				 		wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
			
 
				 
			
 
				-	rdtscll(vcpu->arch.host_tsc);
			
 
				+	vcpu->arch.host_tsc = native_read_tsc();
			
 
				 }
			
 
				 
			
 
				 static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
			
@@ -1045,7 +1051,7 @@ static void update_db_intercept(struct kvm_vcpu *vcpu)
 
				 	svm->vmcb->control.intercept_exceptions &=
			
 
				 		~((1 << DB_VECTOR) | (1 << BP_VECTOR));
			
 
				 
			
 
				-	if (vcpu->arch.singlestep)
			
 
				+	if (svm->nmi_singlestep)
			
 
				 		svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR);
			
 
				 
			
 
				 	if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
			
@@ -1060,26 +1066,16 @@ static void update_db_intercept(struct kvm_vcpu *vcpu)
 
				 		vcpu->guest_debug = 0;
			
 
				 }
			
 
				 
			
 
				-static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
			
 
				+static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
			
 
				 {
			
 
				-	int old_debug = vcpu->guest_debug;
			
 
				 	struct vcpu_svm *svm = to_svm(vcpu);
			
 
				 
			
 
				-	vcpu->guest_debug = dbg->control;
			
 
				-
			
 
				-	update_db_intercept(vcpu);
			
 
				-
			
 
				 	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
			
 
				 		svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
			
 
				 	else
			
 
				 		svm->vmcb->save.dr7 = vcpu->arch.dr7;
			
 
				 
			
 
				-	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
			
 
				-		svm->vmcb->save.rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
			
 
				-	else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
			
 
				-		svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
			
 
				-
			
 
				-	return 0;
			
 
				+	update_db_intercept(vcpu);
			
 
				 }
			
 
				 
			
 
				 static void load_host_msrs(struct kvm_vcpu *vcpu)
			
@@ -1180,7 +1176,7 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int pf_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	u64 fault_address;
			
 
				 	u32 error_code;
			
@@ -1194,17 +1190,19 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
				 	return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
			
 
				 }
			
 
				 
			
 
				-static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int db_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				+	struct kvm_run *kvm_run = svm->vcpu.run;
			
 
				+
			
 
				 	if (!(svm->vcpu.guest_debug &
			
 
				 	      (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
			
 
				-		!svm->vcpu.arch.singlestep) {
			
 
				+		!svm->nmi_singlestep) {
			
 
				 		kvm_queue_exception(&svm->vcpu, DB_VECTOR);
			
 
				 		return 1;
			
 
				 	}
			
 
				 
			
 
				-	if (svm->vcpu.arch.singlestep) {
			
 
				-		svm->vcpu.arch.singlestep = false;
			
 
				+	if (svm->nmi_singlestep) {
			
 
				+		svm->nmi_singlestep = false;
			
 
				 		if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
			
 
				 			svm->vmcb->save.rflags &=
			
 
				 				~(X86_EFLAGS_TF | X86_EFLAGS_RF);
			
@@ -1223,25 +1221,27 @@ static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int bp_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				+	struct kvm_run *kvm_run = svm->vcpu.run;
			
 
				+
			
 
				 	kvm_run->exit_reason = KVM_EXIT_DEBUG;
			
 
				 	kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
			
 
				 	kvm_run->debug.arch.exception = BP_VECTOR;
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int ud_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	int er;
			
 
				 
			
 
				-	er = emulate_instruction(&svm->vcpu, kvm_run, 0, 0, EMULTYPE_TRAP_UD);
			
 
				+	er = emulate_instruction(&svm->vcpu, 0, 0, EMULTYPE_TRAP_UD);
			
 
				 	if (er != EMULATE_DONE)
			
 
				 		kvm_queue_exception(&svm->vcpu, UD_VECTOR);
			
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int nm_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
			
 
				 	if (!(svm->vcpu.arch.cr0 & X86_CR0_TS))
			
@@ -1251,7 +1251,7 @@ static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int mc_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	/*
			
 
				 	 * On an #MC intercept the MCE handler is not called automatically in
			
@@ -1264,8 +1264,10 @@ static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int shutdown_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				+	struct kvm_run *kvm_run = svm->vcpu.run;
			
 
				+
			
 
				 	/*
			
 
				 	 * VMCB is undefined after a SHUTDOWN intercept
			
 
				 	 * so reinitialize it.
			
@@ -1277,7 +1279,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int io_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
			
 
				 	int size, in, string;
			
@@ -1291,7 +1293,7 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
				 
			
 
				 	if (string) {
			
 
				 		if (emulate_instruction(&svm->vcpu,
			
 
				-					kvm_run, 0, 0, 0) == EMULATE_DO_MMIO)
			
 
				+					0, 0, 0) == EMULATE_DO_MMIO)
			
 
				 			return 0;
			
 
				 		return 1;
			
 
				 	}
			
@@ -1301,33 +1303,33 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
				 	size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
			
 
				 
			
 
				 	skip_emulated_instruction(&svm->vcpu);
			
 
				-	return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port);
			
 
				+	return kvm_emulate_pio(&svm->vcpu, in, size, port);
			
 
				 }
			
 
				 
			
 
				-static int nmi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int nmi_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int intr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int intr_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	++svm->vcpu.stat.irq_exits;
			
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int nop_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int nop_on_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int halt_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int halt_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
			
 
				 	skip_emulated_instruction(&svm->vcpu);
			
 
				 	return kvm_emulate_halt(&svm->vcpu);
			
 
				 }
			
 
				 
			
 
				-static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int vmmcall_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
			
 
				 	skip_emulated_instruction(&svm->vcpu);
			
@@ -1378,8 +1380,15 @@ static inline int nested_svm_intr(struct vcpu_svm *svm)
 
				 
			
 
				 	svm->vmcb->control.exit_code = SVM_EXIT_INTR;
			
 
				 
			
 
				-	if (nested_svm_exit_handled(svm)) {
			
 
				-		nsvm_printk("VMexit -> INTR\n");
			
 
				+	if (svm->nested.intercept & 1ULL) {
			
 
				+		/*
			
 
				+		 * The #vmexit can't be emulated here directly because this
			
 
				+		 * code path runs with irqs and preemtion disabled. A
			
 
				+		 * #vmexit emulation might sleep. Only signal request for
			
 
				+		 * the #vmexit here.
			
 
				+		 */
			
 
				+		svm->nested.exit_required = true;
			
 
				+		trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
			
 
				 		return 1;
			
 
				 	}
			
 
				 
			
@@ -1390,10 +1399,7 @@ static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, enum km_type idx)
 
				 {
			
 
				 	struct page *page;
			
 
				 
			
 
				-	down_read(&current->mm->mmap_sem);
			
 
				 	page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
			
 
				-	up_read(&current->mm->mmap_sem);
			
 
				-
			
 
				 	if (is_error_page(page))
			
 
				 		goto error;
			
 
				 
			
@@ -1532,14 +1538,12 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm)
 
				 	}
			
 
				 	default: {
			
 
				 		u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
			
 
				-		nsvm_printk("exit code: 0x%x\n", exit_code);
			
 
				 		if (svm->nested.intercept & exit_bits)
			
 
				 			vmexit = NESTED_EXIT_DONE;
			
 
				 	}
			
 
				 	}
			
 
				 
			
 
				 	if (vmexit == NESTED_EXIT_DONE) {
			
 
				-		nsvm_printk("#VMEXIT reason=%04x\n", exit_code);
			
 
				 		nested_svm_vmexit(svm);
			
 
				 	}
			
 
				 
			
@@ -1584,6 +1588,12 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
 
				 	struct vmcb *hsave = svm->nested.hsave;
			
 
				 	struct vmcb *vmcb = svm->vmcb;
			
 
				 
			
 
				+	trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
			
 
				+				       vmcb->control.exit_info_1,
			
 
				+				       vmcb->control.exit_info_2,
			
 
				+				       vmcb->control.exit_int_info,
			
 
				+				       vmcb->control.exit_int_info_err);
			
 
				+
			
 
				 	nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0);
			
 
				 	if (!nested_vmcb)
			
 
				 		return 1;
			
@@ -1617,6 +1627,22 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
 
				 	nested_vmcb->control.exit_info_2       = vmcb->control.exit_info_2;
			
 
				 	nested_vmcb->control.exit_int_info     = vmcb->control.exit_int_info;
			
 
				 	nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
			
 
				+
			
 
				+	/*
			
 
				+	 * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
			
 
				+	 * to make sure that we do not lose injected events. So check event_inj
			
 
				+	 * here and copy it to exit_int_info if it is valid.
			
 
				+	 * Exit_int_info and event_inj can't be both valid because the case
			
 
				+	 * below only happens on a VMRUN instruction intercept which has
			
 
				+	 * no valid exit_int_info set.
			
 
				+	 */
			
 
				+	if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
			
 
				+		struct vmcb_control_area *nc = &nested_vmcb->control;
			
 
				+
			
 
				+		nc->exit_int_info     = vmcb->control.event_inj;
			
 
				+		nc->exit_int_info_err = vmcb->control.event_inj_err;
			
 
				+	}
			
 
				+
			
 
				 	nested_vmcb->control.tlb_ctl           = 0;
			
 
				 	nested_vmcb->control.event_inj         = 0;
			
 
				 	nested_vmcb->control.event_inj_err     = 0;
			
@@ -1628,10 +1654,6 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
 
				 	/* Restore the original control entries */
			
 
				 	copy_vmcb_control_area(vmcb, hsave);
			
 
				 
			
 
				-	/* Kill any pending exceptions */
			
 
				-	if (svm->vcpu.arch.exception.pending == true)
			
 
				-		nsvm_printk("WARNING: Pending Exception\n");
			
 
				-
			
 
				 	kvm_clear_exception_queue(&svm->vcpu);
			
 
				 	kvm_clear_interrupt_queue(&svm->vcpu);
			
 
				 
			
@@ -1702,6 +1724,12 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
 
				 	/* nested_vmcb is our indicator if nested SVM is activated */
			
 
				 	svm->nested.vmcb = svm->vmcb->save.rax;
			
 
				 
			
 
				+	trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, svm->nested.vmcb,
			
 
				+			       nested_vmcb->save.rip,
			
 
				+			       nested_vmcb->control.int_ctl,
			
 
				+			       nested_vmcb->control.event_inj,
			
 
				+			       nested_vmcb->control.nested_ctl);
			
 
				+
			
 
				 	/* Clear internal status */
			
 
				 	kvm_clear_exception_queue(&svm->vcpu);
			
 
				 	kvm_clear_interrupt_queue(&svm->vcpu);
			
@@ -1789,28 +1817,15 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
 
				 	svm->nested.intercept            = nested_vmcb->control.intercept;
			
 
				 
			
 
				 	force_new_asid(&svm->vcpu);
			
 
				-	svm->vmcb->control.exit_int_info = nested_vmcb->control.exit_int_info;
			
 
				-	svm->vmcb->control.exit_int_info_err = nested_vmcb->control.exit_int_info_err;
			
 
				 	svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
			
 
				-	if (nested_vmcb->control.int_ctl & V_IRQ_MASK) {
			
 
				-		nsvm_printk("nSVM Injecting Interrupt: 0x%x\n",
			
 
				-				nested_vmcb->control.int_ctl);
			
 
				-	}
			
 
				 	if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
			
 
				 		svm->vcpu.arch.hflags |= HF_VINTR_MASK;
			
 
				 	else
			
 
				 		svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
			
 
				 
			
 
				-	nsvm_printk("nSVM exit_int_info: 0x%x | int_state: 0x%x\n",
			
 
				-			nested_vmcb->control.exit_int_info,
			
 
				-			nested_vmcb->control.int_state);
			
 
				-
			
 
				 	svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
			
 
				 	svm->vmcb->control.int_state = nested_vmcb->control.int_state;
			
 
				 	svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
			
 
				-	if (nested_vmcb->control.event_inj & SVM_EVTINJ_VALID)
			
 
				-		nsvm_printk("Injecting Event: 0x%x\n",
			
 
				-				nested_vmcb->control.event_inj);
			
 
				 	svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
			
 
				 	svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
			
 
				 
			
@@ -1837,7 +1852,7 @@ static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
 
				 	to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
			
 
				 }
			
 
				 
			
 
				-static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int vmload_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	struct vmcb *nested_vmcb;
			
 
				 
			
@@ -1857,7 +1872,7 @@ static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int vmsave_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	struct vmcb *nested_vmcb;
			
 
				 
			
@@ -1877,10 +1892,8 @@ static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int vmrun_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				-	nsvm_printk("VMrun\n");
			
 
				-
			
 
				 	if (nested_svm_check_permissions(svm))
			
 
				 		return 1;
			
 
				 
			
@@ -1907,7 +1920,7 @@ failed:
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int stgi_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	if (nested_svm_check_permissions(svm))
			
 
				 		return 1;
			
@@ -1920,7 +1933,7 @@ static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int clgi_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	if (nested_svm_check_permissions(svm))
			
 
				 		return 1;
			
@@ -1937,10 +1950,12 @@ static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int invlpga_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int invlpga_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	struct kvm_vcpu *vcpu = &svm->vcpu;
			
 
				-	nsvm_printk("INVLPGA\n");
			
 
				+
			
 
				+	trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX],
			
 
				+			  vcpu->arch.regs[VCPU_REGS_RAX]);
			
 
				 
			
 
				 	/* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
			
 
				 	kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]);
			
@@ -1950,15 +1965,21 @@ static int invlpga_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int invalid_op_interception(struct vcpu_svm *svm,
			
 
				-				   struct kvm_run *kvm_run)
			
 
				+static int skinit_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				+	trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]);
			
 
				+
			
 
				 	kvm_queue_exception(&svm->vcpu, UD_VECTOR);
			
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int task_switch_interception(struct vcpu_svm *svm,
			
 
				-				    struct kvm_run *kvm_run)
			
 
				+static int invalid_op_interception(struct vcpu_svm *svm)
			
 
				+{
			
 
				+	kvm_queue_exception(&svm->vcpu, UD_VECTOR);
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+static int task_switch_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	u16 tss_selector;
			
 
				 	int reason;
			
@@ -2008,14 +2029,14 @@ static int task_switch_interception(struct vcpu_svm *svm,
 
				 	return kvm_task_switch(&svm->vcpu, tss_selector, reason);
			
 
				 }
			
 
				 
			
 
				-static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int cpuid_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
			
 
				 	kvm_emulate_cpuid(&svm->vcpu);
			
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int iret_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	++svm->vcpu.stat.nmi_window_exits;
			
 
				 	svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
			
@@ -2023,26 +2044,27 @@ static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int invlpg_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				-	if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE)
			
 
				+	if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE)
			
 
				 		pr_unimpl(&svm->vcpu, "%s: failed\n", __func__);
			
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int emulate_on_interception(struct vcpu_svm *svm,
			
 
				-				   struct kvm_run *kvm_run)
			
 
				+static int emulate_on_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				-	if (emulate_instruction(&svm->vcpu, NULL, 0, 0, 0) != EMULATE_DONE)
			
 
				+	if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE)
			
 
				 		pr_unimpl(&svm->vcpu, "%s: failed\n", __func__);
			
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int cr8_write_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				+	struct kvm_run *kvm_run = svm->vcpu.run;
			
 
				+
			
 
				 	u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
			
 
				 	/* instruction emulation calls kvm_set_cr8() */
			
 
				-	emulate_instruction(&svm->vcpu, NULL, 0, 0, 0);
			
 
				+	emulate_instruction(&svm->vcpu, 0, 0, 0);
			
 
				 	if (irqchip_in_kernel(svm->vcpu.kvm)) {
			
 
				 		svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
			
 
				 		return 1;
			
@@ -2128,7 +2150,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int rdmsr_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
			
 
				 	u64 data;
			
@@ -2221,7 +2243,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int wrmsr_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
			
 
				 	u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
			
@@ -2237,17 +2259,18 @@ static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int msr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int msr_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	if (svm->vmcb->control.exit_info_1)
			
 
				-		return wrmsr_interception(svm, kvm_run);
			
 
				+		return wrmsr_interception(svm);
			
 
				 	else
			
 
				-		return rdmsr_interception(svm, kvm_run);
			
 
				+		return rdmsr_interception(svm);
			
 
				 }
			
 
				 
			
 
				-static int interrupt_window_interception(struct vcpu_svm *svm,
			
 
				-				   struct kvm_run *kvm_run)
			
 
				+static int interrupt_window_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				+	struct kvm_run *kvm_run = svm->vcpu.run;
			
 
				+
			
 
				 	svm_clear_vintr(svm);
			
 
				 	svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
			
 
				 	/*
			
@@ -2265,8 +2288,13 @@ static int interrupt_window_interception(struct vcpu_svm *svm,
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
			
 
				-				      struct kvm_run *kvm_run) = {
			
 
				+static int pause_interception(struct vcpu_svm *svm)
			
 
				+{
			
 
				+	kvm_vcpu_on_spin(&(svm->vcpu));
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
			
 
				 	[SVM_EXIT_READ_CR0]           		= emulate_on_interception,
			
 
				 	[SVM_EXIT_READ_CR3]           		= emulate_on_interception,
			
 
				 	[SVM_EXIT_READ_CR4]           		= emulate_on_interception,
			
@@ -2301,6 +2329,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
 
				 	[SVM_EXIT_CPUID]			= cpuid_interception,
			
 
				 	[SVM_EXIT_IRET]                         = iret_interception,
			
 
				 	[SVM_EXIT_INVD]                         = emulate_on_interception,
			
 
				+	[SVM_EXIT_PAUSE]			= pause_interception,
			
 
				 	[SVM_EXIT_HLT]				= halt_interception,
			
 
				 	[SVM_EXIT_INVLPG]			= invlpg_interception,
			
 
				 	[SVM_EXIT_INVLPGA]			= invlpga_interception,
			
@@ -2314,26 +2343,36 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
 
				 	[SVM_EXIT_VMSAVE]			= vmsave_interception,
			
 
				 	[SVM_EXIT_STGI]				= stgi_interception,
			
 
				 	[SVM_EXIT_CLGI]				= clgi_interception,
			
 
				-	[SVM_EXIT_SKINIT]			= invalid_op_interception,
			
 
				+	[SVM_EXIT_SKINIT]			= skinit_interception,
			
 
				 	[SVM_EXIT_WBINVD]                       = emulate_on_interception,
			
 
				 	[SVM_EXIT_MONITOR]			= invalid_op_interception,
			
 
				 	[SVM_EXIT_MWAIT]			= invalid_op_interception,
			
 
				 	[SVM_EXIT_NPF]				= pf_interception,
			
 
				 };
			
 
				 
			
 
				-static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
			
 
				+static int handle_exit(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	struct vcpu_svm *svm = to_svm(vcpu);
			
 
				+	struct kvm_run *kvm_run = vcpu->run;
			
 
				 	u32 exit_code = svm->vmcb->control.exit_code;
			
 
				 
			
 
				 	trace_kvm_exit(exit_code, svm->vmcb->save.rip);
			
 
				 
			
 
				+	if (unlikely(svm->nested.exit_required)) {
			
 
				+		nested_svm_vmexit(svm);
			
 
				+		svm->nested.exit_required = false;
			
 
				+
			
 
				+		return 1;
			
 
				+	}
			
 
				+
			
 
				 	if (is_nested(svm)) {
			
 
				 		int vmexit;
			
 
				 
			
 
				-		nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n",
			
 
				-			    exit_code, svm->vmcb->control.exit_info_1,
			
 
				-			    svm->vmcb->control.exit_info_2, svm->vmcb->save.rip);
			
 
				+		trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code,
			
 
				+					svm->vmcb->control.exit_info_1,
			
 
				+					svm->vmcb->control.exit_info_2,
			
 
				+					svm->vmcb->control.exit_int_info,
			
 
				+					svm->vmcb->control.exit_int_info_err);
			
 
				 
			
 
				 		vmexit = nested_svm_exit_special(svm);
			
 
				 
			
@@ -2383,7 +2422,7 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
				 		return 0;
			
 
				 	}
			
 
				 
			
 
				-	return svm_exit_handlers[exit_code](svm, kvm_run);
			
 
				+	return svm_exit_handlers[exit_code](svm);
			
 
				 }
			
 
				 
			
 
				 static void reload_tss(struct kvm_vcpu *vcpu)
			
@@ -2460,20 +2499,47 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
 
				 		!(svm->vcpu.arch.hflags & HF_NMI_MASK);
			
 
				 }
			
 
				 
			
 
				+static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	struct vcpu_svm *svm = to_svm(vcpu);
			
 
				+
			
 
				+	return !!(svm->vcpu.arch.hflags & HF_NMI_MASK);
			
 
				+}
			
 
				+
			
 
				+static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
			
 
				+{
			
 
				+	struct vcpu_svm *svm = to_svm(vcpu);
			
 
				+
			
 
				+	if (masked) {
			
 
				+		svm->vcpu.arch.hflags |= HF_NMI_MASK;
			
 
				+		svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET);
			
 
				+	} else {
			
 
				+		svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
			
 
				+		svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	struct vcpu_svm *svm = to_svm(vcpu);
			
 
				 	struct vmcb *vmcb = svm->vmcb;
			
 
				-	return (vmcb->save.rflags & X86_EFLAGS_IF) &&
			
 
				-		!(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
			
 
				-		gif_set(svm) &&
			
 
				-		!(is_nested(svm) && (svm->vcpu.arch.hflags & HF_VINTR_MASK));
			
 
				+	int ret;
			
 
				+
			
 
				+	if (!gif_set(svm) ||
			
 
				+	     (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK))
			
 
				+		return 0;
			
 
				+
			
 
				+	ret = !!(vmcb->save.rflags & X86_EFLAGS_IF);
			
 
				+
			
 
				+	if (is_nested(svm))
			
 
				+		return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK);
			
 
				+
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 static void enable_irq_window(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	struct vcpu_svm *svm = to_svm(vcpu);
			
 
				-	nsvm_printk("Trying to open IRQ window\n");
			
 
				 
			
 
				 	nested_svm_intr(svm);
			
 
				 
			
@@ -2498,7 +2564,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
 
				 	/* Something prevents NMI from been injected. Single step over
			
 
				 	   possible problem (IRET or exception injection or interrupt
			
 
				 	   shadow) */
			
 
				-	vcpu->arch.singlestep = true;
			
 
				+	svm->nmi_singlestep = true;
			
 
				 	svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
			
 
				 	update_db_intercept(vcpu);
			
 
				 }
			
@@ -2588,13 +2654,20 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
 
				 #define R "e"
			
 
				 #endif
			
 
				 
			
 
				-static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
			
 
				+static void svm_vcpu_run(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	struct vcpu_svm *svm = to_svm(vcpu);
			
 
				 	u16 fs_selector;
			
 
				 	u16 gs_selector;
			
 
				 	u16 ldt_selector;
			
 
				 
			
 
				+	/*
			
 
				+	 * A vmexit emulation is required before the vcpu can be executed
			
 
				+	 * again.
			
 
				+	 */
			
 
				+	if (unlikely(svm->nested.exit_required))
			
 
				+		return;
			
 
				+
			
 
				 	svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
			
 
				 	svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
			
 
				 	svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
			
@@ -2893,6 +2966,8 @@ static struct kvm_x86_ops svm_x86_ops = {
 
				 	.queue_exception = svm_queue_exception,
			
 
				 	.interrupt_allowed = svm_interrupt_allowed,
			
 
				 	.nmi_allowed = svm_nmi_allowed,
			
 
				+	.get_nmi_mask = svm_get_nmi_mask,
			
 
				+	.set_nmi_mask = svm_set_nmi_mask,
			
 
				 	.enable_nmi_window = enable_nmi_window,
			
 
				 	.enable_irq_window = enable_irq_window,
			
 
				 	.update_cr8_intercept = update_cr8_intercept,
			
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -349,6 +349,171 @@ TRACE_EVENT(kvm_apic_accept_irq,
 
				 		  __entry->coalesced ? " (coalesced)" : "")
			
 
				 );
			
 
				 
			
 
				+/*
			
 
				+ * Tracepoint for nested VMRUN
			
 
				+ */
			
 
				+TRACE_EVENT(kvm_nested_vmrun,
			
 
				+	    TP_PROTO(__u64 rip, __u64 vmcb, __u64 nested_rip, __u32 int_ctl,
			
 
				+		     __u32 event_inj, bool npt),
			
 
				+	    TP_ARGS(rip, vmcb, nested_rip, int_ctl, event_inj, npt),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	__u64,		rip		)
			
 
				+		__field(	__u64,		vmcb		)
			
 
				+		__field(	__u64,		nested_rip	)
			
 
				+		__field(	__u32,		int_ctl		)
			
 
				+		__field(	__u32,		event_inj	)
			
 
				+		__field(	bool,		npt		)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->rip		= rip;
			
 
				+		__entry->vmcb		= vmcb;
			
 
				+		__entry->nested_rip	= nested_rip;
			
 
				+		__entry->int_ctl	= int_ctl;
			
 
				+		__entry->event_inj	= event_inj;
			
 
				+		__entry->npt		= npt;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("rip: 0x%016llx vmcb: 0x%016llx nrip: 0x%016llx int_ctl: 0x%08x "
			
 
				+		  "event_inj: 0x%08x npt: %s\n",
			
 
				+		__entry->rip, __entry->vmcb, __entry->nested_rip,
			
 
				+		__entry->int_ctl, __entry->event_inj,
			
 
				+		__entry->npt ? "on" : "off")
			
 
				+);
			
 
				+
			
 
				+/*
			
 
				+ * Tracepoint for #VMEXIT while nested
			
 
				+ */
			
 
				+TRACE_EVENT(kvm_nested_vmexit,
			
 
				+	    TP_PROTO(__u64 rip, __u32 exit_code,
			
 
				+		     __u64 exit_info1, __u64 exit_info2,
			
 
				+		     __u32 exit_int_info, __u32 exit_int_info_err),
			
 
				+	    TP_ARGS(rip, exit_code, exit_info1, exit_info2,
			
 
				+		    exit_int_info, exit_int_info_err),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	__u64,		rip			)
			
 
				+		__field(	__u32,		exit_code		)
			
 
				+		__field(	__u64,		exit_info1		)
			
 
				+		__field(	__u64,		exit_info2		)
			
 
				+		__field(	__u32,		exit_int_info		)
			
 
				+		__field(	__u32,		exit_int_info_err	)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->rip			= rip;
			
 
				+		__entry->exit_code		= exit_code;
			
 
				+		__entry->exit_info1		= exit_info1;
			
 
				+		__entry->exit_info2		= exit_info2;
			
 
				+		__entry->exit_int_info		= exit_int_info;
			
 
				+		__entry->exit_int_info_err	= exit_int_info_err;
			
 
				+	),
			
 
				+	TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx "
			
 
				+		  "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n",
			
 
				+		  __entry->rip,
			
 
				+		  ftrace_print_symbols_seq(p, __entry->exit_code,
			
 
				+					   kvm_x86_ops->exit_reasons_str),
			
 
				+		  __entry->exit_info1, __entry->exit_info2,
			
 
				+		  __entry->exit_int_info, __entry->exit_int_info_err)
			
 
				+);
			
 
				+
			
 
				+/*
			
 
				+ * Tracepoint for #VMEXIT reinjected to the guest
			
 
				+ */
			
 
				+TRACE_EVENT(kvm_nested_vmexit_inject,
			
 
				+	    TP_PROTO(__u32 exit_code,
			
 
				+		     __u64 exit_info1, __u64 exit_info2,
			
 
				+		     __u32 exit_int_info, __u32 exit_int_info_err),
			
 
				+	    TP_ARGS(exit_code, exit_info1, exit_info2,
			
 
				+		    exit_int_info, exit_int_info_err),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	__u32,		exit_code		)
			
 
				+		__field(	__u64,		exit_info1		)
			
 
				+		__field(	__u64,		exit_info2		)
			
 
				+		__field(	__u32,		exit_int_info		)
			
 
				+		__field(	__u32,		exit_int_info_err	)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->exit_code		= exit_code;
			
 
				+		__entry->exit_info1		= exit_info1;
			
 
				+		__entry->exit_info2		= exit_info2;
			
 
				+		__entry->exit_int_info		= exit_int_info;
			
 
				+		__entry->exit_int_info_err	= exit_int_info_err;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("reason: %s ext_inf1: 0x%016llx "
			
 
				+		  "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n",
			
 
				+		  ftrace_print_symbols_seq(p, __entry->exit_code,
			
 
				+					   kvm_x86_ops->exit_reasons_str),
			
 
				+		__entry->exit_info1, __entry->exit_info2,
			
 
				+		__entry->exit_int_info, __entry->exit_int_info_err)
			
 
				+);
			
 
				+
			
 
				+/*
			
 
				+ * Tracepoint for nested #vmexit because of interrupt pending
			
 
				+ */
			
 
				+TRACE_EVENT(kvm_nested_intr_vmexit,
			
 
				+	    TP_PROTO(__u64 rip),
			
 
				+	    TP_ARGS(rip),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	__u64,	rip	)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->rip	=	rip
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("rip: 0x%016llx\n", __entry->rip)
			
 
				+);
			
 
				+
			
 
				+/*
			
 
				+ * Tracepoint for nested #vmexit because of interrupt pending
			
 
				+ */
			
 
				+TRACE_EVENT(kvm_invlpga,
			
 
				+	    TP_PROTO(__u64 rip, int asid, u64 address),
			
 
				+	    TP_ARGS(rip, asid, address),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	__u64,	rip	)
			
 
				+		__field(	int,	asid	)
			
 
				+		__field(	__u64,	address	)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->rip		=	rip;
			
 
				+		__entry->asid		=	asid;
			
 
				+		__entry->address	=	address;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx\n",
			
 
				+		  __entry->rip, __entry->asid, __entry->address)
			
 
				+);
			
 
				+
			
 
				+/*
			
 
				+ * Tracepoint for nested #vmexit because of interrupt pending
			
 
				+ */
			
 
				+TRACE_EVENT(kvm_skinit,
			
 
				+	    TP_PROTO(__u64 rip, __u32 slb),
			
 
				+	    TP_ARGS(rip, slb),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	__u64,	rip	)
			
 
				+		__field(	__u32,	slb	)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->rip		=	rip;
			
 
				+		__entry->slb		=	slb;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("rip: 0x%016llx slb: 0x%08x\n",
			
 
				+		  __entry->rip, __entry->slb)
			
 
				+);
			
 
				+
			
 
				 #endif /* _TRACE_KVM_H */
			
 
				 
			
 
				 /* This part must be outside protection */
			
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -14,12 +14,76 @@
 
				 
			
 
				 #define KVM_API_VERSION 12
			
 
				 
			
 
				-/* for KVM_TRACE_ENABLE, deprecated */
			
 
				+/* *** Deprecated interfaces *** */
			
 
				+
			
 
				+#define KVM_TRC_SHIFT           16
			
 
				+
			
 
				+#define KVM_TRC_ENTRYEXIT       (1 << KVM_TRC_SHIFT)
			
 
				+#define KVM_TRC_HANDLER         (1 << (KVM_TRC_SHIFT + 1))
			
 
				+
			
 
				+#define KVM_TRC_VMENTRY         (KVM_TRC_ENTRYEXIT + 0x01)
			
 
				+#define KVM_TRC_VMEXIT          (KVM_TRC_ENTRYEXIT + 0x02)
			
 
				+#define KVM_TRC_PAGE_FAULT      (KVM_TRC_HANDLER + 0x01)
			
 
				+
			
 
				+#define KVM_TRC_HEAD_SIZE       12
			
 
				+#define KVM_TRC_CYCLE_SIZE      8
			
 
				+#define KVM_TRC_EXTRA_MAX       7
			
 
				+
			
 
				+#define KVM_TRC_INJ_VIRQ         (KVM_TRC_HANDLER + 0x02)
			
 
				+#define KVM_TRC_REDELIVER_EVT    (KVM_TRC_HANDLER + 0x03)
			
 
				+#define KVM_TRC_PEND_INTR        (KVM_TRC_HANDLER + 0x04)
			
 
				+#define KVM_TRC_IO_READ          (KVM_TRC_HANDLER + 0x05)
			
 
				+#define KVM_TRC_IO_WRITE         (KVM_TRC_HANDLER + 0x06)
			
 
				+#define KVM_TRC_CR_READ          (KVM_TRC_HANDLER + 0x07)
			
 
				+#define KVM_TRC_CR_WRITE         (KVM_TRC_HANDLER + 0x08)
			
 
				+#define KVM_TRC_DR_READ          (KVM_TRC_HANDLER + 0x09)
			
 
				+#define KVM_TRC_DR_WRITE         (KVM_TRC_HANDLER + 0x0A)
			
 
				+#define KVM_TRC_MSR_READ         (KVM_TRC_HANDLER + 0x0B)
			
 
				+#define KVM_TRC_MSR_WRITE        (KVM_TRC_HANDLER + 0x0C)
			
 
				+#define KVM_TRC_CPUID            (KVM_TRC_HANDLER + 0x0D)
			
 
				+#define KVM_TRC_INTR             (KVM_TRC_HANDLER + 0x0E)
			
 
				+#define KVM_TRC_NMI              (KVM_TRC_HANDLER + 0x0F)
			
 
				+#define KVM_TRC_VMMCALL          (KVM_TRC_HANDLER + 0x10)
			
 
				+#define KVM_TRC_HLT              (KVM_TRC_HANDLER + 0x11)
			
 
				+#define KVM_TRC_CLTS             (KVM_TRC_HANDLER + 0x12)
			
 
				+#define KVM_TRC_LMSW             (KVM_TRC_HANDLER + 0x13)
			
 
				+#define KVM_TRC_APIC_ACCESS      (KVM_TRC_HANDLER + 0x14)
			
 
				+#define KVM_TRC_TDP_FAULT        (KVM_TRC_HANDLER + 0x15)
			
 
				+#define KVM_TRC_GTLB_WRITE       (KVM_TRC_HANDLER + 0x16)
			
 
				+#define KVM_TRC_STLB_WRITE       (KVM_TRC_HANDLER + 0x17)
			
 
				+#define KVM_TRC_STLB_INVAL       (KVM_TRC_HANDLER + 0x18)
			
 
				+#define KVM_TRC_PPC_INSTR        (KVM_TRC_HANDLER + 0x19)
			
 
				+
			
 
				 struct kvm_user_trace_setup {
			
 
				-	__u32 buf_size; /* sub_buffer size of each per-cpu */
			
 
				-	__u32 buf_nr; /* the number of sub_buffers of each per-cpu */
			
 
				+	__u32 buf_size;
			
 
				+	__u32 buf_nr;
			
 
				+};
			
 
				+
			
 
				+#define __KVM_DEPRECATED_MAIN_W_0x06 \
			
 
				+	_IOW(KVMIO, 0x06, struct kvm_user_trace_setup)
			
 
				+#define __KVM_DEPRECATED_MAIN_0x07 _IO(KVMIO, 0x07)
			
 
				+#define __KVM_DEPRECATED_MAIN_0x08 _IO(KVMIO, 0x08)
			
 
				+
			
 
				+#define __KVM_DEPRECATED_VM_R_0x70 _IOR(KVMIO, 0x70, struct kvm_assigned_irq)
			
 
				+
			
 
				+struct kvm_breakpoint {
			
 
				+	__u32 enabled;
			
 
				+	__u32 padding;
			
 
				+	__u64 address;
			
 
				+};
			
 
				+
			
 
				+struct kvm_debug_guest {
			
 
				+	__u32 enabled;
			
 
				+	__u32 pad;
			
 
				+	struct kvm_breakpoint breakpoints[4];
			
 
				+	__u32 singlestep;
			
 
				 };
			
 
				 
			
 
				+#define __KVM_DEPRECATED_VCPU_W_0x87 _IOW(KVMIO, 0x87, struct kvm_debug_guest)
			
 
				+
			
 
				+/* *** End of deprecated interfaces *** */
			
 
				+
			
 
				+
			
 
				 /* for KVM_CREATE_MEMORY_REGION */
			
 
				 struct kvm_memory_region {
			
 
				 	__u32 slot;
			
@@ -99,6 +163,7 @@ struct kvm_pit_config {
 
				 
			
 
				 /* For KVM_EXIT_INTERNAL_ERROR */
			
 
				 #define KVM_INTERNAL_ERROR_EMULATION 1
			
 
				+#define KVM_INTERNAL_ERROR_SIMUL_EX 2
			
 
				 
			
 
				 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
			
 
				 struct kvm_run {
			
@@ -116,6 +181,11 @@ struct kvm_run {
 
				 	__u64 cr8;
			
 
				 	__u64 apic_base;
			
 
				 
			
 
				+#ifdef __KVM_S390
			
 
				+	/* the processor status word for s390 */
			
 
				+	__u64 psw_mask; /* psw upper half */
			
 
				+	__u64 psw_addr; /* psw lower half */
			
 
				+#endif
			
 
				 	union {
			
 
				 		/* KVM_EXIT_UNKNOWN */
			
 
				 		struct {
			
@@ -167,8 +237,6 @@ struct kvm_run {
 
				 		/* KVM_EXIT_S390_SIEIC */
			
 
				 		struct {
			
 
				 			__u8 icptcode;
			
 
				-			__u64 mask; /* psw upper half */
			
 
				-			__u64 addr; /* psw lower half */
			
 
				 			__u16 ipa;
			
 
				 			__u32 ipb;
			
 
				 		} s390_sieic;
			
@@ -187,6 +255,9 @@ struct kvm_run {
 
				 		} dcr;
			
 
				 		struct {
			
 
				 			__u32 suberror;
			
 
				+			/* Available with KVM_CAP_INTERNAL_ERROR_DATA: */
			
 
				+			__u32 ndata;
			
 
				+			__u64 data[16];
			
 
				 		} internal;
			
 
				 		/* Fix the size of the union. */
			
 
				 		char padding[256];
			
@@ -329,24 +400,6 @@ struct kvm_ioeventfd {
 
				 	__u8  pad[36];
			
 
				 };
			
 
				 
			
 
				-#define KVM_TRC_SHIFT           16
			
 
				-/*
			
 
				- * kvm trace categories
			
 
				- */
			
 
				-#define KVM_TRC_ENTRYEXIT       (1 << KVM_TRC_SHIFT)
			
 
				-#define KVM_TRC_HANDLER         (1 << (KVM_TRC_SHIFT + 1)) /* only 12 bits */
			
 
				-
			
 
				-/*
			
 
				- * kvm trace action
			
 
				- */
			
 
				-#define KVM_TRC_VMENTRY         (KVM_TRC_ENTRYEXIT + 0x01)
			
 
				-#define KVM_TRC_VMEXIT          (KVM_TRC_ENTRYEXIT + 0x02)
			
 
				-#define KVM_TRC_PAGE_FAULT      (KVM_TRC_HANDLER + 0x01)
			
 
				-
			
 
				-#define KVM_TRC_HEAD_SIZE       12
			
 
				-#define KVM_TRC_CYCLE_SIZE      8
			
 
				-#define KVM_TRC_EXTRA_MAX       7
			
 
				-
			
 
				 #define KVMIO 0xAE
			
 
				 
			
 
				 /*
			
@@ -367,12 +420,10 @@ struct kvm_ioeventfd {
 
				  */
			
 
				 #define KVM_GET_VCPU_MMAP_SIZE    _IO(KVMIO,   0x04) /* in bytes */
			
 
				 #define KVM_GET_SUPPORTED_CPUID   _IOWR(KVMIO, 0x05, struct kvm_cpuid2)
			
 
				-/*
			
 
				- * ioctls for kvm trace
			
 
				- */
			
 
				-#define KVM_TRACE_ENABLE          _IOW(KVMIO, 0x06, struct kvm_user_trace_setup)
			
 
				-#define KVM_TRACE_PAUSE           _IO(KVMIO,  0x07)
			
 
				-#define KVM_TRACE_DISABLE         _IO(KVMIO,  0x08)
			
 
				+#define KVM_TRACE_ENABLE          __KVM_DEPRECATED_MAIN_W_0x06
			
 
				+#define KVM_TRACE_PAUSE           __KVM_DEPRECATED_MAIN_0x07
			
 
				+#define KVM_TRACE_DISABLE         __KVM_DEPRECATED_MAIN_0x08
			
 
				+
			
 
				 /*
			
 
				  * Extension capability list.
			
 
				  */
			
@@ -436,6 +487,15 @@ struct kvm_ioeventfd {
 
				 #endif
			
 
				 #define KVM_CAP_IOEVENTFD 36
			
 
				 #define KVM_CAP_SET_IDENTITY_MAP_ADDR 37
			
 
				+#ifdef __KVM_HAVE_XEN_HVM
			
 
				+#define KVM_CAP_XEN_HVM 38
			
 
				+#endif
			
 
				+#define KVM_CAP_ADJUST_CLOCK 39
			
 
				+#define KVM_CAP_INTERNAL_ERROR_DATA 40
			
 
				+#ifdef __KVM_HAVE_VCPU_EVENTS
			
 
				+#define KVM_CAP_VCPU_EVENTS 41
			
 
				+#endif
			
 
				+#define KVM_CAP_S390_PSW 42
			
 
				 
			
 
				 #ifdef KVM_CAP_IRQ_ROUTING
			
 
				 
			
@@ -488,6 +548,18 @@ struct kvm_x86_mce {
 
				 };
			
 
				 #endif
			
 
				 
			
 
				+#ifdef KVM_CAP_XEN_HVM
			
 
				+struct kvm_xen_hvm_config {
			
 
				+	__u32 flags;
			
 
				+	__u32 msr;
			
 
				+	__u64 blob_addr_32;
			
 
				+	__u64 blob_addr_64;
			
 
				+	__u8 blob_size_32;
			
 
				+	__u8 blob_size_64;
			
 
				+	__u8 pad2[30];
			
 
				+};
			
 
				+#endif
			
 
				+
			
 
				 #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0)
			
 
				 
			
 
				 struct kvm_irqfd {
			
@@ -497,55 +569,66 @@ struct kvm_irqfd {
 
				 	__u8  pad[20];
			
 
				 };
			
 
				 
			
 
				+struct kvm_clock_data {
			
 
				+	__u64 clock;
			
 
				+	__u32 flags;
			
 
				+	__u32 pad[9];
			
 
				+};
			
 
				+
			
 
				 /*
			
 
				  * ioctls for VM fds
			
 
				  */
			
 
				-#define KVM_SET_MEMORY_REGION     _IOW(KVMIO, 0x40, struct kvm_memory_region)
			
 
				+#define KVM_SET_MEMORY_REGION     _IOW(KVMIO,  0x40, struct kvm_memory_region)
			
 
				 /*
			
 
				  * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns
			
 
				  * a vcpu fd.
			
 
				  */
			
 
				-#define KVM_CREATE_VCPU           _IO(KVMIO,  0x41)
			
 
				-#define KVM_GET_DIRTY_LOG         _IOW(KVMIO, 0x42, struct kvm_dirty_log)
			
 
				-#define KVM_SET_MEMORY_ALIAS      _IOW(KVMIO, 0x43, struct kvm_memory_alias)
			
 
				-#define KVM_SET_NR_MMU_PAGES      _IO(KVMIO, 0x44)
			
 
				-#define KVM_GET_NR_MMU_PAGES      _IO(KVMIO, 0x45)
			
 
				-#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46,\
			
 
				+#define KVM_CREATE_VCPU           _IO(KVMIO,   0x41)
			
 
				+#define KVM_GET_DIRTY_LOG         _IOW(KVMIO,  0x42, struct kvm_dirty_log)
			
 
				+#define KVM_SET_MEMORY_ALIAS      _IOW(KVMIO,  0x43, struct kvm_memory_alias)
			
 
				+#define KVM_SET_NR_MMU_PAGES      _IO(KVMIO,   0x44)
			
 
				+#define KVM_GET_NR_MMU_PAGES      _IO(KVMIO,   0x45)
			
 
				+#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46, \
			
 
				 					struct kvm_userspace_memory_region)
			
 
				-#define KVM_SET_TSS_ADDR          _IO(KVMIO, 0x47)
			
 
				-#define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64)
			
 
				+#define KVM_SET_TSS_ADDR          _IO(KVMIO,   0x47)
			
 
				+#define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO,  0x48, __u64)
			
 
				 /* Device model IOC */
			
 
				-#define KVM_CREATE_IRQCHIP	  _IO(KVMIO,  0x60)
			
 
				-#define KVM_IRQ_LINE		  _IOW(KVMIO, 0x61, struct kvm_irq_level)
			
 
				-#define KVM_GET_IRQCHIP		  _IOWR(KVMIO, 0x62, struct kvm_irqchip)
			
 
				-#define KVM_SET_IRQCHIP		  _IOR(KVMIO,  0x63, struct kvm_irqchip)
			
 
				-#define KVM_CREATE_PIT		  _IO(KVMIO,  0x64)
			
 
				-#define KVM_GET_PIT		  _IOWR(KVMIO, 0x65, struct kvm_pit_state)
			
 
				-#define KVM_SET_PIT		  _IOR(KVMIO,  0x66, struct kvm_pit_state)
			
 
				-#define KVM_IRQ_LINE_STATUS	  _IOWR(KVMIO, 0x67, struct kvm_irq_level)
			
 
				+#define KVM_CREATE_IRQCHIP        _IO(KVMIO,   0x60)
			
 
				+#define KVM_IRQ_LINE              _IOW(KVMIO,  0x61, struct kvm_irq_level)
			
 
				+#define KVM_GET_IRQCHIP           _IOWR(KVMIO, 0x62, struct kvm_irqchip)
			
 
				+#define KVM_SET_IRQCHIP           _IOR(KVMIO,  0x63, struct kvm_irqchip)
			
 
				+#define KVM_CREATE_PIT            _IO(KVMIO,   0x64)
			
 
				+#define KVM_GET_PIT               _IOWR(KVMIO, 0x65, struct kvm_pit_state)
			
 
				+#define KVM_SET_PIT               _IOR(KVMIO,  0x66, struct kvm_pit_state)
			
 
				+#define KVM_IRQ_LINE_STATUS       _IOWR(KVMIO, 0x67, struct kvm_irq_level)
			
 
				 #define KVM_REGISTER_COALESCED_MMIO \
			
 
				 			_IOW(KVMIO,  0x67, struct kvm_coalesced_mmio_zone)
			
 
				 #define KVM_UNREGISTER_COALESCED_MMIO \
			
 
				 			_IOW(KVMIO,  0x68, struct kvm_coalesced_mmio_zone)
			
 
				-#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \
			
 
				-				   struct kvm_assigned_pci_dev)
			
 
				-#define KVM_SET_GSI_ROUTING       _IOW(KVMIO, 0x6a, struct kvm_irq_routing)
			
 
				+#define KVM_ASSIGN_PCI_DEVICE     _IOR(KVMIO,  0x69, \
			
 
				+				       struct kvm_assigned_pci_dev)
			
 
				+#define KVM_SET_GSI_ROUTING       _IOW(KVMIO,  0x6a, struct kvm_irq_routing)
			
 
				 /* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */
			
 
				-#define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \
			
 
				-			    struct kvm_assigned_irq)
			
 
				-#define KVM_ASSIGN_DEV_IRQ        _IOW(KVMIO, 0x70, struct kvm_assigned_irq)
			
 
				-#define KVM_REINJECT_CONTROL      _IO(KVMIO, 0x71)
			
 
				-#define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \
			
 
				-				     struct kvm_assigned_pci_dev)
			
 
				-#define KVM_ASSIGN_SET_MSIX_NR \
			
 
				-			_IOW(KVMIO, 0x73, struct kvm_assigned_msix_nr)
			
 
				-#define KVM_ASSIGN_SET_MSIX_ENTRY \
			
 
				-			_IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry)
			
 
				-#define KVM_DEASSIGN_DEV_IRQ       _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
			
 
				-#define KVM_IRQFD                  _IOW(KVMIO, 0x76, struct kvm_irqfd)
			
 
				-#define KVM_CREATE_PIT2		   _IOW(KVMIO, 0x77, struct kvm_pit_config)
			
 
				-#define KVM_SET_BOOT_CPU_ID        _IO(KVMIO, 0x78)
			
 
				-#define KVM_IOEVENTFD             _IOW(KVMIO, 0x79, struct kvm_ioeventfd)
			
 
				+#define KVM_ASSIGN_IRQ            __KVM_DEPRECATED_VM_R_0x70
			
 
				+#define KVM_ASSIGN_DEV_IRQ        _IOW(KVMIO,  0x70, struct kvm_assigned_irq)
			
 
				+#define KVM_REINJECT_CONTROL      _IO(KVMIO,   0x71)
			
 
				+#define KVM_DEASSIGN_PCI_DEVICE   _IOW(KVMIO,  0x72, \
			
 
				+				       struct kvm_assigned_pci_dev)
			
 
				+#define KVM_ASSIGN_SET_MSIX_NR    _IOW(KVMIO,  0x73, \
			
 
				+				       struct kvm_assigned_msix_nr)
			
 
				+#define KVM_ASSIGN_SET_MSIX_ENTRY _IOW(KVMIO,  0x74, \
			
 
				+				       struct kvm_assigned_msix_entry)
			
 
				+#define KVM_DEASSIGN_DEV_IRQ      _IOW(KVMIO,  0x75, struct kvm_assigned_irq)
			
 
				+#define KVM_IRQFD                 _IOW(KVMIO,  0x76, struct kvm_irqfd)
			
 
				+#define KVM_CREATE_PIT2		  _IOW(KVMIO,  0x77, struct kvm_pit_config)
			
 
				+#define KVM_SET_BOOT_CPU_ID       _IO(KVMIO,   0x78)
			
 
				+#define KVM_IOEVENTFD             _IOW(KVMIO,  0x79, struct kvm_ioeventfd)
			
 
				+#define KVM_XEN_HVM_CONFIG        _IOW(KVMIO,  0x7a, struct kvm_xen_hvm_config)
			
 
				+#define KVM_SET_CLOCK             _IOW(KVMIO,  0x7b, struct kvm_clock_data)
			
 
				+#define KVM_GET_CLOCK             _IOR(KVMIO,  0x7c, struct kvm_clock_data)
			
 
				+/* Available with KVM_CAP_PIT_STATE2 */
			
 
				+#define KVM_GET_PIT2              _IOR(KVMIO,  0x9f, struct kvm_pit_state2)
			
 
				+#define KVM_SET_PIT2              _IOW(KVMIO,  0xa0, struct kvm_pit_state2)
			
 
				 
			
 
				 /*
			
 
				  * ioctls for vcpu fds
			
@@ -558,7 +641,7 @@ struct kvm_irqfd {
 
				 #define KVM_TRANSLATE             _IOWR(KVMIO, 0x85, struct kvm_translation)
			
 
				 #define KVM_INTERRUPT             _IOW(KVMIO,  0x86, struct kvm_interrupt)
			
 
				 /* KVM_DEBUG_GUEST is no longer supported, use KVM_SET_GUEST_DEBUG instead */
			
 
				-#define KVM_DEBUG_GUEST           __KVM_DEPRECATED_DEBUG_GUEST
			
 
				+#define KVM_DEBUG_GUEST           __KVM_DEPRECATED_VCPU_W_0x87
			
 
				 #define KVM_GET_MSRS              _IOWR(KVMIO, 0x88, struct kvm_msrs)
			
 
				 #define KVM_SET_MSRS              _IOW(KVMIO,  0x89, struct kvm_msrs)
			
 
				 #define KVM_SET_CPUID             _IOW(KVMIO,  0x8a, struct kvm_cpuid)
			
@@ -570,7 +653,7 @@ struct kvm_irqfd {
 
				 #define KVM_SET_CPUID2            _IOW(KVMIO,  0x90, struct kvm_cpuid2)
			
 
				 #define KVM_GET_CPUID2            _IOWR(KVMIO, 0x91, struct kvm_cpuid2)
			
 
				 /* Available with KVM_CAP_VAPIC */
			
 
				-#define KVM_TPR_ACCESS_REPORTING  _IOWR(KVMIO,  0x92, struct kvm_tpr_access_ctl)
			
 
				+#define KVM_TPR_ACCESS_REPORTING  _IOWR(KVMIO, 0x92, struct kvm_tpr_access_ctl)
			
 
				 /* Available with KVM_CAP_VAPIC */
			
 
				 #define KVM_SET_VAPIC_ADDR        _IOW(KVMIO,  0x93, struct kvm_vapic_addr)
			
 
				 /* valid for virtual machine (for floating interrupt)_and_ vcpu */
			
@@ -582,66 +665,23 @@ struct kvm_irqfd {
 
				 /* initial ipl psw for s390 */
			
 
				 #define KVM_S390_SET_INITIAL_PSW  _IOW(KVMIO,  0x96, struct kvm_s390_psw)
			
 
				 /* initial reset for s390 */
			
 
				-#define KVM_S390_INITIAL_RESET    _IO(KVMIO,  0x97)
			
 
				+#define KVM_S390_INITIAL_RESET    _IO(KVMIO,   0x97)
			
 
				 #define KVM_GET_MP_STATE          _IOR(KVMIO,  0x98, struct kvm_mp_state)
			
 
				 #define KVM_SET_MP_STATE          _IOW(KVMIO,  0x99, struct kvm_mp_state)
			
 
				 /* Available with KVM_CAP_NMI */
			
 
				-#define KVM_NMI                   _IO(KVMIO,  0x9a)
			
 
				+#define KVM_NMI                   _IO(KVMIO,   0x9a)
			
 
				 /* Available with KVM_CAP_SET_GUEST_DEBUG */
			
 
				 #define KVM_SET_GUEST_DEBUG       _IOW(KVMIO,  0x9b, struct kvm_guest_debug)
			
 
				 /* MCE for x86 */
			
 
				 #define KVM_X86_SETUP_MCE         _IOW(KVMIO,  0x9c, __u64)
			
 
				 #define KVM_X86_GET_MCE_CAP_SUPPORTED _IOR(KVMIO,  0x9d, __u64)
			
 
				 #define KVM_X86_SET_MCE           _IOW(KVMIO,  0x9e, struct kvm_x86_mce)
			
 
				-
			
 
				-/*
			
 
				- * Deprecated interfaces
			
 
				- */
			
 
				-struct kvm_breakpoint {
			
 
				-	__u32 enabled;
			
 
				-	__u32 padding;
			
 
				-	__u64 address;
			
 
				-};
			
 
				-
			
 
				-struct kvm_debug_guest {
			
 
				-	__u32 enabled;
			
 
				-	__u32 pad;
			
 
				-	struct kvm_breakpoint breakpoints[4];
			
 
				-	__u32 singlestep;
			
 
				-};
			
 
				-
			
 
				-#define __KVM_DEPRECATED_DEBUG_GUEST _IOW(KVMIO,  0x87, struct kvm_debug_guest)
			
 
				-
			
 
				+/* IA64 stack access */
			
 
				 #define KVM_IA64_VCPU_GET_STACK   _IOR(KVMIO,  0x9a, void *)
			
 
				 #define KVM_IA64_VCPU_SET_STACK   _IOW(KVMIO,  0x9b, void *)
			
 
				-
			
 
				-#define KVM_GET_PIT2   _IOR(KVMIO,   0x9f, struct kvm_pit_state2)
			
 
				-#define KVM_SET_PIT2   _IOW(KVMIO,   0xa0, struct kvm_pit_state2)
			
 
				-
			
 
				-#define KVM_TRC_INJ_VIRQ         (KVM_TRC_HANDLER + 0x02)
			
 
				-#define KVM_TRC_REDELIVER_EVT    (KVM_TRC_HANDLER + 0x03)
			
 
				-#define KVM_TRC_PEND_INTR        (KVM_TRC_HANDLER + 0x04)
			
 
				-#define KVM_TRC_IO_READ          (KVM_TRC_HANDLER + 0x05)
			
 
				-#define KVM_TRC_IO_WRITE         (KVM_TRC_HANDLER + 0x06)
			
 
				-#define KVM_TRC_CR_READ          (KVM_TRC_HANDLER + 0x07)
			
 
				-#define KVM_TRC_CR_WRITE         (KVM_TRC_HANDLER + 0x08)
			
 
				-#define KVM_TRC_DR_READ          (KVM_TRC_HANDLER + 0x09)
			
 
				-#define KVM_TRC_DR_WRITE         (KVM_TRC_HANDLER + 0x0A)
			
 
				-#define KVM_TRC_MSR_READ         (KVM_TRC_HANDLER + 0x0B)
			
 
				-#define KVM_TRC_MSR_WRITE        (KVM_TRC_HANDLER + 0x0C)
			
 
				-#define KVM_TRC_CPUID            (KVM_TRC_HANDLER + 0x0D)
			
 
				-#define KVM_TRC_INTR             (KVM_TRC_HANDLER + 0x0E)
			
 
				-#define KVM_TRC_NMI              (KVM_TRC_HANDLER + 0x0F)
			
 
				-#define KVM_TRC_VMMCALL          (KVM_TRC_HANDLER + 0x10)
			
 
				-#define KVM_TRC_HLT              (KVM_TRC_HANDLER + 0x11)
			
 
				-#define KVM_TRC_CLTS             (KVM_TRC_HANDLER + 0x12)
			
 
				-#define KVM_TRC_LMSW             (KVM_TRC_HANDLER + 0x13)
			
 
				-#define KVM_TRC_APIC_ACCESS      (KVM_TRC_HANDLER + 0x14)
			
 
				-#define KVM_TRC_TDP_FAULT        (KVM_TRC_HANDLER + 0x15)
			
 
				-#define KVM_TRC_GTLB_WRITE       (KVM_TRC_HANDLER + 0x16)
			
 
				-#define KVM_TRC_STLB_WRITE       (KVM_TRC_HANDLER + 0x17)
			
 
				-#define KVM_TRC_STLB_INVAL       (KVM_TRC_HANDLER + 0x18)
			
 
				-#define KVM_TRC_PPC_INSTR        (KVM_TRC_HANDLER + 0x19)
			
 
				+/* Available with KVM_CAP_VCPU_EVENTS */
			
 
				+#define KVM_GET_VCPU_EVENTS       _IOR(KVMIO,  0x9f, struct kvm_vcpu_events)
			
 
				+#define KVM_SET_VCPU_EVENTS       _IOW(KVMIO,  0xa0, struct kvm_vcpu_events)
			
 
				 
			
 
				 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
			
 
				 
			
@@ -696,4 +736,4 @@ struct kvm_assigned_msix_entry {
 
				 	__u16 padding[3];
			
 
				 };
			
 
				 
			
 
				-#endif
			
 
				+#endif /* __LINUX_KVM_H */
			
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -120,7 +120,7 @@ struct kvm_kernel_irq_routing_entry {
 
				 	u32 gsi;
			
 
				 	u32 type;
			
 
				 	int (*set)(struct kvm_kernel_irq_routing_entry *e,
			
 
				-		    struct kvm *kvm, int level);
			
 
				+		   struct kvm *kvm, int irq_source_id, int level);
			
 
				 	union {
			
 
				 		struct {
			
 
				 			unsigned irqchip;
			
@@ -128,9 +128,28 @@ struct kvm_kernel_irq_routing_entry {
 
				 		} irqchip;
			
 
				 		struct msi_msg msi;
			
 
				 	};
			
 
				-	struct list_head link;
			
 
				+	struct hlist_node link;
			
 
				+};
			
 
				+
			
 
				+#ifdef __KVM_HAVE_IOAPIC
			
 
				+
			
 
				+struct kvm_irq_routing_table {
			
 
				+	int chip[KVM_NR_IRQCHIPS][KVM_IOAPIC_NUM_PINS];
			
 
				+	struct kvm_kernel_irq_routing_entry *rt_entries;
			
 
				+	u32 nr_rt_entries;
			
 
				+	/*
			
 
				+	 * Array indexed by gsi. Each entry contains list of irq chips
			
 
				+	 * the gsi is connected to.
			
 
				+	 */
			
 
				+	struct hlist_head map[0];
			
 
				 };
			
 
				 
			
 
				+#else
			
 
				+
			
 
				+struct kvm_irq_routing_table {};
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				 struct kvm {
			
 
				 	spinlock_t mmu_lock;
			
 
				 	spinlock_t requests_lock;
			
@@ -166,8 +185,9 @@ struct kvm {
 
				 
			
 
				 	struct mutex irq_lock;
			
 
				 #ifdef CONFIG_HAVE_KVM_IRQCHIP
			
 
				-	struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
			
 
				+	struct kvm_irq_routing_table *irq_routing;
			
 
				 	struct hlist_head mask_notifier_list;
			
 
				+	struct hlist_head irq_ack_notifier_list;
			
 
				 #endif
			
 
				 
			
 
				 #ifdef KVM_ARCH_WANT_MMU_NOTIFIER
			
@@ -266,6 +286,7 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
 
				 void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
			
 
				 
			
 
				 void kvm_vcpu_block(struct kvm_vcpu *vcpu);
			
 
				+void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
			
 
				 void kvm_resched(struct kvm_vcpu *vcpu);
			
 
				 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
			
 
				 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
			
@@ -325,7 +346,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
 
				 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
			
 
				 
			
 
				 int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu);
			
 
				-void kvm_arch_hardware_enable(void *garbage);
			
 
				+int kvm_arch_hardware_enable(void *garbage);
			
 
				 void kvm_arch_hardware_disable(void *garbage);
			
 
				 int kvm_arch_hardware_setup(void);
			
 
				 void kvm_arch_hardware_unsetup(void);
			
@@ -390,7 +411,12 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 
				 				      struct kvm_irq_mask_notifier *kimn);
			
 
				 void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask);
			
 
				 
			
 
				-int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
			
 
				+#ifdef __KVM_HAVE_IOAPIC
			
 
				+void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
			
 
				+				   union kvm_ioapic_redirect_entry *entry,
			
 
				+				   unsigned long *deliver_bitmask);
			
 
				+#endif
			
 
				+int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level);
			
 
				 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
			
 
				 void kvm_register_irq_ack_notifier(struct kvm *kvm,
			
 
				 				   struct kvm_irq_ack_notifier *kian);
			
@@ -552,4 +578,21 @@ static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
 
				 	return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id;
			
 
				 }
			
 
				 #endif
			
 
				+
			
 
				+#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT
			
 
				+
			
 
				+long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
			
 
				+				  unsigned long arg);
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
			
 
				+						unsigned long arg)
			
 
				+{
			
 
				+	return -ENOTTY;
			
 
				+}
			
 
				+
			
 
				 #endif
			
 
				+
			
 
				+#endif
			
 
				+
			
--- a/include/linux/user-return-notifier.h
+++ b/include/linux/user-return-notifier.h
@@ -0,0 +1,49 @@
 
				+#ifndef _LINUX_USER_RETURN_NOTIFIER_H
			
 
				+#define _LINUX_USER_RETURN_NOTIFIER_H
			
 
				+
			
 
				+#ifdef CONFIG_USER_RETURN_NOTIFIER
			
 
				+
			
 
				+#include <linux/list.h>
			
 
				+#include <linux/sched.h>
			
 
				+
			
 
				+struct user_return_notifier {
			
 
				+	void (*on_user_return)(struct user_return_notifier *urn);
			
 
				+	struct hlist_node link;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+void user_return_notifier_register(struct user_return_notifier *urn);
			
 
				+void user_return_notifier_unregister(struct user_return_notifier *urn);
			
 
				+
			
 
				+static inline void propagate_user_return_notify(struct task_struct *prev,
			
 
				+						struct task_struct *next)
			
 
				+{
			
 
				+	if (test_tsk_thread_flag(prev, TIF_USER_RETURN_NOTIFY)) {
			
 
				+		clear_tsk_thread_flag(prev, TIF_USER_RETURN_NOTIFY);
			
 
				+		set_tsk_thread_flag(next, TIF_USER_RETURN_NOTIFY);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void fire_user_return_notifiers(void);
			
 
				+
			
 
				+static inline void clear_user_return_notifier(struct task_struct *p)
			
 
				+{
			
 
				+	clear_tsk_thread_flag(p, TIF_USER_RETURN_NOTIFY);
			
 
				+}
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+struct user_return_notifier {};
			
 
				+
			
 
				+static inline void propagate_user_return_notify(struct task_struct *prev,
			
 
				+						struct task_struct *next)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+static inline void fire_user_return_notifiers(void) {}
			
 
				+
			
 
				+static inline void clear_user_return_notifier(struct task_struct *p) {}
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+#endif
			
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -99,6 +99,7 @@ obj-$(CONFIG_SLOW_WORK) += slow-work.o
 
				 obj-$(CONFIG_SLOW_WORK_DEBUG) += slow-work-debugfs.o
			
 
				 obj-$(CONFIG_PERF_EVENTS) += perf_event.o
			
 
				 obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
			
 
				+obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
			
 
				 
			
 
				 ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
			
 
				 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
			
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -64,6 +64,7 @@
 
				 #include <linux/magic.h>
			
 
				 #include <linux/perf_event.h>
			
 
				 #include <linux/posix-timers.h>
			
 
				+#include <linux/user-return-notifier.h>
			
 
				 
			
 
				 #include <asm/pgtable.h>
			
 
				 #include <asm/pgalloc.h>
			
@@ -249,6 +250,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 
				 		goto out;
			
 
				 
			
 
				 	setup_thread_stack(tsk, orig);
			
 
				+	clear_user_return_notifier(tsk);
			
 
				 	stackend = end_of_stack(tsk);
			
 
				 	*stackend = STACK_END_MAGIC;	/* for overflow detection */
			
 
				 
			
--- a/kernel/user-return-notifier.c
+++ b/kernel/user-return-notifier.c
@@ -0,0 +1,46 @@
 
				+
			
 
				+#include <linux/user-return-notifier.h>
			
 
				+#include <linux/percpu.h>
			
 
				+#include <linux/sched.h>
			
 
				+#include <linux/module.h>
			
 
				+
			
 
				+static DEFINE_PER_CPU(struct hlist_head, return_notifier_list);
			
 
				+
			
 
				+#define URN_LIST_HEAD per_cpu(return_notifier_list, raw_smp_processor_id())
			
 
				+
			
 
				+/*
			
 
				+ * Request a notification when the current cpu returns to userspace.  Must be
			
 
				+ * called in atomic context.  The notifier will also be called in atomic
			
 
				+ * context.
			
 
				+ */
			
 
				+void user_return_notifier_register(struct user_return_notifier *urn)
			
 
				+{
			
 
				+	set_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY);
			
 
				+	hlist_add_head(&urn->link, &URN_LIST_HEAD);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(user_return_notifier_register);
			
 
				+
			
 
				+/*
			
 
				+ * Removes a registered user return notifier.  Must be called from atomic
			
 
				+ * context, and from the same cpu registration occured in.
			
 
				+ */
			
 
				+void user_return_notifier_unregister(struct user_return_notifier *urn)
			
 
				+{
			
 
				+	hlist_del(&urn->link);
			
 
				+	if (hlist_empty(&URN_LIST_HEAD))
			
 
				+		clear_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(user_return_notifier_unregister);
			
 
				+
			
 
				+/* Calls registered user return notifiers */
			
 
				+void fire_user_return_notifiers(void)
			
 
				+{
			
 
				+	struct user_return_notifier *urn;
			
 
				+	struct hlist_node *tmp1, *tmp2;
			
 
				+	struct hlist_head *head;
			
 
				+
			
 
				+	head = &get_cpu_var(return_notifier_list);
			
 
				+	hlist_for_each_entry_safe(urn, tmp1, tmp2, head, link)
			
 
				+		urn->on_user_return(urn);
			
 
				+	put_cpu_var(return_notifier_list);
			
 
				+}
			
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -0,0 +1,818 @@
 
				+/*
			
 
				+ * Kernel-based Virtual Machine - device assignment support
			
 
				+ *
			
 
				+ * Copyright (C) 2006-9 Red Hat, Inc
			
 
				+ *
			
 
				+ * This work is licensed under the terms of the GNU GPL, version 2.  See
			
 
				+ * the COPYING file in the top-level directory.
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+#include <linux/kvm_host.h>
			
 
				+#include <linux/kvm.h>
			
 
				+#include <linux/uaccess.h>
			
 
				+#include <linux/vmalloc.h>
			
 
				+#include <linux/errno.h>
			
 
				+#include <linux/spinlock.h>
			
 
				+#include <linux/pci.h>
			
 
				+#include <linux/interrupt.h>
			
 
				+#include "irq.h"
			
 
				+
			
 
				+static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
			
 
				+						      int assigned_dev_id)
			
 
				+{
			
 
				+	struct list_head *ptr;
			
 
				+	struct kvm_assigned_dev_kernel *match;
			
 
				+
			
 
				+	list_for_each(ptr, head) {
			
 
				+		match = list_entry(ptr, struct kvm_assigned_dev_kernel, list);
			
 
				+		if (match->assigned_dev_id == assigned_dev_id)
			
 
				+			return match;
			
 
				+	}
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
			
 
				+				    *assigned_dev, int irq)
			
 
				+{
			
 
				+	int i, index;
			
 
				+	struct msix_entry *host_msix_entries;
			
 
				+
			
 
				+	host_msix_entries = assigned_dev->host_msix_entries;
			
 
				+
			
 
				+	index = -1;
			
 
				+	for (i = 0; i < assigned_dev->entries_nr; i++)
			
 
				+		if (irq == host_msix_entries[i].vector) {
			
 
				+			index = i;
			
 
				+			break;
			
 
				+		}
			
 
				+	if (index < 0) {
			
 
				+		printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	return index;
			
 
				+}
			
 
				+
			
 
				+static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
			
 
				+{
			
 
				+	struct kvm_assigned_dev_kernel *assigned_dev;
			
 
				+	struct kvm *kvm;
			
 
				+	int i;
			
 
				+
			
 
				+	assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
			
 
				+				    interrupt_work);
			
 
				+	kvm = assigned_dev->kvm;
			
 
				+
			
 
				+	spin_lock_irq(&assigned_dev->assigned_dev_lock);
			
 
				+	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
			
 
				+		struct kvm_guest_msix_entry *guest_entries =
			
 
				+			assigned_dev->guest_msix_entries;
			
 
				+		for (i = 0; i < assigned_dev->entries_nr; i++) {
			
 
				+			if (!(guest_entries[i].flags &
			
 
				+					KVM_ASSIGNED_MSIX_PENDING))
			
 
				+				continue;
			
 
				+			guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING;
			
 
				+			kvm_set_irq(assigned_dev->kvm,
			
 
				+				    assigned_dev->irq_source_id,
			
 
				+				    guest_entries[i].vector, 1);
			
 
				+		}
			
 
				+	} else
			
 
				+		kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
			
 
				+			    assigned_dev->guest_irq, 1);
			
 
				+
			
 
				+	spin_unlock_irq(&assigned_dev->assigned_dev_lock);
			
 
				+}
			
 
				+
			
 
				+static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
			
 
				+{
			
 
				+	unsigned long flags;
			
 
				+	struct kvm_assigned_dev_kernel *assigned_dev =
			
 
				+		(struct kvm_assigned_dev_kernel *) dev_id;
			
 
				+
			
 
				+	spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags);
			
 
				+	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
			
 
				+		int index = find_index_from_host_irq(assigned_dev, irq);
			
 
				+		if (index < 0)
			
 
				+			goto out;
			
 
				+		assigned_dev->guest_msix_entries[index].flags |=
			
 
				+			KVM_ASSIGNED_MSIX_PENDING;
			
 
				+	}
			
 
				+
			
 
				+	schedule_work(&assigned_dev->interrupt_work);
			
 
				+
			
 
				+	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
			
 
				+		disable_irq_nosync(irq);
			
 
				+		assigned_dev->host_irq_disabled = true;
			
 
				+	}
			
 
				+
			
 
				+out:
			
 
				+	spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags);
			
 
				+	return IRQ_HANDLED;
			
 
				+}
			
 
				+
			
 
				+/* Ack the irq line for an assigned device */
			
 
				+static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
			
 
				+{
			
 
				+	struct kvm_assigned_dev_kernel *dev;
			
 
				+	unsigned long flags;
			
 
				+
			
 
				+	if (kian->gsi == -1)
			
 
				+		return;
			
 
				+
			
 
				+	dev = container_of(kian, struct kvm_assigned_dev_kernel,
			
 
				+			   ack_notifier);
			
 
				+
			
 
				+	kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0);
			
 
				+
			
 
				+	/* The guest irq may be shared so this ack may be
			
 
				+	 * from another device.
			
 
				+	 */
			
 
				+	spin_lock_irqsave(&dev->assigned_dev_lock, flags);
			
 
				+	if (dev->host_irq_disabled) {
			
 
				+		enable_irq(dev->host_irq);
			
 
				+		dev->host_irq_disabled = false;
			
 
				+	}
			
 
				+	spin_unlock_irqrestore(&dev->assigned_dev_lock, flags);
			
 
				+}
			
 
				+
			
 
				+static void deassign_guest_irq(struct kvm *kvm,
			
 
				+			       struct kvm_assigned_dev_kernel *assigned_dev)
			
 
				+{
			
 
				+	kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
			
 
				+	assigned_dev->ack_notifier.gsi = -1;
			
 
				+
			
 
				+	if (assigned_dev->irq_source_id != -1)
			
 
				+		kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
			
 
				+	assigned_dev->irq_source_id = -1;
			
 
				+	assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
			
 
				+}
			
 
				+
			
 
				+/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
			
 
				+static void deassign_host_irq(struct kvm *kvm,
			
 
				+			      struct kvm_assigned_dev_kernel *assigned_dev)
			
 
				+{
			
 
				+	/*
			
 
				+	 * In kvm_free_device_irq, cancel_work_sync return true if:
			
 
				+	 * 1. work is scheduled, and then cancelled.
			
 
				+	 * 2. work callback is executed.
			
 
				+	 *
			
 
				+	 * The first one ensured that the irq is disabled and no more events
			
 
				+	 * would happen. But for the second one, the irq may be enabled (e.g.
			
 
				+	 * for MSI). So we disable irq here to prevent further events.
			
 
				+	 *
			
 
				+	 * Notice this maybe result in nested disable if the interrupt type is
			
 
				+	 * INTx, but it's OK for we are going to free it.
			
 
				+	 *
			
 
				+	 * If this function is a part of VM destroy, please ensure that till
			
 
				+	 * now, the kvm state is still legal for probably we also have to wait
			
 
				+	 * interrupt_work done.
			
 
				+	 */
			
 
				+	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
			
 
				+		int i;
			
 
				+		for (i = 0; i < assigned_dev->entries_nr; i++)
			
 
				+			disable_irq_nosync(assigned_dev->
			
 
				+					   host_msix_entries[i].vector);
			
 
				+
			
 
				+		cancel_work_sync(&assigned_dev->interrupt_work);
			
 
				+
			
 
				+		for (i = 0; i < assigned_dev->entries_nr; i++)
			
 
				+			free_irq(assigned_dev->host_msix_entries[i].vector,
			
 
				+				 (void *)assigned_dev);
			
 
				+
			
 
				+		assigned_dev->entries_nr = 0;
			
 
				+		kfree(assigned_dev->host_msix_entries);
			
 
				+		kfree(assigned_dev->guest_msix_entries);
			
 
				+		pci_disable_msix(assigned_dev->dev);
			
 
				+	} else {
			
 
				+		/* Deal with MSI and INTx */
			
 
				+		disable_irq_nosync(assigned_dev->host_irq);
			
 
				+		cancel_work_sync(&assigned_dev->interrupt_work);
			
 
				+
			
 
				+		free_irq(assigned_dev->host_irq, (void *)assigned_dev);
			
 
				+
			
 
				+		if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
			
 
				+			pci_disable_msi(assigned_dev->dev);
			
 
				+	}
			
 
				+
			
 
				+	assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
			
 
				+}
			
 
				+
			
 
				+static int kvm_deassign_irq(struct kvm *kvm,
			
 
				+			    struct kvm_assigned_dev_kernel *assigned_dev,
			
 
				+			    unsigned long irq_requested_type)
			
 
				+{
			
 
				+	unsigned long guest_irq_type, host_irq_type;
			
 
				+
			
 
				+	if (!irqchip_in_kernel(kvm))
			
 
				+		return -EINVAL;
			
 
				+	/* no irq assignment to deassign */
			
 
				+	if (!assigned_dev->irq_requested_type)
			
 
				+		return -ENXIO;
			
 
				+
			
 
				+	host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
			
 
				+	guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
			
 
				+
			
 
				+	if (host_irq_type)
			
 
				+		deassign_host_irq(kvm, assigned_dev);
			
 
				+	if (guest_irq_type)
			
 
				+		deassign_guest_irq(kvm, assigned_dev);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void kvm_free_assigned_irq(struct kvm *kvm,
			
 
				+				  struct kvm_assigned_dev_kernel *assigned_dev)
			
 
				+{
			
 
				+	kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
			
 
				+}
			
 
				+
			
 
				+static void kvm_free_assigned_device(struct kvm *kvm,
			
 
				+				     struct kvm_assigned_dev_kernel
			
 
				+				     *assigned_dev)
			
 
				+{
			
 
				+	kvm_free_assigned_irq(kvm, assigned_dev);
			
 
				+
			
 
				+	pci_reset_function(assigned_dev->dev);
			
 
				+
			
 
				+	pci_release_regions(assigned_dev->dev);
			
 
				+	pci_disable_device(assigned_dev->dev);
			
 
				+	pci_dev_put(assigned_dev->dev);
			
 
				+
			
 
				+	list_del(&assigned_dev->list);
			
 
				+	kfree(assigned_dev);
			
 
				+}
			
 
				+
			
 
				+void kvm_free_all_assigned_devices(struct kvm *kvm)
			
 
				+{
			
 
				+	struct list_head *ptr, *ptr2;
			
 
				+	struct kvm_assigned_dev_kernel *assigned_dev;
			
 
				+
			
 
				+	list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
			
 
				+		assigned_dev = list_entry(ptr,
			
 
				+					  struct kvm_assigned_dev_kernel,
			
 
				+					  list);
			
 
				+
			
 
				+		kvm_free_assigned_device(kvm, assigned_dev);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static int assigned_device_enable_host_intx(struct kvm *kvm,
			
 
				+					    struct kvm_assigned_dev_kernel *dev)
			
 
				+{
			
 
				+	dev->host_irq = dev->dev->irq;
			
 
				+	/* Even though this is PCI, we don't want to use shared
			
 
				+	 * interrupts. Sharing host devices with guest-assigned devices
			
 
				+	 * on the same interrupt line is not a happy situation: there
			
 
				+	 * are going to be long delays in accepting, acking, etc.
			
 
				+	 */
			
 
				+	if (request_irq(dev->host_irq, kvm_assigned_dev_intr,
			
 
				+			0, "kvm_assigned_intx_device", (void *)dev))
			
 
				+		return -EIO;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+#ifdef __KVM_HAVE_MSI
			
 
				+static int assigned_device_enable_host_msi(struct kvm *kvm,
			
 
				+					   struct kvm_assigned_dev_kernel *dev)
			
 
				+{
			
 
				+	int r;
			
 
				+
			
 
				+	if (!dev->dev->msi_enabled) {
			
 
				+		r = pci_enable_msi(dev->dev);
			
 
				+		if (r)
			
 
				+			return r;
			
 
				+	}
			
 
				+
			
 
				+	dev->host_irq = dev->dev->irq;
			
 
				+	if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0,
			
 
				+			"kvm_assigned_msi_device", (void *)dev)) {
			
 
				+		pci_disable_msi(dev->dev);
			
 
				+		return -EIO;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#ifdef __KVM_HAVE_MSIX
			
 
				+static int assigned_device_enable_host_msix(struct kvm *kvm,
			
 
				+					    struct kvm_assigned_dev_kernel *dev)
			
 
				+{
			
 
				+	int i, r = -EINVAL;
			
 
				+
			
 
				+	/* host_msix_entries and guest_msix_entries should have been
			
 
				+	 * initialized */
			
 
				+	if (dev->entries_nr == 0)
			
 
				+		return r;
			
 
				+
			
 
				+	r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr);
			
 
				+	if (r)
			
 
				+		return r;
			
 
				+
			
 
				+	for (i = 0; i < dev->entries_nr; i++) {
			
 
				+		r = request_irq(dev->host_msix_entries[i].vector,
			
 
				+				kvm_assigned_dev_intr, 0,
			
 
				+				"kvm_assigned_msix_device",
			
 
				+				(void *)dev);
			
 
				+		/* FIXME: free requested_irq's on failure */
			
 
				+		if (r)
			
 
				+			return r;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+static int assigned_device_enable_guest_intx(struct kvm *kvm,
			
 
				+				struct kvm_assigned_dev_kernel *dev,
			
 
				+				struct kvm_assigned_irq *irq)
			
 
				+{
			
 
				+	dev->guest_irq = irq->guest_irq;
			
 
				+	dev->ack_notifier.gsi = irq->guest_irq;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+#ifdef __KVM_HAVE_MSI
			
 
				+static int assigned_device_enable_guest_msi(struct kvm *kvm,
			
 
				+			struct kvm_assigned_dev_kernel *dev,
			
 
				+			struct kvm_assigned_irq *irq)
			
 
				+{
			
 
				+	dev->guest_irq = irq->guest_irq;
			
 
				+	dev->ack_notifier.gsi = -1;
			
 
				+	dev->host_irq_disabled = false;
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#ifdef __KVM_HAVE_MSIX
			
 
				+static int assigned_device_enable_guest_msix(struct kvm *kvm,
			
 
				+			struct kvm_assigned_dev_kernel *dev,
			
 
				+			struct kvm_assigned_irq *irq)
			
 
				+{
			
 
				+	dev->guest_irq = irq->guest_irq;
			
 
				+	dev->ack_notifier.gsi = -1;
			
 
				+	dev->host_irq_disabled = false;
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+static int assign_host_irq(struct kvm *kvm,
			
 
				+			   struct kvm_assigned_dev_kernel *dev,
			
 
				+			   __u32 host_irq_type)
			
 
				+{
			
 
				+	int r = -EEXIST;
			
 
				+
			
 
				+	if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
			
 
				+		return r;
			
 
				+
			
 
				+	switch (host_irq_type) {
			
 
				+	case KVM_DEV_IRQ_HOST_INTX:
			
 
				+		r = assigned_device_enable_host_intx(kvm, dev);
			
 
				+		break;
			
 
				+#ifdef __KVM_HAVE_MSI
			
 
				+	case KVM_DEV_IRQ_HOST_MSI:
			
 
				+		r = assigned_device_enable_host_msi(kvm, dev);
			
 
				+		break;
			
 
				+#endif
			
 
				+#ifdef __KVM_HAVE_MSIX
			
 
				+	case KVM_DEV_IRQ_HOST_MSIX:
			
 
				+		r = assigned_device_enable_host_msix(kvm, dev);
			
 
				+		break;
			
 
				+#endif
			
 
				+	default:
			
 
				+		r = -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	if (!r)
			
 
				+		dev->irq_requested_type |= host_irq_type;
			
 
				+
			
 
				+	return r;
			
 
				+}
			
 
				+
			
 
				+static int assign_guest_irq(struct kvm *kvm,
			
 
				+			    struct kvm_assigned_dev_kernel *dev,
			
 
				+			    struct kvm_assigned_irq *irq,
			
 
				+			    unsigned long guest_irq_type)
			
 
				+{
			
 
				+	int id;
			
 
				+	int r = -EEXIST;
			
 
				+
			
 
				+	if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
			
 
				+		return r;
			
 
				+
			
 
				+	id = kvm_request_irq_source_id(kvm);
			
 
				+	if (id < 0)
			
 
				+		return id;
			
 
				+
			
 
				+	dev->irq_source_id = id;
			
 
				+
			
 
				+	switch (guest_irq_type) {
			
 
				+	case KVM_DEV_IRQ_GUEST_INTX:
			
 
				+		r = assigned_device_enable_guest_intx(kvm, dev, irq);
			
 
				+		break;
			
 
				+#ifdef __KVM_HAVE_MSI
			
 
				+	case KVM_DEV_IRQ_GUEST_MSI:
			
 
				+		r = assigned_device_enable_guest_msi(kvm, dev, irq);
			
 
				+		break;
			
 
				+#endif
			
 
				+#ifdef __KVM_HAVE_MSIX
			
 
				+	case KVM_DEV_IRQ_GUEST_MSIX:
			
 
				+		r = assigned_device_enable_guest_msix(kvm, dev, irq);
			
 
				+		break;
			
 
				+#endif
			
 
				+	default:
			
 
				+		r = -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	if (!r) {
			
 
				+		dev->irq_requested_type |= guest_irq_type;
			
 
				+		kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
			
 
				+	} else
			
 
				+		kvm_free_irq_source_id(kvm, dev->irq_source_id);
			
 
				+
			
 
				+	return r;
			
 
				+}
			
 
				+
			
 
				+/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
			
 
				+static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
			
 
				+				   struct kvm_assigned_irq *assigned_irq)
			
 
				+{
			
 
				+	int r = -EINVAL;
			
 
				+	struct kvm_assigned_dev_kernel *match;
			
 
				+	unsigned long host_irq_type, guest_irq_type;
			
 
				+
			
 
				+	if (!capable(CAP_SYS_RAWIO))
			
 
				+		return -EPERM;
			
 
				+
			
 
				+	if (!irqchip_in_kernel(kvm))
			
 
				+		return r;
			
 
				+
			
 
				+	mutex_lock(&kvm->lock);
			
 
				+	r = -ENODEV;
			
 
				+	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
			
 
				+				      assigned_irq->assigned_dev_id);
			
 
				+	if (!match)
			
 
				+		goto out;
			
 
				+
			
 
				+	host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
			
 
				+	guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
			
 
				+
			
 
				+	r = -EINVAL;
			
 
				+	/* can only assign one type at a time */
			
 
				+	if (hweight_long(host_irq_type) > 1)
			
 
				+		goto out;
			
 
				+	if (hweight_long(guest_irq_type) > 1)
			
 
				+		goto out;
			
 
				+	if (host_irq_type == 0 && guest_irq_type == 0)
			
 
				+		goto out;
			
 
				+
			
 
				+	r = 0;
			
 
				+	if (host_irq_type)
			
 
				+		r = assign_host_irq(kvm, match, host_irq_type);
			
 
				+	if (r)
			
 
				+		goto out;
			
 
				+
			
 
				+	if (guest_irq_type)
			
 
				+		r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
			
 
				+out:
			
 
				+	mutex_unlock(&kvm->lock);
			
 
				+	return r;
			
 
				+}
			
 
				+
			
 
				+static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
			
 
				+					 struct kvm_assigned_irq
			
 
				+					 *assigned_irq)
			
 
				+{
			
 
				+	int r = -ENODEV;
			
 
				+	struct kvm_assigned_dev_kernel *match;
			
 
				+
			
 
				+	mutex_lock(&kvm->lock);
			
 
				+
			
 
				+	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
			
 
				+				      assigned_irq->assigned_dev_id);
			
 
				+	if (!match)
			
 
				+		goto out;
			
 
				+
			
 
				+	r = kvm_deassign_irq(kvm, match, assigned_irq->flags);
			
 
				+out:
			
 
				+	mutex_unlock(&kvm->lock);
			
 
				+	return r;
			
 
				+}
			
 
				+
			
 
				+static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
			
 
				+				      struct kvm_assigned_pci_dev *assigned_dev)
			
 
				+{
			
 
				+	int r = 0;
			
 
				+	struct kvm_assigned_dev_kernel *match;
			
 
				+	struct pci_dev *dev;
			
 
				+
			
 
				+	down_read(&kvm->slots_lock);
			
 
				+	mutex_lock(&kvm->lock);
			
 
				+
			
 
				+	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
			
 
				+				      assigned_dev->assigned_dev_id);
			
 
				+	if (match) {
			
 
				+		/* device already assigned */
			
 
				+		r = -EEXIST;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
			
 
				+	if (match == NULL) {
			
 
				+		printk(KERN_INFO "%s: Couldn't allocate memory\n",
			
 
				+		       __func__);
			
 
				+		r = -ENOMEM;
			
 
				+		goto out;
			
 
				+	}
			
 
				+	dev = pci_get_bus_and_slot(assigned_dev->busnr,
			
 
				+				   assigned_dev->devfn);
			
 
				+	if (!dev) {
			
 
				+		printk(KERN_INFO "%s: host device not found\n", __func__);
			
 
				+		r = -EINVAL;
			
 
				+		goto out_free;
			
 
				+	}
			
 
				+	if (pci_enable_device(dev)) {
			
 
				+		printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
			
 
				+		r = -EBUSY;
			
 
				+		goto out_put;
			
 
				+	}
			
 
				+	r = pci_request_regions(dev, "kvm_assigned_device");
			
 
				+	if (r) {
			
 
				+		printk(KERN_INFO "%s: Could not get access to device regions\n",
			
 
				+		       __func__);
			
 
				+		goto out_disable;
			
 
				+	}
			
 
				+
			
 
				+	pci_reset_function(dev);
			
 
				+
			
 
				+	match->assigned_dev_id = assigned_dev->assigned_dev_id;
			
 
				+	match->host_busnr = assigned_dev->busnr;
			
 
				+	match->host_devfn = assigned_dev->devfn;
			
 
				+	match->flags = assigned_dev->flags;
			
 
				+	match->dev = dev;
			
 
				+	spin_lock_init(&match->assigned_dev_lock);
			
 
				+	match->irq_source_id = -1;
			
 
				+	match->kvm = kvm;
			
 
				+	match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
			
 
				+	INIT_WORK(&match->interrupt_work,
			
 
				+		  kvm_assigned_dev_interrupt_work_handler);
			
 
				+
			
 
				+	list_add(&match->list, &kvm->arch.assigned_dev_head);
			
 
				+
			
 
				+	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
			
 
				+		if (!kvm->arch.iommu_domain) {
			
 
				+			r = kvm_iommu_map_guest(kvm);
			
 
				+			if (r)
			
 
				+				goto out_list_del;
			
 
				+		}
			
 
				+		r = kvm_assign_device(kvm, match);
			
 
				+		if (r)
			
 
				+			goto out_list_del;
			
 
				+	}
			
 
				+
			
 
				+out:
			
 
				+	mutex_unlock(&kvm->lock);
			
 
				+	up_read(&kvm->slots_lock);
			
 
				+	return r;
			
 
				+out_list_del:
			
 
				+	list_del(&match->list);
			
 
				+	pci_release_regions(dev);
			
 
				+out_disable:
			
 
				+	pci_disable_device(dev);
			
 
				+out_put:
			
 
				+	pci_dev_put(dev);
			
 
				+out_free:
			
 
				+	kfree(match);
			
 
				+	mutex_unlock(&kvm->lock);
			
 
				+	up_read(&kvm->slots_lock);
			
 
				+	return r;
			
 
				+}
			
 
				+
			
 
				+static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
			
 
				+		struct kvm_assigned_pci_dev *assigned_dev)
			
 
				+{
			
 
				+	int r = 0;
			
 
				+	struct kvm_assigned_dev_kernel *match;
			
 
				+
			
 
				+	mutex_lock(&kvm->lock);
			
 
				+
			
 
				+	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
			
 
				+				      assigned_dev->assigned_dev_id);
			
 
				+	if (!match) {
			
 
				+		printk(KERN_INFO "%s: device hasn't been assigned before, "
			
 
				+		  "so cannot be deassigned\n", __func__);
			
 
				+		r = -EINVAL;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)
			
 
				+		kvm_deassign_device(kvm, match);
			
 
				+
			
 
				+	kvm_free_assigned_device(kvm, match);
			
 
				+
			
 
				+out:
			
 
				+	mutex_unlock(&kvm->lock);
			
 
				+	return r;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+#ifdef __KVM_HAVE_MSIX
			
 
				+static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
			
 
				+				    struct kvm_assigned_msix_nr *entry_nr)
			
 
				+{
			
 
				+	int r = 0;
			
 
				+	struct kvm_assigned_dev_kernel *adev;
			
 
				+
			
 
				+	mutex_lock(&kvm->lock);
			
 
				+
			
 
				+	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
			
 
				+				      entry_nr->assigned_dev_id);
			
 
				+	if (!adev) {
			
 
				+		r = -EINVAL;
			
 
				+		goto msix_nr_out;
			
 
				+	}
			
 
				+
			
 
				+	if (adev->entries_nr == 0) {
			
 
				+		adev->entries_nr = entry_nr->entry_nr;
			
 
				+		if (adev->entries_nr == 0 ||
			
 
				+		    adev->entries_nr >= KVM_MAX_MSIX_PER_DEV) {
			
 
				+			r = -EINVAL;
			
 
				+			goto msix_nr_out;
			
 
				+		}
			
 
				+
			
 
				+		adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
			
 
				+						entry_nr->entry_nr,
			
 
				+						GFP_KERNEL);
			
 
				+		if (!adev->host_msix_entries) {
			
 
				+			r = -ENOMEM;
			
 
				+			goto msix_nr_out;
			
 
				+		}
			
 
				+		adev->guest_msix_entries = kzalloc(
			
 
				+				sizeof(struct kvm_guest_msix_entry) *
			
 
				+				entry_nr->entry_nr, GFP_KERNEL);
			
 
				+		if (!adev->guest_msix_entries) {
			
 
				+			kfree(adev->host_msix_entries);
			
 
				+			r = -ENOMEM;
			
 
				+			goto msix_nr_out;
			
 
				+		}
			
 
				+	} else /* Not allowed set MSI-X number twice */
			
 
				+		r = -EINVAL;
			
 
				+msix_nr_out:
			
 
				+	mutex_unlock(&kvm->lock);
			
 
				+	return r;
			
 
				+}
			
 
				+
			
 
				+static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
			
 
				+				       struct kvm_assigned_msix_entry *entry)
			
 
				+{
			
 
				+	int r = 0, i;
			
 
				+	struct kvm_assigned_dev_kernel *adev;
			
 
				+
			
 
				+	mutex_lock(&kvm->lock);
			
 
				+
			
 
				+	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
			
 
				+				      entry->assigned_dev_id);
			
 
				+
			
 
				+	if (!adev) {
			
 
				+		r = -EINVAL;
			
 
				+		goto msix_entry_out;
			
 
				+	}
			
 
				+
			
 
				+	for (i = 0; i < adev->entries_nr; i++)
			
 
				+		if (adev->guest_msix_entries[i].vector == 0 ||
			
 
				+		    adev->guest_msix_entries[i].entry == entry->entry) {
			
 
				+			adev->guest_msix_entries[i].entry = entry->entry;
			
 
				+			adev->guest_msix_entries[i].vector = entry->gsi;
			
 
				+			adev->host_msix_entries[i].entry = entry->entry;
			
 
				+			break;
			
 
				+		}
			
 
				+	if (i == adev->entries_nr) {
			
 
				+		r = -ENOSPC;
			
 
				+		goto msix_entry_out;
			
 
				+	}
			
 
				+
			
 
				+msix_entry_out:
			
 
				+	mutex_unlock(&kvm->lock);
			
 
				+
			
 
				+	return r;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
			
 
				+				  unsigned long arg)
			
 
				+{
			
 
				+	void __user *argp = (void __user *)arg;
			
 
				+	int r = -ENOTTY;
			
 
				+
			
 
				+	switch (ioctl) {
			
 
				+	case KVM_ASSIGN_PCI_DEVICE: {
			
 
				+		struct kvm_assigned_pci_dev assigned_dev;
			
 
				+
			
 
				+		r = -EFAULT;
			
 
				+		if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
			
 
				+			goto out;
			
 
				+		r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
			
 
				+		if (r)
			
 
				+			goto out;
			
 
				+		break;
			
 
				+	}
			
 
				+	case KVM_ASSIGN_IRQ: {
			
 
				+		r = -EOPNOTSUPP;
			
 
				+		break;
			
 
				+	}
			
 
				+#ifdef KVM_CAP_ASSIGN_DEV_IRQ
			
 
				+	case KVM_ASSIGN_DEV_IRQ: {
			
 
				+		struct kvm_assigned_irq assigned_irq;
			
 
				+
			
 
				+		r = -EFAULT;
			
 
				+		if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
			
 
				+			goto out;
			
 
				+		r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
			
 
				+		if (r)
			
 
				+			goto out;
			
 
				+		break;
			
 
				+	}
			
 
				+	case KVM_DEASSIGN_DEV_IRQ: {
			
 
				+		struct kvm_assigned_irq assigned_irq;
			
 
				+
			
 
				+		r = -EFAULT;
			
 
				+		if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
			
 
				+			goto out;
			
 
				+		r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
			
 
				+		if (r)
			
 
				+			goto out;
			
 
				+		break;
			
 
				+	}
			
 
				+#endif
			
 
				+#ifdef KVM_CAP_DEVICE_DEASSIGNMENT
			
 
				+	case KVM_DEASSIGN_PCI_DEVICE: {
			
 
				+		struct kvm_assigned_pci_dev assigned_dev;
			
 
				+
			
 
				+		r = -EFAULT;
			
 
				+		if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
			
 
				+			goto out;
			
 
				+		r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev);
			
 
				+		if (r)
			
 
				+			goto out;
			
 
				+		break;
			
 
				+	}
			
 
				+#endif
			
 
				+#ifdef KVM_CAP_IRQ_ROUTING
			
 
				+	case KVM_SET_GSI_ROUTING: {
			
 
				+		struct kvm_irq_routing routing;
			
 
				+		struct kvm_irq_routing __user *urouting;
			
 
				+		struct kvm_irq_routing_entry *entries;
			
 
				+
			
 
				+		r = -EFAULT;
			
 
				+		if (copy_from_user(&routing, argp, sizeof(routing)))
			
 
				+			goto out;
			
 
				+		r = -EINVAL;
			
 
				+		if (routing.nr >= KVM_MAX_IRQ_ROUTES)
			
 
				+			goto out;
			
 
				+		if (routing.flags)
			
 
				+			goto out;
			
 
				+		r = -ENOMEM;
			
 
				+		entries = vmalloc(routing.nr * sizeof(*entries));
			
 
				+		if (!entries)
			
 
				+			goto out;
			
 
				+		r = -EFAULT;
			
 
				+		urouting = argp;
			
 
				+		if (copy_from_user(entries, urouting->entries,
			
 
				+				   routing.nr * sizeof(*entries)))
			
 
				+			goto out_free_irq_routing;
			
 
				+		r = kvm_set_irq_routing(kvm, entries, routing.nr,
			
 
				+					routing.flags);
			
 
				+	out_free_irq_routing:
			
 
				+		vfree(entries);
			
 
				+		break;
			
 
				+	}
			
 
				+#endif /* KVM_CAP_IRQ_ROUTING */
			
 
				+#ifdef __KVM_HAVE_MSIX
			
 
				+	case KVM_ASSIGN_SET_MSIX_NR: {
			
 
				+		struct kvm_assigned_msix_nr entry_nr;
			
 
				+		r = -EFAULT;
			
 
				+		if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
			
 
				+			goto out;
			
 
				+		r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
			
 
				+		if (r)
			
 
				+			goto out;
			
 
				+		break;
			
 
				+	}
			
 
				+	case KVM_ASSIGN_SET_MSIX_ENTRY: {
			
 
				+		struct kvm_assigned_msix_entry entry;
			
 
				+		r = -EFAULT;
			
 
				+		if (copy_from_user(&entry, argp, sizeof entry))
			
 
				+			goto out;
			
 
				+		r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
			
 
				+		if (r)
			
 
				+			goto out;
			
 
				+		break;
			
 
				+	}
			
 
				+#endif
			
 
				+	}
			
 
				+out:
			
 
				+	return r;
			
 
				+}
			
 
				+
			
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -61,10 +61,8 @@ irqfd_inject(struct work_struct *work)
 
				 	struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
			
 
				 	struct kvm *kvm = irqfd->kvm;
			
 
				 
			
 
				-	mutex_lock(&kvm->irq_lock);
			
 
				 	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
			
 
				 	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
			
 
				-	mutex_unlock(&kvm->irq_lock);
			
 
				 }
			
 
				 
			
 
				 /*
			
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -182,6 +182,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
 
				 	union kvm_ioapic_redirect_entry entry;
			
 
				 	int ret = 1;
			
 
				 
			
 
				+	mutex_lock(&ioapic->lock);
			
 
				 	if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
			
 
				 		entry = ioapic->redirtbl[irq];
			
 
				 		level ^= entry.fields.polarity;
			
@@ -198,34 +199,51 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
 
				 		}
			
 
				 		trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
			
 
				 	}
			
 
				+	mutex_unlock(&ioapic->lock);
			
 
				+
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin,
			
 
				-				    int trigger_mode)
			
 
				+static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector,
			
 
				+				     int trigger_mode)
			
 
				 {
			
 
				-	union kvm_ioapic_redirect_entry *ent;
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < IOAPIC_NUM_PINS; i++) {
			
 
				+		union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
			
 
				 
			
 
				-	ent = &ioapic->redirtbl[pin];
			
 
				+		if (ent->fields.vector != vector)
			
 
				+			continue;
			
 
				 
			
 
				-	kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin);
			
 
				+		/*
			
 
				+		 * We are dropping lock while calling ack notifiers because ack
			
 
				+		 * notifier callbacks for assigned devices call into IOAPIC
			
 
				+		 * recursively. Since remote_irr is cleared only after call
			
 
				+		 * to notifiers if the same vector will be delivered while lock
			
 
				+		 * is dropped it will be put into irr and will be delivered
			
 
				+		 * after ack notifier returns.
			
 
				+		 */
			
 
				+		mutex_unlock(&ioapic->lock);
			
 
				+		kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
			
 
				+		mutex_lock(&ioapic->lock);
			
 
				+
			
 
				+		if (trigger_mode != IOAPIC_LEVEL_TRIG)
			
 
				+			continue;
			
 
				 
			
 
				-	if (trigger_mode == IOAPIC_LEVEL_TRIG) {
			
 
				 		ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
			
 
				 		ent->fields.remote_irr = 0;
			
 
				-		if (!ent->fields.mask && (ioapic->irr & (1 << pin)))
			
 
				-			ioapic_service(ioapic, pin);
			
 
				+		if (!ent->fields.mask && (ioapic->irr & (1 << i)))
			
 
				+			ioapic_service(ioapic, i);
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				 void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode)
			
 
				 {
			
 
				 	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
			
 
				-	int i;
			
 
				 
			
 
				-	for (i = 0; i < IOAPIC_NUM_PINS; i++)
			
 
				-		if (ioapic->redirtbl[i].fields.vector == vector)
			
 
				-			__kvm_ioapic_update_eoi(ioapic, i, trigger_mode);
			
 
				+	mutex_lock(&ioapic->lock);
			
 
				+	__kvm_ioapic_update_eoi(ioapic, vector, trigger_mode);
			
 
				+	mutex_unlock(&ioapic->lock);
			
 
				 }
			
 
				 
			
 
				 static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev)
			
@@ -250,8 +268,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
 
				 	ioapic_debug("addr %lx\n", (unsigned long)addr);
			
 
				 	ASSERT(!(addr & 0xf));	/* check alignment */
			
 
				 
			
 
				-	mutex_lock(&ioapic->kvm->irq_lock);
			
 
				 	addr &= 0xff;
			
 
				+	mutex_lock(&ioapic->lock);
			
 
				 	switch (addr) {
			
 
				 	case IOAPIC_REG_SELECT:
			
 
				 		result = ioapic->ioregsel;
			
@@ -265,6 +283,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
 
				 		result = 0;
			
 
				 		break;
			
 
				 	}
			
 
				+	mutex_unlock(&ioapic->lock);
			
 
				+
			
 
				 	switch (len) {
			
 
				 	case 8:
			
 
				 		*(u64 *) val = result;
			
@@ -277,7 +297,6 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
 
				 	default:
			
 
				 		printk(KERN_WARNING "ioapic: wrong length %d\n", len);
			
 
				 	}
			
 
				-	mutex_unlock(&ioapic->kvm->irq_lock);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -293,15 +312,15 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
 
				 		     (void*)addr, len, val);
			
 
				 	ASSERT(!(addr & 0xf));	/* check alignment */
			
 
				 
			
 
				-	mutex_lock(&ioapic->kvm->irq_lock);
			
 
				 	if (len == 4 || len == 8)
			
 
				 		data = *(u32 *) val;
			
 
				 	else {
			
 
				 		printk(KERN_WARNING "ioapic: Unsupported size %d\n", len);
			
 
				-		goto unlock;
			
 
				+		return 0;
			
 
				 	}
			
 
				 
			
 
				 	addr &= 0xff;
			
 
				+	mutex_lock(&ioapic->lock);
			
 
				 	switch (addr) {
			
 
				 	case IOAPIC_REG_SELECT:
			
 
				 		ioapic->ioregsel = data;
			
@@ -312,15 +331,14 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
 
				 		break;
			
 
				 #ifdef	CONFIG_IA64
			
 
				 	case IOAPIC_REG_EOI:
			
 
				-		kvm_ioapic_update_eoi(ioapic->kvm, data, IOAPIC_LEVEL_TRIG);
			
 
				+		__kvm_ioapic_update_eoi(ioapic, data, IOAPIC_LEVEL_TRIG);
			
 
				 		break;
			
 
				 #endif
			
 
				 
			
 
				 	default:
			
 
				 		break;
			
 
				 	}
			
 
				-unlock:
			
 
				-	mutex_unlock(&ioapic->kvm->irq_lock);
			
 
				+	mutex_unlock(&ioapic->lock);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -349,6 +367,7 @@ int kvm_ioapic_init(struct kvm *kvm)
 
				 	ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
			
 
				 	if (!ioapic)
			
 
				 		return -ENOMEM;
			
 
				+	mutex_init(&ioapic->lock);
			
 
				 	kvm->arch.vioapic = ioapic;
			
 
				 	kvm_ioapic_reset(ioapic);
			
 
				 	kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
			
@@ -360,3 +379,26 @@ int kvm_ioapic_init(struct kvm *kvm)
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
			
 
				+{
			
 
				+	struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
			
 
				+	if (!ioapic)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	mutex_lock(&ioapic->lock);
			
 
				+	memcpy(state, ioapic, sizeof(struct kvm_ioapic_state));
			
 
				+	mutex_unlock(&ioapic->lock);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
			
 
				+{
			
 
				+	struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
			
 
				+	if (!ioapic)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	mutex_lock(&ioapic->lock);
			
 
				+	memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
			
 
				+	mutex_unlock(&ioapic->lock);
			
 
				+	return 0;
			
 
				+}
			
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -41,9 +41,11 @@ struct kvm_ioapic {
 
				 	u32 irr;
			
 
				 	u32 pad;
			
 
				 	union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS];
			
 
				+	unsigned long irq_states[IOAPIC_NUM_PINS];
			
 
				 	struct kvm_io_device dev;
			
 
				 	struct kvm *kvm;
			
 
				 	void (*ack_notifier)(void *opaque, int irq);
			
 
				+	struct mutex lock;
			
 
				 };
			
 
				 
			
 
				 #ifdef DEBUG
			
@@ -73,4 +75,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
 
				 void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
			
 
				 int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
			
 
				 		struct kvm_lapic_irq *irq);
			
 
				+int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
			
 
				+int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
			
 
				+
			
 
				 #endif
			
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -31,20 +31,39 @@
 
				 
			
 
				 #include "ioapic.h"
			
 
				 
			
 
				+static inline int kvm_irq_line_state(unsigned long *irq_state,
			
 
				+				     int irq_source_id, int level)
			
 
				+{
			
 
				+	/* Logical OR for level trig interrupt */
			
 
				+	if (level)
			
 
				+		set_bit(irq_source_id, irq_state);
			
 
				+	else
			
 
				+		clear_bit(irq_source_id, irq_state);
			
 
				+
			
 
				+	return !!(*irq_state);
			
 
				+}
			
 
				+
			
 
				 static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
			
 
				-			   struct kvm *kvm, int level)
			
 
				+			   struct kvm *kvm, int irq_source_id, int level)
			
 
				 {
			
 
				 #ifdef CONFIG_X86
			
 
				-	return kvm_pic_set_irq(pic_irqchip(kvm), e->irqchip.pin, level);
			
 
				+	struct kvm_pic *pic = pic_irqchip(kvm);
			
 
				+	level = kvm_irq_line_state(&pic->irq_states[e->irqchip.pin],
			
 
				+				   irq_source_id, level);
			
 
				+	return kvm_pic_set_irq(pic, e->irqchip.pin, level);
			
 
				 #else
			
 
				 	return -1;
			
 
				 #endif
			
 
				 }
			
 
				 
			
 
				 static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
			
 
				-			      struct kvm *kvm, int level)
			
 
				+			      struct kvm *kvm, int irq_source_id, int level)
			
 
				 {
			
 
				-	return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level);
			
 
				+	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
			
 
				+	level = kvm_irq_line_state(&ioapic->irq_states[e->irqchip.pin],
			
 
				+				   irq_source_id, level);
			
 
				+
			
 
				+	return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, level);
			
 
				 }
			
 
				 
			
 
				 inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
			
@@ -63,8 +82,6 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 
				 	int i, r = -1;
			
 
				 	struct kvm_vcpu *vcpu, *lowest = NULL;
			
 
				 
			
 
				-	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
			
 
				-
			
 
				 	if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
			
 
				 			kvm_is_dm_lowest_prio(irq))
			
 
				 		printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
			
@@ -96,10 +113,13 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 
				 }
			
 
				 
			
 
				 static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
			
 
				-		       struct kvm *kvm, int level)
			
 
				+		       struct kvm *kvm, int irq_source_id, int level)
			
 
				 {
			
 
				 	struct kvm_lapic_irq irq;
			
 
				 
			
 
				+	if (!level)
			
 
				+		return -1;
			
 
				+
			
 
				 	trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
			
 
				 
			
 
				 	irq.dest_id = (e->msi.address_lo &
			
@@ -116,78 +136,67 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 
				 	return kvm_irq_delivery_to_apic(kvm, NULL, &irq);
			
 
				 }
			
 
				 
			
 
				-/* This should be called with the kvm->irq_lock mutex held
			
 
				+/*
			
 
				  * Return value:
			
 
				  *  < 0   Interrupt was ignored (masked or not delivered for other reasons)
			
 
				  *  = 0   Interrupt was coalesced (previous irq is still pending)
			
 
				  *  > 0   Number of CPUs interrupt was delivered to
			
 
				  */
			
 
				-int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
			
 
				+int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level)
			
 
				 {
			
 
				-	struct kvm_kernel_irq_routing_entry *e;
			
 
				-	unsigned long *irq_state, sig_level;
			
 
				-	int ret = -1;
			
 
				+	struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS];
			
 
				+	int ret = -1, i = 0;
			
 
				+	struct kvm_irq_routing_table *irq_rt;
			
 
				+	struct hlist_node *n;
			
 
				 
			
 
				 	trace_kvm_set_irq(irq, level, irq_source_id);
			
 
				 
			
 
				-	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
			
 
				-
			
 
				-	if (irq < KVM_IOAPIC_NUM_PINS) {
			
 
				-		irq_state = (unsigned long *)&kvm->arch.irq_states[irq];
			
 
				-
			
 
				-		/* Logical OR for level trig interrupt */
			
 
				-		if (level)
			
 
				-			set_bit(irq_source_id, irq_state);
			
 
				-		else
			
 
				-			clear_bit(irq_source_id, irq_state);
			
 
				-		sig_level = !!(*irq_state);
			
 
				-	} else if (!level)
			
 
				-		return ret;
			
 
				-	else /* Deal with MSI/MSI-X */
			
 
				-		sig_level = 1;
			
 
				-
			
 
				 	/* Not possible to detect if the guest uses the PIC or the
			
 
				 	 * IOAPIC.  So set the bit in both. The guest will ignore
			
 
				 	 * writes to the unused one.
			
 
				 	 */
			
 
				-	list_for_each_entry(e, &kvm->irq_routing, link)
			
 
				-		if (e->gsi == irq) {
			
 
				-			int r = e->set(e, kvm, sig_level);
			
 
				-			if (r < 0)
			
 
				-				continue;
			
 
				+	rcu_read_lock();
			
 
				+	irq_rt = rcu_dereference(kvm->irq_routing);
			
 
				+	if (irq < irq_rt->nr_rt_entries)
			
 
				+		hlist_for_each_entry(e, n, &irq_rt->map[irq], link)
			
 
				+			irq_set[i++] = *e;
			
 
				+	rcu_read_unlock();
			
 
				+
			
 
				+	while(i--) {
			
 
				+		int r;
			
 
				+		r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level);
			
 
				+		if (r < 0)
			
 
				+			continue;
			
 
				+
			
 
				+		ret = r + ((ret < 0) ? 0 : ret);
			
 
				+	}
			
 
				 
			
 
				-			ret = r + ((ret < 0) ? 0 : ret);
			
 
				-		}
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
			
 
				 {
			
 
				-	struct kvm_kernel_irq_routing_entry *e;
			
 
				 	struct kvm_irq_ack_notifier *kian;
			
 
				 	struct hlist_node *n;
			
 
				-	unsigned gsi = pin;
			
 
				+	int gsi;
			
 
				 
			
 
				 	trace_kvm_ack_irq(irqchip, pin);
			
 
				 
			
 
				-	list_for_each_entry(e, &kvm->irq_routing, link)
			
 
				-		if (e->type == KVM_IRQ_ROUTING_IRQCHIP &&
			
 
				-		    e->irqchip.irqchip == irqchip &&
			
 
				-		    e->irqchip.pin == pin) {
			
 
				-			gsi = e->gsi;
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-	hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
			
 
				-		if (kian->gsi == gsi)
			
 
				-			kian->irq_acked(kian);
			
 
				+	rcu_read_lock();
			
 
				+	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
			
 
				+	if (gsi != -1)
			
 
				+		hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list,
			
 
				+					 link)
			
 
				+			if (kian->gsi == gsi)
			
 
				+				kian->irq_acked(kian);
			
 
				+	rcu_read_unlock();
			
 
				 }
			
 
				 
			
 
				 void kvm_register_irq_ack_notifier(struct kvm *kvm,
			
 
				 				   struct kvm_irq_ack_notifier *kian)
			
 
				 {
			
 
				 	mutex_lock(&kvm->irq_lock);
			
 
				-	hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list);
			
 
				+	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
			
 
				 	mutex_unlock(&kvm->irq_lock);
			
 
				 }
			
 
				 
			
@@ -195,8 +204,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
 
				 				    struct kvm_irq_ack_notifier *kian)
			
 
				 {
			
 
				 	mutex_lock(&kvm->irq_lock);
			
 
				-	hlist_del_init(&kian->link);
			
 
				+	hlist_del_init_rcu(&kian->link);
			
 
				 	mutex_unlock(&kvm->irq_lock);
			
 
				+	synchronize_rcu();
			
 
				 }
			
 
				 
			
 
				 int kvm_request_irq_source_id(struct kvm *kvm)
			
@@ -205,16 +215,17 @@ int kvm_request_irq_source_id(struct kvm *kvm)
 
				 	int irq_source_id;
			
 
				 
			
 
				 	mutex_lock(&kvm->irq_lock);
			
 
				-	irq_source_id = find_first_zero_bit(bitmap,
			
 
				-				sizeof(kvm->arch.irq_sources_bitmap));
			
 
				+	irq_source_id = find_first_zero_bit(bitmap, BITS_PER_LONG);
			
 
				 
			
 
				-	if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
			
 
				+	if (irq_source_id >= BITS_PER_LONG) {
			
 
				 		printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n");
			
 
				-		return -EFAULT;
			
 
				+		irq_source_id = -EFAULT;
			
 
				+		goto unlock;
			
 
				 	}
			
 
				 
			
 
				 	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
			
 
				 	set_bit(irq_source_id, bitmap);
			
 
				+unlock:
			
 
				 	mutex_unlock(&kvm->irq_lock);
			
 
				 
			
 
				 	return irq_source_id;
			
@@ -228,13 +239,23 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
 
				 
			
 
				 	mutex_lock(&kvm->irq_lock);
			
 
				 	if (irq_source_id < 0 ||
			
 
				-	    irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
			
 
				+	    irq_source_id >= BITS_PER_LONG) {
			
 
				 		printk(KERN_ERR "kvm: IRQ source ID out of range!\n");
			
 
				-		return;
			
 
				+		goto unlock;
			
 
				 	}
			
 
				-	for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
			
 
				-		clear_bit(irq_source_id, &kvm->arch.irq_states[i]);
			
 
				 	clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
			
 
				+	if (!irqchip_in_kernel(kvm))
			
 
				+		goto unlock;
			
 
				+
			
 
				+	for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) {
			
 
				+		clear_bit(irq_source_id, &kvm->arch.vioapic->irq_states[i]);
			
 
				+		if (i >= 16)
			
 
				+			continue;
			
 
				+#ifdef CONFIG_X86
			
 
				+		clear_bit(irq_source_id, &pic_irqchip(kvm)->irq_states[i]);
			
 
				+#endif
			
 
				+	}
			
 
				+unlock:
			
 
				 	mutex_unlock(&kvm->irq_lock);
			
 
				 }
			
 
				 
			
@@ -243,7 +264,7 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
 
				 {
			
 
				 	mutex_lock(&kvm->irq_lock);
			
 
				 	kimn->irq = irq;
			
 
				-	hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
			
 
				+	hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list);
			
 
				 	mutex_unlock(&kvm->irq_lock);
			
 
				 }
			
 
				 
			
@@ -251,8 +272,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 
				 				      struct kvm_irq_mask_notifier *kimn)
			
 
				 {
			
 
				 	mutex_lock(&kvm->irq_lock);
			
 
				-	hlist_del(&kimn->link);
			
 
				+	hlist_del_rcu(&kimn->link);
			
 
				 	mutex_unlock(&kvm->irq_lock);
			
 
				+	synchronize_rcu();
			
 
				 }
			
 
				 
			
 
				 void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
			
@@ -260,33 +282,37 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
 
				 	struct kvm_irq_mask_notifier *kimn;
			
 
				 	struct hlist_node *n;
			
 
				 
			
 
				-	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
			
 
				-
			
 
				-	hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
			
 
				+	rcu_read_lock();
			
 
				+	hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link)
			
 
				 		if (kimn->irq == irq)
			
 
				 			kimn->func(kimn, mask);
			
 
				-}
			
 
				-
			
 
				-static void __kvm_free_irq_routing(struct list_head *irq_routing)
			
 
				-{
			
 
				-	struct kvm_kernel_irq_routing_entry *e, *n;
			
 
				-
			
 
				-	list_for_each_entry_safe(e, n, irq_routing, link)
			
 
				-		kfree(e);
			
 
				+	rcu_read_unlock();
			
 
				 }
			
 
				 
			
 
				 void kvm_free_irq_routing(struct kvm *kvm)
			
 
				 {
			
 
				-	mutex_lock(&kvm->irq_lock);
			
 
				-	__kvm_free_irq_routing(&kvm->irq_routing);
			
 
				-	mutex_unlock(&kvm->irq_lock);
			
 
				+	/* Called only during vm destruction. Nobody can use the pointer
			
 
				+	   at this stage */
			
 
				+	kfree(kvm->irq_routing);
			
 
				 }
			
 
				 
			
 
				-static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
			
 
				+static int setup_routing_entry(struct kvm_irq_routing_table *rt,
			
 
				+			       struct kvm_kernel_irq_routing_entry *e,
			
 
				 			       const struct kvm_irq_routing_entry *ue)
			
 
				 {
			
 
				 	int r = -EINVAL;
			
 
				 	int delta;
			
 
				+	struct kvm_kernel_irq_routing_entry *ei;
			
 
				+	struct hlist_node *n;
			
 
				+
			
 
				+	/*
			
 
				+	 * Do not allow GSI to be mapped to the same irqchip more than once.
			
 
				+	 * Allow only one to one mapping between GSI and MSI.
			
 
				+	 */
			
 
				+	hlist_for_each_entry(ei, n, &rt->map[ue->gsi], link)
			
 
				+		if (ei->type == KVM_IRQ_ROUTING_MSI ||
			
 
				+		    ue->u.irqchip.irqchip == ei->irqchip.irqchip)
			
 
				+			return r;
			
 
				 
			
 
				 	e->gsi = ue->gsi;
			
 
				 	e->type = ue->type;
			
@@ -309,6 +335,9 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 
				 		}
			
 
				 		e->irqchip.irqchip = ue->u.irqchip.irqchip;
			
 
				 		e->irqchip.pin = ue->u.irqchip.pin + delta;
			
 
				+		if (e->irqchip.pin >= KVM_IOAPIC_NUM_PINS)
			
 
				+			goto out;
			
 
				+		rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
			
 
				 		break;
			
 
				 	case KVM_IRQ_ROUTING_MSI:
			
 
				 		e->set = kvm_set_msi;
			
@@ -319,6 +348,8 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 
				 	default:
			
 
				 		goto out;
			
 
				 	}
			
 
				+
			
 
				+	hlist_add_head(&e->link, &rt->map[e->gsi]);
			
 
				 	r = 0;
			
 
				 out:
			
 
				 	return r;
			
@@ -330,43 +361,53 @@ int kvm_set_irq_routing(struct kvm *kvm,
 
				 			unsigned nr,
			
 
				 			unsigned flags)
			
 
				 {
			
 
				-	struct list_head irq_list = LIST_HEAD_INIT(irq_list);
			
 
				-	struct list_head tmp = LIST_HEAD_INIT(tmp);
			
 
				-	struct kvm_kernel_irq_routing_entry *e = NULL;
			
 
				-	unsigned i;
			
 
				+	struct kvm_irq_routing_table *new, *old;
			
 
				+	u32 i, j, nr_rt_entries = 0;
			
 
				 	int r;
			
 
				 
			
 
				+	for (i = 0; i < nr; ++i) {
			
 
				+		if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES)
			
 
				+			return -EINVAL;
			
 
				+		nr_rt_entries = max(nr_rt_entries, ue[i].gsi);
			
 
				+	}
			
 
				+
			
 
				+	nr_rt_entries += 1;
			
 
				+
			
 
				+	new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head))
			
 
				+		      + (nr * sizeof(struct kvm_kernel_irq_routing_entry)),
			
 
				+		      GFP_KERNEL);
			
 
				+
			
 
				+	if (!new)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	new->rt_entries = (void *)&new->map[nr_rt_entries];
			
 
				+
			
 
				+	new->nr_rt_entries = nr_rt_entries;
			
 
				+	for (i = 0; i < 3; i++)
			
 
				+		for (j = 0; j < KVM_IOAPIC_NUM_PINS; j++)
			
 
				+			new->chip[i][j] = -1;
			
 
				+
			
 
				 	for (i = 0; i < nr; ++i) {
			
 
				 		r = -EINVAL;
			
 
				-		if (ue->gsi >= KVM_MAX_IRQ_ROUTES)
			
 
				-			goto out;
			
 
				 		if (ue->flags)
			
 
				 			goto out;
			
 
				-		r = -ENOMEM;
			
 
				-		e = kzalloc(sizeof(*e), GFP_KERNEL);
			
 
				-		if (!e)
			
 
				-			goto out;
			
 
				-		r = setup_routing_entry(e, ue);
			
 
				+		r = setup_routing_entry(new, &new->rt_entries[i], ue);
			
 
				 		if (r)
			
 
				 			goto out;
			
 
				 		++ue;
			
 
				-		list_add(&e->link, &irq_list);
			
 
				-		e = NULL;
			
 
				 	}
			
 
				 
			
 
				 	mutex_lock(&kvm->irq_lock);
			
 
				-	list_splice(&kvm->irq_routing, &tmp);
			
 
				-	INIT_LIST_HEAD(&kvm->irq_routing);
			
 
				-	list_splice(&irq_list, &kvm->irq_routing);
			
 
				-	INIT_LIST_HEAD(&irq_list);
			
 
				-	list_splice(&tmp, &irq_list);
			
 
				+	old = kvm->irq_routing;
			
 
				+	rcu_assign_pointer(kvm->irq_routing, new);
			
 
				 	mutex_unlock(&kvm->irq_lock);
			
 
				+	synchronize_rcu();
			
 
				 
			
 
				+	new = old;
			
 
				 	r = 0;
			
 
				 
			
 
				 out:
			
 
				-	kfree(e);
			
 
				-	__kvm_free_irq_routing(&irq_list);
			
 
				+	kfree(new);
			
 
				 	return r;
			
 
				 }
			
 
				 
			
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c