12 years ago · dd92d6f274
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -219,19 +219,6 @@ allocation of vcpu ids.  For example, if userspace wants
 
				 single-threaded guest vcpus, it should make all vcpu ids be a multiple
			
 
				 of the number of vcpus per vcore.
			
 
				 
			
 
				-On powerpc using book3s_hv mode, the vcpus are mapped onto virtual
			
 
				-threads in one or more virtual CPU cores.  (This is because the
			
 
				-hardware requires all the hardware threads in a CPU core to be in the
			
 
				-same partition.)  The KVM_CAP_PPC_SMT capability indicates the number
			
 
				-of vcpus per virtual core (vcore).  The vcore id is obtained by
			
 
				-dividing the vcpu id by the number of vcpus per vcore.  The vcpus in a
			
 
				-given vcore will always be in the same physical core as each other
			
 
				-(though that might be a different physical core from time to time).
			
 
				-Userspace can control the threading (SMT) mode of the guest by its
			
 
				-allocation of vcpu ids.  For example, if userspace wants
			
 
				-single-threaded guest vcpus, it should make all vcpu ids be a multiple
			
 
				-of the number of vcpus per vcore.
			
 
				-
			
 
				 For virtual cpus that have been created with S390 user controlled virtual
			
 
				 machines, the resulting vcpu fd can be memory mapped at page offset
			
 
				 KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of the virtual
			
@@ -874,12 +861,12 @@ It is recommended that the lower 21 bits of guest_phys_addr and userspace_addr
 
				 be identical.  This allows large pages in the guest to be backed by large
			
 
				 pages in the host.
			
 
				 
			
 
				-The flags field supports two flag, KVM_MEM_LOG_DIRTY_PAGES, which instructs
			
 
				-kvm to keep track of writes to memory within the slot.  See KVM_GET_DIRTY_LOG
			
 
				-ioctl.  The KVM_CAP_READONLY_MEM capability indicates the availability of the
			
 
				-KVM_MEM_READONLY flag.  When this flag is set for a memory region, KVM only
			
 
				-allows read accesses.  Writes will be posted to userspace as KVM_EXIT_MMIO
			
 
				-exits.
			
 
				+The flags field supports two flags: KVM_MEM_LOG_DIRTY_PAGES and
			
 
				+KVM_MEM_READONLY.  The former can be set to instruct KVM to keep track of
			
 
				+writes to memory within the slot.  See KVM_GET_DIRTY_LOG ioctl to know how to
			
 
				+use it.  The latter can be set, if KVM_CAP_READONLY_MEM capability allows it,
			
 
				+to make a new slot read-only.  In this case, writes to this memory will be
			
 
				+posted to userspace as KVM_EXIT_MMIO exits.
			
 
				 
			
 
				 When the KVM_CAP_SYNC_MMU capability is available, changes in the backing of
			
 
				 the memory region are automatically reflected into the guest.  For example, an
			
--- a/arch/ia64/kvm/lapic.h
+++ b/arch/ia64/kvm/lapic.h
@@ -27,4 +27,10 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
 
				 #define kvm_apic_present(x) (true)
			
 
				 #define kvm_lapic_enabled(x) (true)
			
 
				 
			
 
				+static inline bool kvm_apic_vid_enabled(void)
			
 
				+{
			
 
				+	/* IA64 has no apicv supporting, do nothing here */
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				 #endif
			
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -770,6 +770,14 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
 
				 	} else
			
 
				 		prefix = 0;
			
 
				 
			
 
				+	/*
			
 
				+	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
			
 
				+	 * copying in vcpu load/put. Lets update our copies before we save
			
 
				+	 * it into the save area
			
 
				+	 */
			
 
				+	save_fp_regs(&vcpu->arch.guest_fpregs);
			
 
				+	save_access_regs(vcpu->run->s.regs.acrs);
			
 
				+
			
 
				 	if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs),
			
 
				 			vcpu->arch.guest_fpregs.fprs, 128, prefix))
			
 
				 		return -EFAULT;
			
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -67,8 +67,8 @@ static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix)
 
				 
			
 
				 static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				-	int base2 = vcpu->arch.sie_block->ipb >> 28;
			
 
				-	int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
			
 
				+	u32 base2 = vcpu->arch.sie_block->ipb >> 28;
			
 
				+	u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
			
 
				 
			
 
				 	return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
			
 
				 }
			
@@ -76,10 +76,10 @@ static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu)
 
				 static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu,
			
 
				 					      u64 *address1, u64 *address2)
			
 
				 {
			
 
				-	int base1 = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28;
			
 
				-	int disp1 = (vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16;
			
 
				-	int base2 = (vcpu->arch.sie_block->ipb & 0xf000) >> 12;
			
 
				-	int disp2 = vcpu->arch.sie_block->ipb & 0x0fff;
			
 
				+	u32 base1 = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28;
			
 
				+	u32 disp1 = (vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16;
			
 
				+	u32 base2 = (vcpu->arch.sie_block->ipb & 0xf000) >> 12;
			
 
				+	u32 disp2 = vcpu->arch.sie_block->ipb & 0x0fff;
			
 
				 
			
 
				 	*address1 = (base1 ? vcpu->run->s.regs.gprs[base1] : 0) + disp1;
			
 
				 	*address2 = (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
			
@@ -87,17 +87,20 @@ static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu,
 
				 
			
 
				 static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				-	int base2 = vcpu->arch.sie_block->ipb >> 28;
			
 
				-	int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) +
			
 
				+	u32 base2 = vcpu->arch.sie_block->ipb >> 28;
			
 
				+	u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) +
			
 
				 			((vcpu->arch.sie_block->ipb & 0xff00) << 4);
			
 
				+	/* The displacement is a 20bit _SIGNED_ value */
			
 
				+	if (disp2 & 0x80000)
			
 
				+		disp2+=0xfff00000;
			
 
				 
			
 
				-	return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
			
 
				+	return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + (long)(int)disp2;
			
 
				 }
			
 
				 
			
 
				 static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				-	int base2 = vcpu->arch.sie_block->ipb >> 28;
			
 
				-	int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
			
 
				+	u32 base2 = vcpu->arch.sie_block->ipb >> 28;
			
 
				+	u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
			
 
				 
			
 
				 	return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
			
 
				 }
			
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -699,6 +699,11 @@ struct kvm_x86_ops {
 
				 	void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
			
 
				 	void (*enable_irq_window)(struct kvm_vcpu *vcpu);
			
 
				 	void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
			
 
				+	int (*vm_has_apicv)(struct kvm *kvm);
			
 
				+	void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
			
 
				+	void (*hwapic_isr_update)(struct kvm *kvm, int isr);
			
 
				+	void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
			
 
				+	void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
			
 
				 	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
			
 
				 	int (*get_tdp_level)(void);
			
 
				 	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
			
@@ -993,6 +998,7 @@ int kvm_age_hva(struct kvm *kvm, unsigned long hva);
 
				 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
			
 
				 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
			
 
				 int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
			
 
				+int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
			
 
				 int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
			
 
				 int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
			
 
				 int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
			
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -62,10 +62,12 @@
 
				 #define EXIT_REASON_MCE_DURING_VMENTRY  41
			
 
				 #define EXIT_REASON_TPR_BELOW_THRESHOLD 43
			
 
				 #define EXIT_REASON_APIC_ACCESS         44
			
 
				+#define EXIT_REASON_EOI_INDUCED         45
			
 
				 #define EXIT_REASON_EPT_VIOLATION       48
			
 
				 #define EXIT_REASON_EPT_MISCONFIG       49
			
 
				 #define EXIT_REASON_WBINVD              54
			
 
				 #define EXIT_REASON_XSETBV              55
			
 
				+#define EXIT_REASON_APIC_WRITE          56
			
 
				 #define EXIT_REASON_INVPCID             58
			
 
				 
			
 
				 #define VMX_EXIT_REASONS \
			
@@ -103,7 +105,12 @@
 
				 	{ EXIT_REASON_APIC_ACCESS,           "APIC_ACCESS" }, \
			
 
				 	{ EXIT_REASON_EPT_VIOLATION,         "EPT_VIOLATION" }, \
			
 
				 	{ EXIT_REASON_EPT_MISCONFIG,         "EPT_MISCONFIG" }, \
			
 
				-	{ EXIT_REASON_WBINVD,                "WBINVD" }
			
 
				+	{ EXIT_REASON_WBINVD,                "WBINVD" }, \
			
 
				+	{ EXIT_REASON_APIC_WRITE,            "APIC_WRITE" }, \
			
 
				+	{ EXIT_REASON_EOI_INDUCED,           "EOI_INDUCED" }, \
			
 
				+	{ EXIT_REASON_INVALID_STATE,         "INVALID_STATE" }, \
			
 
				+	{ EXIT_REASON_INVD,                  "INVD" }, \
			
 
				+	{ EXIT_REASON_INVPCID,               "INVPCID" }
			
 
				 
			
 
				 #ifdef __KERNEL__
			
 
				 
			
@@ -138,9 +145,12 @@
 
				 #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
			
 
				 #define SECONDARY_EXEC_ENABLE_EPT               0x00000002
			
 
				 #define SECONDARY_EXEC_RDTSCP			0x00000008
			
 
				+#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE   0x00000010
			
 
				 #define SECONDARY_EXEC_ENABLE_VPID              0x00000020
			
 
				 #define SECONDARY_EXEC_WBINVD_EXITING		0x00000040
			
 
				 #define SECONDARY_EXEC_UNRESTRICTED_GUEST	0x00000080
			
 
				+#define SECONDARY_EXEC_APIC_REGISTER_VIRT       0x00000100
			
 
				+#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY    0x00000200
			
 
				 #define SECONDARY_EXEC_PAUSE_LOOP_EXITING	0x00000400
			
 
				 #define SECONDARY_EXEC_ENABLE_INVPCID		0x00001000
			
 
				 
			
@@ -178,6 +188,7 @@ enum vmcs_field {
 
				 	GUEST_GS_SELECTOR               = 0x0000080a,
			
 
				 	GUEST_LDTR_SELECTOR             = 0x0000080c,
			
 
				 	GUEST_TR_SELECTOR               = 0x0000080e,
			
 
				+	GUEST_INTR_STATUS               = 0x00000810,
			
 
				 	HOST_ES_SELECTOR                = 0x00000c00,
			
 
				 	HOST_CS_SELECTOR                = 0x00000c02,
			
 
				 	HOST_SS_SELECTOR                = 0x00000c04,
			
@@ -205,6 +216,14 @@ enum vmcs_field {
 
				 	APIC_ACCESS_ADDR_HIGH		= 0x00002015,
			
 
				 	EPT_POINTER                     = 0x0000201a,
			
 
				 	EPT_POINTER_HIGH                = 0x0000201b,
			
 
				+	EOI_EXIT_BITMAP0                = 0x0000201c,
			
 
				+	EOI_EXIT_BITMAP0_HIGH           = 0x0000201d,
			
 
				+	EOI_EXIT_BITMAP1                = 0x0000201e,
			
 
				+	EOI_EXIT_BITMAP1_HIGH           = 0x0000201f,
			
 
				+	EOI_EXIT_BITMAP2                = 0x00002020,
			
 
				+	EOI_EXIT_BITMAP2_HIGH           = 0x00002021,
			
 
				+	EOI_EXIT_BITMAP3                = 0x00002022,
			
 
				+	EOI_EXIT_BITMAP3_HIGH           = 0x00002023,
			
 
				 	GUEST_PHYSICAL_ADDRESS          = 0x00002400,
			
 
				 	GUEST_PHYSICAL_ADDRESS_HIGH     = 0x00002401,
			
 
				 	VMCS_LINK_POINTER               = 0x00002800,
			
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1013,7 +1013,7 @@ static u8 test_cc(unsigned int condition, unsigned long flags)
 
				 	void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
			
 
				 
			
 
				 	flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
			
 
				-	asm("pushq %[flags]; popf; call *%[fastop]"
			
 
				+	asm("push %[flags]; popf; call *%[fastop]"
			
 
				 	    : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags));
			
 
				 	return rc;
			
 
				 }
			
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -37,6 +37,38 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 
				 }
			
 
				 EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
			
 
				 
			
 
				+/*
			
 
				+ * check if there is pending interrupt from
			
 
				+ * non-APIC source without intack.
			
 
				+ */
			
 
				+static int kvm_cpu_has_extint(struct kvm_vcpu *v)
			
 
				+{
			
 
				+	if (kvm_apic_accept_pic_intr(v))
			
 
				+		return pic_irqchip(v->kvm)->output;	/* PIC */
			
 
				+	else
			
 
				+		return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * check if there is injectable interrupt:
			
 
				+ * when virtual interrupt delivery enabled,
			
 
				+ * interrupt from apic will handled by hardware,
			
 
				+ * we don't need to check it here.
			
 
				+ */
			
 
				+int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
			
 
				+{
			
 
				+	if (!irqchip_in_kernel(v->kvm))
			
 
				+		return v->arch.interrupt.pending;
			
 
				+
			
 
				+	if (kvm_cpu_has_extint(v))
			
 
				+		return 1;
			
 
				+
			
 
				+	if (kvm_apic_vid_enabled(v->kvm))
			
 
				+		return 0;
			
 
				+
			
 
				+	return kvm_apic_has_interrupt(v) != -1; /* LAPIC */
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * check if there is pending interrupt without
			
 
				  * intack.
			
@@ -46,27 +78,41 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
 
				 	if (!irqchip_in_kernel(v->kvm))
			
 
				 		return v->arch.interrupt.pending;
			
 
				 
			
 
				-	if (kvm_apic_accept_pic_intr(v) && pic_irqchip(v->kvm)->output)
			
 
				-		return pic_irqchip(v->kvm)->output;	/* PIC */
			
 
				+	if (kvm_cpu_has_extint(v))
			
 
				+		return 1;
			
 
				 
			
 
				 	return kvm_apic_has_interrupt(v) != -1;	/* LAPIC */
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
			
 
				 
			
 
				+/*
			
 
				+ * Read pending interrupt(from non-APIC source)
			
 
				+ * vector and intack.
			
 
				+ */
			
 
				+static int kvm_cpu_get_extint(struct kvm_vcpu *v)
			
 
				+{
			
 
				+	if (kvm_cpu_has_extint(v))
			
 
				+		return kvm_pic_read_irq(v->kvm); /* PIC */
			
 
				+	return -1;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Read pending interrupt vector and intack.
			
 
				  */
			
 
				 int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
			
 
				 {
			
 
				+	int vector;
			
 
				+
			
 
				 	if (!irqchip_in_kernel(v->kvm))
			
 
				 		return v->arch.interrupt.nr;
			
 
				 
			
 
				-	if (kvm_apic_accept_pic_intr(v) && pic_irqchip(v->kvm)->output)
			
 
				-		return kvm_pic_read_irq(v->kvm);	/* PIC */
			
 
				+	vector = kvm_cpu_get_extint(v);
			
 
				+
			
 
				+	if (kvm_apic_vid_enabled(v->kvm) || vector != -1)
			
 
				+		return vector;			/* PIC */
			
 
				 
			
 
				 	return kvm_get_apic_interrupt(v);	/* APIC */
			
 
				 }
			
 
				-EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
			
 
				 
			
 
				 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
			
 
				 {
			
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -140,31 +140,56 @@ static inline int apic_enabled(struct kvm_lapic *apic)
 
				 	(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
			
 
				 	 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
			
 
				 
			
 
				-static inline int apic_x2apic_mode(struct kvm_lapic *apic)
			
 
				-{
			
 
				-	return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
			
 
				-}
			
 
				-
			
 
				 static inline int kvm_apic_id(struct kvm_lapic *apic)
			
 
				 {
			
 
				 	return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
			
 
				 }
			
 
				 
			
 
				-static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr)
			
 
				+void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
			
 
				+				struct kvm_lapic_irq *irq,
			
 
				+				u64 *eoi_exit_bitmap)
			
 
				 {
			
 
				-	u16 cid;
			
 
				-	ldr >>= 32 - map->ldr_bits;
			
 
				-	cid = (ldr >> map->cid_shift) & map->cid_mask;
			
 
				+	struct kvm_lapic **dst;
			
 
				+	struct kvm_apic_map *map;
			
 
				+	unsigned long bitmap = 1;
			
 
				+	int i;
			
 
				 
			
 
				-	BUG_ON(cid >= ARRAY_SIZE(map->logical_map));
			
 
				+	rcu_read_lock();
			
 
				+	map = rcu_dereference(vcpu->kvm->arch.apic_map);
			
 
				 
			
 
				-	return cid;
			
 
				-}
			
 
				+	if (unlikely(!map)) {
			
 
				+		__set_bit(irq->vector, (unsigned long *)eoi_exit_bitmap);
			
 
				+		goto out;
			
 
				+	}
			
 
				 
			
 
				-static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr)
			
 
				-{
			
 
				-	ldr >>= (32 - map->ldr_bits);
			
 
				-	return ldr & map->lid_mask;
			
 
				+	if (irq->dest_mode == 0) { /* physical mode */
			
 
				+		if (irq->delivery_mode == APIC_DM_LOWEST ||
			
 
				+				irq->dest_id == 0xff) {
			
 
				+			__set_bit(irq->vector,
			
 
				+				  (unsigned long *)eoi_exit_bitmap);
			
 
				+			goto out;
			
 
				+		}
			
 
				+		dst = &map->phys_map[irq->dest_id & 0xff];
			
 
				+	} else {
			
 
				+		u32 mda = irq->dest_id << (32 - map->ldr_bits);
			
 
				+
			
 
				+		dst = map->logical_map[apic_cluster_id(map, mda)];
			
 
				+
			
 
				+		bitmap = apic_logical_id(map, mda);
			
 
				+	}
			
 
				+
			
 
				+	for_each_set_bit(i, &bitmap, 16) {
			
 
				+		if (!dst[i])
			
 
				+			continue;
			
 
				+		if (dst[i]->vcpu == vcpu) {
			
 
				+			__set_bit(irq->vector,
			
 
				+				  (unsigned long *)eoi_exit_bitmap);
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+out:
			
 
				+	rcu_read_unlock();
			
 
				 }
			
 
				 
			
 
				 static void recalculate_apic_map(struct kvm *kvm)
			
@@ -230,6 +255,8 @@ out:
 
				 
			
 
				 	if (old)
			
 
				 		kfree_rcu(old, rcu);
			
 
				+
			
 
				+	kvm_ioapic_make_eoibitmap_request(kvm);
			
 
				 }
			
 
				 
			
 
				 static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
			
@@ -345,6 +372,10 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic)
 
				 {
			
 
				 	int result;
			
 
				 
			
 
				+	/*
			
 
				+	 * Note that irr_pending is just a hint. It will be always
			
 
				+	 * true with virtual interrupt delivery enabled.
			
 
				+	 */
			
 
				 	if (!apic->irr_pending)
			
 
				 		return -1;
			
 
				 
			
@@ -461,6 +492,8 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
 
				 static inline int apic_find_highest_isr(struct kvm_lapic *apic)
			
 
				 {
			
 
				 	int result;
			
 
				+
			
 
				+	/* Note that isr_count is always 1 with vid enabled */
			
 
				 	if (!apic->isr_count)
			
 
				 		return -1;
			
 
				 	if (likely(apic->highest_isr_cache != -1))
			
@@ -740,6 +773,19 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
 
				 	return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
			
 
				 }
			
 
				 
			
 
				+static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
			
 
				+{
			
 
				+	if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) &&
			
 
				+	    kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
			
 
				+		int trigger_mode;
			
 
				+		if (apic_test_vector(vector, apic->regs + APIC_TMR))
			
 
				+			trigger_mode = IOAPIC_LEVEL_TRIG;
			
 
				+		else
			
 
				+			trigger_mode = IOAPIC_EDGE_TRIG;
			
 
				+		kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 static int apic_set_eoi(struct kvm_lapic *apic)
			
 
				 {
			
 
				 	int vector = apic_find_highest_isr(apic);
			
@@ -756,19 +802,26 @@ static int apic_set_eoi(struct kvm_lapic *apic)
 
				 	apic_clear_isr(vector, apic);
			
 
				 	apic_update_ppr(apic);
			
 
				 
			
 
				-	if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) &&
			
 
				-	    kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
			
 
				-		int trigger_mode;
			
 
				-		if (apic_test_vector(vector, apic->regs + APIC_TMR))
			
 
				-			trigger_mode = IOAPIC_LEVEL_TRIG;
			
 
				-		else
			
 
				-			trigger_mode = IOAPIC_EDGE_TRIG;
			
 
				-		kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
			
 
				-	}
			
 
				+	kvm_ioapic_send_eoi(apic, vector);
			
 
				 	kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
			
 
				 	return vector;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * this interface assumes a trap-like exit, which has already finished
			
 
				+ * desired side effect including vISR and vPPR update.
			
 
				+ */
			
 
				+void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
			
 
				+{
			
 
				+	struct kvm_lapic *apic = vcpu->arch.apic;
			
 
				+
			
 
				+	trace_kvm_eoi(apic, vector);
			
 
				+
			
 
				+	kvm_ioapic_send_eoi(apic, vector);
			
 
				+	kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
			
 
				+
			
 
				 static void apic_send_ipi(struct kvm_lapic *apic)
			
 
				 {
			
 
				 	u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR);
			
@@ -1212,6 +1265,21 @@ void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
			
 
				 
			
 
				+/* emulate APIC access in a trap manner */
			
 
				+void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
			
 
				+{
			
 
				+	u32 val = 0;
			
 
				+
			
 
				+	/* hw has done the conditional check and inst decode */
			
 
				+	offset &= 0xff0;
			
 
				+
			
 
				+	apic_reg_read(vcpu->arch.apic, offset, 4, &val);
			
 
				+
			
 
				+	/* TODO: optimize to just emulate side effect w/o one more write */
			
 
				+	apic_reg_write(vcpu->arch.apic, offset, val);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
			
 
				+
			
 
				 void kvm_free_lapic(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	struct kvm_lapic *apic = vcpu->arch.apic;
			
@@ -1288,6 +1356,7 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
 
				 
			
 
				 void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
			
 
				 {
			
 
				+	u64 old_value = vcpu->arch.apic_base;
			
 
				 	struct kvm_lapic *apic = vcpu->arch.apic;
			
 
				 
			
 
				 	if (!apic) {
			
@@ -1309,11 +1378,16 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 
				 		value &= ~MSR_IA32_APICBASE_BSP;
			
 
				 
			
 
				 	vcpu->arch.apic_base = value;
			
 
				-	if (apic_x2apic_mode(apic)) {
			
 
				-		u32 id = kvm_apic_id(apic);
			
 
				-		u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
			
 
				-		kvm_apic_set_ldr(apic, ldr);
			
 
				+	if ((old_value ^ value) & X2APIC_ENABLE) {
			
 
				+		if (value & X2APIC_ENABLE) {
			
 
				+			u32 id = kvm_apic_id(apic);
			
 
				+			u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
			
 
				+			kvm_apic_set_ldr(apic, ldr);
			
 
				+			kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true);
			
 
				+		} else
			
 
				+			kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false);
			
 
				 	}
			
 
				+
			
 
				 	apic->base_address = apic->vcpu->arch.apic_base &
			
 
				 			     MSR_IA32_APICBASE_BASE;
			
 
				 
			
@@ -1359,8 +1433,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 
				 		apic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
			
 
				 		apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
			
 
				 	}
			
 
				-	apic->irr_pending = false;
			
 
				-	apic->isr_count = 0;
			
 
				+	apic->irr_pending = kvm_apic_vid_enabled(vcpu->kvm);
			
 
				+	apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm);
			
 
				 	apic->highest_isr_cache = -1;
			
 
				 	update_divide_count(apic);
			
 
				 	atomic_set(&apic->lapic_timer.pending, 0);
			
@@ -1575,8 +1649,10 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
 
				 	update_divide_count(apic);
			
 
				 	start_apic_timer(apic);
			
 
				 	apic->irr_pending = true;
			
 
				-	apic->isr_count = count_vectors(apic->regs + APIC_ISR);
			
 
				+	apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm) ?
			
 
				+				1 : count_vectors(apic->regs + APIC_ISR);
			
 
				 	apic->highest_isr_cache = -1;
			
 
				+	kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic));
			
 
				 	kvm_make_request(KVM_REQ_EVENT, vcpu);
			
 
				 }
			
 
				 
			
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -64,6 +64,9 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
 
				 u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);
			
 
				 void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data);
			
 
				 
			
 
				+void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset);
			
 
				+void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector);
			
 
				+
			
 
				 void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
			
 
				 void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
			
 
				 void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu);
			
@@ -124,4 +127,35 @@ static inline int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
 
				 	return kvm_apic_present(vcpu) && kvm_apic_sw_enabled(vcpu->arch.apic);
			
 
				 }
			
 
				 
			
 
				+static inline int apic_x2apic_mode(struct kvm_lapic *apic)
			
 
				+{
			
 
				+	return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
			
 
				+}
			
 
				+
			
 
				+static inline bool kvm_apic_vid_enabled(struct kvm *kvm)
			
 
				+{
			
 
				+	return kvm_x86_ops->vm_has_apicv(kvm);
			
 
				+}
			
 
				+
			
 
				+static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr)
			
 
				+{
			
 
				+	u16 cid;
			
 
				+	ldr >>= 32 - map->ldr_bits;
			
 
				+	cid = (ldr >> map->cid_shift) & map->cid_mask;
			
 
				+
			
 
				+	BUG_ON(cid >= ARRAY_SIZE(map->logical_map));
			
 
				+
			
 
				+	return cid;
			
 
				+}
			
 
				+
			
 
				+static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr)
			
 
				+{
			
 
				+	ldr >>= (32 - map->ldr_bits);
			
 
				+	return ldr & map->lid_mask;
			
 
				+}
			
 
				+
			
 
				+void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
			
 
				+				struct kvm_lapic_irq *irq,
			
 
				+				u64 *eoi_bitmap);
			
 
				+
			
 
				 #endif
			
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -448,7 +448,8 @@ static bool __check_direct_spte_mmio_pf(u64 spte)
 
				 
			
 
				 static bool spte_is_locklessly_modifiable(u64 spte)
			
 
				 {
			
 
				-	return !(~spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE));
			
 
				+	return (spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE)) ==
			
 
				+		(SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE);
			
 
				 }
			
 
				 
			
 
				 static bool spte_has_volatile_bits(u64 spte)
			
@@ -1460,28 +1461,14 @@ static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, int nr)
 
				 	percpu_counter_add(&kvm_total_used_mmu_pages, nr);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Remove the sp from shadow page cache, after call it,
			
 
				- * we can not find this sp from the cache, and the shadow
			
 
				- * page table is still valid.
			
 
				- * It should be under the protection of mmu lock.
			
 
				- */
			
 
				-static void kvm_mmu_isolate_page(struct kvm_mmu_page *sp)
			
 
				+static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
			
 
				 {
			
 
				 	ASSERT(is_empty_shadow_page(sp->spt));
			
 
				 	hlist_del(&sp->hash_link);
			
 
				-	if (!sp->role.direct)
			
 
				-		free_page((unsigned long)sp->gfns);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Free the shadow page table and the sp, we can do it
			
 
				- * out of the protection of mmu lock.
			
 
				- */
			
 
				-static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
			
 
				-{
			
 
				 	list_del(&sp->link);
			
 
				 	free_page((unsigned long)sp->spt);
			
 
				+	if (!sp->role.direct)
			
 
				+		free_page((unsigned long)sp->gfns);
			
 
				 	kmem_cache_free(mmu_page_header_cache, sp);
			
 
				 }
			
 
				 
			
@@ -2125,7 +2112,6 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
 
				 	do {
			
 
				 		sp = list_first_entry(invalid_list, struct kvm_mmu_page, link);
			
 
				 		WARN_ON(!sp->role.invalid || sp->root_count);
			
 
				-		kvm_mmu_isolate_page(sp);
			
 
				 		kvm_mmu_free_page(sp);
			
 
				 	} while (!list_empty(invalid_list));
			
 
				 }
			
@@ -2327,9 +2313,8 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
 
				 		if (s->role.level != PT_PAGE_TABLE_LEVEL)
			
 
				 			return 1;
			
 
				 
			
 
				-		if (!need_unsync && !s->unsync) {
			
 
				+		if (!s->unsync)
			
 
				 			need_unsync = true;
			
 
				-		}
			
 
				 	}
			
 
				 	if (need_unsync)
			
 
				 		kvm_unsync_pages(vcpu, gfn);
			
@@ -3687,6 +3672,7 @@ int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
 
				 	else
			
 
				 		r = paging32_init_context(vcpu, context);
			
 
				 
			
 
				+	vcpu->arch.mmu.base_role.nxe = is_nx(vcpu);
			
 
				 	vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu);
			
 
				 	vcpu->arch.mmu.base_role.cr0_wp  = is_write_protection(vcpu);
			
 
				 	vcpu->arch.mmu.base_role.smep_andnot_wp
			
@@ -3853,7 +3839,7 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
 
				 		/* Handle a 32-bit guest writing two halves of a 64-bit gpte */
			
 
				 		*gpa &= ~(gpa_t)7;
			
 
				 		*bytes = 8;
			
 
				-		r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, min(*bytes, 8));
			
 
				+		r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, 8);
			
 
				 		if (r)
			
 
				 			gentry = 0;
			
 
				 		new = (const u8 *)&gentry;
			
@@ -4007,7 +3993,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 
				 			      !((sp->role.word ^ vcpu->arch.mmu.base_role.word)
			
 
				 			      & mask.word) && rmap_can_add(vcpu))
			
 
				 				mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
			
 
				-			if (!remote_flush && need_remote_flush(entry, *spte))
			
 
				+			if (need_remote_flush(entry, *spte))
			
 
				 				remote_flush = true;
			
 
				 			++spte;
			
 
				 		}
			
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -409,9 +409,6 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 
				 	unsigned direct_access, access = gw->pt_access;
			
 
				 	int top_level, emulate = 0;
			
 
				 
			
 
				-	if (!is_present_gpte(gw->ptes[gw->level - 1]))
			
 
				-		return 0;
			
 
				-
			
 
				 	direct_access = gw->pte_access;
			
 
				 
			
 
				 	top_level = vcpu->arch.mmu.root_level;
			
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3571,6 +3571,26 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
 
				 		set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
			
 
				 }
			
 
				 
			
 
				+static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
			
 
				+{
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+static int svm_vm_has_apicv(struct kvm *kvm)
			
 
				+{
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
			
 
				+{
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+static void svm_hwapic_isr_update(struct kvm *kvm, int isr)
			
 
				+{
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				 static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	struct vcpu_svm *svm = to_svm(vcpu);
			
@@ -4290,6 +4310,10 @@ static struct kvm_x86_ops svm_x86_ops = {
 
				 	.enable_nmi_window = enable_nmi_window,
			
 
				 	.enable_irq_window = enable_irq_window,
			
 
				 	.update_cr8_intercept = update_cr8_intercept,
			
 
				+	.set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode,
			
 
				+	.vm_has_apicv = svm_vm_has_apicv,
			
 
				+	.load_eoi_exitmap = svm_load_eoi_exitmap,
			
 
				+	.hwapic_isr_update = svm_hwapic_isr_update,
			
 
				 
			
 
				 	.set_tss_addr = svm_set_tss_addr,
			
 
				 	.get_tdp_level = get_npt_level,
			
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -84,6 +84,9 @@ module_param(vmm_exclusive, bool, S_IRUGO);
 
				 static bool __read_mostly fasteoi = 1;
			
 
				 module_param(fasteoi, bool, S_IRUGO);
			
 
				 
			
 
				+static bool __read_mostly enable_apicv_reg_vid = 1;
			
 
				+module_param(enable_apicv_reg_vid, bool, S_IRUGO);
			
 
				+
			
 
				 /*
			
 
				  * If nested=1, nested virtualization is supported, i.e., guests may use
			
 
				  * VMX and be a hypervisor for its own guests. If nested=0, guests may not
			
@@ -640,6 +643,8 @@ static unsigned long *vmx_io_bitmap_a;
 
				 static unsigned long *vmx_io_bitmap_b;
			
 
				 static unsigned long *vmx_msr_bitmap_legacy;
			
 
				 static unsigned long *vmx_msr_bitmap_longmode;
			
 
				+static unsigned long *vmx_msr_bitmap_legacy_x2apic;
			
 
				+static unsigned long *vmx_msr_bitmap_longmode_x2apic;
			
 
				 
			
 
				 static bool cpu_has_load_ia32_efer;
			
 
				 static bool cpu_has_load_perf_global_ctrl;
			
@@ -764,6 +769,24 @@ static inline bool cpu_has_vmx_virtualize_apic_accesses(void)
 
				 		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
			
 
				 }
			
 
				 
			
 
				+static inline bool cpu_has_vmx_virtualize_x2apic_mode(void)
			
 
				+{
			
 
				+	return vmcs_config.cpu_based_2nd_exec_ctrl &
			
 
				+		SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
			
 
				+}
			
 
				+
			
 
				+static inline bool cpu_has_vmx_apic_register_virt(void)
			
 
				+{
			
 
				+	return vmcs_config.cpu_based_2nd_exec_ctrl &
			
 
				+		SECONDARY_EXEC_APIC_REGISTER_VIRT;
			
 
				+}
			
 
				+
			
 
				+static inline bool cpu_has_vmx_virtual_intr_delivery(void)
			
 
				+{
			
 
				+	return vmcs_config.cpu_based_2nd_exec_ctrl &
			
 
				+		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
			
 
				+}
			
 
				+
			
 
				 static inline bool cpu_has_vmx_flexpriority(void)
			
 
				 {
			
 
				 	return cpu_has_vmx_tpr_shadow() &&
			
@@ -1821,6 +1844,25 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
 
				 	vmx->guest_msrs[from] = tmp;
			
 
				 }
			
 
				 
			
 
				+static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	unsigned long *msr_bitmap;
			
 
				+
			
 
				+	if (irqchip_in_kernel(vcpu->kvm) && apic_x2apic_mode(vcpu->arch.apic)) {
			
 
				+		if (is_long_mode(vcpu))
			
 
				+			msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
			
 
				+		else
			
 
				+			msr_bitmap = vmx_msr_bitmap_legacy_x2apic;
			
 
				+	} else {
			
 
				+		if (is_long_mode(vcpu))
			
 
				+			msr_bitmap = vmx_msr_bitmap_longmode;
			
 
				+		else
			
 
				+			msr_bitmap = vmx_msr_bitmap_legacy;
			
 
				+	}
			
 
				+
			
 
				+	vmcs_write64(MSR_BITMAP, __pa(msr_bitmap));
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Set up the vmcs to automatically save and restore system
			
 
				  * msrs.  Don't touch the 64-bit msrs if the guest is in legacy
			
@@ -1829,7 +1871,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
 
				 static void setup_msrs(struct vcpu_vmx *vmx)
			
 
				 {
			
 
				 	int save_nmsrs, index;
			
 
				-	unsigned long *msr_bitmap;
			
 
				 
			
 
				 	save_nmsrs = 0;
			
 
				 #ifdef CONFIG_X86_64
			
@@ -1861,14 +1902,8 @@ static void setup_msrs(struct vcpu_vmx *vmx)
 
				 
			
 
				 	vmx->save_nmsrs = save_nmsrs;
			
 
				 
			
 
				-	if (cpu_has_vmx_msr_bitmap()) {
			
 
				-		if (is_long_mode(&vmx->vcpu))
			
 
				-			msr_bitmap = vmx_msr_bitmap_longmode;
			
 
				-		else
			
 
				-			msr_bitmap = vmx_msr_bitmap_legacy;
			
 
				-
			
 
				-		vmcs_write64(MSR_BITMAP, __pa(msr_bitmap));
			
 
				-	}
			
 
				+	if (cpu_has_vmx_msr_bitmap())
			
 
				+		vmx_set_msr_bitmap(&vmx->vcpu);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -2534,13 +2569,16 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 
				 	if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
			
 
				 		min2 = 0;
			
 
				 		opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
			
 
				+			SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
			
 
				 			SECONDARY_EXEC_WBINVD_EXITING |
			
 
				 			SECONDARY_EXEC_ENABLE_VPID |
			
 
				 			SECONDARY_EXEC_ENABLE_EPT |
			
 
				 			SECONDARY_EXEC_UNRESTRICTED_GUEST |
			
 
				 			SECONDARY_EXEC_PAUSE_LOOP_EXITING |
			
 
				 			SECONDARY_EXEC_RDTSCP |
			
 
				-			SECONDARY_EXEC_ENABLE_INVPCID;
			
 
				+			SECONDARY_EXEC_ENABLE_INVPCID |
			
 
				+			SECONDARY_EXEC_APIC_REGISTER_VIRT |
			
 
				+			SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
			
 
				 		if (adjust_vmx_controls(min2, opt2,
			
 
				 					MSR_IA32_VMX_PROCBASED_CTLS2,
			
 
				 					&_cpu_based_2nd_exec_control) < 0)
			
@@ -2551,6 +2589,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 
				 				SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
			
 
				 		_cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW;
			
 
				 #endif
			
 
				+
			
 
				+	if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
			
 
				+		_cpu_based_2nd_exec_control &= ~(
			
 
				+				SECONDARY_EXEC_APIC_REGISTER_VIRT |
			
 
				+				SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
			
 
				+				SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
			
 
				+
			
 
				 	if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
			
 
				 		/* CR3 accesses and invlpg don't need to cause VM Exits when EPT
			
 
				 		   enabled */
			
@@ -2748,6 +2793,15 @@ static __init int hardware_setup(void)
 
				 	if (!cpu_has_vmx_ple())
			
 
				 		ple_gap = 0;
			
 
				 
			
 
				+	if (!cpu_has_vmx_apic_register_virt() ||
			
 
				+				!cpu_has_vmx_virtual_intr_delivery())
			
 
				+		enable_apicv_reg_vid = 0;
			
 
				+
			
 
				+	if (enable_apicv_reg_vid)
			
 
				+		kvm_x86_ops->update_cr8_intercept = NULL;
			
 
				+	else
			
 
				+		kvm_x86_ops->hwapic_irr_update = NULL;
			
 
				+
			
 
				 	if (nested)
			
 
				 		nested_vmx_setup_ctls_msrs();
			
 
				 
			
@@ -3173,6 +3227,14 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 
				 		if (!is_paging(vcpu)) {
			
 
				 			hw_cr4 &= ~X86_CR4_PAE;
			
 
				 			hw_cr4 |= X86_CR4_PSE;
			
 
				+			/*
			
 
				+			 * SMEP is disabled if CPU is in non-paging mode in
			
 
				+			 * hardware. However KVM always uses paging mode to
			
 
				+			 * emulate guest non-paging mode with TDP.
			
 
				+			 * To emulate this behavior, SMEP needs to be manually
			
 
				+			 * disabled when guest switches to non-paging mode.
			
 
				+			 */
			
 
				+			hw_cr4 &= ~X86_CR4_SMEP;
			
 
				 		} else if (!(cr4 & X86_CR4_PAE)) {
			
 
				 			hw_cr4 &= ~X86_CR4_PAE;
			
 
				 		}
			
@@ -3707,7 +3769,10 @@ static void free_vpid(struct vcpu_vmx *vmx)
 
				 	spin_unlock(&vmx_vpid_lock);
			
 
				 }
			
 
				 
			
 
				-static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr)
			
 
				+#define MSR_TYPE_R	1
			
 
				+#define MSR_TYPE_W	2
			
 
				+static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
			
 
				+						u32 msr, int type)
			
 
				 {
			
 
				 	int f = sizeof(unsigned long);
			
 
				 
			
@@ -3720,20 +3785,93 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr)
 
				 	 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
			
 
				 	 */
			
 
				 	if (msr <= 0x1fff) {
			
 
				-		__clear_bit(msr, msr_bitmap + 0x000 / f); /* read-low */
			
 
				-		__clear_bit(msr, msr_bitmap + 0x800 / f); /* write-low */
			
 
				+		if (type & MSR_TYPE_R)
			
 
				+			/* read-low */
			
 
				+			__clear_bit(msr, msr_bitmap + 0x000 / f);
			
 
				+
			
 
				+		if (type & MSR_TYPE_W)
			
 
				+			/* write-low */
			
 
				+			__clear_bit(msr, msr_bitmap + 0x800 / f);
			
 
				+
			
 
				 	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
			
 
				 		msr &= 0x1fff;
			
 
				-		__clear_bit(msr, msr_bitmap + 0x400 / f); /* read-high */
			
 
				-		__clear_bit(msr, msr_bitmap + 0xc00 / f); /* write-high */
			
 
				+		if (type & MSR_TYPE_R)
			
 
				+			/* read-high */
			
 
				+			__clear_bit(msr, msr_bitmap + 0x400 / f);
			
 
				+
			
 
				+		if (type & MSR_TYPE_W)
			
 
				+			/* write-high */
			
 
				+			__clear_bit(msr, msr_bitmap + 0xc00 / f);
			
 
				+
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
			
 
				+						u32 msr, int type)
			
 
				+{
			
 
				+	int f = sizeof(unsigned long);
			
 
				+
			
 
				+	if (!cpu_has_vmx_msr_bitmap())
			
 
				+		return;
			
 
				+
			
 
				+	/*
			
 
				+	 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
			
 
				+	 * have the write-low and read-high bitmap offsets the wrong way round.
			
 
				+	 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
			
 
				+	 */
			
 
				+	if (msr <= 0x1fff) {
			
 
				+		if (type & MSR_TYPE_R)
			
 
				+			/* read-low */
			
 
				+			__set_bit(msr, msr_bitmap + 0x000 / f);
			
 
				+
			
 
				+		if (type & MSR_TYPE_W)
			
 
				+			/* write-low */
			
 
				+			__set_bit(msr, msr_bitmap + 0x800 / f);
			
 
				+
			
 
				+	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
			
 
				+		msr &= 0x1fff;
			
 
				+		if (type & MSR_TYPE_R)
			
 
				+			/* read-high */
			
 
				+			__set_bit(msr, msr_bitmap + 0x400 / f);
			
 
				+
			
 
				+		if (type & MSR_TYPE_W)
			
 
				+			/* write-high */
			
 
				+			__set_bit(msr, msr_bitmap + 0xc00 / f);
			
 
				+
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				 static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
			
 
				 {
			
 
				 	if (!longmode_only)
			
 
				-		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr);
			
 
				-	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr);
			
 
				+		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
			
 
				+						msr, MSR_TYPE_R | MSR_TYPE_W);
			
 
				+	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
			
 
				+						msr, MSR_TYPE_R | MSR_TYPE_W);
			
 
				+}
			
 
				+
			
 
				+static void vmx_enable_intercept_msr_read_x2apic(u32 msr)
			
 
				+{
			
 
				+	__vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
			
 
				+			msr, MSR_TYPE_R);
			
 
				+	__vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
			
 
				+			msr, MSR_TYPE_R);
			
 
				+}
			
 
				+
			
 
				+static void vmx_disable_intercept_msr_read_x2apic(u32 msr)
			
 
				+{
			
 
				+	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
			
 
				+			msr, MSR_TYPE_R);
			
 
				+	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
			
 
				+			msr, MSR_TYPE_R);
			
 
				+}
			
 
				+
			
 
				+static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
			
 
				+{
			
 
				+	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
			
 
				+			msr, MSR_TYPE_W);
			
 
				+	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
			
 
				+			msr, MSR_TYPE_W);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -3812,6 +3950,11 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
 
				 	return exec_control;
			
 
				 }
			
 
				 
			
 
				+static int vmx_vm_has_apicv(struct kvm *kvm)
			
 
				+{
			
 
				+	return enable_apicv_reg_vid && irqchip_in_kernel(kvm);
			
 
				+}
			
 
				+
			
 
				 static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
			
 
				 {
			
 
				 	u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
			
@@ -3829,6 +3972,10 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
 
				 		exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
			
 
				 	if (!ple_gap)
			
 
				 		exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
			
 
				+	if (!vmx_vm_has_apicv(vmx->vcpu.kvm))
			
 
				+		exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
			
 
				+				  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
			
 
				+	exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
			
 
				 	return exec_control;
			
 
				 }
			
 
				 
			
@@ -3873,6 +4020,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
				 				vmx_secondary_exec_control(vmx));
			
 
				 	}
			
 
				 
			
 
				+	if (enable_apicv_reg_vid) {
			
 
				+		vmcs_write64(EOI_EXIT_BITMAP0, 0);
			
 
				+		vmcs_write64(EOI_EXIT_BITMAP1, 0);
			
 
				+		vmcs_write64(EOI_EXIT_BITMAP2, 0);
			
 
				+		vmcs_write64(EOI_EXIT_BITMAP3, 0);
			
 
				+
			
 
				+		vmcs_write16(GUEST_INTR_STATUS, 0);
			
 
				+	}
			
 
				+
			
 
				 	if (ple_gap) {
			
 
				 		vmcs_write32(PLE_GAP, ple_gap);
			
 
				 		vmcs_write32(PLE_WINDOW, ple_window);
			
@@ -4787,6 +4943,26 @@ static int handle_apic_access(struct kvm_vcpu *vcpu)
 
				 	return emulate_instruction(vcpu, 0) == EMULATE_DONE;
			
 
				 }
			
 
				 
			
 
				+static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
			
 
				+	int vector = exit_qualification & 0xff;
			
 
				+
			
 
				+	/* EOI-induced VM exit is trap-like and thus no need to adjust IP */
			
 
				+	kvm_apic_set_eoi_accelerated(vcpu, vector);
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+static int handle_apic_write(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
			
 
				+	u32 offset = exit_qualification & 0xfff;
			
 
				+
			
 
				+	/* APIC-write VM exit is trap-like and thus no need to adjust IP */
			
 
				+	kvm_apic_write_nodecode(vcpu, offset);
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				 static int handle_task_switch(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	struct vcpu_vmx *vmx = to_vmx(vcpu);
			
@@ -5721,6 +5897,8 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 
				 	[EXIT_REASON_VMON]                    = handle_vmon,
			
 
				 	[EXIT_REASON_TPR_BELOW_THRESHOLD]     = handle_tpr_below_threshold,
			
 
				 	[EXIT_REASON_APIC_ACCESS]             = handle_apic_access,
			
 
				+	[EXIT_REASON_APIC_WRITE]              = handle_apic_write,
			
 
				+	[EXIT_REASON_EOI_INDUCED]             = handle_apic_eoi_induced,
			
 
				 	[EXIT_REASON_WBINVD]                  = handle_wbinvd,
			
 
				 	[EXIT_REASON_XSETBV]                  = handle_xsetbv,
			
 
				 	[EXIT_REASON_TASK_SWITCH]             = handle_task_switch,
			
@@ -6070,6 +6248,85 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
 
				 	vmcs_write32(TPR_THRESHOLD, irr);
			
 
				 }
			
 
				 
			
 
				+static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
			
 
				+{
			
 
				+	u32 sec_exec_control;
			
 
				+
			
 
				+	/*
			
 
				+	 * There is not point to enable virtualize x2apic without enable
			
 
				+	 * apicv
			
 
				+	 */
			
 
				+	if (!cpu_has_vmx_virtualize_x2apic_mode() ||
			
 
				+				!vmx_vm_has_apicv(vcpu->kvm))
			
 
				+		return;
			
 
				+
			
 
				+	if (!vm_need_tpr_shadow(vcpu->kvm))
			
 
				+		return;
			
 
				+
			
 
				+	sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
			
 
				+
			
 
				+	if (set) {
			
 
				+		sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
			
 
				+		sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
			
 
				+	} else {
			
 
				+		sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
			
 
				+		sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
			
 
				+	}
			
 
				+	vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
			
 
				+
			
 
				+	vmx_set_msr_bitmap(vcpu);
			
 
				+}
			
 
				+
			
 
				+static void vmx_hwapic_isr_update(struct kvm *kvm, int isr)
			
 
				+{
			
 
				+	u16 status;
			
 
				+	u8 old;
			
 
				+
			
 
				+	if (!vmx_vm_has_apicv(kvm))
			
 
				+		return;
			
 
				+
			
 
				+	if (isr == -1)
			
 
				+		isr = 0;
			
 
				+
			
 
				+	status = vmcs_read16(GUEST_INTR_STATUS);
			
 
				+	old = status >> 8;
			
 
				+	if (isr != old) {
			
 
				+		status &= 0xff;
			
 
				+		status |= isr << 8;
			
 
				+		vmcs_write16(GUEST_INTR_STATUS, status);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void vmx_set_rvi(int vector)
			
 
				+{
			
 
				+	u16 status;
			
 
				+	u8 old;
			
 
				+
			
 
				+	status = vmcs_read16(GUEST_INTR_STATUS);
			
 
				+	old = (u8)status & 0xff;
			
 
				+	if ((u8)vector != old) {
			
 
				+		status &= ~0xff;
			
 
				+		status |= (u8)vector;
			
 
				+		vmcs_write16(GUEST_INTR_STATUS, status);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
			
 
				+{
			
 
				+	if (max_irr == -1)
			
 
				+		return;
			
 
				+
			
 
				+	vmx_set_rvi(max_irr);
			
 
				+}
			
 
				+
			
 
				+static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
			
 
				+{
			
 
				+	vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]);
			
 
				+	vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]);
			
 
				+	vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]);
			
 
				+	vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]);
			
 
				+}
			
 
				+
			
 
				 static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
			
 
				 {
			
 
				 	u32 exit_intr_info;
			
@@ -7333,6 +7590,11 @@ static struct kvm_x86_ops vmx_x86_ops = {
 
				 	.enable_nmi_window = enable_nmi_window,
			
 
				 	.enable_irq_window = enable_irq_window,
			
 
				 	.update_cr8_intercept = update_cr8_intercept,
			
 
				+	.set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode,
			
 
				+	.vm_has_apicv = vmx_vm_has_apicv,
			
 
				+	.load_eoi_exitmap = vmx_load_eoi_exitmap,
			
 
				+	.hwapic_irr_update = vmx_hwapic_irr_update,
			
 
				+	.hwapic_isr_update = vmx_hwapic_isr_update,
			
 
				 
			
 
				 	.set_tss_addr = vmx_set_tss_addr,
			
 
				 	.get_tdp_level = get_ept_level,
			
@@ -7365,7 +7627,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
 
				 
			
 
				 static int __init vmx_init(void)
			
 
				 {
			
 
				-	int r, i;
			
 
				+	int r, i, msr;
			
 
				 
			
 
				 	rdmsrl_safe(MSR_EFER, &host_efer);
			
 
				 
			
@@ -7386,11 +7648,19 @@ static int __init vmx_init(void)
 
				 	if (!vmx_msr_bitmap_legacy)
			
 
				 		goto out1;
			
 
				 
			
 
				+	vmx_msr_bitmap_legacy_x2apic =
			
 
				+				(unsigned long *)__get_free_page(GFP_KERNEL);
			
 
				+	if (!vmx_msr_bitmap_legacy_x2apic)
			
 
				+		goto out2;
			
 
				 
			
 
				 	vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
			
 
				 	if (!vmx_msr_bitmap_longmode)
			
 
				-		goto out2;
			
 
				+		goto out3;
			
 
				 
			
 
				+	vmx_msr_bitmap_longmode_x2apic =
			
 
				+				(unsigned long *)__get_free_page(GFP_KERNEL);
			
 
				+	if (!vmx_msr_bitmap_longmode_x2apic)
			
 
				+		goto out4;
			
 
				 
			
 
				 	/*
			
 
				 	 * Allow direct access to the PC debug port (it is often used for I/O
			
@@ -7422,6 +7692,28 @@ static int __init vmx_init(void)
 
				 	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
			
 
				 	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
			
 
				 	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
			
 
				+	memcpy(vmx_msr_bitmap_legacy_x2apic,
			
 
				+			vmx_msr_bitmap_legacy, PAGE_SIZE);
			
 
				+	memcpy(vmx_msr_bitmap_longmode_x2apic,
			
 
				+			vmx_msr_bitmap_longmode, PAGE_SIZE);
			
 
				+
			
 
				+	if (enable_apicv_reg_vid) {
			
 
				+		for (msr = 0x800; msr <= 0x8ff; msr++)
			
 
				+			vmx_disable_intercept_msr_read_x2apic(msr);
			
 
				+
			
 
				+		/* According SDM, in x2apic mode, the whole id reg is used.
			
 
				+		 * But in KVM, it only use the highest eight bits. Need to
			
 
				+		 * intercept it */
			
 
				+		vmx_enable_intercept_msr_read_x2apic(0x802);
			
 
				+		/* TMCCT */
			
 
				+		vmx_enable_intercept_msr_read_x2apic(0x839);
			
 
				+		/* TPR */
			
 
				+		vmx_disable_intercept_msr_write_x2apic(0x808);
			
 
				+		/* EOI */
			
 
				+		vmx_disable_intercept_msr_write_x2apic(0x80b);
			
 
				+		/* SELF-IPI */
			
 
				+		vmx_disable_intercept_msr_write_x2apic(0x83f);
			
 
				+	}
			
 
				 
			
 
				 	if (enable_ept) {
			
 
				 		kvm_mmu_set_mask_ptes(0ull,
			
@@ -7435,8 +7727,10 @@ static int __init vmx_init(void)
 
				 
			
 
				 	return 0;
			
 
				 
			
 
				-out3:
			
 
				+out4:
			
 
				 	free_page((unsigned long)vmx_msr_bitmap_longmode);
			
 
				+out3:
			
 
				+	free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
			
 
				 out2:
			
 
				 	free_page((unsigned long)vmx_msr_bitmap_legacy);
			
 
				 out1:
			
@@ -7448,6 +7742,8 @@ out:
 
				 
			
 
				 static void __exit vmx_exit(void)
			
 
				 {
			
 
				+	free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
			
 
				+	free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
			
 
				 	free_page((unsigned long)vmx_msr_bitmap_legacy);
			
 
				 	free_page((unsigned long)vmx_msr_bitmap_longmode);
			
 
				 	free_page((unsigned long)vmx_io_bitmap_b);
			
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -870,8 +870,6 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
 
				 
			
 
				 	kvm_x86_ops->set_efer(vcpu, efer);
			
 
				 
			
 
				-	vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
			
 
				-
			
 
				 	/* Update reserved bits */
			
 
				 	if ((efer ^ old_efer) & EFER_NX)
			
 
				 		kvm_mmu_reset_context(vcpu);
			
@@ -5565,7 +5563,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
 
				 			vcpu->arch.nmi_injected = true;
			
 
				 			kvm_x86_ops->set_nmi(vcpu);
			
 
				 		}
			
 
				-	} else if (kvm_cpu_has_interrupt(vcpu)) {
			
 
				+	} else if (kvm_cpu_has_injectable_intr(vcpu)) {
			
 
				 		if (kvm_x86_ops->interrupt_allowed(vcpu)) {
			
 
				 			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
			
 
				 					    false);
			
@@ -5633,6 +5631,16 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
 
				 #endif
			
 
				 }
			
 
				 
			
 
				+static void update_eoi_exitmap(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	u64 eoi_exit_bitmap[4];
			
 
				+
			
 
				+	memset(eoi_exit_bitmap, 0, 32);
			
 
				+
			
 
				+	kvm_ioapic_calculate_eoi_exitmap(vcpu, eoi_exit_bitmap);
			
 
				+	kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
			
 
				+}
			
 
				+
			
 
				 static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	int r;
			
@@ -5686,6 +5694,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
				 			kvm_handle_pmu_event(vcpu);
			
 
				 		if (kvm_check_request(KVM_REQ_PMI, vcpu))
			
 
				 			kvm_deliver_pmi(vcpu);
			
 
				+		if (kvm_check_request(KVM_REQ_EOIBITMAP, vcpu))
			
 
				+			update_eoi_exitmap(vcpu);
			
 
				 	}
			
 
				 
			
 
				 	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
			
@@ -5694,10 +5704,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
				 		/* enable NMI/IRQ window open exits if needed */
			
 
				 		if (vcpu->arch.nmi_pending)
			
 
				 			kvm_x86_ops->enable_nmi_window(vcpu);
			
 
				-		else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
			
 
				+		else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
			
 
				 			kvm_x86_ops->enable_irq_window(vcpu);
			
 
				 
			
 
				 		if (kvm_lapic_enabled(vcpu)) {
			
 
				+			/*
			
 
				+			 * Update architecture specific hints for APIC
			
 
				+			 * virtual interrupt delivery.
			
 
				+			 */
			
 
				+			if (kvm_x86_ops->hwapic_irr_update)
			
 
				+				kvm_x86_ops->hwapic_irr_update(vcpu,
			
 
				+					kvm_lapic_find_highest_irr(vcpu));
			
 
				 			update_cr8_intercept(vcpu);
			
 
				 			kvm_lapic_sync_to_vapic(vcpu);
			
 
				 		}
			
--- a/drivers/s390/kvm/virtio_ccw.c
+++ b/drivers/s390/kvm/virtio_ccw.c
@@ -244,9 +244,9 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev,
 
				 {
			
 
				 	struct virtio_ccw_device *vcdev = to_vc_device(vdev);
			
 
				 	int err;
			
 
				-	struct virtqueue *vq;
			
 
				+	struct virtqueue *vq = NULL;
			
 
				 	struct virtio_ccw_vq_info *info;
			
 
				-	unsigned long size;
			
 
				+	unsigned long size = 0; /* silence the compiler */
			
 
				 	unsigned long flags;
			
 
				 
			
 
				 	/* Allocate queue. */
			
@@ -279,11 +279,8 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev,
 
				 		/* For now, we fail if we can't get the requested size. */
			
 
				 		dev_warn(&vcdev->cdev->dev, "no vq\n");
			
 
				 		err = -ENOMEM;
			
 
				-		free_pages_exact(info->queue, size);
			
 
				 		goto out_err;
			
 
				 	}
			
 
				-	info->vq = vq;
			
 
				-	vq->priv = info;
			
 
				 
			
 
				 	/* Register it with the host. */
			
 
				 	info->info_block->queue = (__u64)info->queue;
			
@@ -297,12 +294,12 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev,
 
				 	err = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_SET_VQ | i);
			
 
				 	if (err) {
			
 
				 		dev_warn(&vcdev->cdev->dev, "SET_VQ failed\n");
			
 
				-		free_pages_exact(info->queue, size);
			
 
				-		info->vq = NULL;
			
 
				-		vq->priv = NULL;
			
 
				 		goto out_err;
			
 
				 	}
			
 
				 
			
 
				+	info->vq = vq;
			
 
				+	vq->priv = info;
			
 
				+
			
 
				 	/* Save it to our list. */
			
 
				 	spin_lock_irqsave(&vcdev->lock, flags);
			
 
				 	list_add(&info->node, &vcdev->virtqueues);
			
@@ -311,8 +308,13 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev,
 
				 	return vq;
			
 
				 
			
 
				 out_err:
			
 
				-	if (info)
			
 
				+	if (vq)
			
 
				+		vring_del_virtqueue(vq);
			
 
				+	if (info) {
			
 
				+		if (info->queue)
			
 
				+			free_pages_exact(info->queue, size);
			
 
				 		kfree(info->info_block);
			
 
				+	}
			
 
				 	kfree(info);
			
 
				 	return ERR_PTR(err);
			
 
				 }
			
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -123,6 +123,7 @@ static inline bool is_error_page(struct page *page)
 
				 #define KVM_REQ_MASTERCLOCK_UPDATE 19
			
 
				 #define KVM_REQ_MCLOCK_INPROGRESS 20
			
 
				 #define KVM_REQ_EPR_EXIT          21
			
 
				+#define KVM_REQ_EOIBITMAP         22
			
 
				 
			
 
				 #define KVM_USERSPACE_IRQ_SOURCE_ID		0
			
 
				 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
			
@@ -538,6 +539,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
 
				 void kvm_flush_remote_tlbs(struct kvm *kvm);
			
 
				 void kvm_reload_remote_mmus(struct kvm *kvm);
			
 
				 void kvm_make_mclock_inprogress_request(struct kvm *kvm);
			
 
				+void kvm_make_update_eoibitmap_request(struct kvm *kvm);
			
 
				 
			
 
				 long kvm_arch_dev_ioctl(struct file *filp,
			
 
				 			unsigned int ioctl, unsigned long arg);
			
@@ -691,6 +693,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level);
 
				 int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level);
			
 
				 int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
			
 
				 		int irq_source_id, int level);
			
 
				+bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin);
			
 
				 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
			
 
				 void kvm_register_irq_ack_notifier(struct kvm *kvm,
			
 
				 				   struct kvm_irq_ack_notifier *kian);
			
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4316,7 +4316,10 @@ EXPORT_SYMBOL(yield);
 
				  * It's the caller's job to ensure that the target task struct
			
 
				  * can't go away on us before we can do any checks.
			
 
				  *
			
 
				- * Returns true if we indeed boosted the target task.
			
 
				+ * Returns:
			
 
				+ *	true (>0) if we indeed boosted the target task.
			
 
				+ *	false (0) if we failed to boost the target.
			
 
				+ *	-ESRCH if there's no task to yield to.
			
 
				  */
			
 
				 bool __sched yield_to(struct task_struct *p, bool preempt)
			
 
				 {
			
@@ -4330,6 +4333,15 @@ bool __sched yield_to(struct task_struct *p, bool preempt)
 
				 
			
 
				 again:
			
 
				 	p_rq = task_rq(p);
			
 
				+	/*
			
 
				+	 * If we're the only runnable task on the rq and target rq also
			
 
				+	 * has only one task, there's absolutely no point in yielding.
			
 
				+	 */
			
 
				+	if (rq->nr_running == 1 && p_rq->nr_running == 1) {
			
 
				+		yielded = -ESRCH;
			
 
				+		goto out_irq;
			
 
				+	}
			
 
				+
			
 
				 	double_rq_lock(rq, p_rq);
			
 
				 	while (task_rq(p) != p_rq) {
			
 
				 		double_rq_unlock(rq, p_rq);
			
@@ -4337,13 +4349,13 @@ again:
 
				 	}
			
 
				 
			
 
				 	if (!curr->sched_class->yield_to_task)
			
 
				-		goto out;
			
 
				+		goto out_unlock;
			
 
				 
			
 
				 	if (curr->sched_class != p->sched_class)
			
 
				-		goto out;
			
 
				+		goto out_unlock;
			
 
				 
			
 
				 	if (task_running(p_rq, p) || p->state)
			
 
				-		goto out;
			
 
				+		goto out_unlock;
			
 
				 
			
 
				 	yielded = curr->sched_class->yield_to_task(rq, p, preempt);
			
 
				 	if (yielded) {
			
@@ -4356,11 +4368,12 @@ again:
 
				 			resched_task(p_rq->curr);
			
 
				 	}
			
 
				 
			
 
				-out:
			
 
				+out_unlock:
			
 
				 	double_rq_unlock(rq, p_rq);
			
 
				+out_irq:
			
 
				 	local_irq_restore(flags);
			
 
				 
			
 
				-	if (yielded)
			
 
				+	if (yielded > 0)
			
 
				 		schedule();
			
 
				 
			
 
				 	return yielded;
			
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -35,6 +35,7 @@
 
				 #include <linux/hrtimer.h>
			
 
				 #include <linux/io.h>
			
 
				 #include <linux/slab.h>
			
 
				+#include <linux/export.h>
			
 
				 #include <asm/processor.h>
			
 
				 #include <asm/page.h>
			
 
				 #include <asm/current.h>
			
@@ -115,6 +116,42 @@ static void update_handled_vectors(struct kvm_ioapic *ioapic)
 
				 	smp_wmb();
			
 
				 }
			
 
				 
			
 
				+void kvm_ioapic_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
			
 
				+					u64 *eoi_exit_bitmap)
			
 
				+{
			
 
				+	struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
			
 
				+	union kvm_ioapic_redirect_entry *e;
			
 
				+	struct kvm_lapic_irq irqe;
			
 
				+	int index;
			
 
				+
			
 
				+	spin_lock(&ioapic->lock);
			
 
				+	/* traverse ioapic entry to set eoi exit bitmap*/
			
 
				+	for (index = 0; index < IOAPIC_NUM_PINS; index++) {
			
 
				+		e = &ioapic->redirtbl[index];
			
 
				+		if (!e->fields.mask &&
			
 
				+			(e->fields.trig_mode == IOAPIC_LEVEL_TRIG ||
			
 
				+			 kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC,
			
 
				+				 index))) {
			
 
				+			irqe.dest_id = e->fields.dest_id;
			
 
				+			irqe.vector = e->fields.vector;
			
 
				+			irqe.dest_mode = e->fields.dest_mode;
			
 
				+			irqe.delivery_mode = e->fields.delivery_mode << 8;
			
 
				+			kvm_calculate_eoi_exitmap(vcpu, &irqe, eoi_exit_bitmap);
			
 
				+		}
			
 
				+	}
			
 
				+	spin_unlock(&ioapic->lock);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(kvm_ioapic_calculate_eoi_exitmap);
			
 
				+
			
 
				+void kvm_ioapic_make_eoibitmap_request(struct kvm *kvm)
			
 
				+{
			
 
				+	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
			
 
				+
			
 
				+	if (!kvm_apic_vid_enabled(kvm) || !ioapic)
			
 
				+		return;
			
 
				+	kvm_make_update_eoibitmap_request(kvm);
			
 
				+}
			
 
				+
			
 
				 static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
			
 
				 {
			
 
				 	unsigned index;
			
@@ -156,6 +193,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 
				 		if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG
			
 
				 		    && ioapic->irr & (1 << index))
			
 
				 			ioapic_service(ioapic, index);
			
 
				+		kvm_ioapic_make_eoibitmap_request(ioapic->kvm);
			
 
				 		break;
			
 
				 	}
			
 
				 }
			
@@ -455,6 +493,7 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
 
				 	spin_lock(&ioapic->lock);
			
 
				 	memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
			
 
				 	update_handled_vectors(ioapic);
			
 
				+	kvm_ioapic_make_eoibitmap_request(kvm);
			
 
				 	spin_unlock(&ioapic->lock);
			
 
				 	return 0;
			
 
				 }
			
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -82,5 +82,9 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 
				 		struct kvm_lapic_irq *irq);
			
 
				 int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
			
 
				 int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
			
 
				+void kvm_ioapic_make_eoibitmap_request(struct kvm *kvm);
			
 
				+void kvm_ioapic_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
			
 
				+					u64 *eoi_exit_bitmap);
			
 
				+
			
 
				 
			
 
				 #endif
			
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -76,7 +76,9 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
 
				 	gfn     = slot->base_gfn;
			
 
				 	end_gfn = gfn + slot->npages;
			
 
				 
			
 
				-	flags = IOMMU_READ | IOMMU_WRITE;
			
 
				+	flags = IOMMU_READ;
			
 
				+	if (!(slot->flags & KVM_MEM_READONLY))
			
 
				+		flags |= IOMMU_WRITE;
			
 
				 	if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY)
			
 
				 		flags |= IOMMU_CACHE;
			
 
				 
			
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -22,6 +22,7 @@
 
				 
			
 
				 #include <linux/kvm_host.h>
			
 
				 #include <linux/slab.h>
			
 
				+#include <linux/export.h>
			
 
				 #include <trace/events/kvm.h>
			
 
				 
			
 
				 #include <asm/msidef.h>
			
@@ -237,6 +238,28 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
			
 
				+{
			
 
				+	struct kvm_irq_ack_notifier *kian;
			
 
				+	struct hlist_node *n;
			
 
				+	int gsi;
			
 
				+
			
 
				+	rcu_read_lock();
			
 
				+	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
			
 
				+	if (gsi != -1)
			
 
				+		hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list,
			
 
				+					 link)
			
 
				+			if (kian->gsi == gsi) {
			
 
				+				rcu_read_unlock();
			
 
				+				return true;
			
 
				+			}
			
 
				+
			
 
				+	rcu_read_unlock();
			
 
				+
			
 
				+	return false;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
			
 
				+
			
 
				 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
			
 
				 {
			
 
				 	struct kvm_irq_ack_notifier *kian;
			
@@ -261,6 +284,7 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm,
 
				 	mutex_lock(&kvm->irq_lock);
			
 
				 	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
			
 
				 	mutex_unlock(&kvm->irq_lock);
			
 
				+	kvm_ioapic_make_eoibitmap_request(kvm);
			
 
				 }
			
 
				 
			
 
				 void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
			
@@ -270,6 +294,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
 
				 	hlist_del_init_rcu(&kian->link);
			
 
				 	mutex_unlock(&kvm->irq_lock);
			
 
				 	synchronize_rcu();
			
 
				+	kvm_ioapic_make_eoibitmap_request(kvm);
			
 
				 }
			
 
				 
			
 
				 int kvm_request_irq_source_id(struct kvm *kvm)
			
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -217,6 +217,11 @@ void kvm_make_mclock_inprogress_request(struct kvm *kvm)
 
				 	make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
			
 
				 }
			
 
				 
			
 
				+void kvm_make_update_eoibitmap_request(struct kvm *kvm)
			
 
				+{
			
 
				+	make_all_cpus_request(kvm, KVM_REQ_EOIBITMAP);
			
 
				+}
			
 
				+
			
 
				 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
			
 
				 {
			
 
				 	struct page *page;
			
@@ -713,6 +718,24 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
 
				 	return old_memslots; 
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations:
			
 
				+ * - create a new memory slot
			
 
				+ * - delete an existing memory slot
			
 
				+ * - modify an existing memory slot
			
 
				+ *   -- move it in the guest physical memory space
			
 
				+ *   -- just change its flags
			
 
				+ *
			
 
				+ * Since flags can be changed by some of these operations, the following
			
 
				+ * differentiation is the best we can do for __kvm_set_memory_region():
			
 
				+ */
			
 
				+enum kvm_mr_change {
			
 
				+	KVM_MR_CREATE,
			
 
				+	KVM_MR_DELETE,
			
 
				+	KVM_MR_MOVE,
			
 
				+	KVM_MR_FLAGS_ONLY,
			
 
				+};
			
 
				+
			
 
				 /*
			
 
				  * Allocate some memory and give it an address in the guest physical address
			
 
				  * space.
			
@@ -731,6 +754,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 
				 	struct kvm_memory_slot *slot;
			
 
				 	struct kvm_memory_slot old, new;
			
 
				 	struct kvm_memslots *slots = NULL, *old_memslots;
			
 
				+	enum kvm_mr_change change;
			
 
				 
			
 
				 	r = check_memory_region_flags(mem);
			
 
				 	if (r)
			
@@ -772,17 +796,31 @@ int __kvm_set_memory_region(struct kvm *kvm,
 
				 	new.npages = npages;
			
 
				 	new.flags = mem->flags;
			
 
				 
			
 
				-	/*
			
 
				-	 * Disallow changing a memory slot's size or changing anything about
			
 
				-	 * zero sized slots that doesn't involve making them non-zero.
			
 
				-	 */
			
 
				 	r = -EINVAL;
			
 
				-	if (npages && old.npages && npages != old.npages)
			
 
				-		goto out;
			
 
				-	if (!npages && !old.npages)
			
 
				+	if (npages) {
			
 
				+		if (!old.npages)
			
 
				+			change = KVM_MR_CREATE;
			
 
				+		else { /* Modify an existing slot. */
			
 
				+			if ((mem->userspace_addr != old.userspace_addr) ||
			
 
				+			    (npages != old.npages) ||
			
 
				+			    ((new.flags ^ old.flags) & KVM_MEM_READONLY))
			
 
				+				goto out;
			
 
				+
			
 
				+			if (base_gfn != old.base_gfn)
			
 
				+				change = KVM_MR_MOVE;
			
 
				+			else if (new.flags != old.flags)
			
 
				+				change = KVM_MR_FLAGS_ONLY;
			
 
				+			else { /* Nothing to change. */
			
 
				+				r = 0;
			
 
				+				goto out;
			
 
				+			}
			
 
				+		}
			
 
				+	} else if (old.npages) {
			
 
				+		change = KVM_MR_DELETE;
			
 
				+	} else /* Modify a non-existent slot: disallowed. */
			
 
				 		goto out;
			
 
				 
			
 
				-	if ((npages && !old.npages) || (base_gfn != old.base_gfn)) {
			
 
				+	if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
			
 
				 		/* Check for overlaps */
			
 
				 		r = -EEXIST;
			
 
				 		kvm_for_each_memslot(slot, kvm->memslots) {
			
@@ -800,20 +838,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
 
				 		new.dirty_bitmap = NULL;
			
 
				 
			
 
				 	r = -ENOMEM;
			
 
				-
			
 
				-	/*
			
 
				-	 * Allocate if a slot is being created.  If modifying a slot,
			
 
				-	 * the userspace_addr cannot change.
			
 
				-	 */
			
 
				-	if (!old.npages) {
			
 
				+	if (change == KVM_MR_CREATE) {
			
 
				 		new.user_alloc = user_alloc;
			
 
				 		new.userspace_addr = mem->userspace_addr;
			
 
				 
			
 
				 		if (kvm_arch_create_memslot(&new, npages))
			
 
				 			goto out_free;
			
 
				-	} else if (npages && mem->userspace_addr != old.userspace_addr) {
			
 
				-		r = -EINVAL;
			
 
				-		goto out_free;
			
 
				 	}
			
 
				 
			
 
				 	/* Allocate page dirty bitmap if needed */
			
@@ -822,7 +852,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 
				 			goto out_free;
			
 
				 	}
			
 
				 
			
 
				-	if (!npages || base_gfn != old.base_gfn) {
			
 
				+	if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {
			
 
				 		r = -ENOMEM;
			
 
				 		slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
			
 
				 				GFP_KERNEL);
			
@@ -863,15 +893,23 @@ int __kvm_set_memory_region(struct kvm *kvm,
 
				 			goto out_free;
			
 
				 	}
			
 
				 
			
 
				-	/* map new memory slot into the iommu */
			
 
				-	if (npages) {
			
 
				+	/*
			
 
				+	 * IOMMU mapping:  New slots need to be mapped.  Old slots need to be
			
 
				+	 * un-mapped and re-mapped if their base changes.  Since base change
			
 
				+	 * unmapping is handled above with slot deletion, mapping alone is
			
 
				+	 * needed here.  Anything else the iommu might care about for existing
			
 
				+	 * slots (size changes, userspace addr changes and read-only flag
			
 
				+	 * changes) is disallowed above, so any other attribute changes getting
			
 
				+	 * here can be skipped.
			
 
				+	 */
			
 
				+	if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
			
 
				 		r = kvm_iommu_map_pages(kvm, &new);
			
 
				 		if (r)
			
 
				 			goto out_slots;
			
 
				 	}
			
 
				 
			
 
				 	/* actual memory is freed via old in kvm_free_physmem_slot below */
			
 
				-	if (!npages) {
			
 
				+	if (change == KVM_MR_DELETE) {
			
 
				 		new.dirty_bitmap = NULL;
			
 
				 		memset(&new.arch, 0, sizeof(new.arch));
			
 
				 	}
			
@@ -1669,6 +1707,7 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
 
				 {
			
 
				 	struct pid *pid;
			
 
				 	struct task_struct *task = NULL;
			
 
				+	bool ret = false;
			
 
				 
			
 
				 	rcu_read_lock();
			
 
				 	pid = rcu_dereference(target->pid);
			
@@ -1676,17 +1715,15 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
 
				 		task = get_pid_task(target->pid, PIDTYPE_PID);
			
 
				 	rcu_read_unlock();
			
 
				 	if (!task)
			
 
				-		return false;
			
 
				+		return ret;
			
 
				 	if (task->flags & PF_VCPU) {
			
 
				 		put_task_struct(task);
			
 
				-		return false;
			
 
				-	}
			
 
				-	if (yield_to(task, 1)) {
			
 
				-		put_task_struct(task);
			
 
				-		return true;
			
 
				+		return ret;
			
 
				 	}
			
 
				+	ret = yield_to(task, 1);
			
 
				 	put_task_struct(task);
			
 
				-	return false;
			
 
				+
			
 
				+	return ret;
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
			
 
				 
			
@@ -1727,12 +1764,14 @@ bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
 
				 	return eligible;
			
 
				 }
			
 
				 #endif
			
 
				+
			
 
				 void kvm_vcpu_on_spin(struct kvm_vcpu *me)
			
 
				 {
			
 
				 	struct kvm *kvm = me->kvm;
			
 
				 	struct kvm_vcpu *vcpu;
			
 
				 	int last_boosted_vcpu = me->kvm->last_boosted_vcpu;
			
 
				 	int yielded = 0;
			
 
				+	int try = 3;
			
 
				 	int pass;
			
 
				 	int i;
			
 
				 
			
@@ -1744,7 +1783,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
 
				 	 * VCPU is holding the lock that we need and will release it.
			
 
				 	 * We approximate round-robin by starting at the last boosted VCPU.
			
 
				 	 */
			
 
				-	for (pass = 0; pass < 2 && !yielded; pass++) {
			
 
				+	for (pass = 0; pass < 2 && !yielded && try; pass++) {
			
 
				 		kvm_for_each_vcpu(i, vcpu, kvm) {
			
 
				 			if (!pass && i <= last_boosted_vcpu) {
			
 
				 				i = last_boosted_vcpu;
			
@@ -1757,10 +1796,15 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
 
				 				continue;
			
 
				 			if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
			
 
				 				continue;
			
 
				-			if (kvm_vcpu_yield_to(vcpu)) {
			
 
				+
			
 
				+			yielded = kvm_vcpu_yield_to(vcpu);
			
 
				+			if (yielded > 0) {
			
 
				 				kvm->last_boosted_vcpu = i;
			
 
				-				yielded = 1;
			
 
				 				break;
			
 
				+			} else if (yielded < 0) {
			
 
				+				try--;
			
 
				+				if (!try)
			
 
				+					break;
			
 
				 			}
			
 
				 		}
			
 
				 	}