17 years ago · 2c57ee6f92
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -107,6 +107,7 @@ config ARCH_SUPPORTS_OPROFILE
 
				 	bool
			
 
				 	default y
			
 
				 
			
 
				+select HAVE_KVM
			
 
				 
			
 
				 config ZONE_DMA32
			
 
				 	bool
			
@@ -1598,4 +1599,6 @@ source "security/Kconfig"
 
				 
			
 
				 source "crypto/Kconfig"
			
 
				 
			
 
				+source "arch/x86/kvm/Kconfig"
			
 
				+
			
 
				 source "lib/Kconfig"
			
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -7,6 +7,8 @@ else
 
				         KBUILD_DEFCONFIG := $(ARCH)_defconfig
			
 
				 endif
			
 
				 
			
 
				+core-$(CONFIG_KVM) += arch/x86/kvm/
			
 
				+
			
 
				 # BITS is used as extension for files which are available in a 32 bit
			
 
				 # and a 64 bit version to simplify shared Makefiles.
			
 
				 # e.g.: obj-y += foo_$(BITS).o
			
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -1,9 +1,12 @@
 
				 #
			
 
				 # KVM configuration
			
 
				 #
			
 
				+config HAVE_KVM
			
 
				+       bool
			
 
				+
			
 
				 menuconfig VIRTUALIZATION
			
 
				 	bool "Virtualization"
			
 
				-	depends on X86
			
 
				+	depends on HAVE_KVM || X86
			
 
				 	default y
			
 
				 	---help---
			
 
				 	  Say Y here to get to see options for using your Linux host to run other
			
@@ -16,7 +19,7 @@ if VIRTUALIZATION
 
				 
			
 
				 config KVM
			
 
				 	tristate "Kernel-based Virtual Machine (KVM) support"
			
 
				-	depends on X86 && EXPERIMENTAL
			
 
				+	depends on HAVE_KVM && EXPERIMENTAL
			
 
				 	select PREEMPT_NOTIFIERS
			
 
				 	select ANON_INODES
			
 
				 	---help---
			
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -2,7 +2,11 @@
 
				 # Makefile for Kernel-based Virtual Machine module
			
 
				 #
			
 
				 
			
 
				-kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o irq.o lapic.o ioapic.o
			
 
				+common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o)
			
 
				+
			
 
				+EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
			
 
				+
			
 
				+kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o
			
 
				 obj-$(CONFIG_KVM) += kvm.o
			
 
				 kvm-intel-objs = vmx.o
			
 
				 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
			
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -28,6 +28,8 @@
 
				 #include <linux/mm.h>
			
 
				 #include "irq.h"
			
 
				 
			
 
				+#include <linux/kvm_host.h>
			
 
				+
			
 
				 /*
			
 
				  * set irq level. If an edge is detected, then the IRR is set to 1
			
 
				  */
			
@@ -181,10 +183,8 @@ int kvm_pic_read_irq(struct kvm_pic *s)
 
				 	return intno;
			
 
				 }
			
 
				 
			
 
				-static void pic_reset(void *opaque)
			
 
				+void kvm_pic_reset(struct kvm_kpic_state *s)
			
 
				 {
			
 
				-	struct kvm_kpic_state *s = opaque;
			
 
				-
			
 
				 	s->last_irr = 0;
			
 
				 	s->irr = 0;
			
 
				 	s->imr = 0;
			
@@ -209,7 +209,7 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val)
 
				 	addr &= 1;
			
 
				 	if (addr == 0) {
			
 
				 		if (val & 0x10) {
			
 
				-			pic_reset(s);	/* init */
			
 
				+			kvm_pic_reset(s);	/* init */
			
 
				 			/*
			
 
				 			 * deassert a pending interrupt
			
 
				 			 */
			
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -20,8 +20,8 @@
 
				  */
			
 
				 
			
 
				 #include <linux/module.h>
			
 
				+#include <linux/kvm_host.h>
			
 
				 
			
 
				-#include "kvm.h"
			
 
				 #include "irq.h"
			
 
				 
			
 
				 /*
			
@@ -63,26 +63,6 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
			
 
				 
			
 
				-static void vcpu_kick_intr(void *info)
			
 
				-{
			
 
				-#ifdef DEBUG
			
 
				-	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
			
 
				-	printk(KERN_DEBUG "vcpu_kick_intr %p \n", vcpu);
			
 
				-#endif
			
 
				-}
			
 
				-
			
 
				-void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	int ipi_pcpu = vcpu->cpu;
			
 
				-
			
 
				-	if (waitqueue_active(&vcpu->wq)) {
			
 
				-		wake_up_interruptible(&vcpu->wq);
			
 
				-		++vcpu->stat.halt_wakeup;
			
 
				-	}
			
 
				-	if (vcpu->guest_mode)
			
 
				-		smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0);
			
 
				-}
			
 
				-
			
 
				 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	kvm_inject_apic_timer_irqs(vcpu);
			
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -0,0 +1,88 @@
 
				+/*
			
 
				+ * irq.h: in kernel interrupt controller related definitions
			
 
				+ * Copyright (c) 2007, Intel Corporation.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify it
			
 
				+ * under the terms and conditions of the GNU General Public License,
			
 
				+ * version 2, as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope it will be useful, but WITHOUT
			
 
				+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
			
 
				+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
			
 
				+ * more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License along with
			
 
				+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
			
 
				+ * Place - Suite 330, Boston, MA 02111-1307 USA.
			
 
				+ * Authors:
			
 
				+ *   Yaozu (Eddie) Dong <Eddie.dong@intel.com>
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+#ifndef __IRQ_H
			
 
				+#define __IRQ_H
			
 
				+
			
 
				+#include <linux/mm_types.h>
			
 
				+#include <linux/hrtimer.h>
			
 
				+#include <linux/kvm_host.h>
			
 
				+
			
 
				+#include "iodev.h"
			
 
				+#include "ioapic.h"
			
 
				+#include "lapic.h"
			
 
				+
			
 
				+struct kvm;
			
 
				+struct kvm_vcpu;
			
 
				+
			
 
				+typedef void irq_request_func(void *opaque, int level);
			
 
				+
			
 
				+struct kvm_kpic_state {
			
 
				+	u8 last_irr;	/* edge detection */
			
 
				+	u8 irr;		/* interrupt request register */
			
 
				+	u8 imr;		/* interrupt mask register */
			
 
				+	u8 isr;		/* interrupt service register */
			
 
				+	u8 priority_add;	/* highest irq priority */
			
 
				+	u8 irq_base;
			
 
				+	u8 read_reg_select;
			
 
				+	u8 poll;
			
 
				+	u8 special_mask;
			
 
				+	u8 init_state;
			
 
				+	u8 auto_eoi;
			
 
				+	u8 rotate_on_auto_eoi;
			
 
				+	u8 special_fully_nested_mode;
			
 
				+	u8 init4;		/* true if 4 byte init */
			
 
				+	u8 elcr;		/* PIIX edge/trigger selection */
			
 
				+	u8 elcr_mask;
			
 
				+	struct kvm_pic *pics_state;
			
 
				+};
			
 
				+
			
 
				+struct kvm_pic {
			
 
				+	struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
			
 
				+	irq_request_func *irq_request;
			
 
				+	void *irq_request_opaque;
			
 
				+	int output;		/* intr from master PIC */
			
 
				+	struct kvm_io_device dev;
			
 
				+};
			
 
				+
			
 
				+struct kvm_pic *kvm_create_pic(struct kvm *kvm);
			
 
				+void kvm_pic_set_irq(void *opaque, int irq, int level);
			
 
				+int kvm_pic_read_irq(struct kvm_pic *s);
			
 
				+void kvm_pic_update_irq(struct kvm_pic *s);
			
 
				+
			
 
				+static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
			
 
				+{
			
 
				+	return kvm->arch.vpic;
			
 
				+}
			
 
				+
			
 
				+static inline int irqchip_in_kernel(struct kvm *kvm)
			
 
				+{
			
 
				+	return pic_irqchip(kvm) != NULL;
			
 
				+}
			
 
				+
			
 
				+void kvm_pic_reset(struct kvm_kpic_state *s);
			
 
				+
			
 
				+void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
			
 
				+void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
			
 
				+void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
			
 
				+void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
			
 
				+
			
 
				+#endif
			
--- a/arch/x86/kvm/kvm_svm.h
+++ b/arch/x86/kvm/kvm_svm.h
@@ -4,10 +4,10 @@
 
				 #include <linux/kernel.h>
			
 
				 #include <linux/types.h>
			
 
				 #include <linux/list.h>
			
 
				+#include <linux/kvm_host.h>
			
 
				 #include <asm/msr.h>
			
 
				 
			
 
				 #include "svm.h"
			
 
				-#include "kvm.h"
			
 
				 
			
 
				 static const u32 host_save_user_msrs[] = {
			
 
				 #ifdef CONFIG_X86_64
			
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -17,7 +17,7 @@
 
				  * the COPYING file in the top-level directory.
			
 
				  */
			
 
				 
			
 
				-#include "kvm.h"
			
 
				+#include <linux/kvm_host.h>
			
 
				 #include <linux/kvm.h>
			
 
				 #include <linux/mm.h>
			
 
				 #include <linux/highmem.h>
			
@@ -56,6 +56,7 @@
 
				 
			
 
				 #define VEC_POS(v) ((v) & (32 - 1))
			
 
				 #define REG_POS(v) (((v) >> 5) << 4)
			
 
				+
			
 
				 static inline u32 apic_get_reg(struct kvm_lapic *apic, int reg_off)
			
 
				 {
			
 
				 	return *((u32 *) (apic->regs + reg_off));
			
@@ -88,7 +89,7 @@ static inline void apic_clear_vector(int vec, void *bitmap)
 
				 
			
 
				 static inline int apic_hw_enabled(struct kvm_lapic *apic)
			
 
				 {
			
 
				-	return (apic)->vcpu->apic_base & MSR_IA32_APICBASE_ENABLE;
			
 
				+	return (apic)->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE;
			
 
				 }
			
 
				 
			
 
				 static inline int  apic_sw_enabled(struct kvm_lapic *apic)
			
@@ -172,7 +173,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic)
 
				 
			
 
				 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				-	struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
			
 
				+	struct kvm_lapic *apic = vcpu->arch.apic;
			
 
				 	int highest_irr;
			
 
				 
			
 
				 	if (!apic)
			
@@ -183,8 +184,10 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);
			
 
				 
			
 
				-int kvm_apic_set_irq(struct kvm_lapic *apic, u8 vec, u8 trig)
			
 
				+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig)
			
 
				 {
			
 
				+	struct kvm_lapic *apic = vcpu->arch.apic;
			
 
				+
			
 
				 	if (!apic_test_and_set_irr(vec, apic)) {
			
 
				 		/* a new pending irq is set in IRR */
			
 
				 		if (trig)
			
@@ -268,7 +271,7 @@ static int apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 
				 			   int short_hand, int dest, int dest_mode)
			
 
				 {
			
 
				 	int result = 0;
			
 
				-	struct kvm_lapic *target = vcpu->apic;
			
 
				+	struct kvm_lapic *target = vcpu->arch.apic;
			
 
				 
			
 
				 	apic_debug("target %p, source %p, dest 0x%x, "
			
 
				 		   "dest_mode 0x%x, short_hand 0x%x",
			
@@ -335,10 +338,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 
				 		} else
			
 
				 			apic_clear_vector(vector, apic->regs + APIC_TMR);
			
 
				 
			
 
				-		if (vcpu->mp_state == VCPU_MP_STATE_RUNNABLE)
			
 
				+		if (vcpu->arch.mp_state == VCPU_MP_STATE_RUNNABLE)
			
 
				 			kvm_vcpu_kick(vcpu);
			
 
				-		else if (vcpu->mp_state == VCPU_MP_STATE_HALTED) {
			
 
				-			vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
			
 
				+		else if (vcpu->arch.mp_state == VCPU_MP_STATE_HALTED) {
			
 
				+			vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE;
			
 
				 			if (waitqueue_active(&vcpu->wq))
			
 
				 				wake_up_interruptible(&vcpu->wq);
			
 
				 		}
			
@@ -359,11 +362,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 
				 
			
 
				 	case APIC_DM_INIT:
			
 
				 		if (level) {
			
 
				-			if (vcpu->mp_state == VCPU_MP_STATE_RUNNABLE)
			
 
				+			if (vcpu->arch.mp_state == VCPU_MP_STATE_RUNNABLE)
			
 
				 				printk(KERN_DEBUG
			
 
				 				       "INIT on a runnable vcpu %d\n",
			
 
				 				       vcpu->vcpu_id);
			
 
				-			vcpu->mp_state = VCPU_MP_STATE_INIT_RECEIVED;
			
 
				+			vcpu->arch.mp_state = VCPU_MP_STATE_INIT_RECEIVED;
			
 
				 			kvm_vcpu_kick(vcpu);
			
 
				 		} else {
			
 
				 			printk(KERN_DEBUG
			
@@ -376,9 +379,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 
				 	case APIC_DM_STARTUP:
			
 
				 		printk(KERN_DEBUG "SIPI to vcpu %d vector 0x%02x\n",
			
 
				 		       vcpu->vcpu_id, vector);
			
 
				-		if (vcpu->mp_state == VCPU_MP_STATE_INIT_RECEIVED) {
			
 
				-			vcpu->sipi_vector = vector;
			
 
				-			vcpu->mp_state = VCPU_MP_STATE_SIPI_RECEIVED;
			
 
				+		if (vcpu->arch.mp_state == VCPU_MP_STATE_INIT_RECEIVED) {
			
 
				+			vcpu->arch.sipi_vector = vector;
			
 
				+			vcpu->arch.mp_state = VCPU_MP_STATE_SIPI_RECEIVED;
			
 
				 			if (waitqueue_active(&vcpu->wq))
			
 
				 				wake_up_interruptible(&vcpu->wq);
			
 
				 		}
			
@@ -392,15 +395,14 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 
				 	return result;
			
 
				 }
			
 
				 
			
 
				-struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector,
			
 
				+static struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector,
			
 
				 				       unsigned long bitmap)
			
 
				 {
			
 
				-	int vcpu_id;
			
 
				 	int last;
			
 
				 	int next;
			
 
				-	struct kvm_lapic *apic;
			
 
				+	struct kvm_lapic *apic = NULL;
			
 
				 
			
 
				-	last = kvm->round_robin_prev_vcpu;
			
 
				+	last = kvm->arch.round_robin_prev_vcpu;
			
 
				 	next = last;
			
 
				 
			
 
				 	do {
			
@@ -408,25 +410,30 @@ struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector,
 
				 			next = 0;
			
 
				 		if (kvm->vcpus[next] == NULL || !test_bit(next, &bitmap))
			
 
				 			continue;
			
 
				-		apic = kvm->vcpus[next]->apic;
			
 
				+		apic = kvm->vcpus[next]->arch.apic;
			
 
				 		if (apic && apic_enabled(apic))
			
 
				 			break;
			
 
				 		apic = NULL;
			
 
				 	} while (next != last);
			
 
				-	kvm->round_robin_prev_vcpu = next;
			
 
				+	kvm->arch.round_robin_prev_vcpu = next;
			
 
				 
			
 
				-	if (!apic) {
			
 
				-		vcpu_id = ffs(bitmap) - 1;
			
 
				-		if (vcpu_id < 0) {
			
 
				-			vcpu_id = 0;
			
 
				-			printk(KERN_DEBUG "vcpu not ready for apic_round_robin\n");
			
 
				-		}
			
 
				-		apic = kvm->vcpus[vcpu_id]->apic;
			
 
				-	}
			
 
				+	if (!apic)
			
 
				+		printk(KERN_DEBUG "vcpu not ready for apic_round_robin\n");
			
 
				 
			
 
				 	return apic;
			
 
				 }
			
 
				 
			
 
				+struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
			
 
				+		unsigned long bitmap)
			
 
				+{
			
 
				+	struct kvm_lapic *apic;
			
 
				+
			
 
				+	apic = kvm_apic_round_robin(kvm, vector, bitmap);
			
 
				+	if (apic)
			
 
				+		return apic->vcpu;
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				 static void apic_set_eoi(struct kvm_lapic *apic)
			
 
				 {
			
 
				 	int vector = apic_find_highest_isr(apic);
			
@@ -458,7 +465,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
 
				 	unsigned int delivery_mode = icr_low & APIC_MODE_MASK;
			
 
				 	unsigned int vector = icr_low & APIC_VECTOR_MASK;
			
 
				 
			
 
				-	struct kvm_lapic *target;
			
 
				+	struct kvm_vcpu *target;
			
 
				 	struct kvm_vcpu *vcpu;
			
 
				 	unsigned long lpr_map = 0;
			
 
				 	int i;
			
@@ -474,20 +481,20 @@ static void apic_send_ipi(struct kvm_lapic *apic)
 
				 		if (!vcpu)
			
 
				 			continue;
			
 
				 
			
 
				-		if (vcpu->apic &&
			
 
				+		if (vcpu->arch.apic &&
			
 
				 		    apic_match_dest(vcpu, apic, short_hand, dest, dest_mode)) {
			
 
				 			if (delivery_mode == APIC_DM_LOWEST)
			
 
				 				set_bit(vcpu->vcpu_id, &lpr_map);
			
 
				 			else
			
 
				-				__apic_accept_irq(vcpu->apic, delivery_mode,
			
 
				+				__apic_accept_irq(vcpu->arch.apic, delivery_mode,
			
 
				 						  vector, level, trig_mode);
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				 	if (delivery_mode == APIC_DM_LOWEST) {
			
 
				-		target = kvm_apic_round_robin(vcpu->kvm, vector, lpr_map);
			
 
				+		target = kvm_get_lowest_prio_vcpu(vcpu->kvm, vector, lpr_map);
			
 
				 		if (target != NULL)
			
 
				-			__apic_accept_irq(target, delivery_mode,
			
 
				+			__apic_accept_irq(target->arch.apic, delivery_mode,
			
 
				 					  vector, level, trig_mode);
			
 
				 	}
			
 
				 }
			
@@ -544,6 +551,23 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
 
				 	return tmcct;
			
 
				 }
			
 
				 
			
 
				+static void __report_tpr_access(struct kvm_lapic *apic, bool write)
			
 
				+{
			
 
				+	struct kvm_vcpu *vcpu = apic->vcpu;
			
 
				+	struct kvm_run *run = vcpu->run;
			
 
				+
			
 
				+	set_bit(KVM_REQ_REPORT_TPR_ACCESS, &vcpu->requests);
			
 
				+	kvm_x86_ops->cache_regs(vcpu);
			
 
				+	run->tpr_access.rip = vcpu->arch.rip;
			
 
				+	run->tpr_access.is_write = write;
			
 
				+}
			
 
				+
			
 
				+static inline void report_tpr_access(struct kvm_lapic *apic, bool write)
			
 
				+{
			
 
				+	if (apic->vcpu->arch.tpr_access_reporting)
			
 
				+		__report_tpr_access(apic, write);
			
 
				+}
			
 
				+
			
 
				 static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
			
 
				 {
			
 
				 	u32 val = 0;
			
@@ -561,6 +585,9 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
 
				 		val = apic_get_tmcct(apic);
			
 
				 		break;
			
 
				 
			
 
				+	case APIC_TASKPRI:
			
 
				+		report_tpr_access(apic, false);
			
 
				+		/* fall thru */
			
 
				 	default:
			
 
				 		apic_update_ppr(apic);
			
 
				 		val = apic_get_reg(apic, offset);
			
@@ -670,6 +697,7 @@ static void apic_mmio_write(struct kvm_io_device *this,
 
				 		break;
			
 
				 
			
 
				 	case APIC_TASKPRI:
			
 
				+		report_tpr_access(apic, true);
			
 
				 		apic_set_tpr(apic, val & 0xff);
			
 
				 		break;
			
 
				 
			
@@ -762,19 +790,17 @@ static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr)
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-void kvm_free_apic(struct kvm_lapic *apic)
			
 
				+void kvm_free_lapic(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				-	if (!apic)
			
 
				+	if (!vcpu->arch.apic)
			
 
				 		return;
			
 
				 
			
 
				-	hrtimer_cancel(&apic->timer.dev);
			
 
				+	hrtimer_cancel(&vcpu->arch.apic->timer.dev);
			
 
				 
			
 
				-	if (apic->regs_page) {
			
 
				-		__free_page(apic->regs_page);
			
 
				-		apic->regs_page = 0;
			
 
				-	}
			
 
				+	if (vcpu->arch.apic->regs_page)
			
 
				+		__free_page(vcpu->arch.apic->regs_page);
			
 
				 
			
 
				-	kfree(apic);
			
 
				+	kfree(vcpu->arch.apic);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -785,16 +811,17 @@ void kvm_free_apic(struct kvm_lapic *apic)
 
				 
			
 
				 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
			
 
				 {
			
 
				-	struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
			
 
				+	struct kvm_lapic *apic = vcpu->arch.apic;
			
 
				 
			
 
				 	if (!apic)
			
 
				 		return;
			
 
				-	apic_set_tpr(apic, ((cr8 & 0x0f) << 4));
			
 
				+	apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
			
 
				+		     | (apic_get_reg(apic, APIC_TASKPRI) & 4));
			
 
				 }
			
 
				 
			
 
				 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				-	struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
			
 
				+	struct kvm_lapic *apic = vcpu->arch.apic;
			
 
				 	u64 tpr;
			
 
				 
			
 
				 	if (!apic)
			
@@ -807,29 +834,29 @@ EXPORT_SYMBOL_GPL(kvm_lapic_get_cr8);
 
				 
			
 
				 void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
			
 
				 {
			
 
				-	struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
			
 
				+	struct kvm_lapic *apic = vcpu->arch.apic;
			
 
				 
			
 
				 	if (!apic) {
			
 
				 		value |= MSR_IA32_APICBASE_BSP;
			
 
				-		vcpu->apic_base = value;
			
 
				+		vcpu->arch.apic_base = value;
			
 
				 		return;
			
 
				 	}
			
 
				 	if (apic->vcpu->vcpu_id)
			
 
				 		value &= ~MSR_IA32_APICBASE_BSP;
			
 
				 
			
 
				-	vcpu->apic_base = value;
			
 
				-	apic->base_address = apic->vcpu->apic_base &
			
 
				+	vcpu->arch.apic_base = value;
			
 
				+	apic->base_address = apic->vcpu->arch.apic_base &
			
 
				 			     MSR_IA32_APICBASE_BASE;
			
 
				 
			
 
				 	/* with FSB delivery interrupt, we can restart APIC functionality */
			
 
				 	apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is "
			
 
				-		   "0x%lx.\n", apic->apic_base, apic->base_address);
			
 
				+		   "0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address);
			
 
				 
			
 
				 }
			
 
				 
			
 
				 u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				-	return vcpu->apic_base;
			
 
				+	return vcpu->arch.apic_base;
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(kvm_lapic_get_base);
			
 
				 
			
@@ -841,7 +868,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 
				 	apic_debug("%s\n", __FUNCTION__);
			
 
				 
			
 
				 	ASSERT(vcpu);
			
 
				-	apic = vcpu->apic;
			
 
				+	apic = vcpu->arch.apic;
			
 
				 	ASSERT(apic != NULL);
			
 
				 
			
 
				 	/* Stop the timer in case it's a reset to an active apic */
			
@@ -872,19 +899,19 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 
				 	update_divide_count(apic);
			
 
				 	atomic_set(&apic->timer.pending, 0);
			
 
				 	if (vcpu->vcpu_id == 0)
			
 
				-		vcpu->apic_base |= MSR_IA32_APICBASE_BSP;
			
 
				+		vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
			
 
				 	apic_update_ppr(apic);
			
 
				 
			
 
				 	apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr="
			
 
				 		   "0x%016" PRIx64 ", base_address=0x%0lx.\n", __FUNCTION__,
			
 
				 		   vcpu, kvm_apic_id(apic),
			
 
				-		   vcpu->apic_base, apic->base_address);
			
 
				+		   vcpu->arch.apic_base, apic->base_address);
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(kvm_lapic_reset);
			
 
				 
			
 
				 int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				-	struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
			
 
				+	struct kvm_lapic *apic = vcpu->arch.apic;
			
 
				 	int ret = 0;
			
 
				 
			
 
				 	if (!apic)
			
@@ -908,9 +935,8 @@ static int __apic_timer_fn(struct kvm_lapic *apic)
 
				 	wait_queue_head_t *q = &apic->vcpu->wq;
			
 
				 
			
 
				 	atomic_inc(&apic->timer.pending);
			
 
				-	if (waitqueue_active(q))
			
 
				-	{
			
 
				-		apic->vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
			
 
				+	if (waitqueue_active(q)) {
			
 
				+		apic->vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE;
			
 
				 		wake_up_interruptible(q);
			
 
				 	}
			
 
				 	if (apic_lvtt_period(apic)) {
			
@@ -956,13 +982,13 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
 
				 	if (!apic)
			
 
				 		goto nomem;
			
 
				 
			
 
				-	vcpu->apic = apic;
			
 
				+	vcpu->arch.apic = apic;
			
 
				 
			
 
				 	apic->regs_page = alloc_page(GFP_KERNEL);
			
 
				 	if (apic->regs_page == NULL) {
			
 
				 		printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
			
 
				 		       vcpu->vcpu_id);
			
 
				-		goto nomem;
			
 
				+		goto nomem_free_apic;
			
 
				 	}
			
 
				 	apic->regs = page_address(apic->regs_page);
			
 
				 	memset(apic->regs, 0, PAGE_SIZE);
			
@@ -971,7 +997,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
 
				 	hrtimer_init(&apic->timer.dev, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
			
 
				 	apic->timer.dev.function = apic_timer_fn;
			
 
				 	apic->base_address = APIC_DEFAULT_PHYS_BASE;
			
 
				-	vcpu->apic_base = APIC_DEFAULT_PHYS_BASE;
			
 
				+	vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE;
			
 
				 
			
 
				 	kvm_lapic_reset(vcpu);
			
 
				 	apic->dev.read = apic_mmio_read;
			
@@ -980,15 +1006,16 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
 
				 	apic->dev.private = apic;
			
 
				 
			
 
				 	return 0;
			
 
				+nomem_free_apic:
			
 
				+	kfree(apic);
			
 
				 nomem:
			
 
				-	kvm_free_apic(apic);
			
 
				 	return -ENOMEM;
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(kvm_create_lapic);
			
 
				 
			
 
				 int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				-	struct kvm_lapic *apic = vcpu->apic;
			
 
				+	struct kvm_lapic *apic = vcpu->arch.apic;
			
 
				 	int highest_irr;
			
 
				 
			
 
				 	if (!apic || !apic_enabled(apic))
			
@@ -1004,11 +1031,11 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
 
				 
			
 
				 int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				-	u32 lvt0 = apic_get_reg(vcpu->apic, APIC_LVT0);
			
 
				+	u32 lvt0 = apic_get_reg(vcpu->arch.apic, APIC_LVT0);
			
 
				 	int r = 0;
			
 
				 
			
 
				 	if (vcpu->vcpu_id == 0) {
			
 
				-		if (!apic_hw_enabled(vcpu->apic))
			
 
				+		if (!apic_hw_enabled(vcpu->arch.apic))
			
 
				 			r = 1;
			
 
				 		if ((lvt0 & APIC_LVT_MASKED) == 0 &&
			
 
				 		    GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT)
			
@@ -1019,7 +1046,7 @@ int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
 
				 
			
 
				 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				-	struct kvm_lapic *apic = vcpu->apic;
			
 
				+	struct kvm_lapic *apic = vcpu->arch.apic;
			
 
				 
			
 
				 	if (apic && apic_lvt_enabled(apic, APIC_LVTT) &&
			
 
				 		atomic_read(&apic->timer.pending) > 0) {
			
@@ -1030,7 +1057,7 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
 
				 
			
 
				 void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
			
 
				 {
			
 
				-	struct kvm_lapic *apic = vcpu->apic;
			
 
				+	struct kvm_lapic *apic = vcpu->arch.apic;
			
 
				 
			
 
				 	if (apic && apic_lvt_vector(apic, APIC_LVTT) == vec)
			
 
				 		apic->timer.last_update = ktime_add_ns(
			
@@ -1041,7 +1068,7 @@ void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
 
				 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	int vector = kvm_apic_has_interrupt(vcpu);
			
 
				-	struct kvm_lapic *apic = vcpu->apic;
			
 
				+	struct kvm_lapic *apic = vcpu->arch.apic;
			
 
				 
			
 
				 	if (vector == -1)
			
 
				 		return -1;
			
@@ -1054,9 +1081,9 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
 
				 
			
 
				 void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				-	struct kvm_lapic *apic = vcpu->apic;
			
 
				+	struct kvm_lapic *apic = vcpu->arch.apic;
			
 
				 
			
 
				-	apic->base_address = vcpu->apic_base &
			
 
				+	apic->base_address = vcpu->arch.apic_base &
			
 
				 			     MSR_IA32_APICBASE_BASE;
			
 
				 	apic_set_reg(apic, APIC_LVR, APIC_VERSION);
			
 
				 	apic_update_ppr(apic);
			
@@ -1065,9 +1092,9 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
 
				 	start_apic_timer(apic);
			
 
				 }
			
 
				 
			
 
				-void kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
			
 
				+void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				-	struct kvm_lapic *apic = vcpu->apic;
			
 
				+	struct kvm_lapic *apic = vcpu->arch.apic;
			
 
				 	struct hrtimer *timer;
			
 
				 
			
 
				 	if (!apic)
			
@@ -1077,4 +1104,51 @@ void kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
 
				 	if (hrtimer_cancel(timer))
			
 
				 		hrtimer_start(timer, timer->expires, HRTIMER_MODE_ABS);
			
 
				 }
			
 
				-EXPORT_SYMBOL_GPL(kvm_migrate_apic_timer);
			
 
				+
			
 
				+void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	u32 data;
			
 
				+	void *vapic;
			
 
				+
			
 
				+	if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr)
			
 
				+		return;
			
 
				+
			
 
				+	vapic = kmap_atomic(vcpu->arch.apic->vapic_page, KM_USER0);
			
 
				+	data = *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr));
			
 
				+	kunmap_atomic(vapic, KM_USER0);
			
 
				+
			
 
				+	apic_set_tpr(vcpu->arch.apic, data & 0xff);
			
 
				+}
			
 
				+
			
 
				+void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	u32 data, tpr;
			
 
				+	int max_irr, max_isr;
			
 
				+	struct kvm_lapic *apic;
			
 
				+	void *vapic;
			
 
				+
			
 
				+	if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr)
			
 
				+		return;
			
 
				+
			
 
				+	apic = vcpu->arch.apic;
			
 
				+	tpr = apic_get_reg(apic, APIC_TASKPRI) & 0xff;
			
 
				+	max_irr = apic_find_highest_irr(apic);
			
 
				+	if (max_irr < 0)
			
 
				+		max_irr = 0;
			
 
				+	max_isr = apic_find_highest_isr(apic);
			
 
				+	if (max_isr < 0)
			
 
				+		max_isr = 0;
			
 
				+	data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);
			
 
				+
			
 
				+	vapic = kmap_atomic(vcpu->arch.apic->vapic_page, KM_USER0);
			
 
				+	*(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr)) = data;
			
 
				+	kunmap_atomic(vapic, KM_USER0);
			
 
				+}
			
 
				+
			
 
				+void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
			
 
				+{
			
 
				+	if (!irqchip_in_kernel(vcpu->kvm))
			
 
				+		return;
			
 
				+
			
 
				+	vcpu->arch.apic->vapic_addr = vapic_addr;
			
 
				+}
			
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -0,0 +1,50 @@
 
				+#ifndef __KVM_X86_LAPIC_H
			
 
				+#define __KVM_X86_LAPIC_H
			
 
				+
			
 
				+#include "iodev.h"
			
 
				+
			
 
				+#include <linux/kvm_host.h>
			
 
				+
			
 
				+struct kvm_lapic {
			
 
				+	unsigned long base_address;
			
 
				+	struct kvm_io_device dev;
			
 
				+	struct {
			
 
				+		atomic_t pending;
			
 
				+		s64 period;	/* unit: ns */
			
 
				+		u32 divide_count;
			
 
				+		ktime_t last_update;
			
 
				+		struct hrtimer dev;
			
 
				+	} timer;
			
 
				+	struct kvm_vcpu *vcpu;
			
 
				+	struct page *regs_page;
			
 
				+	void *regs;
			
 
				+	gpa_t vapic_addr;
			
 
				+	struct page *vapic_page;
			
 
				+};
			
 
				+int kvm_create_lapic(struct kvm_vcpu *vcpu);
			
 
				+void kvm_free_lapic(struct kvm_vcpu *vcpu);
			
 
				+
			
 
				+int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
			
 
				+int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu);
			
 
				+int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
			
 
				+void kvm_lapic_reset(struct kvm_vcpu *vcpu);
			
 
				+u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
			
 
				+void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
			
 
				+void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
			
 
				+
			
 
				+int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
			
 
				+int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
			
 
				+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig);
			
 
				+
			
 
				+u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
			
 
				+void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
			
 
				+void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
			
 
				+int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
			
 
				+int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
			
 
				+void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
			
 
				+
			
 
				+void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
			
 
				+void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
			
 
				+void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu);
			
 
				+
			
 
				+#endif
			
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -0,0 +1,1885 @@
 
				+/*
			
 
				+ * Kernel-based Virtual Machine driver for Linux
			
 
				+ *
			
 
				+ * This module enables machines with Intel VT-x extensions to run virtual
			
 
				+ * machines without emulation or binary translation.
			
 
				+ *
			
 
				+ * MMU support
			
 
				+ *
			
 
				+ * Copyright (C) 2006 Qumranet, Inc.
			
 
				+ *
			
 
				+ * Authors:
			
 
				+ *   Yaniv Kamay  <yaniv@qumranet.com>
			
 
				+ *   Avi Kivity   <avi@qumranet.com>
			
 
				+ *
			
 
				+ * This work is licensed under the terms of the GNU GPL, version 2.  See
			
 
				+ * the COPYING file in the top-level directory.
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+#include "vmx.h"
			
 
				+#include "mmu.h"
			
 
				+
			
 
				+#include <linux/kvm_host.h>
			
 
				+#include <linux/types.h>
			
 
				+#include <linux/string.h>
			
 
				+#include <linux/mm.h>
			
 
				+#include <linux/highmem.h>
			
 
				+#include <linux/module.h>
			
 
				+#include <linux/swap.h>
			
 
				+
			
 
				+#include <asm/page.h>
			
 
				+#include <asm/cmpxchg.h>
			
 
				+#include <asm/io.h>
			
 
				+
			
 
				+#undef MMU_DEBUG
			
 
				+
			
 
				+#undef AUDIT
			
 
				+
			
 
				+#ifdef AUDIT
			
 
				+static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg);
			
 
				+#else
			
 
				+static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) {}
			
 
				+#endif
			
 
				+
			
 
				+#ifdef MMU_DEBUG
			
 
				+
			
 
				+#define pgprintk(x...) do { if (dbg) printk(x); } while (0)
			
 
				+#define rmap_printk(x...) do { if (dbg) printk(x); } while (0)
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+#define pgprintk(x...) do { } while (0)
			
 
				+#define rmap_printk(x...) do { } while (0)
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+#if defined(MMU_DEBUG) || defined(AUDIT)
			
 
				+static int dbg = 1;
			
 
				+#endif
			
 
				+
			
 
				+#ifndef MMU_DEBUG
			
 
				+#define ASSERT(x) do { } while (0)
			
 
				+#else
			
 
				+#define ASSERT(x)							\
			
 
				+	if (!(x)) {							\
			
 
				+		printk(KERN_WARNING "assertion failed %s:%d: %s\n",	\
			
 
				+		       __FILE__, __LINE__, #x);				\
			
 
				+	}
			
 
				+#endif
			
 
				+
			
 
				+#define PT64_PT_BITS 9
			
 
				+#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
			
 
				+#define PT32_PT_BITS 10
			
 
				+#define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS)
			
 
				+
			
 
				+#define PT_WRITABLE_SHIFT 1
			
 
				+
			
 
				+#define PT_PRESENT_MASK (1ULL << 0)
			
 
				+#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)
			
 
				+#define PT_USER_MASK (1ULL << 2)
			
 
				+#define PT_PWT_MASK (1ULL << 3)
			
 
				+#define PT_PCD_MASK (1ULL << 4)
			
 
				+#define PT_ACCESSED_MASK (1ULL << 5)
			
 
				+#define PT_DIRTY_MASK (1ULL << 6)
			
 
				+#define PT_PAGE_SIZE_MASK (1ULL << 7)
			
 
				+#define PT_PAT_MASK (1ULL << 7)
			
 
				+#define PT_GLOBAL_MASK (1ULL << 8)
			
 
				+#define PT64_NX_SHIFT 63
			
 
				+#define PT64_NX_MASK (1ULL << PT64_NX_SHIFT)
			
 
				+
			
 
				+#define PT_PAT_SHIFT 7
			
 
				+#define PT_DIR_PAT_SHIFT 12
			
 
				+#define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT)
			
 
				+
			
 
				+#define PT32_DIR_PSE36_SIZE 4
			
 
				+#define PT32_DIR_PSE36_SHIFT 13
			
 
				+#define PT32_DIR_PSE36_MASK \
			
 
				+	(((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)
			
 
				+
			
 
				+
			
 
				+#define PT_FIRST_AVAIL_BITS_SHIFT 9
			
 
				+#define PT64_SECOND_AVAIL_BITS_SHIFT 52
			
 
				+
			
 
				+#define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
			
 
				+
			
 
				+#define VALID_PAGE(x) ((x) != INVALID_PAGE)
			
 
				+
			
 
				+#define PT64_LEVEL_BITS 9
			
 
				+
			
 
				+#define PT64_LEVEL_SHIFT(level) \
			
 
				+		(PAGE_SHIFT + (level - 1) * PT64_LEVEL_BITS)
			
 
				+
			
 
				+#define PT64_LEVEL_MASK(level) \
			
 
				+		(((1ULL << PT64_LEVEL_BITS) - 1) << PT64_LEVEL_SHIFT(level))
			
 
				+
			
 
				+#define PT64_INDEX(address, level)\
			
 
				+	(((address) >> PT64_LEVEL_SHIFT(level)) & ((1 << PT64_LEVEL_BITS) - 1))
			
 
				+
			
 
				+
			
 
				+#define PT32_LEVEL_BITS 10
			
 
				+
			
 
				+#define PT32_LEVEL_SHIFT(level) \
			
 
				+		(PAGE_SHIFT + (level - 1) * PT32_LEVEL_BITS)
			
 
				+
			
 
				+#define PT32_LEVEL_MASK(level) \
			
 
				+		(((1ULL << PT32_LEVEL_BITS) - 1) << PT32_LEVEL_SHIFT(level))
			
 
				+
			
 
				+#define PT32_INDEX(address, level)\
			
 
				+	(((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1))
			
 
				+
			
 
				+
			
 
				+#define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))
			
 
				+#define PT64_DIR_BASE_ADDR_MASK \
			
 
				+	(PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + PT64_LEVEL_BITS)) - 1))
			
 
				+
			
 
				+#define PT32_BASE_ADDR_MASK PAGE_MASK
			
 
				+#define PT32_DIR_BASE_ADDR_MASK \
			
 
				+	(PAGE_MASK & ~((1ULL << (PAGE_SHIFT + PT32_LEVEL_BITS)) - 1))
			
 
				+
			
 
				+#define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \
			
 
				+			| PT64_NX_MASK)
			
 
				+
			
 
				+#define PFERR_PRESENT_MASK (1U << 0)
			
 
				+#define PFERR_WRITE_MASK (1U << 1)
			
 
				+#define PFERR_USER_MASK (1U << 2)
			
 
				+#define PFERR_FETCH_MASK (1U << 4)
			
 
				+
			
 
				+#define PT64_ROOT_LEVEL 4
			
 
				+#define PT32_ROOT_LEVEL 2
			
 
				+#define PT32E_ROOT_LEVEL 3
			
 
				+
			
 
				+#define PT_DIRECTORY_LEVEL 2
			
 
				+#define PT_PAGE_TABLE_LEVEL 1
			
 
				+
			
 
				+#define RMAP_EXT 4
			
 
				+
			
 
				+#define ACC_EXEC_MASK    1
			
 
				+#define ACC_WRITE_MASK   PT_WRITABLE_MASK
			
 
				+#define ACC_USER_MASK    PT_USER_MASK
			
 
				+#define ACC_ALL          (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
			
 
				+
			
 
				+struct kvm_rmap_desc {
			
 
				+	u64 *shadow_ptes[RMAP_EXT];
			
 
				+	struct kvm_rmap_desc *more;
			
 
				+};
			
 
				+
			
 
				+static struct kmem_cache *pte_chain_cache;
			
 
				+static struct kmem_cache *rmap_desc_cache;
			
 
				+static struct kmem_cache *mmu_page_header_cache;
			
 
				+
			
 
				+static u64 __read_mostly shadow_trap_nonpresent_pte;
			
 
				+static u64 __read_mostly shadow_notrap_nonpresent_pte;
			
 
				+
			
 
				+void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
			
 
				+{
			
 
				+	shadow_trap_nonpresent_pte = trap_pte;
			
 
				+	shadow_notrap_nonpresent_pte = notrap_pte;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes);
			
 
				+
			
 
				+static int is_write_protection(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	return vcpu->arch.cr0 & X86_CR0_WP;
			
 
				+}
			
 
				+
			
 
				+static int is_cpuid_PSE36(void)
			
 
				+{
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+static int is_nx(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	return vcpu->arch.shadow_efer & EFER_NX;
			
 
				+}
			
 
				+
			
 
				+static int is_present_pte(unsigned long pte)
			
 
				+{
			
 
				+	return pte & PT_PRESENT_MASK;
			
 
				+}
			
 
				+
			
 
				+static int is_shadow_present_pte(u64 pte)
			
 
				+{
			
 
				+	pte &= ~PT_SHADOW_IO_MARK;
			
 
				+	return pte != shadow_trap_nonpresent_pte
			
 
				+		&& pte != shadow_notrap_nonpresent_pte;
			
 
				+}
			
 
				+
			
 
				+static int is_writeble_pte(unsigned long pte)
			
 
				+{
			
 
				+	return pte & PT_WRITABLE_MASK;
			
 
				+}
			
 
				+
			
 
				+static int is_dirty_pte(unsigned long pte)
			
 
				+{
			
 
				+	return pte & PT_DIRTY_MASK;
			
 
				+}
			
 
				+
			
 
				+static int is_io_pte(unsigned long pte)
			
 
				+{
			
 
				+	return pte & PT_SHADOW_IO_MARK;
			
 
				+}
			
 
				+
			
 
				+static int is_rmap_pte(u64 pte)
			
 
				+{
			
 
				+	return pte != shadow_trap_nonpresent_pte
			
 
				+		&& pte != shadow_notrap_nonpresent_pte;
			
 
				+}
			
 
				+
			
 
				+static gfn_t pse36_gfn_delta(u32 gpte)
			
 
				+{
			
 
				+	int shift = 32 - PT32_DIR_PSE36_SHIFT - PAGE_SHIFT;
			
 
				+
			
 
				+	return (gpte & PT32_DIR_PSE36_MASK) << shift;
			
 
				+}
			
 
				+
			
 
				+static void set_shadow_pte(u64 *sptep, u64 spte)
			
 
				+{
			
 
				+#ifdef CONFIG_X86_64
			
 
				+	set_64bit((unsigned long *)sptep, spte);
			
 
				+#else
			
 
				+	set_64bit((unsigned long long *)sptep, spte);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
			
 
				+				  struct kmem_cache *base_cache, int min)
			
 
				+{
			
 
				+	void *obj;
			
 
				+
			
 
				+	if (cache->nobjs >= min)
			
 
				+		return 0;
			
 
				+	while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
			
 
				+		obj = kmem_cache_zalloc(base_cache, GFP_KERNEL);
			
 
				+		if (!obj)
			
 
				+			return -ENOMEM;
			
 
				+		cache->objects[cache->nobjs++] = obj;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
			
 
				+{
			
 
				+	while (mc->nobjs)
			
 
				+		kfree(mc->objects[--mc->nobjs]);
			
 
				+}
			
 
				+
			
 
				+static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache,
			
 
				+				       int min)
			
 
				+{
			
 
				+	struct page *page;
			
 
				+
			
 
				+	if (cache->nobjs >= min)
			
 
				+		return 0;
			
 
				+	while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
			
 
				+		page = alloc_page(GFP_KERNEL);
			
 
				+		if (!page)
			
 
				+			return -ENOMEM;
			
 
				+		set_page_private(page, 0);
			
 
				+		cache->objects[cache->nobjs++] = page_address(page);
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void mmu_free_memory_cache_page(struct kvm_mmu_memory_cache *mc)
			
 
				+{
			
 
				+	while (mc->nobjs)
			
 
				+		free_page((unsigned long)mc->objects[--mc->nobjs]);
			
 
				+}
			
 
				+
			
 
				+static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	int r;
			
 
				+
			
 
				+	r = mmu_topup_memory_cache(&vcpu->arch.mmu_pte_chain_cache,
			
 
				+				   pte_chain_cache, 4);
			
 
				+	if (r)
			
 
				+		goto out;
			
 
				+	r = mmu_topup_memory_cache(&vcpu->arch.mmu_rmap_desc_cache,
			
 
				+				   rmap_desc_cache, 1);
			
 
				+	if (r)
			
 
				+		goto out;
			
 
				+	r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 8);
			
 
				+	if (r)
			
 
				+		goto out;
			
 
				+	r = mmu_topup_memory_cache(&vcpu->arch.mmu_page_header_cache,
			
 
				+				   mmu_page_header_cache, 4);
			
 
				+out:
			
 
				+	return r;
			
 
				+}
			
 
				+
			
 
				+static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	mmu_free_memory_cache(&vcpu->arch.mmu_pte_chain_cache);
			
 
				+	mmu_free_memory_cache(&vcpu->arch.mmu_rmap_desc_cache);
			
 
				+	mmu_free_memory_cache_page(&vcpu->arch.mmu_page_cache);
			
 
				+	mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache);
			
 
				+}
			
 
				+
			
 
				+static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc,
			
 
				+				    size_t size)
			
 
				+{
			
 
				+	void *p;
			
 
				+
			
 
				+	BUG_ON(!mc->nobjs);
			
 
				+	p = mc->objects[--mc->nobjs];
			
 
				+	memset(p, 0, size);
			
 
				+	return p;
			
 
				+}
			
 
				+
			
 
				+static struct kvm_pte_chain *mmu_alloc_pte_chain(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	return mmu_memory_cache_alloc(&vcpu->arch.mmu_pte_chain_cache,
			
 
				+				      sizeof(struct kvm_pte_chain));
			
 
				+}
			
 
				+
			
 
				+static void mmu_free_pte_chain(struct kvm_pte_chain *pc)
			
 
				+{
			
 
				+	kfree(pc);
			
 
				+}
			
 
				+
			
 
				+static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	return mmu_memory_cache_alloc(&vcpu->arch.mmu_rmap_desc_cache,
			
 
				+				      sizeof(struct kvm_rmap_desc));
			
 
				+}
			
 
				+
			
 
				+static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd)
			
 
				+{
			
 
				+	kfree(rd);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Take gfn and return the reverse mapping to it.
			
 
				+ * Note: gfn must be unaliased before this function get called
			
 
				+ */
			
 
				+
			
 
				+static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn)
			
 
				+{
			
 
				+	struct kvm_memory_slot *slot;
			
 
				+
			
 
				+	slot = gfn_to_memslot(kvm, gfn);
			
 
				+	return &slot->rmap[gfn - slot->base_gfn];
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Reverse mapping data structures:
			
 
				+ *
			
 
				+ * If rmapp bit zero is zero, then rmapp point to the shadw page table entry
			
 
				+ * that points to page_address(page).
			
 
				+ *
			
 
				+ * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc
			
 
				+ * containing more mappings.
			
 
				+ */
			
 
				+static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
			
 
				+{
			
 
				+	struct kvm_mmu_page *sp;
			
 
				+	struct kvm_rmap_desc *desc;
			
 
				+	unsigned long *rmapp;
			
 
				+	int i;
			
 
				+
			
 
				+	if (!is_rmap_pte(*spte))
			
 
				+		return;
			
 
				+	gfn = unalias_gfn(vcpu->kvm, gfn);
			
 
				+	sp = page_header(__pa(spte));
			
 
				+	sp->gfns[spte - sp->spt] = gfn;
			
 
				+	rmapp = gfn_to_rmap(vcpu->kvm, gfn);
			
 
				+	if (!*rmapp) {
			
 
				+		rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte);
			
 
				+		*rmapp = (unsigned long)spte;
			
 
				+	} else if (!(*rmapp & 1)) {
			
 
				+		rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte);
			
 
				+		desc = mmu_alloc_rmap_desc(vcpu);
			
 
				+		desc->shadow_ptes[0] = (u64 *)*rmapp;
			
 
				+		desc->shadow_ptes[1] = spte;
			
 
				+		*rmapp = (unsigned long)desc | 1;
			
 
				+	} else {
			
 
				+		rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte);
			
 
				+		desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
			
 
				+		while (desc->shadow_ptes[RMAP_EXT-1] && desc->more)
			
 
				+			desc = desc->more;
			
 
				+		if (desc->shadow_ptes[RMAP_EXT-1]) {
			
 
				+			desc->more = mmu_alloc_rmap_desc(vcpu);
			
 
				+			desc = desc->more;
			
 
				+		}
			
 
				+		for (i = 0; desc->shadow_ptes[i]; ++i)
			
 
				+			;
			
 
				+		desc->shadow_ptes[i] = spte;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void rmap_desc_remove_entry(unsigned long *rmapp,
			
 
				+				   struct kvm_rmap_desc *desc,
			
 
				+				   int i,
			
 
				+				   struct kvm_rmap_desc *prev_desc)
			
 
				+{
			
 
				+	int j;
			
 
				+
			
 
				+	for (j = RMAP_EXT - 1; !desc->shadow_ptes[j] && j > i; --j)
			
 
				+		;
			
 
				+	desc->shadow_ptes[i] = desc->shadow_ptes[j];
			
 
				+	desc->shadow_ptes[j] = NULL;
			
 
				+	if (j != 0)
			
 
				+		return;
			
 
				+	if (!prev_desc && !desc->more)
			
 
				+		*rmapp = (unsigned long)desc->shadow_ptes[0];
			
 
				+	else
			
 
				+		if (prev_desc)
			
 
				+			prev_desc->more = desc->more;
			
 
				+		else
			
 
				+			*rmapp = (unsigned long)desc->more | 1;
			
 
				+	mmu_free_rmap_desc(desc);
			
 
				+}
			
 
				+
			
 
				+static void rmap_remove(struct kvm *kvm, u64 *spte)
			
 
				+{
			
 
				+	struct kvm_rmap_desc *desc;
			
 
				+	struct kvm_rmap_desc *prev_desc;
			
 
				+	struct kvm_mmu_page *sp;
			
 
				+	struct page *page;
			
 
				+	unsigned long *rmapp;
			
 
				+	int i;
			
 
				+
			
 
				+	if (!is_rmap_pte(*spte))
			
 
				+		return;
			
 
				+	sp = page_header(__pa(spte));
			
 
				+	page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
			
 
				+	mark_page_accessed(page);
			
 
				+	if (is_writeble_pte(*spte))
			
 
				+		kvm_release_page_dirty(page);
			
 
				+	else
			
 
				+		kvm_release_page_clean(page);
			
 
				+	rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt]);
			
 
				+	if (!*rmapp) {
			
 
				+		printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
			
 
				+		BUG();
			
 
				+	} else if (!(*rmapp & 1)) {
			
 
				+		rmap_printk("rmap_remove:  %p %llx 1->0\n", spte, *spte);
			
 
				+		if ((u64 *)*rmapp != spte) {
			
 
				+			printk(KERN_ERR "rmap_remove:  %p %llx 1->BUG\n",
			
 
				+			       spte, *spte);
			
 
				+			BUG();
			
 
				+		}
			
 
				+		*rmapp = 0;
			
 
				+	} else {
			
 
				+		rmap_printk("rmap_remove:  %p %llx many->many\n", spte, *spte);
			
 
				+		desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
			
 
				+		prev_desc = NULL;
			
 
				+		while (desc) {
			
 
				+			for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i)
			
 
				+				if (desc->shadow_ptes[i] == spte) {
			
 
				+					rmap_desc_remove_entry(rmapp,
			
 
				+							       desc, i,
			
 
				+							       prev_desc);
			
 
				+					return;
			
 
				+				}
			
 
				+			prev_desc = desc;
			
 
				+			desc = desc->more;
			
 
				+		}
			
 
				+		BUG();
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte)
			
 
				+{
			
 
				+	struct kvm_rmap_desc *desc;
			
 
				+	struct kvm_rmap_desc *prev_desc;
			
 
				+	u64 *prev_spte;
			
 
				+	int i;
			
 
				+
			
 
				+	if (!*rmapp)
			
 
				+		return NULL;
			
 
				+	else if (!(*rmapp & 1)) {
			
 
				+		if (!spte)
			
 
				+			return (u64 *)*rmapp;
			
 
				+		return NULL;
			
 
				+	}
			
 
				+	desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
			
 
				+	prev_desc = NULL;
			
 
				+	prev_spte = NULL;
			
 
				+	while (desc) {
			
 
				+		for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i) {
			
 
				+			if (prev_spte == spte)
			
 
				+				return desc->shadow_ptes[i];
			
 
				+			prev_spte = desc->shadow_ptes[i];
			
 
				+		}
			
 
				+		desc = desc->more;
			
 
				+	}
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+static void rmap_write_protect(struct kvm *kvm, u64 gfn)
			
 
				+{
			
 
				+	unsigned long *rmapp;
			
 
				+	u64 *spte;
			
 
				+	int write_protected = 0;
			
 
				+
			
 
				+	gfn = unalias_gfn(kvm, gfn);
			
 
				+	rmapp = gfn_to_rmap(kvm, gfn);
			
 
				+
			
 
				+	spte = rmap_next(kvm, rmapp, NULL);
			
 
				+	while (spte) {
			
 
				+		BUG_ON(!spte);
			
 
				+		BUG_ON(!(*spte & PT_PRESENT_MASK));
			
 
				+		rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
			
 
				+		if (is_writeble_pte(*spte)) {
			
 
				+			set_shadow_pte(spte, *spte & ~PT_WRITABLE_MASK);
			
 
				+			write_protected = 1;
			
 
				+		}
			
 
				+		spte = rmap_next(kvm, rmapp, spte);
			
 
				+	}
			
 
				+	if (write_protected)
			
 
				+		kvm_flush_remote_tlbs(kvm);
			
 
				+}
			
 
				+
			
 
				+#ifdef MMU_DEBUG
			
 
				+static int is_empty_shadow_page(u64 *spt)
			
 
				+{
			
 
				+	u64 *pos;
			
 
				+	u64 *end;
			
 
				+
			
 
				+	for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++)
			
 
				+		if ((*pos & ~PT_SHADOW_IO_MARK) != shadow_trap_nonpresent_pte) {
			
 
				+			printk(KERN_ERR "%s: %p %llx\n", __FUNCTION__,
			
 
				+			       pos, *pos);
			
 
				+			return 0;
			
 
				+		}
			
 
				+	return 1;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp)
			
 
				+{
			
 
				+	ASSERT(is_empty_shadow_page(sp->spt));
			
 
				+	list_del(&sp->link);
			
 
				+	__free_page(virt_to_page(sp->spt));
			
 
				+	__free_page(virt_to_page(sp->gfns));
			
 
				+	kfree(sp);
			
 
				+	++kvm->arch.n_free_mmu_pages;
			
 
				+}
			
 
				+
			
 
				+static unsigned kvm_page_table_hashfn(gfn_t gfn)
			
 
				+{
			
 
				+	return gfn;
			
 
				+}
			
 
				+
			
 
				+static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
			
 
				+					       u64 *parent_pte)
			
 
				+{
			
 
				+	struct kvm_mmu_page *sp;
			
 
				+
			
 
				+	sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache, sizeof *sp);
			
 
				+	sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE);
			
 
				+	sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE);
			
 
				+	set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
			
 
				+	list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
			
 
				+	ASSERT(is_empty_shadow_page(sp->spt));
			
 
				+	sp->slot_bitmap = 0;
			
 
				+	sp->multimapped = 0;
			
 
				+	sp->parent_pte = parent_pte;
			
 
				+	--vcpu->kvm->arch.n_free_mmu_pages;
			
 
				+	return sp;
			
 
				+}
			
 
				+
			
 
				+static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu,
			
 
				+				    struct kvm_mmu_page *sp, u64 *parent_pte)
			
 
				+{
			
 
				+	struct kvm_pte_chain *pte_chain;
			
 
				+	struct hlist_node *node;
			
 
				+	int i;
			
 
				+
			
 
				+	if (!parent_pte)
			
 
				+		return;
			
 
				+	if (!sp->multimapped) {
			
 
				+		u64 *old = sp->parent_pte;
			
 
				+
			
 
				+		if (!old) {
			
 
				+			sp->parent_pte = parent_pte;
			
 
				+			return;
			
 
				+		}
			
 
				+		sp->multimapped = 1;
			
 
				+		pte_chain = mmu_alloc_pte_chain(vcpu);
			
 
				+		INIT_HLIST_HEAD(&sp->parent_ptes);
			
 
				+		hlist_add_head(&pte_chain->link, &sp->parent_ptes);
			
 
				+		pte_chain->parent_ptes[0] = old;
			
 
				+	}
			
 
				+	hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) {
			
 
				+		if (pte_chain->parent_ptes[NR_PTE_CHAIN_ENTRIES-1])
			
 
				+			continue;
			
 
				+		for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i)
			
 
				+			if (!pte_chain->parent_ptes[i]) {
			
 
				+				pte_chain->parent_ptes[i] = parent_pte;
			
 
				+				return;
			
 
				+			}
			
 
				+	}
			
 
				+	pte_chain = mmu_alloc_pte_chain(vcpu);
			
 
				+	BUG_ON(!pte_chain);
			
 
				+	hlist_add_head(&pte_chain->link, &sp->parent_ptes);
			
 
				+	pte_chain->parent_ptes[0] = parent_pte;
			
 
				+}
			
 
				+
			
 
				+static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp,
			
 
				+				       u64 *parent_pte)
			
 
				+{
			
 
				+	struct kvm_pte_chain *pte_chain;
			
 
				+	struct hlist_node *node;
			
 
				+	int i;
			
 
				+
			
 
				+	if (!sp->multimapped) {
			
 
				+		BUG_ON(sp->parent_pte != parent_pte);
			
 
				+		sp->parent_pte = NULL;
			
 
				+		return;
			
 
				+	}
			
 
				+	hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link)
			
 
				+		for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) {
			
 
				+			if (!pte_chain->parent_ptes[i])
			
 
				+				break;
			
 
				+			if (pte_chain->parent_ptes[i] != parent_pte)
			
 
				+				continue;
			
 
				+			while (i + 1 < NR_PTE_CHAIN_ENTRIES
			
 
				+				&& pte_chain->parent_ptes[i + 1]) {
			
 
				+				pte_chain->parent_ptes[i]
			
 
				+					= pte_chain->parent_ptes[i + 1];
			
 
				+				++i;
			
 
				+			}
			
 
				+			pte_chain->parent_ptes[i] = NULL;
			
 
				+			if (i == 0) {
			
 
				+				hlist_del(&pte_chain->link);
			
 
				+				mmu_free_pte_chain(pte_chain);
			
 
				+				if (hlist_empty(&sp->parent_ptes)) {
			
 
				+					sp->multimapped = 0;
			
 
				+					sp->parent_pte = NULL;
			
 
				+				}
			
 
				+			}
			
 
				+			return;
			
 
				+		}
			
 
				+	BUG();
			
 
				+}
			
 
				+
			
 
				+static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
			
 
				+{
			
 
				+	unsigned index;
			
 
				+	struct hlist_head *bucket;
			
 
				+	struct kvm_mmu_page *sp;
			
 
				+	struct hlist_node *node;
			
 
				+
			
 
				+	pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn);
			
 
				+	index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
			
 
				+	bucket = &kvm->arch.mmu_page_hash[index];
			
 
				+	hlist_for_each_entry(sp, node, bucket, hash_link)
			
 
				+		if (sp->gfn == gfn && !sp->role.metaphysical) {
			
 
				+			pgprintk("%s: found role %x\n",
			
 
				+				 __FUNCTION__, sp->role.word);
			
 
				+			return sp;
			
 
				+		}
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
			
 
				+					     gfn_t gfn,
			
 
				+					     gva_t gaddr,
			
 
				+					     unsigned level,
			
 
				+					     int metaphysical,
			
 
				+					     unsigned access,
			
 
				+					     u64 *parent_pte,
			
 
				+					     bool *new_page)
			
 
				+{
			
 
				+	union kvm_mmu_page_role role;
			
 
				+	unsigned index;
			
 
				+	unsigned quadrant;
			
 
				+	struct hlist_head *bucket;
			
 
				+	struct kvm_mmu_page *sp;
			
 
				+	struct hlist_node *node;
			
 
				+
			
 
				+	role.word = 0;
			
 
				+	role.glevels = vcpu->arch.mmu.root_level;
			
 
				+	role.level = level;
			
 
				+	role.metaphysical = metaphysical;
			
 
				+	role.access = access;
			
 
				+	if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) {
			
 
				+		quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
			
 
				+		quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
			
 
				+		role.quadrant = quadrant;
			
 
				+	}
			
 
				+	pgprintk("%s: looking gfn %lx role %x\n", __FUNCTION__,
			
 
				+		 gfn, role.word);
			
 
				+	index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
			
 
				+	bucket = &vcpu->kvm->arch.mmu_page_hash[index];
			
 
				+	hlist_for_each_entry(sp, node, bucket, hash_link)
			
 
				+		if (sp->gfn == gfn && sp->role.word == role.word) {
			
 
				+			mmu_page_add_parent_pte(vcpu, sp, parent_pte);
			
 
				+			pgprintk("%s: found\n", __FUNCTION__);
			
 
				+			return sp;
			
 
				+		}
			
 
				+	++vcpu->kvm->stat.mmu_cache_miss;
			
 
				+	sp = kvm_mmu_alloc_page(vcpu, parent_pte);
			
 
				+	if (!sp)
			
 
				+		return sp;
			
 
				+	pgprintk("%s: adding gfn %lx role %x\n", __FUNCTION__, gfn, role.word);
			
 
				+	sp->gfn = gfn;
			
 
				+	sp->role = role;
			
 
				+	hlist_add_head(&sp->hash_link, bucket);
			
 
				+	vcpu->arch.mmu.prefetch_page(vcpu, sp);
			
 
				+	if (!metaphysical)
			
 
				+		rmap_write_protect(vcpu->kvm, gfn);
			
 
				+	if (new_page)
			
 
				+		*new_page = 1;
			
 
				+	return sp;
			
 
				+}
			
 
				+
			
 
				+static void kvm_mmu_page_unlink_children(struct kvm *kvm,
			
 
				+					 struct kvm_mmu_page *sp)
			
 
				+{
			
 
				+	unsigned i;
			
 
				+	u64 *pt;
			
 
				+	u64 ent;
			
 
				+
			
 
				+	pt = sp->spt;
			
 
				+
			
 
				+	if (sp->role.level == PT_PAGE_TABLE_LEVEL) {
			
 
				+		for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
			
 
				+			if (is_shadow_present_pte(pt[i]))
			
 
				+				rmap_remove(kvm, &pt[i]);
			
 
				+			pt[i] = shadow_trap_nonpresent_pte;
			
 
				+		}
			
 
				+		kvm_flush_remote_tlbs(kvm);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
			
 
				+		ent = pt[i];
			
 
				+
			
 
				+		pt[i] = shadow_trap_nonpresent_pte;
			
 
				+		if (!is_shadow_present_pte(ent))
			
 
				+			continue;
			
 
				+		ent &= PT64_BASE_ADDR_MASK;
			
 
				+		mmu_page_remove_parent_pte(page_header(ent), &pt[i]);
			
 
				+	}
			
 
				+	kvm_flush_remote_tlbs(kvm);
			
 
				+}
			
 
				+
			
 
				+static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte)
			
 
				+{
			
 
				+	mmu_page_remove_parent_pte(sp, parent_pte);
			
 
				+}
			
 
				+
			
 
				+static void kvm_mmu_reset_last_pte_updated(struct kvm *kvm)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < KVM_MAX_VCPUS; ++i)
			
 
				+		if (kvm->vcpus[i])
			
 
				+			kvm->vcpus[i]->arch.last_pte_updated = NULL;
			
 
				+}
			
 
				+
			
 
				+static void kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
			
 
				+{
			
 
				+	u64 *parent_pte;
			
 
				+
			
 
				+	++kvm->stat.mmu_shadow_zapped;
			
 
				+	while (sp->multimapped || sp->parent_pte) {
			
 
				+		if (!sp->multimapped)
			
 
				+			parent_pte = sp->parent_pte;
			
 
				+		else {
			
 
				+			struct kvm_pte_chain *chain;
			
 
				+
			
 
				+			chain = container_of(sp->parent_ptes.first,
			
 
				+					     struct kvm_pte_chain, link);
			
 
				+			parent_pte = chain->parent_ptes[0];
			
 
				+		}
			
 
				+		BUG_ON(!parent_pte);
			
 
				+		kvm_mmu_put_page(sp, parent_pte);
			
 
				+		set_shadow_pte(parent_pte, shadow_trap_nonpresent_pte);
			
 
				+	}
			
 
				+	kvm_mmu_page_unlink_children(kvm, sp);
			
 
				+	if (!sp->root_count) {
			
 
				+		hlist_del(&sp->hash_link);
			
 
				+		kvm_mmu_free_page(kvm, sp);
			
 
				+	} else
			
 
				+		list_move(&sp->link, &kvm->arch.active_mmu_pages);
			
 
				+	kvm_mmu_reset_last_pte_updated(kvm);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Changing the number of mmu pages allocated to the vm
			
 
				+ * Note: if kvm_nr_mmu_pages is too small, you will get dead lock
			
 
				+ */
			
 
				+void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages)
			
 
				+{
			
 
				+	/*
			
 
				+	 * If we set the number of mmu pages to be smaller be than the
			
 
				+	 * number of actived pages , we must to free some mmu pages before we
			
 
				+	 * change the value
			
 
				+	 */
			
 
				+
			
 
				+	if ((kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages) >
			
 
				+	    kvm_nr_mmu_pages) {
			
 
				+		int n_used_mmu_pages = kvm->arch.n_alloc_mmu_pages
			
 
				+				       - kvm->arch.n_free_mmu_pages;
			
 
				+
			
 
				+		while (n_used_mmu_pages > kvm_nr_mmu_pages) {
			
 
				+			struct kvm_mmu_page *page;
			
 
				+
			
 
				+			page = container_of(kvm->arch.active_mmu_pages.prev,
			
 
				+					    struct kvm_mmu_page, link);
			
 
				+			kvm_mmu_zap_page(kvm, page);
			
 
				+			n_used_mmu_pages--;
			
 
				+		}
			
 
				+		kvm->arch.n_free_mmu_pages = 0;
			
 
				+	}
			
 
				+	else
			
 
				+		kvm->arch.n_free_mmu_pages += kvm_nr_mmu_pages
			
 
				+					 - kvm->arch.n_alloc_mmu_pages;
			
 
				+
			
 
				+	kvm->arch.n_alloc_mmu_pages = kvm_nr_mmu_pages;
			
 
				+}
			
 
				+
			
 
				+static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
			
 
				+{
			
 
				+	unsigned index;
			
 
				+	struct hlist_head *bucket;
			
 
				+	struct kvm_mmu_page *sp;
			
 
				+	struct hlist_node *node, *n;
			
 
				+	int r;
			
 
				+
			
 
				+	pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn);
			
 
				+	r = 0;
			
 
				+	index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
			
 
				+	bucket = &kvm->arch.mmu_page_hash[index];
			
 
				+	hlist_for_each_entry_safe(sp, node, n, bucket, hash_link)
			
 
				+		if (sp->gfn == gfn && !sp->role.metaphysical) {
			
 
				+			pgprintk("%s: gfn %lx role %x\n", __FUNCTION__, gfn,
			
 
				+				 sp->role.word);
			
 
				+			kvm_mmu_zap_page(kvm, sp);
			
 
				+			r = 1;
			
 
				+		}
			
 
				+	return r;
			
 
				+}
			
 
				+
			
 
				+static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
			
 
				+{
			
 
				+	struct kvm_mmu_page *sp;
			
 
				+
			
 
				+	while ((sp = kvm_mmu_lookup_page(kvm, gfn)) != NULL) {
			
 
				+		pgprintk("%s: zap %lx %x\n", __FUNCTION__, gfn, sp->role.word);
			
 
				+		kvm_mmu_zap_page(kvm, sp);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
			
 
				+{
			
 
				+	int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn));
			
 
				+	struct kvm_mmu_page *sp = page_header(__pa(pte));
			
 
				+
			
 
				+	__set_bit(slot, &sp->slot_bitmap);
			
 
				+}
			
 
				+
			
 
				+struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
			
 
				+{
			
 
				+	gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
			
 
				+
			
 
				+	if (gpa == UNMAPPED_GVA)
			
 
				+		return NULL;
			
 
				+	return gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
			
 
				+}
			
 
				+
			
 
				+static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
			
 
				+			 unsigned pt_access, unsigned pte_access,
			
 
				+			 int user_fault, int write_fault, int dirty,
			
 
				+			 int *ptwrite, gfn_t gfn, struct page *page)
			
 
				+{
			
 
				+	u64 spte;
			
 
				+	int was_rmapped = is_rmap_pte(*shadow_pte);
			
 
				+	int was_writeble = is_writeble_pte(*shadow_pte);
			
 
				+
			
 
				+	pgprintk("%s: spte %llx access %x write_fault %d"
			
 
				+		 " user_fault %d gfn %lx\n",
			
 
				+		 __FUNCTION__, *shadow_pte, pt_access,
			
 
				+		 write_fault, user_fault, gfn);
			
 
				+
			
 
				+	/*
			
 
				+	 * We don't set the accessed bit, since we sometimes want to see
			
 
				+	 * whether the guest actually used the pte (in order to detect
			
 
				+	 * demand paging).
			
 
				+	 */
			
 
				+	spte = PT_PRESENT_MASK | PT_DIRTY_MASK;
			
 
				+	if (!dirty)
			
 
				+		pte_access &= ~ACC_WRITE_MASK;
			
 
				+	if (!(pte_access & ACC_EXEC_MASK))
			
 
				+		spte |= PT64_NX_MASK;
			
 
				+
			
 
				+	spte |= PT_PRESENT_MASK;
			
 
				+	if (pte_access & ACC_USER_MASK)
			
 
				+		spte |= PT_USER_MASK;
			
 
				+
			
 
				+	if (is_error_page(page)) {
			
 
				+		set_shadow_pte(shadow_pte,
			
 
				+			       shadow_trap_nonpresent_pte | PT_SHADOW_IO_MARK);
			
 
				+		kvm_release_page_clean(page);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	spte |= page_to_phys(page);
			
 
				+
			
 
				+	if ((pte_access & ACC_WRITE_MASK)
			
 
				+	    || (write_fault && !is_write_protection(vcpu) && !user_fault)) {
			
 
				+		struct kvm_mmu_page *shadow;
			
 
				+
			
 
				+		spte |= PT_WRITABLE_MASK;
			
 
				+		if (user_fault) {
			
 
				+			mmu_unshadow(vcpu->kvm, gfn);
			
 
				+			goto unshadowed;
			
 
				+		}
			
 
				+
			
 
				+		shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn);
			
 
				+		if (shadow) {
			
 
				+			pgprintk("%s: found shadow page for %lx, marking ro\n",
			
 
				+				 __FUNCTION__, gfn);
			
 
				+			pte_access &= ~ACC_WRITE_MASK;
			
 
				+			if (is_writeble_pte(spte)) {
			
 
				+				spte &= ~PT_WRITABLE_MASK;
			
 
				+				kvm_x86_ops->tlb_flush(vcpu);
			
 
				+			}
			
 
				+			if (write_fault)
			
 
				+				*ptwrite = 1;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+unshadowed:
			
 
				+
			
 
				+	if (pte_access & ACC_WRITE_MASK)
			
 
				+		mark_page_dirty(vcpu->kvm, gfn);
			
 
				+
			
 
				+	pgprintk("%s: setting spte %llx\n", __FUNCTION__, spte);
			
 
				+	set_shadow_pte(shadow_pte, spte);
			
 
				+	page_header_update_slot(vcpu->kvm, shadow_pte, gfn);
			
 
				+	if (!was_rmapped) {
			
 
				+		rmap_add(vcpu, shadow_pte, gfn);
			
 
				+		if (!is_rmap_pte(*shadow_pte))
			
 
				+			kvm_release_page_clean(page);
			
 
				+	} else {
			
 
				+		if (was_writeble)
			
 
				+			kvm_release_page_dirty(page);
			
 
				+		else
			
 
				+			kvm_release_page_clean(page);
			
 
				+	}
			
 
				+	if (!ptwrite || !*ptwrite)
			
 
				+		vcpu->arch.last_pte_updated = shadow_pte;
			
 
				+}
			
 
				+
			
 
				+static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write,
			
 
				+			   gfn_t gfn, struct page *page)
			
 
				+{
			
 
				+	int level = PT32E_ROOT_LEVEL;
			
 
				+	hpa_t table_addr = vcpu->arch.mmu.root_hpa;
			
 
				+	int pt_write = 0;
			
 
				+
			
 
				+	for (; ; level--) {
			
 
				+		u32 index = PT64_INDEX(v, level);
			
 
				+		u64 *table;
			
 
				+
			
 
				+		ASSERT(VALID_PAGE(table_addr));
			
 
				+		table = __va(table_addr);
			
 
				+
			
 
				+		if (level == 1) {
			
 
				+			mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL,
			
 
				+				     0, write, 1, &pt_write, gfn, page);
			
 
				+			return pt_write || is_io_pte(table[index]);
			
 
				+		}
			
 
				+
			
 
				+		if (table[index] == shadow_trap_nonpresent_pte) {
			
 
				+			struct kvm_mmu_page *new_table;
			
 
				+			gfn_t pseudo_gfn;
			
 
				+
			
 
				+			pseudo_gfn = (v & PT64_DIR_BASE_ADDR_MASK)
			
 
				+				>> PAGE_SHIFT;
			
 
				+			new_table = kvm_mmu_get_page(vcpu, pseudo_gfn,
			
 
				+						     v, level - 1,
			
 
				+						     1, ACC_ALL, &table[index],
			
 
				+						     NULL);
			
 
				+			if (!new_table) {
			
 
				+				pgprintk("nonpaging_map: ENOMEM\n");
			
 
				+				kvm_release_page_clean(page);
			
 
				+				return -ENOMEM;
			
 
				+			}
			
 
				+
			
 
				+			table[index] = __pa(new_table->spt) | PT_PRESENT_MASK
			
 
				+				| PT_WRITABLE_MASK | PT_USER_MASK;
			
 
				+		}
			
 
				+		table_addr = table[index] & PT64_BASE_ADDR_MASK;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
			
 
				+{
			
 
				+	int r;
			
 
				+
			
 
				+	struct page *page;
			
 
				+
			
 
				+	down_read(&current->mm->mmap_sem);
			
 
				+	page = gfn_to_page(vcpu->kvm, gfn);
			
 
				+
			
 
				+	spin_lock(&vcpu->kvm->mmu_lock);
			
 
				+	kvm_mmu_free_some_pages(vcpu);
			
 
				+	r = __nonpaging_map(vcpu, v, write, gfn, page);
			
 
				+	spin_unlock(&vcpu->kvm->mmu_lock);
			
 
				+
			
 
				+	up_read(&current->mm->mmap_sem);
			
 
				+
			
 
				+	return r;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu,
			
 
				+				    struct kvm_mmu_page *sp)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
			
 
				+		sp->spt[i] = shadow_trap_nonpresent_pte;
			
 
				+}
			
 
				+
			
 
				+static void mmu_free_roots(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	int i;
			
 
				+	struct kvm_mmu_page *sp;
			
 
				+
			
 
				+	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
			
 
				+		return;
			
 
				+	spin_lock(&vcpu->kvm->mmu_lock);
			
 
				+#ifdef CONFIG_X86_64
			
 
				+	if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
			
 
				+		hpa_t root = vcpu->arch.mmu.root_hpa;
			
 
				+
			
 
				+		sp = page_header(root);
			
 
				+		--sp->root_count;
			
 
				+		vcpu->arch.mmu.root_hpa = INVALID_PAGE;
			
 
				+		spin_unlock(&vcpu->kvm->mmu_lock);
			
 
				+		return;
			
 
				+	}
			
 
				+#endif
			
 
				+	for (i = 0; i < 4; ++i) {
			
 
				+		hpa_t root = vcpu->arch.mmu.pae_root[i];
			
 
				+
			
 
				+		if (root) {
			
 
				+			root &= PT64_BASE_ADDR_MASK;
			
 
				+			sp = page_header(root);
			
 
				+			--sp->root_count;
			
 
				+		}
			
 
				+		vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
			
 
				+	}
			
 
				+	spin_unlock(&vcpu->kvm->mmu_lock);
			
 
				+	vcpu->arch.mmu.root_hpa = INVALID_PAGE;
			
 
				+}
			
 
				+
			
 
				+static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	int i;
			
 
				+	gfn_t root_gfn;
			
 
				+	struct kvm_mmu_page *sp;
			
 
				+
			
 
				+	root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT;
			
 
				+
			
 
				+#ifdef CONFIG_X86_64
			
 
				+	if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
			
 
				+		hpa_t root = vcpu->arch.mmu.root_hpa;
			
 
				+
			
 
				+		ASSERT(!VALID_PAGE(root));
			
 
				+		sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
			
 
				+				      PT64_ROOT_LEVEL, 0, ACC_ALL, NULL, NULL);
			
 
				+		root = __pa(sp->spt);
			
 
				+		++sp->root_count;
			
 
				+		vcpu->arch.mmu.root_hpa = root;
			
 
				+		return;
			
 
				+	}
			
 
				+#endif
			
 
				+	for (i = 0; i < 4; ++i) {
			
 
				+		hpa_t root = vcpu->arch.mmu.pae_root[i];
			
 
				+
			
 
				+		ASSERT(!VALID_PAGE(root));
			
 
				+		if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) {
			
 
				+			if (!is_present_pte(vcpu->arch.pdptrs[i])) {
			
 
				+				vcpu->arch.mmu.pae_root[i] = 0;
			
 
				+				continue;
			
 
				+			}
			
 
				+			root_gfn = vcpu->arch.pdptrs[i] >> PAGE_SHIFT;
			
 
				+		} else if (vcpu->arch.mmu.root_level == 0)
			
 
				+			root_gfn = 0;
			
 
				+		sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
			
 
				+				      PT32_ROOT_LEVEL, !is_paging(vcpu),
			
 
				+				      ACC_ALL, NULL, NULL);
			
 
				+		root = __pa(sp->spt);
			
 
				+		++sp->root_count;
			
 
				+		vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK;
			
 
				+	}
			
 
				+	vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root);
			
 
				+}
			
 
				+
			
 
				+static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
			
 
				+{
			
 
				+	return vaddr;
			
 
				+}
			
 
				+
			
 
				+static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
			
 
				+				u32 error_code)
			
 
				+{
			
 
				+	gfn_t gfn;
			
 
				+	int r;
			
 
				+
			
 
				+	pgprintk("%s: gva %lx error %x\n", __FUNCTION__, gva, error_code);
			
 
				+	r = mmu_topup_memory_caches(vcpu);
			
 
				+	if (r)
			
 
				+		return r;
			
 
				+
			
 
				+	ASSERT(vcpu);
			
 
				+	ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
			
 
				+
			
 
				+	gfn = gva >> PAGE_SHIFT;
			
 
				+
			
 
				+	return nonpaging_map(vcpu, gva & PAGE_MASK,
			
 
				+			     error_code & PFERR_WRITE_MASK, gfn);
			
 
				+}
			
 
				+
			
 
				+static void nonpaging_free(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	mmu_free_roots(vcpu);
			
 
				+}
			
 
				+
			
 
				+static int nonpaging_init_context(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	struct kvm_mmu *context = &vcpu->arch.mmu;
			
 
				+
			
 
				+	context->new_cr3 = nonpaging_new_cr3;
			
 
				+	context->page_fault = nonpaging_page_fault;
			
 
				+	context->gva_to_gpa = nonpaging_gva_to_gpa;
			
 
				+	context->free = nonpaging_free;
			
 
				+	context->prefetch_page = nonpaging_prefetch_page;
			
 
				+	context->root_level = 0;
			
 
				+	context->shadow_root_level = PT32E_ROOT_LEVEL;
			
 
				+	context->root_hpa = INVALID_PAGE;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	++vcpu->stat.tlb_flush;
			
 
				+	kvm_x86_ops->tlb_flush(vcpu);
			
 
				+}
			
 
				+
			
 
				+static void paging_new_cr3(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->cr3);
			
 
				+	mmu_free_roots(vcpu);
			
 
				+}
			
 
				+
			
 
				+static void inject_page_fault(struct kvm_vcpu *vcpu,
			
 
				+			      u64 addr,
			
 
				+			      u32 err_code)
			
 
				+{
			
 
				+	kvm_inject_page_fault(vcpu, addr, err_code);
			
 
				+}
			
 
				+
			
 
				+static void paging_free(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	nonpaging_free(vcpu);
			
 
				+}
			
 
				+
			
 
				+#define PTTYPE 64
			
 
				+#include "paging_tmpl.h"
			
 
				+#undef PTTYPE
			
 
				+
			
 
				+#define PTTYPE 32
			
 
				+#include "paging_tmpl.h"
			
 
				+#undef PTTYPE
			
 
				+
			
 
				+static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level)
			
 
				+{
			
 
				+	struct kvm_mmu *context = &vcpu->arch.mmu;
			
 
				+
			
 
				+	ASSERT(is_pae(vcpu));
			
 
				+	context->new_cr3 = paging_new_cr3;
			
 
				+	context->page_fault = paging64_page_fault;
			
 
				+	context->gva_to_gpa = paging64_gva_to_gpa;
			
 
				+	context->prefetch_page = paging64_prefetch_page;
			
 
				+	context->free = paging_free;
			
 
				+	context->root_level = level;
			
 
				+	context->shadow_root_level = level;
			
 
				+	context->root_hpa = INVALID_PAGE;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int paging64_init_context(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL);
			
 
				+}
			
 
				+
			
 
				+static int paging32_init_context(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	struct kvm_mmu *context = &vcpu->arch.mmu;
			
 
				+
			
 
				+	context->new_cr3 = paging_new_cr3;
			
 
				+	context->page_fault = paging32_page_fault;
			
 
				+	context->gva_to_gpa = paging32_gva_to_gpa;
			
 
				+	context->free = paging_free;
			
 
				+	context->prefetch_page = paging32_prefetch_page;
			
 
				+	context->root_level = PT32_ROOT_LEVEL;
			
 
				+	context->shadow_root_level = PT32E_ROOT_LEVEL;
			
 
				+	context->root_hpa = INVALID_PAGE;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int paging32E_init_context(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL);
			
 
				+}
			
 
				+
			
 
				+static int init_kvm_mmu(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	ASSERT(vcpu);
			
 
				+	ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
			
 
				+
			
 
				+	if (!is_paging(vcpu))
			
 
				+		return nonpaging_init_context(vcpu);
			
 
				+	else if (is_long_mode(vcpu))
			
 
				+		return paging64_init_context(vcpu);
			
 
				+	else if (is_pae(vcpu))
			
 
				+		return paging32E_init_context(vcpu);
			
 
				+	else
			
 
				+		return paging32_init_context(vcpu);
			
 
				+}
			
 
				+
			
 
				+static void destroy_kvm_mmu(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	ASSERT(vcpu);
			
 
				+	if (VALID_PAGE(vcpu->arch.mmu.root_hpa)) {
			
 
				+		vcpu->arch.mmu.free(vcpu);
			
 
				+		vcpu->arch.mmu.root_hpa = INVALID_PAGE;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+int kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	destroy_kvm_mmu(vcpu);
			
 
				+	return init_kvm_mmu(vcpu);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(kvm_mmu_reset_context);
			
 
				+
			
 
				+int kvm_mmu_load(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	int r;
			
 
				+
			
 
				+	r = mmu_topup_memory_caches(vcpu);
			
 
				+	if (r)
			
 
				+		goto out;
			
 
				+	spin_lock(&vcpu->kvm->mmu_lock);
			
 
				+	kvm_mmu_free_some_pages(vcpu);
			
 
				+	mmu_alloc_roots(vcpu);
			
 
				+	spin_unlock(&vcpu->kvm->mmu_lock);
			
 
				+	kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa);
			
 
				+	kvm_mmu_flush_tlb(vcpu);
			
 
				+out:
			
 
				+	return r;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(kvm_mmu_load);
			
 
				+
			
 
				+void kvm_mmu_unload(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	mmu_free_roots(vcpu);
			
 
				+}
			
 
				+
			
 
				+static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
			
 
				+				  struct kvm_mmu_page *sp,
			
 
				+				  u64 *spte)
			
 
				+{
			
 
				+	u64 pte;
			
 
				+	struct kvm_mmu_page *child;
			
 
				+
			
 
				+	pte = *spte;
			
 
				+	if (is_shadow_present_pte(pte)) {
			
 
				+		if (sp->role.level == PT_PAGE_TABLE_LEVEL)
			
 
				+			rmap_remove(vcpu->kvm, spte);
			
 
				+		else {
			
 
				+			child = page_header(pte & PT64_BASE_ADDR_MASK);
			
 
				+			mmu_page_remove_parent_pte(child, spte);
			
 
				+		}
			
 
				+	}
			
 
				+	set_shadow_pte(spte, shadow_trap_nonpresent_pte);
			
 
				+}
			
 
				+
			
 
				+static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
			
 
				+				  struct kvm_mmu_page *sp,
			
 
				+				  u64 *spte,
			
 
				+				  const void *new, int bytes,
			
 
				+				  int offset_in_pte)
			
 
				+{
			
 
				+	if (sp->role.level != PT_PAGE_TABLE_LEVEL) {
			
 
				+		++vcpu->kvm->stat.mmu_pde_zapped;
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	++vcpu->kvm->stat.mmu_pte_updated;
			
 
				+	if (sp->role.glevels == PT32_ROOT_LEVEL)
			
 
				+		paging32_update_pte(vcpu, sp, spte, new, bytes, offset_in_pte);
			
 
				+	else
			
 
				+		paging64_update_pte(vcpu, sp, spte, new, bytes, offset_in_pte);
			
 
				+}
			
 
				+
			
 
				+static bool need_remote_flush(u64 old, u64 new)
			
 
				+{
			
 
				+	if (!is_shadow_present_pte(old))
			
 
				+		return false;
			
 
				+	if (!is_shadow_present_pte(new))
			
 
				+		return true;
			
 
				+	if ((old ^ new) & PT64_BASE_ADDR_MASK)
			
 
				+		return true;
			
 
				+	old ^= PT64_NX_MASK;
			
 
				+	new ^= PT64_NX_MASK;
			
 
				+	return (old & ~new & PT64_PERM_MASK) != 0;
			
 
				+}
			
 
				+
			
 
				+static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, u64 old, u64 new)
			
 
				+{
			
 
				+	if (need_remote_flush(old, new))
			
 
				+		kvm_flush_remote_tlbs(vcpu->kvm);
			
 
				+	else
			
 
				+		kvm_mmu_flush_tlb(vcpu);
			
 
				+}
			
 
				+
			
 
				+static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	u64 *spte = vcpu->arch.last_pte_updated;
			
 
				+
			
 
				+	return !!(spte && (*spte & PT_ACCESSED_MASK));
			
 
				+}
			
 
				+
			
 
				+static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
			
 
				+					  const u8 *new, int bytes)
			
 
				+{
			
 
				+	gfn_t gfn;
			
 
				+	int r;
			
 
				+	u64 gpte = 0;
			
 
				+
			
 
				+	if (bytes != 4 && bytes != 8)
			
 
				+		return;
			
 
				+
			
 
				+	/*
			
 
				+	 * Assume that the pte write on a page table of the same type
			
 
				+	 * as the current vcpu paging mode.  This is nearly always true
			
 
				+	 * (might be false while changing modes).  Note it is verified later
			
 
				+	 * by update_pte().
			
 
				+	 */
			
 
				+	if (is_pae(vcpu)) {
			
 
				+		/* Handle a 32-bit guest writing two halves of a 64-bit gpte */
			
 
				+		if ((bytes == 4) && (gpa % 4 == 0)) {
			
 
				+			r = kvm_read_guest(vcpu->kvm, gpa & ~(u64)7, &gpte, 8);
			
 
				+			if (r)
			
 
				+				return;
			
 
				+			memcpy((void *)&gpte + (gpa % 8), new, 4);
			
 
				+		} else if ((bytes == 8) && (gpa % 8 == 0)) {
			
 
				+			memcpy((void *)&gpte, new, 8);
			
 
				+		}
			
 
				+	} else {
			
 
				+		if ((bytes == 4) && (gpa % 4 == 0))
			
 
				+			memcpy((void *)&gpte, new, 4);
			
 
				+	}
			
 
				+	if (!is_present_pte(gpte))
			
 
				+		return;
			
 
				+	gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
			
 
				+	vcpu->arch.update_pte.gfn = gfn;
			
 
				+	vcpu->arch.update_pte.page = gfn_to_page(vcpu->kvm, gfn);
			
 
				+}
			
 
				+
			
 
				+void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
			
 
				+		       const u8 *new, int bytes)
			
 
				+{
			
 
				+	gfn_t gfn = gpa >> PAGE_SHIFT;
			
 
				+	struct kvm_mmu_page *sp;
			
 
				+	struct hlist_node *node, *n;
			
 
				+	struct hlist_head *bucket;
			
 
				+	unsigned index;
			
 
				+	u64 entry;
			
 
				+	u64 *spte;
			
 
				+	unsigned offset = offset_in_page(gpa);
			
 
				+	unsigned pte_size;
			
 
				+	unsigned page_offset;
			
 
				+	unsigned misaligned;
			
 
				+	unsigned quadrant;
			
 
				+	int level;
			
 
				+	int flooded = 0;
			
 
				+	int npte;
			
 
				+
			
 
				+	pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes);
			
 
				+	mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes);
			
 
				+	spin_lock(&vcpu->kvm->mmu_lock);
			
 
				+	kvm_mmu_free_some_pages(vcpu);
			
 
				+	++vcpu->kvm->stat.mmu_pte_write;
			
 
				+	kvm_mmu_audit(vcpu, "pre pte write");
			
 
				+	if (gfn == vcpu->arch.last_pt_write_gfn
			
 
				+	    && !last_updated_pte_accessed(vcpu)) {
			
 
				+		++vcpu->arch.last_pt_write_count;
			
 
				+		if (vcpu->arch.last_pt_write_count >= 3)
			
 
				+			flooded = 1;
			
 
				+	} else {
			
 
				+		vcpu->arch.last_pt_write_gfn = gfn;
			
 
				+		vcpu->arch.last_pt_write_count = 1;
			
 
				+		vcpu->arch.last_pte_updated = NULL;
			
 
				+	}
			
 
				+	index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
			
 
				+	bucket = &vcpu->kvm->arch.mmu_page_hash[index];
			
 
				+	hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) {
			
 
				+		if (sp->gfn != gfn || sp->role.metaphysical)
			
 
				+			continue;
			
 
				+		pte_size = sp->role.glevels == PT32_ROOT_LEVEL ? 4 : 8;
			
 
				+		misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
			
 
				+		misaligned |= bytes < 4;
			
 
				+		if (misaligned || flooded) {
			
 
				+			/*
			
 
				+			 * Misaligned accesses are too much trouble to fix
			
 
				+			 * up; also, they usually indicate a page is not used
			
 
				+			 * as a page table.
			
 
				+			 *
			
 
				+			 * If we're seeing too many writes to a page,
			
 
				+			 * it may no longer be a page table, or we may be
			
 
				+			 * forking, in which case it is better to unmap the
			
 
				+			 * page.
			
 
				+			 */
			
 
				+			pgprintk("misaligned: gpa %llx bytes %d role %x\n",
			
 
				+				 gpa, bytes, sp->role.word);
			
 
				+			kvm_mmu_zap_page(vcpu->kvm, sp);
			
 
				+			++vcpu->kvm->stat.mmu_flooded;
			
 
				+			continue;
			
 
				+		}
			
 
				+		page_offset = offset;
			
 
				+		level = sp->role.level;
			
 
				+		npte = 1;
			
 
				+		if (sp->role.glevels == PT32_ROOT_LEVEL) {
			
 
				+			page_offset <<= 1;	/* 32->64 */
			
 
				+			/*
			
 
				+			 * A 32-bit pde maps 4MB while the shadow pdes map
			
 
				+			 * only 2MB.  So we need to double the offset again
			
 
				+			 * and zap two pdes instead of one.
			
 
				+			 */
			
 
				+			if (level == PT32_ROOT_LEVEL) {
			
 
				+				page_offset &= ~7; /* kill rounding error */
			
 
				+				page_offset <<= 1;
			
 
				+				npte = 2;
			
 
				+			}
			
 
				+			quadrant = page_offset >> PAGE_SHIFT;
			
 
				+			page_offset &= ~PAGE_MASK;
			
 
				+			if (quadrant != sp->role.quadrant)
			
 
				+				continue;
			
 
				+		}
			
 
				+		spte = &sp->spt[page_offset / sizeof(*spte)];
			
 
				+		while (npte--) {
			
 
				+			entry = *spte;
			
 
				+			mmu_pte_write_zap_pte(vcpu, sp, spte);
			
 
				+			mmu_pte_write_new_pte(vcpu, sp, spte, new, bytes,
			
 
				+					      page_offset & (pte_size - 1));
			
 
				+			mmu_pte_write_flush_tlb(vcpu, entry, *spte);
			
 
				+			++spte;
			
 
				+		}
			
 
				+	}
			
 
				+	kvm_mmu_audit(vcpu, "post pte write");
			
 
				+	spin_unlock(&vcpu->kvm->mmu_lock);
			
 
				+	if (vcpu->arch.update_pte.page) {
			
 
				+		kvm_release_page_clean(vcpu->arch.update_pte.page);
			
 
				+		vcpu->arch.update_pte.page = NULL;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
			
 
				+{
			
 
				+	gpa_t gpa;
			
 
				+	int r;
			
 
				+
			
 
				+	down_read(&current->mm->mmap_sem);
			
 
				+	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
			
 
				+	up_read(&current->mm->mmap_sem);
			
 
				+
			
 
				+	spin_lock(&vcpu->kvm->mmu_lock);
			
 
				+	r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
			
 
				+	spin_unlock(&vcpu->kvm->mmu_lock);
			
 
				+	return r;
			
 
				+}
			
 
				+
			
 
				+void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	while (vcpu->kvm->arch.n_free_mmu_pages < KVM_REFILL_PAGES) {
			
 
				+		struct kvm_mmu_page *sp;
			
 
				+
			
 
				+		sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev,
			
 
				+				  struct kvm_mmu_page, link);
			
 
				+		kvm_mmu_zap_page(vcpu->kvm, sp);
			
 
				+		++vcpu->kvm->stat.mmu_recycled;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
			
 
				+{
			
 
				+	int r;
			
 
				+	enum emulation_result er;
			
 
				+
			
 
				+	r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code);
			
 
				+	if (r < 0)
			
 
				+		goto out;
			
 
				+
			
 
				+	if (!r) {
			
 
				+		r = 1;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	r = mmu_topup_memory_caches(vcpu);
			
 
				+	if (r)
			
 
				+		goto out;
			
 
				+
			
 
				+	er = emulate_instruction(vcpu, vcpu->run, cr2, error_code, 0);
			
 
				+
			
 
				+	switch (er) {
			
 
				+	case EMULATE_DONE:
			
 
				+		return 1;
			
 
				+	case EMULATE_DO_MMIO:
			
 
				+		++vcpu->stat.mmio_exits;
			
 
				+		return 0;
			
 
				+	case EMULATE_FAIL:
			
 
				+		kvm_report_emulation_failure(vcpu, "pagetable");
			
 
				+		return 1;
			
 
				+	default:
			
 
				+		BUG();
			
 
				+	}
			
 
				+out:
			
 
				+	return r;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
			
 
				+
			
 
				+static void free_mmu_pages(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	struct kvm_mmu_page *sp;
			
 
				+
			
 
				+	while (!list_empty(&vcpu->kvm->arch.active_mmu_pages)) {
			
 
				+		sp = container_of(vcpu->kvm->arch.active_mmu_pages.next,
			
 
				+				  struct kvm_mmu_page, link);
			
 
				+		kvm_mmu_zap_page(vcpu->kvm, sp);
			
 
				+	}
			
 
				+	free_page((unsigned long)vcpu->arch.mmu.pae_root);
			
 
				+}
			
 
				+
			
 
				+static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	struct page *page;
			
 
				+	int i;
			
 
				+
			
 
				+	ASSERT(vcpu);
			
 
				+
			
 
				+	if (vcpu->kvm->arch.n_requested_mmu_pages)
			
 
				+		vcpu->kvm->arch.n_free_mmu_pages =
			
 
				+					vcpu->kvm->arch.n_requested_mmu_pages;
			
 
				+	else
			
 
				+		vcpu->kvm->arch.n_free_mmu_pages =
			
 
				+					vcpu->kvm->arch.n_alloc_mmu_pages;
			
 
				+	/*
			
 
				+	 * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64.
			
 
				+	 * Therefore we need to allocate shadow page tables in the first
			
 
				+	 * 4GB of memory, which happens to fit the DMA32 zone.
			
 
				+	 */
			
 
				+	page = alloc_page(GFP_KERNEL | __GFP_DMA32);
			
 
				+	if (!page)
			
 
				+		goto error_1;
			
 
				+	vcpu->arch.mmu.pae_root = page_address(page);
			
 
				+	for (i = 0; i < 4; ++i)
			
 
				+		vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+error_1:
			
 
				+	free_mmu_pages(vcpu);
			
 
				+	return -ENOMEM;
			
 
				+}
			
 
				+
			
 
				+int kvm_mmu_create(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	ASSERT(vcpu);
			
 
				+	ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
			
 
				+
			
 
				+	return alloc_mmu_pages(vcpu);
			
 
				+}
			
 
				+
			
 
				+int kvm_mmu_setup(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	ASSERT(vcpu);
			
 
				+	ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
			
 
				+
			
 
				+	return init_kvm_mmu(vcpu);
			
 
				+}
			
 
				+
			
 
				+void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	ASSERT(vcpu);
			
 
				+
			
 
				+	destroy_kvm_mmu(vcpu);
			
 
				+	free_mmu_pages(vcpu);
			
 
				+	mmu_free_memory_caches(vcpu);
			
 
				+}
			
 
				+
			
 
				+void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
			
 
				+{
			
 
				+	struct kvm_mmu_page *sp;
			
 
				+
			
 
				+	list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) {
			
 
				+		int i;
			
 
				+		u64 *pt;
			
 
				+
			
 
				+		if (!test_bit(slot, &sp->slot_bitmap))
			
 
				+			continue;
			
 
				+
			
 
				+		pt = sp->spt;
			
 
				+		for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
			
 
				+			/* avoid RMW */
			
 
				+			if (pt[i] & PT_WRITABLE_MASK)
			
 
				+				pt[i] &= ~PT_WRITABLE_MASK;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void kvm_mmu_zap_all(struct kvm *kvm)
			
 
				+{
			
 
				+	struct kvm_mmu_page *sp, *node;
			
 
				+
			
 
				+	spin_lock(&kvm->mmu_lock);
			
 
				+	list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link)
			
 
				+		kvm_mmu_zap_page(kvm, sp);
			
 
				+	spin_unlock(&kvm->mmu_lock);
			
 
				+
			
 
				+	kvm_flush_remote_tlbs(kvm);
			
 
				+}
			
 
				+
			
 
				+void kvm_mmu_module_exit(void)
			
 
				+{
			
 
				+	if (pte_chain_cache)
			
 
				+		kmem_cache_destroy(pte_chain_cache);
			
 
				+	if (rmap_desc_cache)
			
 
				+		kmem_cache_destroy(rmap_desc_cache);
			
 
				+	if (mmu_page_header_cache)
			
 
				+		kmem_cache_destroy(mmu_page_header_cache);
			
 
				+}
			
 
				+
			
 
				+int kvm_mmu_module_init(void)
			
 
				+{
			
 
				+	pte_chain_cache = kmem_cache_create("kvm_pte_chain",
			
 
				+					    sizeof(struct kvm_pte_chain),
			
 
				+					    0, 0, NULL);
			
 
				+	if (!pte_chain_cache)
			
 
				+		goto nomem;
			
 
				+	rmap_desc_cache = kmem_cache_create("kvm_rmap_desc",
			
 
				+					    sizeof(struct kvm_rmap_desc),
			
 
				+					    0, 0, NULL);
			
 
				+	if (!rmap_desc_cache)
			
 
				+		goto nomem;
			
 
				+
			
 
				+	mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header",
			
 
				+						  sizeof(struct kvm_mmu_page),
			
 
				+						  0, 0, NULL);
			
 
				+	if (!mmu_page_header_cache)
			
 
				+		goto nomem;
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+nomem:
			
 
				+	kvm_mmu_module_exit();
			
 
				+	return -ENOMEM;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Caculate mmu pages needed for kvm.
			
 
				+ */
			
 
				+unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
			
 
				+{
			
 
				+	int i;
			
 
				+	unsigned int nr_mmu_pages;
			
 
				+	unsigned int  nr_pages = 0;
			
 
				+
			
 
				+	for (i = 0; i < kvm->nmemslots; i++)
			
 
				+		nr_pages += kvm->memslots[i].npages;
			
 
				+
			
 
				+	nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
			
 
				+	nr_mmu_pages = max(nr_mmu_pages,
			
 
				+			(unsigned int) KVM_MIN_ALLOC_MMU_PAGES);
			
 
				+
			
 
				+	return nr_mmu_pages;
			
 
				+}
			
 
				+
			
 
				+#ifdef AUDIT
			
 
				+
			
 
				+static const char *audit_msg;
			
 
				+
			
 
				+static gva_t canonicalize(gva_t gva)
			
 
				+{
			
 
				+#ifdef CONFIG_X86_64
			
 
				+	gva = (long long)(gva << 16) >> 16;
			
 
				+#endif
			
 
				+	return gva;
			
 
				+}
			
 
				+
			
 
				+static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
			
 
				+				gva_t va, int level)
			
 
				+{
			
 
				+	u64 *pt = __va(page_pte & PT64_BASE_ADDR_MASK);
			
 
				+	int i;
			
 
				+	gva_t va_delta = 1ul << (PAGE_SHIFT + 9 * (level - 1));
			
 
				+
			
 
				+	for (i = 0; i < PT64_ENT_PER_PAGE; ++i, va += va_delta) {
			
 
				+		u64 ent = pt[i];
			
 
				+
			
 
				+		if (ent == shadow_trap_nonpresent_pte)
			
 
				+			continue;
			
 
				+
			
 
				+		va = canonicalize(va);
			
 
				+		if (level > 1) {
			
 
				+			if (ent == shadow_notrap_nonpresent_pte)
			
 
				+				printk(KERN_ERR "audit: (%s) nontrapping pte"
			
 
				+				       " in nonleaf level: levels %d gva %lx"
			
 
				+				       " level %d pte %llx\n", audit_msg,
			
 
				+				       vcpu->arch.mmu.root_level, va, level, ent);
			
 
				+
			
 
				+			audit_mappings_page(vcpu, ent, va, level - 1);
			
 
				+		} else {
			
 
				+			gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va);
			
 
				+			struct page *page = gpa_to_page(vcpu, gpa);
			
 
				+			hpa_t hpa = page_to_phys(page);
			
 
				+
			
 
				+			if (is_shadow_present_pte(ent)
			
 
				+			    && (ent & PT64_BASE_ADDR_MASK) != hpa)
			
 
				+				printk(KERN_ERR "xx audit error: (%s) levels %d"
			
 
				+				       " gva %lx gpa %llx hpa %llx ent %llx %d\n",
			
 
				+				       audit_msg, vcpu->arch.mmu.root_level,
			
 
				+				       va, gpa, hpa, ent,
			
 
				+				       is_shadow_present_pte(ent));
			
 
				+			else if (ent == shadow_notrap_nonpresent_pte
			
 
				+				 && !is_error_hpa(hpa))
			
 
				+				printk(KERN_ERR "audit: (%s) notrap shadow,"
			
 
				+				       " valid guest gva %lx\n", audit_msg, va);
			
 
				+			kvm_release_page_clean(page);
			
 
				+
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void audit_mappings(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	unsigned i;
			
 
				+
			
 
				+	if (vcpu->arch.mmu.root_level == 4)
			
 
				+		audit_mappings_page(vcpu, vcpu->arch.mmu.root_hpa, 0, 4);
			
 
				+	else
			
 
				+		for (i = 0; i < 4; ++i)
			
 
				+			if (vcpu->arch.mmu.pae_root[i] & PT_PRESENT_MASK)
			
 
				+				audit_mappings_page(vcpu,
			
 
				+						    vcpu->arch.mmu.pae_root[i],
			
 
				+						    i << 30,
			
 
				+						    2);
			
 
				+}
			
 
				+
			
 
				+static int count_rmaps(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	int nmaps = 0;
			
 
				+	int i, j, k;
			
 
				+
			
 
				+	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
			
 
				+		struct kvm_memory_slot *m = &vcpu->kvm->memslots[i];
			
 
				+		struct kvm_rmap_desc *d;
			
 
				+
			
 
				+		for (j = 0; j < m->npages; ++j) {
			
 
				+			unsigned long *rmapp = &m->rmap[j];
			
 
				+
			
 
				+			if (!*rmapp)
			
 
				+				continue;
			
 
				+			if (!(*rmapp & 1)) {
			
 
				+				++nmaps;
			
 
				+				continue;
			
 
				+			}
			
 
				+			d = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
			
 
				+			while (d) {
			
 
				+				for (k = 0; k < RMAP_EXT; ++k)
			
 
				+					if (d->shadow_ptes[k])
			
 
				+						++nmaps;
			
 
				+					else
			
 
				+						break;
			
 
				+				d = d->more;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	return nmaps;
			
 
				+}
			
 
				+
			
 
				+static int count_writable_mappings(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	int nmaps = 0;
			
 
				+	struct kvm_mmu_page *sp;
			
 
				+	int i;
			
 
				+
			
 
				+	list_for_each_entry(sp, &vcpu->kvm->arch.active_mmu_pages, link) {
			
 
				+		u64 *pt = sp->spt;
			
 
				+
			
 
				+		if (sp->role.level != PT_PAGE_TABLE_LEVEL)
			
 
				+			continue;
			
 
				+
			
 
				+		for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
			
 
				+			u64 ent = pt[i];
			
 
				+
			
 
				+			if (!(ent & PT_PRESENT_MASK))
			
 
				+				continue;
			
 
				+			if (!(ent & PT_WRITABLE_MASK))
			
 
				+				continue;
			
 
				+			++nmaps;
			
 
				+		}
			
 
				+	}
			
 
				+	return nmaps;
			
 
				+}
			
 
				+
			
 
				+static void audit_rmap(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	int n_rmap = count_rmaps(vcpu);
			
 
				+	int n_actual = count_writable_mappings(vcpu);
			
 
				+
			
 
				+	if (n_rmap != n_actual)
			
 
				+		printk(KERN_ERR "%s: (%s) rmap %d actual %d\n",
			
 
				+		       __FUNCTION__, audit_msg, n_rmap, n_actual);
			
 
				+}
			
 
				+
			
 
				+static void audit_write_protection(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	struct kvm_mmu_page *sp;
			
 
				+	struct kvm_memory_slot *slot;
			
 
				+	unsigned long *rmapp;
			
 
				+	gfn_t gfn;
			
 
				+
			
 
				+	list_for_each_entry(sp, &vcpu->kvm->arch.active_mmu_pages, link) {
			
 
				+		if (sp->role.metaphysical)
			
 
				+			continue;
			
 
				+
			
 
				+		slot = gfn_to_memslot(vcpu->kvm, sp->gfn);
			
 
				+		gfn = unalias_gfn(vcpu->kvm, sp->gfn);
			
 
				+		rmapp = &slot->rmap[gfn - slot->base_gfn];
			
 
				+		if (*rmapp)
			
 
				+			printk(KERN_ERR "%s: (%s) shadow page has writable"
			
 
				+			       " mappings: gfn %lx role %x\n",
			
 
				+			       __FUNCTION__, audit_msg, sp->gfn,
			
 
				+			       sp->role.word);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg)
			
 
				+{
			
 
				+	int olddbg = dbg;
			
 
				+
			
 
				+	dbg = 0;
			
 
				+	audit_msg = msg;
			
 
				+	audit_rmap(vcpu);
			
 
				+	audit_write_protection(vcpu);
			
 
				+	audit_mappings(vcpu);
			
 
				+	dbg = olddbg;
			
 
				+}
			
 
				+
			
 
				+#endif
			
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -0,0 +1,44 @@
 
				+#ifndef __KVM_X86_MMU_H
			
 
				+#define __KVM_X86_MMU_H
			
 
				+
			
 
				+#include <linux/kvm_host.h>
			
 
				+
			
 
				+static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	if (unlikely(vcpu->kvm->arch.n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES))
			
 
				+		__kvm_mmu_free_some_pages(vcpu);
			
 
				+}
			
 
				+
			
 
				+static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	if (likely(vcpu->arch.mmu.root_hpa != INVALID_PAGE))
			
 
				+		return 0;
			
 
				+
			
 
				+	return kvm_mmu_load(vcpu);
			
 
				+}
			
 
				+
			
 
				+static inline int is_long_mode(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+#ifdef CONFIG_X86_64
			
 
				+	return vcpu->arch.shadow_efer & EFER_LME;
			
 
				+#else
			
 
				+	return 0;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+static inline int is_pae(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	return vcpu->arch.cr4 & X86_CR4_PAE;
			
 
				+}
			
 
				+
			
 
				+static inline int is_pse(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	return vcpu->arch.cr4 & X86_CR4_PSE;
			
 
				+}
			
 
				+
			
 
				+static inline int is_paging(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	return vcpu->arch.cr0 & X86_CR0_PG;
			
 
				+}
			
 
				+
			
 
				+#endif
			
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -0,0 +1,484 @@
 
				+/*
			
 
				+ * Kernel-based Virtual Machine driver for Linux
			
 
				+ *
			
 
				+ * This module enables machines with Intel VT-x extensions to run virtual
			
 
				+ * machines without emulation or binary translation.
			
 
				+ *
			
 
				+ * MMU support
			
 
				+ *
			
 
				+ * Copyright (C) 2006 Qumranet, Inc.
			
 
				+ *
			
 
				+ * Authors:
			
 
				+ *   Yaniv Kamay  <yaniv@qumranet.com>
			
 
				+ *   Avi Kivity   <avi@qumranet.com>
			
 
				+ *
			
 
				+ * This work is licensed under the terms of the GNU GPL, version 2.  See
			
 
				+ * the COPYING file in the top-level directory.
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * We need the mmu code to access both 32-bit and 64-bit guest ptes,
			
 
				+ * so the code in this file is compiled twice, once per pte size.
			
 
				+ */
			
 
				+
			
 
				+#if PTTYPE == 64
			
 
				+	#define pt_element_t u64
			
 
				+	#define guest_walker guest_walker64
			
 
				+	#define FNAME(name) paging##64_##name
			
 
				+	#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
			
 
				+	#define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK
			
 
				+	#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
			
 
				+	#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
			
 
				+	#define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level)
			
 
				+	#define PT_LEVEL_BITS PT64_LEVEL_BITS
			
 
				+	#ifdef CONFIG_X86_64
			
 
				+	#define PT_MAX_FULL_LEVELS 4
			
 
				+	#define CMPXCHG cmpxchg
			
 
				+	#else
			
 
				+	#define CMPXCHG cmpxchg64
			
 
				+	#define PT_MAX_FULL_LEVELS 2
			
 
				+	#endif
			
 
				+#elif PTTYPE == 32
			
 
				+	#define pt_element_t u32
			
 
				+	#define guest_walker guest_walker32
			
 
				+	#define FNAME(name) paging##32_##name
			
 
				+	#define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
			
 
				+	#define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK
			
 
				+	#define PT_INDEX(addr, level) PT32_INDEX(addr, level)
			
 
				+	#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
			
 
				+	#define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level)
			
 
				+	#define PT_LEVEL_BITS PT32_LEVEL_BITS
			
 
				+	#define PT_MAX_FULL_LEVELS 2
			
 
				+	#define CMPXCHG cmpxchg
			
 
				+#else
			
 
				+	#error Invalid PTTYPE value
			
 
				+#endif
			
 
				+
			
 
				+#define gpte_to_gfn FNAME(gpte_to_gfn)
			
 
				+#define gpte_to_gfn_pde FNAME(gpte_to_gfn_pde)
			
 
				+
			
 
				+/*
			
 
				+ * The guest_walker structure emulates the behavior of the hardware page
			
 
				+ * table walker.
			
 
				+ */
			
 
				+struct guest_walker {
			
 
				+	int level;
			
 
				+	gfn_t table_gfn[PT_MAX_FULL_LEVELS];
			
 
				+	pt_element_t ptes[PT_MAX_FULL_LEVELS];
			
 
				+	gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
			
 
				+	unsigned pt_access;
			
 
				+	unsigned pte_access;
			
 
				+	gfn_t gfn;
			
 
				+	u32 error_code;
			
 
				+};
			
 
				+
			
 
				+static gfn_t gpte_to_gfn(pt_element_t gpte)
			
 
				+{
			
 
				+	return (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
			
 
				+}
			
 
				+
			
 
				+static gfn_t gpte_to_gfn_pde(pt_element_t gpte)
			
 
				+{
			
 
				+	return (gpte & PT_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT;
			
 
				+}
			
 
				+
			
 
				+static bool FNAME(cmpxchg_gpte)(struct kvm *kvm,
			
 
				+			 gfn_t table_gfn, unsigned index,
			
 
				+			 pt_element_t orig_pte, pt_element_t new_pte)
			
 
				+{
			
 
				+	pt_element_t ret;
			
 
				+	pt_element_t *table;
			
 
				+	struct page *page;
			
 
				+
			
 
				+	page = gfn_to_page(kvm, table_gfn);
			
 
				+	table = kmap_atomic(page, KM_USER0);
			
 
				+
			
 
				+	ret = CMPXCHG(&table[index], orig_pte, new_pte);
			
 
				+
			
 
				+	kunmap_atomic(table, KM_USER0);
			
 
				+
			
 
				+	kvm_release_page_dirty(page);
			
 
				+
			
 
				+	return (ret != orig_pte);
			
 
				+}
			
 
				+
			
 
				+static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte)
			
 
				+{
			
 
				+	unsigned access;
			
 
				+
			
 
				+	access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
			
 
				+#if PTTYPE == 64
			
 
				+	if (is_nx(vcpu))
			
 
				+		access &= ~(gpte >> PT64_NX_SHIFT);
			
 
				+#endif
			
 
				+	return access;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Fetch a guest pte for a guest virtual address
			
 
				+ */
			
 
				+static int FNAME(walk_addr)(struct guest_walker *walker,
			
 
				+			    struct kvm_vcpu *vcpu, gva_t addr,
			
 
				+			    int write_fault, int user_fault, int fetch_fault)
			
 
				+{
			
 
				+	pt_element_t pte;
			
 
				+	gfn_t table_gfn;
			
 
				+	unsigned index, pt_access, pte_access;
			
 
				+	gpa_t pte_gpa;
			
 
				+
			
 
				+	pgprintk("%s: addr %lx\n", __FUNCTION__, addr);
			
 
				+walk:
			
 
				+	walker->level = vcpu->arch.mmu.root_level;
			
 
				+	pte = vcpu->arch.cr3;
			
 
				+#if PTTYPE == 64
			
 
				+	if (!is_long_mode(vcpu)) {
			
 
				+		pte = vcpu->arch.pdptrs[(addr >> 30) & 3];
			
 
				+		if (!is_present_pte(pte))
			
 
				+			goto not_present;
			
 
				+		--walker->level;
			
 
				+	}
			
 
				+#endif
			
 
				+	ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
			
 
				+	       (vcpu->cr3 & CR3_NONPAE_RESERVED_BITS) == 0);
			
 
				+
			
 
				+	pt_access = ACC_ALL;
			
 
				+
			
 
				+	for (;;) {
			
 
				+		index = PT_INDEX(addr, walker->level);
			
 
				+
			
 
				+		table_gfn = gpte_to_gfn(pte);
			
 
				+		pte_gpa = gfn_to_gpa(table_gfn);
			
 
				+		pte_gpa += index * sizeof(pt_element_t);
			
 
				+		walker->table_gfn[walker->level - 1] = table_gfn;
			
 
				+		walker->pte_gpa[walker->level - 1] = pte_gpa;
			
 
				+		pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
			
 
				+			 walker->level - 1, table_gfn);
			
 
				+
			
 
				+		kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte));
			
 
				+
			
 
				+		if (!is_present_pte(pte))
			
 
				+			goto not_present;
			
 
				+
			
 
				+		if (write_fault && !is_writeble_pte(pte))
			
 
				+			if (user_fault || is_write_protection(vcpu))
			
 
				+				goto access_error;
			
 
				+
			
 
				+		if (user_fault && !(pte & PT_USER_MASK))
			
 
				+			goto access_error;
			
 
				+
			
 
				+#if PTTYPE == 64
			
 
				+		if (fetch_fault && is_nx(vcpu) && (pte & PT64_NX_MASK))
			
 
				+			goto access_error;
			
 
				+#endif
			
 
				+
			
 
				+		if (!(pte & PT_ACCESSED_MASK)) {
			
 
				+			mark_page_dirty(vcpu->kvm, table_gfn);
			
 
				+			if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn,
			
 
				+			    index, pte, pte|PT_ACCESSED_MASK))
			
 
				+				goto walk;
			
 
				+			pte |= PT_ACCESSED_MASK;
			
 
				+		}
			
 
				+
			
 
				+		pte_access = pt_access & FNAME(gpte_access)(vcpu, pte);
			
 
				+
			
 
				+		walker->ptes[walker->level - 1] = pte;
			
 
				+
			
 
				+		if (walker->level == PT_PAGE_TABLE_LEVEL) {
			
 
				+			walker->gfn = gpte_to_gfn(pte);
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		if (walker->level == PT_DIRECTORY_LEVEL
			
 
				+		    && (pte & PT_PAGE_SIZE_MASK)
			
 
				+		    && (PTTYPE == 64 || is_pse(vcpu))) {
			
 
				+			walker->gfn = gpte_to_gfn_pde(pte);
			
 
				+			walker->gfn += PT_INDEX(addr, PT_PAGE_TABLE_LEVEL);
			
 
				+			if (PTTYPE == 32 && is_cpuid_PSE36())
			
 
				+				walker->gfn += pse36_gfn_delta(pte);
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		pt_access = pte_access;
			
 
				+		--walker->level;
			
 
				+	}
			
 
				+
			
 
				+	if (write_fault && !is_dirty_pte(pte)) {
			
 
				+		bool ret;
			
 
				+
			
 
				+		mark_page_dirty(vcpu->kvm, table_gfn);
			
 
				+		ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte,
			
 
				+			    pte|PT_DIRTY_MASK);
			
 
				+		if (ret)
			
 
				+			goto walk;
			
 
				+		pte |= PT_DIRTY_MASK;
			
 
				+		kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte));
			
 
				+		walker->ptes[walker->level - 1] = pte;
			
 
				+	}
			
 
				+
			
 
				+	walker->pt_access = pt_access;
			
 
				+	walker->pte_access = pte_access;
			
 
				+	pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
			
 
				+		 __FUNCTION__, (u64)pte, pt_access, pte_access);
			
 
				+	return 1;
			
 
				+
			
 
				+not_present:
			
 
				+	walker->error_code = 0;
			
 
				+	goto err;
			
 
				+
			
 
				+access_error:
			
 
				+	walker->error_code = PFERR_PRESENT_MASK;
			
 
				+
			
 
				+err:
			
 
				+	if (write_fault)
			
 
				+		walker->error_code |= PFERR_WRITE_MASK;
			
 
				+	if (user_fault)
			
 
				+		walker->error_code |= PFERR_USER_MASK;
			
 
				+	if (fetch_fault)
			
 
				+		walker->error_code |= PFERR_FETCH_MASK;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
			
 
				+			      u64 *spte, const void *pte, int bytes,
			
 
				+			      int offset_in_pte)
			
 
				+{
			
 
				+	pt_element_t gpte;
			
 
				+	unsigned pte_access;
			
 
				+	struct page *npage;
			
 
				+
			
 
				+	gpte = *(const pt_element_t *)pte;
			
 
				+	if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {
			
 
				+		if (!offset_in_pte && !is_present_pte(gpte))
			
 
				+			set_shadow_pte(spte, shadow_notrap_nonpresent_pte);
			
 
				+		return;
			
 
				+	}
			
 
				+	if (bytes < sizeof(pt_element_t))
			
 
				+		return;
			
 
				+	pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte);
			
 
				+	pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte);
			
 
				+	if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn)
			
 
				+		return;
			
 
				+	npage = vcpu->arch.update_pte.page;
			
 
				+	if (!npage)
			
 
				+		return;
			
 
				+	get_page(npage);
			
 
				+	mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
			
 
				+		     gpte & PT_DIRTY_MASK, NULL, gpte_to_gfn(gpte), npage);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Fetch a shadow pte for a specific level in the paging hierarchy.
			
 
				+ */
			
 
				+static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
			
 
				+			 struct guest_walker *walker,
			
 
				+			 int user_fault, int write_fault, int *ptwrite,
			
 
				+			 struct page *page)
			
 
				+{
			
 
				+	hpa_t shadow_addr;
			
 
				+	int level;
			
 
				+	u64 *shadow_ent;
			
 
				+	unsigned access = walker->pt_access;
			
 
				+
			
 
				+	if (!is_present_pte(walker->ptes[walker->level - 1]))
			
 
				+		return NULL;
			
 
				+
			
 
				+	shadow_addr = vcpu->arch.mmu.root_hpa;
			
 
				+	level = vcpu->arch.mmu.shadow_root_level;
			
 
				+	if (level == PT32E_ROOT_LEVEL) {
			
 
				+		shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
			
 
				+		shadow_addr &= PT64_BASE_ADDR_MASK;
			
 
				+		--level;
			
 
				+	}
			
 
				+
			
 
				+	for (; ; level--) {
			
 
				+		u32 index = SHADOW_PT_INDEX(addr, level);
			
 
				+		struct kvm_mmu_page *shadow_page;
			
 
				+		u64 shadow_pte;
			
 
				+		int metaphysical;
			
 
				+		gfn_t table_gfn;
			
 
				+		bool new_page = 0;
			
 
				+
			
 
				+		shadow_ent = ((u64 *)__va(shadow_addr)) + index;
			
 
				+		if (level == PT_PAGE_TABLE_LEVEL)
			
 
				+			break;
			
 
				+		if (is_shadow_present_pte(*shadow_ent)) {
			
 
				+			shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		if (level - 1 == PT_PAGE_TABLE_LEVEL
			
 
				+		    && walker->level == PT_DIRECTORY_LEVEL) {
			
 
				+			metaphysical = 1;
			
 
				+			if (!is_dirty_pte(walker->ptes[level - 1]))
			
 
				+				access &= ~ACC_WRITE_MASK;
			
 
				+			table_gfn = gpte_to_gfn(walker->ptes[level - 1]);
			
 
				+		} else {
			
 
				+			metaphysical = 0;
			
 
				+			table_gfn = walker->table_gfn[level - 2];
			
 
				+		}
			
 
				+		shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
			
 
				+					       metaphysical, access,
			
 
				+					       shadow_ent, &new_page);
			
 
				+		if (new_page && !metaphysical) {
			
 
				+			int r;
			
 
				+			pt_element_t curr_pte;
			
 
				+			r = kvm_read_guest_atomic(vcpu->kvm,
			
 
				+						  walker->pte_gpa[level - 2],
			
 
				+						  &curr_pte, sizeof(curr_pte));
			
 
				+			if (r || curr_pte != walker->ptes[level - 2]) {
			
 
				+				kvm_release_page_clean(page);
			
 
				+				return NULL;
			
 
				+			}
			
 
				+		}
			
 
				+		shadow_addr = __pa(shadow_page->spt);
			
 
				+		shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK
			
 
				+			| PT_WRITABLE_MASK | PT_USER_MASK;
			
 
				+		*shadow_ent = shadow_pte;
			
 
				+	}
			
 
				+
			
 
				+	mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access,
			
 
				+		     user_fault, write_fault,
			
 
				+		     walker->ptes[walker->level-1] & PT_DIRTY_MASK,
			
 
				+		     ptwrite, walker->gfn, page);
			
 
				+
			
 
				+	return shadow_ent;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Page fault handler.  There are several causes for a page fault:
			
 
				+ *   - there is no shadow pte for the guest pte
			
 
				+ *   - write access through a shadow pte marked read only so that we can set
			
 
				+ *     the dirty bit
			
 
				+ *   - write access to a shadow pte marked read only so we can update the page
			
 
				+ *     dirty bitmap, when userspace requests it
			
 
				+ *   - mmio access; in this case we will never install a present shadow pte
			
 
				+ *   - normal guest page fault due to the guest pte marked not present, not
			
 
				+ *     writable, or not executable
			
 
				+ *
			
 
				+ *  Returns: 1 if we need to emulate the instruction, 0 otherwise, or
			
 
				+ *           a negative value on error.
			
 
				+ */
			
 
				+static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
			
 
				+			       u32 error_code)
			
 
				+{
			
 
				+	int write_fault = error_code & PFERR_WRITE_MASK;
			
 
				+	int user_fault = error_code & PFERR_USER_MASK;
			
 
				+	int fetch_fault = error_code & PFERR_FETCH_MASK;
			
 
				+	struct guest_walker walker;
			
 
				+	u64 *shadow_pte;
			
 
				+	int write_pt = 0;
			
 
				+	int r;
			
 
				+	struct page *page;
			
 
				+
			
 
				+	pgprintk("%s: addr %lx err %x\n", __FUNCTION__, addr, error_code);
			
 
				+	kvm_mmu_audit(vcpu, "pre page fault");
			
 
				+
			
 
				+	r = mmu_topup_memory_caches(vcpu);
			
 
				+	if (r)
			
 
				+		return r;
			
 
				+
			
 
				+	down_read(&current->mm->mmap_sem);
			
 
				+	/*
			
 
				+	 * Look up the shadow pte for the faulting address.
			
 
				+	 */
			
 
				+	r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault,
			
 
				+			     fetch_fault);
			
 
				+
			
 
				+	/*
			
 
				+	 * The page is not mapped by the guest.  Let the guest handle it.
			
 
				+	 */
			
 
				+	if (!r) {
			
 
				+		pgprintk("%s: guest page fault\n", __FUNCTION__);
			
 
				+		inject_page_fault(vcpu, addr, walker.error_code);
			
 
				+		vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
			
 
				+		up_read(&current->mm->mmap_sem);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	page = gfn_to_page(vcpu->kvm, walker.gfn);
			
 
				+
			
 
				+	spin_lock(&vcpu->kvm->mmu_lock);
			
 
				+	kvm_mmu_free_some_pages(vcpu);
			
 
				+	shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
			
 
				+				  &write_pt, page);
			
 
				+	pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__,
			
 
				+		 shadow_pte, *shadow_pte, write_pt);
			
 
				+
			
 
				+	if (!write_pt)
			
 
				+		vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
			
 
				+
			
 
				+	/*
			
 
				+	 * mmio: emulate if accessible, otherwise its a guest fault.
			
 
				+	 */
			
 
				+	if (shadow_pte && is_io_pte(*shadow_pte)) {
			
 
				+		spin_unlock(&vcpu->kvm->mmu_lock);
			
 
				+		up_read(&current->mm->mmap_sem);
			
 
				+		return 1;
			
 
				+	}
			
 
				+
			
 
				+	++vcpu->stat.pf_fixed;
			
 
				+	kvm_mmu_audit(vcpu, "post page fault (fixed)");
			
 
				+	spin_unlock(&vcpu->kvm->mmu_lock);
			
 
				+	up_read(&current->mm->mmap_sem);
			
 
				+
			
 
				+	return write_pt;
			
 
				+}
			
 
				+
			
 
				+static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
			
 
				+{
			
 
				+	struct guest_walker walker;
			
 
				+	gpa_t gpa = UNMAPPED_GVA;
			
 
				+	int r;
			
 
				+
			
 
				+	r = FNAME(walk_addr)(&walker, vcpu, vaddr, 0, 0, 0);
			
 
				+
			
 
				+	if (r) {
			
 
				+		gpa = gfn_to_gpa(walker.gfn);
			
 
				+		gpa |= vaddr & ~PAGE_MASK;
			
 
				+	}
			
 
				+
			
 
				+	return gpa;
			
 
				+}
			
 
				+
			
 
				+static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
			
 
				+				 struct kvm_mmu_page *sp)
			
 
				+{
			
 
				+	int i, offset = 0, r = 0;
			
 
				+	pt_element_t pt;
			
 
				+
			
 
				+	if (sp->role.metaphysical
			
 
				+	    || (PTTYPE == 32 && sp->role.level > PT_PAGE_TABLE_LEVEL)) {
			
 
				+		nonpaging_prefetch_page(vcpu, sp);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (PTTYPE == 32)
			
 
				+		offset = sp->role.quadrant << PT64_LEVEL_BITS;
			
 
				+
			
 
				+	for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
			
 
				+		gpa_t pte_gpa = gfn_to_gpa(sp->gfn);
			
 
				+		pte_gpa += (i+offset) * sizeof(pt_element_t);
			
 
				+
			
 
				+		r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &pt,
			
 
				+					  sizeof(pt_element_t));
			
 
				+		if (r || is_present_pte(pt))
			
 
				+			sp->spt[i] = shadow_trap_nonpresent_pte;
			
 
				+		else
			
 
				+			sp->spt[i] = shadow_notrap_nonpresent_pte;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+#undef pt_element_t
			
 
				+#undef guest_walker
			
 
				+#undef FNAME
			
 
				+#undef PT_BASE_ADDR_MASK
			
 
				+#undef PT_INDEX
			
 
				+#undef SHADOW_PT_INDEX
			
 
				+#undef PT_LEVEL_MASK
			
 
				+#undef PT_DIR_BASE_ADDR_MASK
			
 
				+#undef PT_LEVEL_BITS
			
 
				+#undef PT_MAX_FULL_LEVELS
			
 
				+#undef gpte_to_gfn
			
 
				+#undef gpte_to_gfn_pde
			
 
				+#undef CMPXCHG
			
--- a/arch/x86/kvm/segment_descriptor.h
+++ b/arch/x86/kvm/segment_descriptor.h
@@ -1,3 +1,6 @@
 
				+#ifndef __SEGMENT_DESCRIPTOR_H
			
 
				+#define __SEGMENT_DESCRIPTOR_H
			
 
				+
			
 
				 struct segment_descriptor {
			
 
				 	u16 limit_low;
			
 
				 	u16 base_low;
			
@@ -14,4 +17,13 @@ struct segment_descriptor {
 
				 	u8  base_high;
			
 
				 } __attribute__((packed));
			
 
				 
			
 
				+#ifdef CONFIG_X86_64
			
 
				+/* LDT or TSS descriptor in the GDT. 16 bytes. */
			
 
				+struct segment_descriptor_64 {
			
 
				+	struct segment_descriptor s;
			
 
				+	u32 base_higher;
			
 
				+	u32 pad_zero;
			
 
				+};
			
 
				 
			
 
				+#endif
			
 
				+#endif
			
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -13,10 +13,11 @@
 
				  * the COPYING file in the top-level directory.
			
 
				  *
			
 
				  */
			
 
				+#include <linux/kvm_host.h>
			
 
				 
			
 
				 #include "kvm_svm.h"
			
 
				-#include "x86_emulate.h"
			
 
				 #include "irq.h"
			
 
				+#include "mmu.h"
			
 
				 
			
 
				 #include <linux/module.h>
			
 
				 #include <linux/kernel.h>
			
@@ -42,9 +43,6 @@ MODULE_LICENSE("GPL");
 
				 #define SEG_TYPE_LDT 2
			
 
				 #define SEG_TYPE_BUSY_TSS16 3
			
 
				 
			
 
				-#define KVM_EFER_LMA (1 << 10)
			
 
				-#define KVM_EFER_LME (1 << 8)
			
 
				-
			
 
				 #define SVM_FEATURE_NPT  (1 << 0)
			
 
				 #define SVM_FEATURE_LBRV (1 << 1)
			
 
				 #define SVM_DEATURE_SVML (1 << 2)
			
@@ -102,20 +100,20 @@ static inline u32 svm_has(u32 feat)
 
				 
			
 
				 static inline u8 pop_irq(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				-	int word_index = __ffs(vcpu->irq_summary);
			
 
				-	int bit_index = __ffs(vcpu->irq_pending[word_index]);
			
 
				+	int word_index = __ffs(vcpu->arch.irq_summary);
			
 
				+	int bit_index = __ffs(vcpu->arch.irq_pending[word_index]);
			
 
				 	int irq = word_index * BITS_PER_LONG + bit_index;
			
 
				 
			
 
				-	clear_bit(bit_index, &vcpu->irq_pending[word_index]);
			
 
				-	if (!vcpu->irq_pending[word_index])
			
 
				-		clear_bit(word_index, &vcpu->irq_summary);
			
 
				+	clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]);
			
 
				+	if (!vcpu->arch.irq_pending[word_index])
			
 
				+		clear_bit(word_index, &vcpu->arch.irq_summary);
			
 
				 	return irq;
			
 
				 }
			
 
				 
			
 
				 static inline void push_irq(struct kvm_vcpu *vcpu, u8 irq)
			
 
				 {
			
 
				-	set_bit(irq, vcpu->irq_pending);
			
 
				-	set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary);
			
 
				+	set_bit(irq, vcpu->arch.irq_pending);
			
 
				+	set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
			
 
				 }
			
 
				 
			
 
				 static inline void clgi(void)
			
@@ -184,35 +182,30 @@ static inline void flush_guest_tlb(struct kvm_vcpu *vcpu)
 
				 
			
 
				 static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
			
 
				 {
			
 
				-	if (!(efer & KVM_EFER_LMA))
			
 
				-		efer &= ~KVM_EFER_LME;
			
 
				+	if (!(efer & EFER_LMA))
			
 
				+		efer &= ~EFER_LME;
			
 
				 
			
 
				 	to_svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK;
			
 
				-	vcpu->shadow_efer = efer;
			
 
				+	vcpu->arch.shadow_efer = efer;
			
 
				 }
			
 
				 
			
 
				-static void svm_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code)
			
 
				+static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
			
 
				+				bool has_error_code, u32 error_code)
			
 
				 {
			
 
				 	struct vcpu_svm *svm = to_svm(vcpu);
			
 
				 
			
 
				-	svm->vmcb->control.event_inj =		SVM_EVTINJ_VALID |
			
 
				-						SVM_EVTINJ_VALID_ERR |
			
 
				-						SVM_EVTINJ_TYPE_EXEPT |
			
 
				-						GP_VECTOR;
			
 
				+	svm->vmcb->control.event_inj = nr
			
 
				+		| SVM_EVTINJ_VALID
			
 
				+		| (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
			
 
				+		| SVM_EVTINJ_TYPE_EXEPT;
			
 
				 	svm->vmcb->control.event_inj_err = error_code;
			
 
				 }
			
 
				 
			
 
				-static void inject_ud(struct kvm_vcpu *vcpu)
			
 
				+static bool svm_exception_injected(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				-	to_svm(vcpu)->vmcb->control.event_inj = SVM_EVTINJ_VALID |
			
 
				-						SVM_EVTINJ_TYPE_EXEPT |
			
 
				-						UD_VECTOR;
			
 
				-}
			
 
				+	struct vcpu_svm *svm = to_svm(vcpu);
			
 
				 
			
 
				-static int is_page_fault(uint32_t info)
			
 
				-{
			
 
				-	info &= SVM_EVTINJ_VEC_MASK | SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
			
 
				-	return info == (PF_VECTOR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT);
			
 
				+	return !(svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID);
			
 
				 }
			
 
				 
			
 
				 static int is_external_interrupt(u32 info)
			
@@ -229,17 +222,16 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 
				 		printk(KERN_DEBUG "%s: NOP\n", __FUNCTION__);
			
 
				 		return;
			
 
				 	}
			
 
				-	if (svm->next_rip - svm->vmcb->save.rip > MAX_INST_SIZE) {
			
 
				+	if (svm->next_rip - svm->vmcb->save.rip > MAX_INST_SIZE)
			
 
				 		printk(KERN_ERR "%s: ip 0x%llx next 0x%llx\n",
			
 
				 		       __FUNCTION__,
			
 
				 		       svm->vmcb->save.rip,
			
 
				 		       svm->next_rip);
			
 
				-	}
			
 
				 
			
 
				-	vcpu->rip = svm->vmcb->save.rip = svm->next_rip;
			
 
				+	vcpu->arch.rip = svm->vmcb->save.rip = svm->next_rip;
			
 
				 	svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
			
 
				 
			
 
				-	vcpu->interrupt_window_open = 1;
			
 
				+	vcpu->arch.interrupt_window_open = 1;
			
 
				 }
			
 
				 
			
 
				 static int has_svm(void)
			
@@ -312,7 +304,7 @@ static void svm_hardware_enable(void *garbage)
 
				 	svm_data->next_asid = svm_data->max_asid + 1;
			
 
				 	svm_features = cpuid_edx(SVM_CPUID_FUNC);
			
 
				 
			
 
				-	asm volatile ( "sgdt %0" : "=m"(gdt_descr) );
			
 
				+	asm volatile ("sgdt %0" : "=m"(gdt_descr));
			
 
				 	gdt = (struct desc_struct *)gdt_descr.address;
			
 
				 	svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
			
 
				 
			
@@ -458,11 +450,13 @@ static void init_vmcb(struct vmcb *vmcb)
 
				 
			
 
				 	control->intercept_cr_read = 	INTERCEPT_CR0_MASK |
			
 
				 					INTERCEPT_CR3_MASK |
			
 
				-					INTERCEPT_CR4_MASK;
			
 
				+					INTERCEPT_CR4_MASK |
			
 
				+					INTERCEPT_CR8_MASK;
			
 
				 
			
 
				 	control->intercept_cr_write = 	INTERCEPT_CR0_MASK |
			
 
				 					INTERCEPT_CR3_MASK |
			
 
				-					INTERCEPT_CR4_MASK;
			
 
				+					INTERCEPT_CR4_MASK |
			
 
				+					INTERCEPT_CR8_MASK;
			
 
				 
			
 
				 	control->intercept_dr_read = 	INTERCEPT_DR0_MASK |
			
 
				 					INTERCEPT_DR1_MASK |
			
@@ -476,7 +470,8 @@ static void init_vmcb(struct vmcb *vmcb)
 
				 					INTERCEPT_DR5_MASK |
			
 
				 					INTERCEPT_DR7_MASK;
			
 
				 
			
 
				-	control->intercept_exceptions = 1 << PF_VECTOR;
			
 
				+	control->intercept_exceptions = (1 << PF_VECTOR) |
			
 
				+					(1 << UD_VECTOR);
			
 
				 
			
 
				 
			
 
				 	control->intercept = 	(1ULL << INTERCEPT_INTR) |
			
@@ -543,8 +538,7 @@ static void init_vmcb(struct vmcb *vmcb)
 
				 	init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
			
 
				 
			
 
				 	save->efer = MSR_EFER_SVME_MASK;
			
 
				-
			
 
				-        save->dr6 = 0xffff0ff0;
			
 
				+	save->dr6 = 0xffff0ff0;
			
 
				 	save->dr7 = 0x400;
			
 
				 	save->rflags = 2;
			
 
				 	save->rip = 0x0000fff0;
			
@@ -558,7 +552,7 @@ static void init_vmcb(struct vmcb *vmcb)
 
				 	/* rdx = ?? */
			
 
				 }
			
 
				 
			
 
				-static void svm_vcpu_reset(struct kvm_vcpu *vcpu)
			
 
				+static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	struct vcpu_svm *svm = to_svm(vcpu);
			
 
				 
			
@@ -566,9 +560,11 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu)
 
				 
			
 
				 	if (vcpu->vcpu_id != 0) {
			
 
				 		svm->vmcb->save.rip = 0;
			
 
				-		svm->vmcb->save.cs.base = svm->vcpu.sipi_vector << 12;
			
 
				-		svm->vmcb->save.cs.selector = svm->vcpu.sipi_vector << 8;
			
 
				+		svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12;
			
 
				+		svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8;
			
 
				 	}
			
 
				+
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
			
@@ -587,12 +583,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
 
				 	if (err)
			
 
				 		goto free_svm;
			
 
				 
			
 
				-	if (irqchip_in_kernel(kvm)) {
			
 
				-		err = kvm_create_lapic(&svm->vcpu);
			
 
				-		if (err < 0)
			
 
				-			goto free_svm;
			
 
				-	}
			
 
				-
			
 
				 	page = alloc_page(GFP_KERNEL);
			
 
				 	if (!page) {
			
 
				 		err = -ENOMEM;
			
@@ -608,9 +598,9 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
 
				 
			
 
				 	fx_init(&svm->vcpu);
			
 
				 	svm->vcpu.fpu_active = 1;
			
 
				-	svm->vcpu.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
			
 
				+	svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
			
 
				 	if (svm->vcpu.vcpu_id == 0)
			
 
				-		svm->vcpu.apic_base |= MSR_IA32_APICBASE_BSP;
			
 
				+		svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
			
 
				 
			
 
				 	return &svm->vcpu;
			
 
				 
			
@@ -644,7 +634,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
				 		 * increasing TSC.
			
 
				 		 */
			
 
				 		rdtscll(tsc_this);
			
 
				-		delta = vcpu->host_tsc - tsc_this;
			
 
				+		delta = vcpu->arch.host_tsc - tsc_this;
			
 
				 		svm->vmcb->control.tsc_offset += delta;
			
 
				 		vcpu->cpu = cpu;
			
 
				 		kvm_migrate_apic_timer(vcpu);
			
@@ -659,11 +649,11 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
 
				 	struct vcpu_svm *svm = to_svm(vcpu);
			
 
				 	int i;
			
 
				 
			
 
				+	++vcpu->stat.host_state_reload;
			
 
				 	for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
			
 
				 		wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
			
 
				 
			
 
				-	rdtscll(vcpu->host_tsc);
			
 
				-	kvm_put_guest_fpu(vcpu);
			
 
				+	rdtscll(vcpu->arch.host_tsc);
			
 
				 }
			
 
				 
			
 
				 static void svm_vcpu_decache(struct kvm_vcpu *vcpu)
			
@@ -674,17 +664,17 @@ static void svm_cache_regs(struct kvm_vcpu *vcpu)
 
				 {
			
 
				 	struct vcpu_svm *svm = to_svm(vcpu);
			
 
				 
			
 
				-	vcpu->regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
			
 
				-	vcpu->regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
			
 
				-	vcpu->rip = svm->vmcb->save.rip;
			
 
				+	vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
			
 
				+	vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
			
 
				+	vcpu->arch.rip = svm->vmcb->save.rip;
			
 
				 }
			
 
				 
			
 
				 static void svm_decache_regs(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	struct vcpu_svm *svm = to_svm(vcpu);
			
 
				-	svm->vmcb->save.rax = vcpu->regs[VCPU_REGS_RAX];
			
 
				-	svm->vmcb->save.rsp = vcpu->regs[VCPU_REGS_RSP];
			
 
				-	svm->vmcb->save.rip = vcpu->rip;
			
 
				+	svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
			
 
				+	svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
			
 
				+	svm->vmcb->save.rip = vcpu->arch.rip;
			
 
				 }
			
 
				 
			
 
				 static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
			
@@ -782,24 +772,24 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 
				 	struct vcpu_svm *svm = to_svm(vcpu);
			
 
				 
			
 
				 #ifdef CONFIG_X86_64
			
 
				-	if (vcpu->shadow_efer & KVM_EFER_LME) {
			
 
				+	if (vcpu->arch.shadow_efer & EFER_LME) {
			
 
				 		if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
			
 
				-			vcpu->shadow_efer |= KVM_EFER_LMA;
			
 
				-			svm->vmcb->save.efer |= KVM_EFER_LMA | KVM_EFER_LME;
			
 
				+			vcpu->arch.shadow_efer |= EFER_LMA;
			
 
				+			svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
			
 
				 		}
			
 
				 
			
 
				-		if (is_paging(vcpu) && !(cr0 & X86_CR0_PG) ) {
			
 
				-			vcpu->shadow_efer &= ~KVM_EFER_LMA;
			
 
				-			svm->vmcb->save.efer &= ~(KVM_EFER_LMA | KVM_EFER_LME);
			
 
				+		if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
			
 
				+			vcpu->arch.shadow_efer &= ~EFER_LMA;
			
 
				+			svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
			
 
				 		}
			
 
				 	}
			
 
				 #endif
			
 
				-	if ((vcpu->cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) {
			
 
				+	if ((vcpu->arch.cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) {
			
 
				 		svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
			
 
				 		vcpu->fpu_active = 1;
			
 
				 	}
			
 
				 
			
 
				-	vcpu->cr0 = cr0;
			
 
				+	vcpu->arch.cr0 = cr0;
			
 
				 	cr0 |= X86_CR0_PG | X86_CR0_WP;
			
 
				 	cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
			
 
				 	svm->vmcb->save.cr0 = cr0;
			
@@ -807,7 +797,7 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 
				 
			
 
				 static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
			
 
				 {
			
 
				-       vcpu->cr4 = cr4;
			
 
				+       vcpu->arch.cr4 = cr4;
			
 
				        to_svm(vcpu)->vmcb->save.cr4 = cr4 | X86_CR4_PAE;
			
 
				 }
			
 
				 
			
@@ -912,7 +902,7 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
 
				 		svm->db_regs[dr] = value;
			
 
				 		return;
			
 
				 	case 4 ... 5:
			
 
				-		if (vcpu->cr4 & X86_CR4_DE) {
			
 
				+		if (vcpu->arch.cr4 & X86_CR4_DE) {
			
 
				 			*exception = UD_VECTOR;
			
 
				 			return;
			
 
				 		}
			
@@ -938,51 +928,30 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
				 	struct kvm *kvm = svm->vcpu.kvm;
			
 
				 	u64 fault_address;
			
 
				 	u32 error_code;
			
 
				-	enum emulation_result er;
			
 
				-	int r;
			
 
				 
			
 
				 	if (!irqchip_in_kernel(kvm) &&
			
 
				 		is_external_interrupt(exit_int_info))
			
 
				 		push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK);
			
 
				 
			
 
				-	mutex_lock(&kvm->lock);
			
 
				-
			
 
				 	fault_address  = svm->vmcb->control.exit_info_2;
			
 
				 	error_code = svm->vmcb->control.exit_info_1;
			
 
				-	r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
			
 
				-	if (r < 0) {
			
 
				-		mutex_unlock(&kvm->lock);
			
 
				-		return r;
			
 
				-	}
			
 
				-	if (!r) {
			
 
				-		mutex_unlock(&kvm->lock);
			
 
				-		return 1;
			
 
				-	}
			
 
				-	er = emulate_instruction(&svm->vcpu, kvm_run, fault_address,
			
 
				-				 error_code);
			
 
				-	mutex_unlock(&kvm->lock);
			
 
				+	return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
			
 
				+}
			
 
				 
			
 
				-	switch (er) {
			
 
				-	case EMULATE_DONE:
			
 
				-		return 1;
			
 
				-	case EMULATE_DO_MMIO:
			
 
				-		++svm->vcpu.stat.mmio_exits;
			
 
				-		return 0;
			
 
				-	case EMULATE_FAIL:
			
 
				-		kvm_report_emulation_failure(&svm->vcpu, "pagetable");
			
 
				-		break;
			
 
				-	default:
			
 
				-		BUG();
			
 
				-	}
			
 
				+static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+{
			
 
				+	int er;
			
 
				 
			
 
				-	kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
			
 
				-	return 0;
			
 
				+	er = emulate_instruction(&svm->vcpu, kvm_run, 0, 0, EMULTYPE_TRAP_UD);
			
 
				+	if (er != EMULATE_DONE)
			
 
				+		kvm_queue_exception(&svm->vcpu, UD_VECTOR);
			
 
				+	return 1;
			
 
				 }
			
 
				 
			
 
				 static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				 {
			
 
				 	svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
			
 
				-	if (!(svm->vcpu.cr0 & X86_CR0_TS))
			
 
				+	if (!(svm->vcpu.arch.cr0 & X86_CR0_TS))
			
 
				 		svm->vmcb->save.cr0 &= ~X86_CR0_TS;
			
 
				 	svm->vcpu.fpu_active = 1;
			
 
				 
			
@@ -1004,7 +973,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
				 
			
 
				 static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				 {
			
 
				-	u32 io_info = svm->vmcb->control.exit_info_1; //address size bug?
			
 
				+	u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
			
 
				 	int size, down, in, string, rep;
			
 
				 	unsigned port;
			
 
				 
			
@@ -1015,7 +984,8 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
				 	string = (io_info & SVM_IOIO_STR_MASK) != 0;
			
 
				 
			
 
				 	if (string) {
			
 
				-		if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0) == EMULATE_DO_MMIO)
			
 
				+		if (emulate_instruction(&svm->vcpu,
			
 
				+					kvm_run, 0, 0, 0) == EMULATE_DO_MMIO)
			
 
				 			return 0;
			
 
				 		return 1;
			
 
				 	}
			
@@ -1045,13 +1015,14 @@ static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
				 {
			
 
				 	svm->next_rip = svm->vmcb->save.rip + 3;
			
 
				 	skip_emulated_instruction(&svm->vcpu);
			
 
				-	return kvm_hypercall(&svm->vcpu, kvm_run);
			
 
				+	kvm_emulate_hypercall(&svm->vcpu);
			
 
				+	return 1;
			
 
				 }
			
 
				 
			
 
				 static int invalid_op_interception(struct vcpu_svm *svm,
			
 
				 				   struct kvm_run *kvm_run)
			
 
				 {
			
 
				-	inject_ud(&svm->vcpu);
			
 
				+	kvm_queue_exception(&svm->vcpu, UD_VECTOR);
			
 
				 	return 1;
			
 
				 }
			
 
				 
			
@@ -1073,11 +1044,20 @@ static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
				 static int emulate_on_interception(struct vcpu_svm *svm,
			
 
				 				   struct kvm_run *kvm_run)
			
 
				 {
			
 
				-	if (emulate_instruction(&svm->vcpu, NULL, 0, 0) != EMULATE_DONE)
			
 
				+	if (emulate_instruction(&svm->vcpu, NULL, 0, 0, 0) != EMULATE_DONE)
			
 
				 		pr_unimpl(&svm->vcpu, "%s: failed\n", __FUNCTION__);
			
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				+static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+{
			
 
				+	emulate_instruction(&svm->vcpu, NULL, 0, 0, 0);
			
 
				+	if (irqchip_in_kernel(svm->vcpu.kvm))
			
 
				+		return 1;
			
 
				+	kvm_run->exit_reason = KVM_EXIT_SET_TPR;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
			
 
				 {
			
 
				 	struct vcpu_svm *svm = to_svm(vcpu);
			
@@ -1124,14 +1104,14 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
 
				 
			
 
				 static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				 {
			
 
				-	u32 ecx = svm->vcpu.regs[VCPU_REGS_RCX];
			
 
				+	u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
			
 
				 	u64 data;
			
 
				 
			
 
				 	if (svm_get_msr(&svm->vcpu, ecx, &data))
			
 
				-		svm_inject_gp(&svm->vcpu, 0);
			
 
				+		kvm_inject_gp(&svm->vcpu, 0);
			
 
				 	else {
			
 
				 		svm->vmcb->save.rax = data & 0xffffffff;
			
 
				-		svm->vcpu.regs[VCPU_REGS_RDX] = data >> 32;
			
 
				+		svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32;
			
 
				 		svm->next_rip = svm->vmcb->save.rip + 2;
			
 
				 		skip_emulated_instruction(&svm->vcpu);
			
 
				 	}
			
@@ -1176,7 +1156,20 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
 
				 	case MSR_IA32_SYSENTER_ESP:
			
 
				 		svm->vmcb->save.sysenter_esp = data;
			
 
				 		break;
			
 
				+	case MSR_K7_EVNTSEL0:
			
 
				+	case MSR_K7_EVNTSEL1:
			
 
				+	case MSR_K7_EVNTSEL2:
			
 
				+	case MSR_K7_EVNTSEL3:
			
 
				+		/*
			
 
				+		 * only support writing 0 to the performance counters for now
			
 
				+		 * to make Windows happy. Should be replaced by a real
			
 
				+		 * performance counter emulation later.
			
 
				+		 */
			
 
				+		if (data != 0)
			
 
				+			goto unhandled;
			
 
				+		break;
			
 
				 	default:
			
 
				+	unhandled:
			
 
				 		return kvm_set_msr_common(vcpu, ecx, data);
			
 
				 	}
			
 
				 	return 0;
			
@@ -1184,12 +1177,12 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
 
				 
			
 
				 static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				 {
			
 
				-	u32 ecx = svm->vcpu.regs[VCPU_REGS_RCX];
			
 
				+	u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
			
 
				 	u64 data = (svm->vmcb->save.rax & -1u)
			
 
				-		| ((u64)(svm->vcpu.regs[VCPU_REGS_RDX] & -1u) << 32);
			
 
				+		| ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32);
			
 
				 	svm->next_rip = svm->vmcb->save.rip + 2;
			
 
				 	if (svm_set_msr(&svm->vcpu, ecx, data))
			
 
				-		svm_inject_gp(&svm->vcpu, 0);
			
 
				+		kvm_inject_gp(&svm->vcpu, 0);
			
 
				 	else
			
 
				 		skip_emulated_instruction(&svm->vcpu);
			
 
				 	return 1;
			
@@ -1213,7 +1206,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm,
 
				 	 * possible
			
 
				 	 */
			
 
				 	if (kvm_run->request_interrupt_window &&
			
 
				-	    !svm->vcpu.irq_summary) {
			
 
				+	    !svm->vcpu.arch.irq_summary) {
			
 
				 		++svm->vcpu.stat.irq_window_exits;
			
 
				 		kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
			
 
				 		return 0;
			
@@ -1227,10 +1220,12 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
 
				 	[SVM_EXIT_READ_CR0]           		= emulate_on_interception,
			
 
				 	[SVM_EXIT_READ_CR3]           		= emulate_on_interception,
			
 
				 	[SVM_EXIT_READ_CR4]           		= emulate_on_interception,
			
 
				+	[SVM_EXIT_READ_CR8]           		= emulate_on_interception,
			
 
				 	/* for now: */
			
 
				 	[SVM_EXIT_WRITE_CR0]          		= emulate_on_interception,
			
 
				 	[SVM_EXIT_WRITE_CR3]          		= emulate_on_interception,
			
 
				 	[SVM_EXIT_WRITE_CR4]          		= emulate_on_interception,
			
 
				+	[SVM_EXIT_WRITE_CR8]          		= cr8_write_interception,
			
 
				 	[SVM_EXIT_READ_DR0] 			= emulate_on_interception,
			
 
				 	[SVM_EXIT_READ_DR1]			= emulate_on_interception,
			
 
				 	[SVM_EXIT_READ_DR2]			= emulate_on_interception,
			
@@ -1241,6 +1236,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
 
				 	[SVM_EXIT_WRITE_DR3]			= emulate_on_interception,
			
 
				 	[SVM_EXIT_WRITE_DR5]			= emulate_on_interception,
			
 
				 	[SVM_EXIT_WRITE_DR7]			= emulate_on_interception,
			
 
				+	[SVM_EXIT_EXCP_BASE + UD_VECTOR]	= ud_interception,
			
 
				 	[SVM_EXIT_EXCP_BASE + PF_VECTOR] 	= pf_interception,
			
 
				 	[SVM_EXIT_EXCP_BASE + NM_VECTOR] 	= nm_interception,
			
 
				 	[SVM_EXIT_INTR] 			= nop_on_interception,
			
@@ -1293,7 +1289,7 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
				 		       exit_code);
			
 
				 
			
 
				 	if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
			
 
				-	    || svm_exit_handlers[exit_code] == 0) {
			
 
				+	    || !svm_exit_handlers[exit_code]) {
			
 
				 		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
			
 
				 		kvm_run->hw.hardware_exit_reason = exit_code;
			
 
				 		return 0;
			
@@ -1307,7 +1303,7 @@ static void reload_tss(struct kvm_vcpu *vcpu)
 
				 	int cpu = raw_smp_processor_id();
			
 
				 
			
 
				 	struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu);
			
 
				-	svm_data->tss_desc->type = 9; //available 32/64-bit TSS
			
 
				+	svm_data->tss_desc->type = 9; /* available 32/64-bit TSS */
			
 
				 	load_TR_desc();
			
 
				 }
			
 
				 
			
@@ -1348,7 +1344,6 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
 
				 	struct vmcb *vmcb = svm->vmcb;
			
 
				 	int intr_vector = -1;
			
 
				 
			
 
				-	kvm_inject_pending_timer_irqs(vcpu);
			
 
				 	if ((vmcb->control.exit_int_info & SVM_EVTINJ_VALID) &&
			
 
				 	    ((vmcb->control.exit_int_info & SVM_EVTINJ_TYPE_MASK) == 0)) {
			
 
				 		intr_vector = vmcb->control.exit_int_info &
			
@@ -1388,20 +1383,20 @@ static void kvm_reput_irq(struct vcpu_svm *svm)
 
				 		push_irq(&svm->vcpu, control->int_vector);
			
 
				 	}
			
 
				 
			
 
				-	svm->vcpu.interrupt_window_open =
			
 
				+	svm->vcpu.arch.interrupt_window_open =
			
 
				 		!(control->int_state & SVM_INTERRUPT_SHADOW_MASK);
			
 
				 }
			
 
				 
			
 
				 static void svm_do_inject_vector(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	struct kvm_vcpu *vcpu = &svm->vcpu;
			
 
				-	int word_index = __ffs(vcpu->irq_summary);
			
 
				-	int bit_index = __ffs(vcpu->irq_pending[word_index]);
			
 
				+	int word_index = __ffs(vcpu->arch.irq_summary);
			
 
				+	int bit_index = __ffs(vcpu->arch.irq_pending[word_index]);
			
 
				 	int irq = word_index * BITS_PER_LONG + bit_index;
			
 
				 
			
 
				-	clear_bit(bit_index, &vcpu->irq_pending[word_index]);
			
 
				-	if (!vcpu->irq_pending[word_index])
			
 
				-		clear_bit(word_index, &vcpu->irq_summary);
			
 
				+	clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]);
			
 
				+	if (!vcpu->arch.irq_pending[word_index])
			
 
				+		clear_bit(word_index, &vcpu->arch.irq_summary);
			
 
				 	svm_inject_irq(svm, irq);
			
 
				 }
			
 
				 
			
@@ -1411,11 +1406,11 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 
				 	struct vcpu_svm *svm = to_svm(vcpu);
			
 
				 	struct vmcb_control_area *control = &svm->vmcb->control;
			
 
				 
			
 
				-	svm->vcpu.interrupt_window_open =
			
 
				+	svm->vcpu.arch.interrupt_window_open =
			
 
				 		(!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
			
 
				 		 (svm->vmcb->save.rflags & X86_EFLAGS_IF));
			
 
				 
			
 
				-	if (svm->vcpu.interrupt_window_open && svm->vcpu.irq_summary)
			
 
				+	if (svm->vcpu.arch.interrupt_window_open && svm->vcpu.arch.irq_summary)
			
 
				 		/*
			
 
				 		 * If interrupts enabled, and not blocked by sti or mov ss. Good.
			
 
				 		 */
			
@@ -1424,13 +1419,18 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 
				 	/*
			
 
				 	 * Interrupts blocked.  Wait for unblock.
			
 
				 	 */
			
 
				-	if (!svm->vcpu.interrupt_window_open &&
			
 
				-	    (svm->vcpu.irq_summary || kvm_run->request_interrupt_window)) {
			
 
				+	if (!svm->vcpu.arch.interrupt_window_open &&
			
 
				+	    (svm->vcpu.arch.irq_summary || kvm_run->request_interrupt_window))
			
 
				 		control->intercept |= 1ULL << INTERCEPT_VINTR;
			
 
				-	} else
			
 
				+	 else
			
 
				 		control->intercept &= ~(1ULL << INTERCEPT_VINTR);
			
 
				 }
			
 
				 
			
 
				+static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
			
 
				+{
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 static void save_db_regs(unsigned long *db_regs)
			
 
				 {
			
 
				 	asm volatile ("mov %%dr0, %0" : "=r"(db_regs[0]));
			
@@ -1472,7 +1472,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 	svm->host_cr2 = kvm_read_cr2();
			
 
				 	svm->host_dr6 = read_dr6();
			
 
				 	svm->host_dr7 = read_dr7();
			
 
				-	svm->vmcb->save.cr2 = vcpu->cr2;
			
 
				+	svm->vmcb->save.cr2 = vcpu->arch.cr2;
			
 
				 
			
 
				 	if (svm->vmcb->save.dr7 & 0xff) {
			
 
				 		write_dr7(0);
			
@@ -1486,13 +1486,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 
			
 
				 	asm volatile (
			
 
				 #ifdef CONFIG_X86_64
			
 
				-		"push %%rbx; push %%rcx; push %%rdx;"
			
 
				-		"push %%rsi; push %%rdi; push %%rbp;"
			
 
				-		"push %%r8;  push %%r9;  push %%r10; push %%r11;"
			
 
				-		"push %%r12; push %%r13; push %%r14; push %%r15;"
			
 
				+		"push %%rbp; \n\t"
			
 
				 #else
			
 
				-		"push %%ebx; push %%ecx; push %%edx;"
			
 
				-		"push %%esi; push %%edi; push %%ebp;"
			
 
				+		"push %%ebp; \n\t"
			
 
				 #endif
			
 
				 
			
 
				 #ifdef CONFIG_X86_64
			
@@ -1554,10 +1550,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 		"mov %%r14, %c[r14](%[svm]) \n\t"
			
 
				 		"mov %%r15, %c[r15](%[svm]) \n\t"
			
 
				 
			
 
				-		"pop  %%r15; pop  %%r14; pop  %%r13; pop  %%r12;"
			
 
				-		"pop  %%r11; pop  %%r10; pop  %%r9;  pop  %%r8;"
			
 
				-		"pop  %%rbp; pop  %%rdi; pop  %%rsi;"
			
 
				-		"pop  %%rdx; pop  %%rcx; pop  %%rbx; \n\t"
			
 
				+		"pop  %%rbp; \n\t"
			
 
				 #else
			
 
				 		"mov %%ebx, %c[rbx](%[svm]) \n\t"
			
 
				 		"mov %%ecx, %c[rcx](%[svm]) \n\t"
			
@@ -1566,34 +1559,40 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 		"mov %%edi, %c[rdi](%[svm]) \n\t"
			
 
				 		"mov %%ebp, %c[rbp](%[svm]) \n\t"
			
 
				 
			
 
				-		"pop  %%ebp; pop  %%edi; pop  %%esi;"
			
 
				-		"pop  %%edx; pop  %%ecx; pop  %%ebx; \n\t"
			
 
				+		"pop  %%ebp; \n\t"
			
 
				 #endif
			
 
				 		:
			
 
				 		: [svm]"a"(svm),
			
 
				 		  [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
			
 
				-		  [rbx]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RBX])),
			
 
				-		  [rcx]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RCX])),
			
 
				-		  [rdx]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RDX])),
			
 
				-		  [rsi]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RSI])),
			
 
				-		  [rdi]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RDI])),
			
 
				-		  [rbp]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RBP]))
			
 
				+		  [rbx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBX])),
			
 
				+		  [rcx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RCX])),
			
 
				+		  [rdx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDX])),
			
 
				+		  [rsi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RSI])),
			
 
				+		  [rdi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDI])),
			
 
				+		  [rbp]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBP]))
			
 
				 #ifdef CONFIG_X86_64
			
 
				-		  ,[r8 ]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R8])),
			
 
				-		  [r9 ]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R9 ])),
			
 
				-		  [r10]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R10])),
			
 
				-		  [r11]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R11])),
			
 
				-		  [r12]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R12])),
			
 
				-		  [r13]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R13])),
			
 
				-		  [r14]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R14])),
			
 
				-		  [r15]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R15]))
			
 
				+		  , [r8]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R8])),
			
 
				+		  [r9]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R9])),
			
 
				+		  [r10]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R10])),
			
 
				+		  [r11]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R11])),
			
 
				+		  [r12]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R12])),
			
 
				+		  [r13]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R13])),
			
 
				+		  [r14]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R14])),
			
 
				+		  [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15]))
			
 
				 #endif
			
 
				-		: "cc", "memory" );
			
 
				+		: "cc", "memory"
			
 
				+#ifdef CONFIG_X86_64
			
 
				+		, "rbx", "rcx", "rdx", "rsi", "rdi"
			
 
				+		, "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
			
 
				+#else
			
 
				+		, "ebx", "ecx", "edx" , "esi", "edi"
			
 
				+#endif
			
 
				+		);
			
 
				 
			
 
				 	if ((svm->vmcb->save.dr7 & 0xff))
			
 
				 		load_db_regs(svm->host_db_regs);
			
 
				 
			
 
				-	vcpu->cr2 = svm->vmcb->save.cr2;
			
 
				+	vcpu->arch.cr2 = svm->vmcb->save.cr2;
			
 
				 
			
 
				 	write_dr6(svm->host_dr6);
			
 
				 	write_dr7(svm->host_dr7);
			
@@ -1627,34 +1626,6 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static void svm_inject_page_fault(struct kvm_vcpu *vcpu,
			
 
				-				  unsigned long  addr,
			
 
				-				  uint32_t err_code)
			
 
				-{
			
 
				-	struct vcpu_svm *svm = to_svm(vcpu);
			
 
				-	uint32_t exit_int_info = svm->vmcb->control.exit_int_info;
			
 
				-
			
 
				-	++vcpu->stat.pf_guest;
			
 
				-
			
 
				-	if (is_page_fault(exit_int_info)) {
			
 
				-
			
 
				-		svm->vmcb->control.event_inj_err = 0;
			
 
				-		svm->vmcb->control.event_inj = 	SVM_EVTINJ_VALID |
			
 
				-						SVM_EVTINJ_VALID_ERR |
			
 
				-						SVM_EVTINJ_TYPE_EXEPT |
			
 
				-						DF_VECTOR;
			
 
				-		return;
			
 
				-	}
			
 
				-	vcpu->cr2 = addr;
			
 
				-	svm->vmcb->save.cr2 = addr;
			
 
				-	svm->vmcb->control.event_inj = 	SVM_EVTINJ_VALID |
			
 
				-					SVM_EVTINJ_VALID_ERR |
			
 
				-					SVM_EVTINJ_TYPE_EXEPT |
			
 
				-					PF_VECTOR;
			
 
				-	svm->vmcb->control.event_inj_err = err_code;
			
 
				-}
			
 
				-
			
 
				-
			
 
				 static int is_disabled(void)
			
 
				 {
			
 
				 	u64 vm_cr;
			
@@ -1675,7 +1646,6 @@ svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
 
				 	hypercall[0] = 0x0f;
			
 
				 	hypercall[1] = 0x01;
			
 
				 	hypercall[2] = 0xd9;
			
 
				-	hypercall[3] = 0xc3;
			
 
				 }
			
 
				 
			
 
				 static void svm_check_processor_compat(void *rtn)
			
@@ -1683,6 +1653,11 @@ static void svm_check_processor_compat(void *rtn)
 
				 	*(int *)rtn = 0;
			
 
				 }
			
 
				 
			
 
				+static bool svm_cpu_has_accelerated_tpr(void)
			
 
				+{
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				 static struct kvm_x86_ops svm_x86_ops = {
			
 
				 	.cpu_has_kvm_support = has_svm,
			
 
				 	.disabled_by_bios = is_disabled,
			
@@ -1691,6 +1666,7 @@ static struct kvm_x86_ops svm_x86_ops = {
 
				 	.check_processor_compatibility = svm_check_processor_compat,
			
 
				 	.hardware_enable = svm_hardware_enable,
			
 
				 	.hardware_disable = svm_hardware_disable,
			
 
				+	.cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr,
			
 
				 
			
 
				 	.vcpu_create = svm_create_vcpu,
			
 
				 	.vcpu_free = svm_free_vcpu,
			
@@ -1725,9 +1701,6 @@ static struct kvm_x86_ops svm_x86_ops = {
 
				 	.set_rflags = svm_set_rflags,
			
 
				 
			
 
				 	.tlb_flush = svm_flush_tlb,
			
 
				-	.inject_page_fault = svm_inject_page_fault,
			
 
				-
			
 
				-	.inject_gp = svm_inject_gp,
			
 
				 
			
 
				 	.run = svm_vcpu_run,
			
 
				 	.handle_exit = handle_exit,
			
@@ -1735,19 +1708,23 @@ static struct kvm_x86_ops svm_x86_ops = {
 
				 	.patch_hypercall = svm_patch_hypercall,
			
 
				 	.get_irq = svm_get_irq,
			
 
				 	.set_irq = svm_set_irq,
			
 
				+	.queue_exception = svm_queue_exception,
			
 
				+	.exception_injected = svm_exception_injected,
			
 
				 	.inject_pending_irq = svm_intr_assist,
			
 
				 	.inject_pending_vectors = do_interrupt_requests,
			
 
				+
			
 
				+	.set_tss_addr = svm_set_tss_addr,
			
 
				 };
			
 
				 
			
 
				 static int __init svm_init(void)
			
 
				 {
			
 
				-	return kvm_init_x86(&svm_x86_ops, sizeof(struct vcpu_svm),
			
 
				+	return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm),
			
 
				 			      THIS_MODULE);
			
 
				 }
			
 
				 
			
 
				 static void __exit svm_exit(void)
			
 
				 {
			
 
				-	kvm_exit_x86();
			
 
				+	kvm_exit();
			
 
				 }
			
 
				 
			
 
				 module_init(svm_init)
			
--- a/arch/x86/kvm/svm.h
+++ b/arch/x86/kvm/svm.h
@@ -204,6 +204,7 @@ struct __attribute__ ((__packed__)) vmcb {
 
				 #define INTERCEPT_CR0_MASK 1
			
 
				 #define INTERCEPT_CR3_MASK (1 << 3)
			
 
				 #define INTERCEPT_CR4_MASK (1 << 4)
			
 
				+#define INTERCEPT_CR8_MASK (1 << 8)
			
 
				 
			
 
				 #define INTERCEPT_DR0_MASK 1
			
 
				 #define INTERCEPT_DR1_MASK (1 << 1)
			
@@ -311,7 +312,7 @@ struct __attribute__ ((__packed__)) vmcb {
 
				 
			
 
				 #define SVM_EXIT_ERR		-1
			
 
				 
			
 
				-#define SVM_CR0_SELECTIVE_MASK (1 << 3 | 1) // TS and MP
			
 
				+#define SVM_CR0_SELECTIVE_MASK (1 << 3 | 1) /* TS and MP */
			
 
				 
			
 
				 #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
			
 
				 #define SVM_VMRUN  ".byte 0x0f, 0x01, 0xd8"
			
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/kvm/vmx.h
@@ -25,6 +25,9 @@
 
				  *
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * Definitions of Primary Processor-Based VM-Execution Controls.
			
 
				+ */
			
 
				 #define CPU_BASED_VIRTUAL_INTR_PENDING          0x00000004
			
 
				 #define CPU_BASED_USE_TSC_OFFSETING             0x00000008
			
 
				 #define CPU_BASED_HLT_EXITING                   0x00000080
			
@@ -42,6 +45,12 @@
 
				 #define CPU_BASED_MONITOR_EXITING               0x20000000
			
 
				 #define CPU_BASED_PAUSE_EXITING                 0x40000000
			
 
				 #define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS   0x80000000
			
 
				+/*
			
 
				+ * Definitions of Secondary Processor-Based VM-Execution Controls.
			
 
				+ */
			
 
				+#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
			
 
				+#define SECONDARY_EXEC_WBINVD_EXITING		0x00000040
			
 
				+
			
 
				 
			
 
				 #define PIN_BASED_EXT_INTR_MASK                 0x00000001
			
 
				 #define PIN_BASED_NMI_EXITING                   0x00000008
			
@@ -54,8 +63,6 @@
 
				 #define VM_ENTRY_SMM                            0x00000400
			
 
				 #define VM_ENTRY_DEACT_DUAL_MONITOR             0x00000800
			
 
				 
			
 
				-#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
			
 
				-
			
 
				 /* VMCS Encodings */
			
 
				 enum vmcs_field {
			
 
				 	GUEST_ES_SELECTOR               = 0x00000800,
			
@@ -89,6 +96,8 @@ enum vmcs_field {
 
				 	TSC_OFFSET_HIGH                 = 0x00002011,
			
 
				 	VIRTUAL_APIC_PAGE_ADDR          = 0x00002012,
			
 
				 	VIRTUAL_APIC_PAGE_ADDR_HIGH     = 0x00002013,
			
 
				+	APIC_ACCESS_ADDR		= 0x00002014,
			
 
				+	APIC_ACCESS_ADDR_HIGH		= 0x00002015,
			
 
				 	VMCS_LINK_POINTER               = 0x00002800,
			
 
				 	VMCS_LINK_POINTER_HIGH          = 0x00002801,
			
 
				 	GUEST_IA32_DEBUGCTL             = 0x00002802,
			
@@ -214,6 +223,8 @@ enum vmcs_field {
 
				 #define EXIT_REASON_MSR_WRITE           32
			
 
				 #define EXIT_REASON_MWAIT_INSTRUCTION   36
			
 
				 #define EXIT_REASON_TPR_BELOW_THRESHOLD 43
			
 
				+#define EXIT_REASON_APIC_ACCESS         44
			
 
				+#define EXIT_REASON_WBINVD		54
			
 
				 
			
 
				 /*
			
 
				  * Interruption-information format
			
@@ -230,13 +241,14 @@ enum vmcs_field {
 
				 
			
 
				 #define INTR_TYPE_EXT_INTR              (0 << 8) /* external interrupt */
			
 
				 #define INTR_TYPE_EXCEPTION             (3 << 8) /* processor exception */
			
 
				+#define INTR_TYPE_SOFT_INTR             (4 << 8) /* software interrupt */
			
 
				 
			
 
				 /*
			
 
				  * Exit Qualifications for MOV for Control Register Access
			
 
				  */
			
 
				-#define CONTROL_REG_ACCESS_NUM          0x7     /* 2:0, number of control register */
			
 
				+#define CONTROL_REG_ACCESS_NUM          0x7     /* 2:0, number of control reg.*/
			
 
				 #define CONTROL_REG_ACCESS_TYPE         0x30    /* 5:4, access type */
			
 
				-#define CONTROL_REG_ACCESS_REG          0xf00   /* 10:8, general purpose register */
			
 
				+#define CONTROL_REG_ACCESS_REG          0xf00   /* 10:8, general purpose reg. */
			
 
				 #define LMSW_SOURCE_DATA_SHIFT 16
			
 
				 #define LMSW_SOURCE_DATA  (0xFFFF << LMSW_SOURCE_DATA_SHIFT) /* 16:31 lmsw source */
			
 
				 #define REG_EAX                         (0 << 8)
			
@@ -259,11 +271,11 @@ enum vmcs_field {
 
				 /*
			
 
				  * Exit Qualifications for MOV for Debug Register Access
			
 
				  */
			
 
				-#define DEBUG_REG_ACCESS_NUM            0x7     /* 2:0, number of debug register */
			
 
				+#define DEBUG_REG_ACCESS_NUM            0x7     /* 2:0, number of debug reg. */
			
 
				 #define DEBUG_REG_ACCESS_TYPE           0x10    /* 4, direction of access */
			
 
				 #define TYPE_MOV_TO_DR                  (0 << 4)
			
 
				 #define TYPE_MOV_FROM_DR                (1 << 4)
			
 
				-#define DEBUG_REG_ACCESS_REG            0xf00   /* 11:8, general purpose register */
			
 
				+#define DEBUG_REG_ACCESS_REG            0xf00   /* 11:8, general purpose reg. */
			
 
				 
			
 
				 
			
 
				 /* segment AR */
			
@@ -307,4 +319,6 @@ enum vmcs_field {
 
				 #define MSR_IA32_FEATURE_CONTROL_LOCKED         0x1
			
 
				 #define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED  0x4
			
 
				 
			
 
				+#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT	9
			
 
				+
			
 
				 #endif
			
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -0,0 +1,1912 @@
 
				+/******************************************************************************
			
 
				+ * x86_emulate.c
			
 
				+ *
			
 
				+ * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
			
 
				+ *
			
 
				+ * Copyright (c) 2005 Keir Fraser
			
 
				+ *
			
 
				+ * Linux coding style, mod r/m decoder, segment base fixes, real-mode
			
 
				+ * privileged instructions:
			
 
				+ *
			
 
				+ * Copyright (C) 2006 Qumranet
			
 
				+ *
			
 
				+ *   Avi Kivity <avi@qumranet.com>
			
 
				+ *   Yaniv Kamay <yaniv@qumranet.com>
			
 
				+ *
			
 
				+ * This work is licensed under the terms of the GNU GPL, version 2.  See
			
 
				+ * the COPYING file in the top-level directory.
			
 
				+ *
			
 
				+ * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
			
 
				+ */
			
 
				+
			
 
				+#ifndef __KERNEL__
			
 
				+#include <stdio.h>
			
 
				+#include <stdint.h>
			
 
				+#include <public/xen.h>
			
 
				+#define DPRINTF(_f, _a ...) printf(_f , ## _a)
			
 
				+#else
			
 
				+#include <linux/kvm_host.h>
			
 
				+#define DPRINTF(x...) do {} while (0)
			
 
				+#endif
			
 
				+#include <linux/module.h>
			
 
				+#include <asm/kvm_x86_emulate.h>
			
 
				+
			
 
				+/*
			
 
				+ * Opcode effective-address decode tables.
			
 
				+ * Note that we only emulate instructions that have at least one memory
			
 
				+ * operand (excluding implicit stack references). We assume that stack
			
 
				+ * references and instruction fetches will never occur in special memory
			
 
				+ * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
			
 
				+ * not be handled.
			
 
				+ */
			
 
				+
			
 
				+/* Operand sizes: 8-bit operands or specified/overridden size. */
			
 
				+#define ByteOp      (1<<0)	/* 8-bit operands. */
			
 
				+/* Destination operand type. */
			
 
				+#define ImplicitOps (1<<1)	/* Implicit in opcode. No generic decode. */
			
 
				+#define DstReg      (2<<1)	/* Register operand. */
			
 
				+#define DstMem      (3<<1)	/* Memory operand. */
			
 
				+#define DstMask     (3<<1)
			
 
				+/* Source operand type. */
			
 
				+#define SrcNone     (0<<3)	/* No source operand. */
			
 
				+#define SrcImplicit (0<<3)	/* Source operand is implicit in the opcode. */
			
 
				+#define SrcReg      (1<<3)	/* Register operand. */
			
 
				+#define SrcMem      (2<<3)	/* Memory operand. */
			
 
				+#define SrcMem16    (3<<3)	/* Memory operand (16-bit). */
			
 
				+#define SrcMem32    (4<<3)	/* Memory operand (32-bit). */
			
 
				+#define SrcImm      (5<<3)	/* Immediate operand. */
			
 
				+#define SrcImmByte  (6<<3)	/* 8-bit sign-extended immediate operand. */
			
 
				+#define SrcMask     (7<<3)
			
 
				+/* Generic ModRM decode. */
			
 
				+#define ModRM       (1<<6)
			
 
				+/* Destination is only written; never read. */
			
 
				+#define Mov         (1<<7)
			
 
				+#define BitOp       (1<<8)
			
 
				+#define MemAbs      (1<<9)      /* Memory operand is absolute displacement */
			
 
				+#define String      (1<<10)     /* String instruction (rep capable) */
			
 
				+#define Stack       (1<<11)     /* Stack instruction (push/pop) */
			
 
				+
			
 
				+static u16 opcode_table[256] = {
			
 
				+	/* 0x00 - 0x07 */
			
 
				+	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
			
 
				+	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
			
 
				+	0, 0, 0, 0,
			
 
				+	/* 0x08 - 0x0F */
			
 
				+	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
			
 
				+	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
			
 
				+	0, 0, 0, 0,
			
 
				+	/* 0x10 - 0x17 */
			
 
				+	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
			
 
				+	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
			
 
				+	0, 0, 0, 0,
			
 
				+	/* 0x18 - 0x1F */
			
 
				+	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
			
 
				+	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
			
 
				+	0, 0, 0, 0,
			
 
				+	/* 0x20 - 0x27 */
			
 
				+	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
			
 
				+	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
			
 
				+	SrcImmByte, SrcImm, 0, 0,
			
 
				+	/* 0x28 - 0x2F */
			
 
				+	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
			
 
				+	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
			
 
				+	0, 0, 0, 0,
			
 
				+	/* 0x30 - 0x37 */
			
 
				+	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
			
 
				+	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
			
 
				+	0, 0, 0, 0,
			
 
				+	/* 0x38 - 0x3F */
			
 
				+	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
			
 
				+	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
			
 
				+	0, 0, 0, 0,
			
 
				+	/* 0x40 - 0x47 */
			
 
				+	DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
			
 
				+	/* 0x48 - 0x4F */
			
 
				+	DstReg, DstReg, DstReg, DstReg,	DstReg, DstReg, DstReg, DstReg,
			
 
				+	/* 0x50 - 0x57 */
			
 
				+	SrcReg | Stack, SrcReg | Stack, SrcReg | Stack, SrcReg | Stack,
			
 
				+	SrcReg | Stack, SrcReg | Stack, SrcReg | Stack, SrcReg | Stack,
			
 
				+	/* 0x58 - 0x5F */
			
 
				+	DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
			
 
				+	DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
			
 
				+	/* 0x60 - 0x67 */
			
 
				+	0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
			
 
				+	0, 0, 0, 0,
			
 
				+	/* 0x68 - 0x6F */
			
 
				+	0, 0, ImplicitOps | Mov | Stack, 0,
			
 
				+	SrcNone  | ByteOp  | ImplicitOps, SrcNone  | ImplicitOps, /* insb, insw/insd */
			
 
				+	SrcNone  | ByteOp  | ImplicitOps, SrcNone  | ImplicitOps, /* outsb, outsw/outsd */
			
 
				+	/* 0x70 - 0x77 */
			
 
				+	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
			
 
				+	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
			
 
				+	/* 0x78 - 0x7F */
			
 
				+	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
			
 
				+	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
			
 
				+	/* 0x80 - 0x87 */
			
 
				+	ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
			
 
				+	ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM,
			
 
				+	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
			
 
				+	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
			
 
				+	/* 0x88 - 0x8F */
			
 
				+	ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov,
			
 
				+	ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
			
 
				+	0, ModRM | DstReg, 0, DstMem | SrcNone | ModRM | Mov | Stack,
			
 
				+	/* 0x90 - 0x9F */
			
 
				+	0, 0, 0, 0, 0, 0, 0, 0,
			
 
				+	0, 0, 0, 0, ImplicitOps | Stack, ImplicitOps | Stack, 0, 0,
			
 
				+	/* 0xA0 - 0xA7 */
			
 
				+	ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs,
			
 
				+	ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs,
			
 
				+	ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String,
			
 
				+	ByteOp | ImplicitOps | String, ImplicitOps | String,
			
 
				+	/* 0xA8 - 0xAF */
			
 
				+	0, 0, ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String,
			
 
				+	ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String,
			
 
				+	ByteOp | ImplicitOps | String, ImplicitOps | String,
			
 
				+	/* 0xB0 - 0xBF */
			
 
				+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
			
 
				+	/* 0xC0 - 0xC7 */
			
 
				+	ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM,
			
 
				+	0, ImplicitOps | Stack, 0, 0,
			
 
				+	ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
			
 
				+	/* 0xC8 - 0xCF */
			
 
				+	0, 0, 0, 0, 0, 0, 0, 0,
			
 
				+	/* 0xD0 - 0xD7 */
			
 
				+	ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
			
 
				+	ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
			
 
				+	0, 0, 0, 0,
			
 
				+	/* 0xD8 - 0xDF */
			
 
				+	0, 0, 0, 0, 0, 0, 0, 0,
			
 
				+	/* 0xE0 - 0xE7 */
			
 
				+	0, 0, 0, 0, 0, 0, 0, 0,
			
 
				+	/* 0xE8 - 0xEF */
			
 
				+	ImplicitOps | Stack, SrcImm|ImplicitOps, 0, SrcImmByte|ImplicitOps,
			
 
				+	0, 0, 0, 0,
			
 
				+	/* 0xF0 - 0xF7 */
			
 
				+	0, 0, 0, 0,
			
 
				+	ImplicitOps, ImplicitOps,
			
 
				+	ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
			
 
				+	/* 0xF8 - 0xFF */
			
 
				+	ImplicitOps, 0, ImplicitOps, ImplicitOps,
			
 
				+	0, 0, ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM
			
 
				+};
			
 
				+
			
 
				+static u16 twobyte_table[256] = {
			
 
				+	/* 0x00 - 0x0F */
			
 
				+	0, SrcMem | ModRM | DstReg, 0, 0, 0, 0, ImplicitOps, 0,
			
 
				+	ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0,
			
 
				+	/* 0x10 - 0x1F */
			
 
				+	0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0,
			
 
				+	/* 0x20 - 0x2F */
			
 
				+	ModRM | ImplicitOps, ModRM, ModRM | ImplicitOps, ModRM, 0, 0, 0, 0,
			
 
				+	0, 0, 0, 0, 0, 0, 0, 0,
			
 
				+	/* 0x30 - 0x3F */
			
 
				+	ImplicitOps, 0, ImplicitOps, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
			
 
				+	/* 0x40 - 0x47 */
			
 
				+	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
			
 
				+	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
			
 
				+	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
			
 
				+	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
			
 
				+	/* 0x48 - 0x4F */
			
 
				+	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
			
 
				+	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
			
 
				+	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
			
 
				+	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
			
 
				+	/* 0x50 - 0x5F */
			
 
				+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
			
 
				+	/* 0x60 - 0x6F */
			
 
				+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
			
 
				+	/* 0x70 - 0x7F */
			
 
				+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
			
 
				+	/* 0x80 - 0x8F */
			
 
				+	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
			
 
				+	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
			
 
				+	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
			
 
				+	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
			
 
				+	/* 0x90 - 0x9F */
			
 
				+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
			
 
				+	/* 0xA0 - 0xA7 */
			
 
				+	0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0,
			
 
				+	/* 0xA8 - 0xAF */
			
 
				+	0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0,
			
 
				+	/* 0xB0 - 0xB7 */
			
 
				+	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0,
			
 
				+	    DstMem | SrcReg | ModRM | BitOp,
			
 
				+	0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
			
 
				+	    DstReg | SrcMem16 | ModRM | Mov,
			
 
				+	/* 0xB8 - 0xBF */
			
 
				+	0, 0, DstMem | SrcImmByte | ModRM, DstMem | SrcReg | ModRM | BitOp,
			
 
				+	0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
			
 
				+	    DstReg | SrcMem16 | ModRM | Mov,
			
 
				+	/* 0xC0 - 0xCF */
			
 
				+	0, 0, 0, DstMem | SrcReg | ModRM | Mov, 0, 0, 0, ImplicitOps | ModRM,
			
 
				+	0, 0, 0, 0, 0, 0, 0, 0,
			
 
				+	/* 0xD0 - 0xDF */
			
 
				+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
			
 
				+	/* 0xE0 - 0xEF */
			
 
				+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
			
 
				+	/* 0xF0 - 0xFF */
			
 
				+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
			
 
				+};
			
 
				+
			
 
				+/* EFLAGS bit definitions. */
			
 
				+#define EFLG_OF (1<<11)
			
 
				+#define EFLG_DF (1<<10)
			
 
				+#define EFLG_SF (1<<7)
			
 
				+#define EFLG_ZF (1<<6)
			
 
				+#define EFLG_AF (1<<4)
			
 
				+#define EFLG_PF (1<<2)
			
 
				+#define EFLG_CF (1<<0)
			
 
				+
			
 
				+/*
			
 
				+ * Instruction emulation:
			
 
				+ * Most instructions are emulated directly via a fragment of inline assembly
			
 
				+ * code. This allows us to save/restore EFLAGS and thus very easily pick up
			
 
				+ * any modified flags.
			
 
				+ */
			
 
				+
			
 
				+#if defined(CONFIG_X86_64)
			
 
				+#define _LO32 "k"		/* force 32-bit operand */
			
 
				+#define _STK  "%%rsp"		/* stack pointer */
			
 
				+#elif defined(__i386__)
			
 
				+#define _LO32 ""		/* force 32-bit operand */
			
 
				+#define _STK  "%%esp"		/* stack pointer */
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * These EFLAGS bits are restored from saved value during emulation, and
			
 
				+ * any changes are written back to the saved value after emulation.
			
 
				+ */
			
 
				+#define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)
			
 
				+
			
 
				+/* Before executing instruction: restore necessary bits in EFLAGS. */
			
 
				+#define _PRE_EFLAGS(_sav, _msk, _tmp)					\
			
 
				+	/* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); _sav &= ~_msk; */ \
			
 
				+	"movl %"_sav",%"_LO32 _tmp"; "                                  \
			
 
				+	"push %"_tmp"; "                                                \
			
 
				+	"push %"_tmp"; "                                                \
			
 
				+	"movl %"_msk",%"_LO32 _tmp"; "                                  \
			
 
				+	"andl %"_LO32 _tmp",("_STK"); "                                 \
			
 
				+	"pushf; "                                                       \
			
 
				+	"notl %"_LO32 _tmp"; "                                          \
			
 
				+	"andl %"_LO32 _tmp",("_STK"); "                                 \
			
 
				+	"andl %"_LO32 _tmp","__stringify(BITS_PER_LONG/4)"("_STK"); "	\
			
 
				+	"pop  %"_tmp"; "                                                \
			
 
				+	"orl  %"_LO32 _tmp",("_STK"); "                                 \
			
 
				+	"popf; "                                                        \
			
 
				+	"pop  %"_sav"; "
			
 
				+
			
 
				+/* After executing instruction: write-back necessary bits in EFLAGS. */
			
 
				+#define _POST_EFLAGS(_sav, _msk, _tmp) \
			
 
				+	/* _sav |= EFLAGS & _msk; */		\
			
 
				+	"pushf; "				\
			
 
				+	"pop  %"_tmp"; "			\
			
 
				+	"andl %"_msk",%"_LO32 _tmp"; "		\
			
 
				+	"orl  %"_LO32 _tmp",%"_sav"; "
			
 
				+
			
 
				+/* Raw emulation: instruction has two explicit operands. */
			
 
				+#define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \
			
 
				+	do { 								    \
			
 
				+		unsigned long _tmp;					    \
			
 
				+									    \
			
 
				+		switch ((_dst).bytes) {					    \
			
 
				+		case 2:							    \
			
 
				+			__asm__ __volatile__ (				    \
			
 
				+				_PRE_EFLAGS("0", "4", "2")		    \
			
 
				+				_op"w %"_wx"3,%1; "			    \
			
 
				+				_POST_EFLAGS("0", "4", "2")		    \
			
 
				+				: "=m" (_eflags), "=m" ((_dst).val),        \
			
 
				+				  "=&r" (_tmp)				    \
			
 
				+				: _wy ((_src).val), "i" (EFLAGS_MASK));     \
			
 
				+			break;						    \
			
 
				+		case 4:							    \
			
 
				+			__asm__ __volatile__ (				    \
			
 
				+				_PRE_EFLAGS("0", "4", "2")		    \
			
 
				+				_op"l %"_lx"3,%1; "			    \
			
 
				+				_POST_EFLAGS("0", "4", "2")		    \
			
 
				+				: "=m" (_eflags), "=m" ((_dst).val),	    \
			
 
				+				  "=&r" (_tmp)				    \
			
 
				+				: _ly ((_src).val), "i" (EFLAGS_MASK));     \
			
 
				+			break;						    \
			
 
				+		case 8:							    \
			
 
				+			__emulate_2op_8byte(_op, _src, _dst,		    \
			
 
				+					    _eflags, _qx, _qy);		    \
			
 
				+			break;						    \
			
 
				+		}							    \
			
 
				+	} while (0)
			
 
				+
			
 
				+#define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
			
 
				+	do {								     \
			
 
				+		unsigned long _tmp;					     \
			
 
				+		switch ((_dst).bytes) {				             \
			
 
				+		case 1:							     \
			
 
				+			__asm__ __volatile__ (				     \
			
 
				+				_PRE_EFLAGS("0", "4", "2")		     \
			
 
				+				_op"b %"_bx"3,%1; "			     \
			
 
				+				_POST_EFLAGS("0", "4", "2")		     \
			
 
				+				: "=m" (_eflags), "=m" ((_dst).val),	     \
			
 
				+				  "=&r" (_tmp)				     \
			
 
				+				: _by ((_src).val), "i" (EFLAGS_MASK));      \
			
 
				+			break;						     \
			
 
				+		default:						     \
			
 
				+			__emulate_2op_nobyte(_op, _src, _dst, _eflags,	     \
			
 
				+					     _wx, _wy, _lx, _ly, _qx, _qy);  \
			
 
				+			break;						     \
			
 
				+		}							     \
			
 
				+	} while (0)
			
 
				+
			
 
				+/* Source operand is byte-sized and may be restricted to just %cl. */
			
 
				+#define emulate_2op_SrcB(_op, _src, _dst, _eflags)                      \
			
 
				+	__emulate_2op(_op, _src, _dst, _eflags,				\
			
 
				+		      "b", "c", "b", "c", "b", "c", "b", "c")
			
 
				+
			
 
				+/* Source operand is byte, word, long or quad sized. */
			
 
				+#define emulate_2op_SrcV(_op, _src, _dst, _eflags)                      \
			
 
				+	__emulate_2op(_op, _src, _dst, _eflags,				\
			
 
				+		      "b", "q", "w", "r", _LO32, "r", "", "r")
			
 
				+
			
 
				+/* Source operand is word, long or quad sized. */
			
 
				+#define emulate_2op_SrcV_nobyte(_op, _src, _dst, _eflags)               \
			
 
				+	__emulate_2op_nobyte(_op, _src, _dst, _eflags,			\
			
 
				+			     "w", "r", _LO32, "r", "", "r")
			
 
				+
			
 
				+/* Instruction has only one explicit operand (no source operand). */
			
 
				+#define emulate_1op(_op, _dst, _eflags)                                    \
			
 
				+	do {								\
			
 
				+		unsigned long _tmp;					\
			
 
				+									\
			
 
				+		switch ((_dst).bytes) {				        \
			
 
				+		case 1:							\
			
 
				+			__asm__ __volatile__ (				\
			
 
				+				_PRE_EFLAGS("0", "3", "2")		\
			
 
				+				_op"b %1; "				\
			
 
				+				_POST_EFLAGS("0", "3", "2")		\
			
 
				+				: "=m" (_eflags), "=m" ((_dst).val),	\
			
 
				+				  "=&r" (_tmp)				\
			
 
				+				: "i" (EFLAGS_MASK));			\
			
 
				+			break;						\
			
 
				+		case 2:							\
			
 
				+			__asm__ __volatile__ (				\
			
 
				+				_PRE_EFLAGS("0", "3", "2")		\
			
 
				+				_op"w %1; "				\
			
 
				+				_POST_EFLAGS("0", "3", "2")		\
			
 
				+				: "=m" (_eflags), "=m" ((_dst).val),	\
			
 
				+				  "=&r" (_tmp)				\
			
 
				+				: "i" (EFLAGS_MASK));			\
			
 
				+			break;						\
			
 
				+		case 4:							\
			
 
				+			__asm__ __volatile__ (				\
			
 
				+				_PRE_EFLAGS("0", "3", "2")		\
			
 
				+				_op"l %1; "				\
			
 
				+				_POST_EFLAGS("0", "3", "2")		\
			
 
				+				: "=m" (_eflags), "=m" ((_dst).val),	\
			
 
				+				  "=&r" (_tmp)				\
			
 
				+				: "i" (EFLAGS_MASK));			\
			
 
				+			break;						\
			
 
				+		case 8:							\
			
 
				+			__emulate_1op_8byte(_op, _dst, _eflags);	\
			
 
				+			break;						\
			
 
				+		}							\
			
 
				+	} while (0)
			
 
				+
			
 
				+/* Emulate an instruction with quadword operands (x86/64 only). */
			
 
				+#if defined(CONFIG_X86_64)
			
 
				+#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy)           \
			
 
				+	do {								  \
			
 
				+		__asm__ __volatile__ (					  \
			
 
				+			_PRE_EFLAGS("0", "4", "2")			  \
			
 
				+			_op"q %"_qx"3,%1; "				  \
			
 
				+			_POST_EFLAGS("0", "4", "2")			  \
			
 
				+			: "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
			
 
				+			: _qy ((_src).val), "i" (EFLAGS_MASK));		\
			
 
				+	} while (0)
			
 
				+
			
 
				+#define __emulate_1op_8byte(_op, _dst, _eflags)                           \
			
 
				+	do {								  \
			
 
				+		__asm__ __volatile__ (					  \
			
 
				+			_PRE_EFLAGS("0", "3", "2")			  \
			
 
				+			_op"q %1; "					  \
			
 
				+			_POST_EFLAGS("0", "3", "2")			  \
			
 
				+			: "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
			
 
				+			: "i" (EFLAGS_MASK));				  \
			
 
				+	} while (0)
			
 
				+
			
 
				+#elif defined(__i386__)
			
 
				+#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy)
			
 
				+#define __emulate_1op_8byte(_op, _dst, _eflags)
			
 
				+#endif				/* __i386__ */
			
 
				+
			
 
				+/* Fetch next part of the instruction being emulated. */
			
 
				+#define insn_fetch(_type, _size, _eip)                                  \
			
 
				+({	unsigned long _x;						\
			
 
				+	rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size));		\
			
 
				+	if (rc != 0)							\
			
 
				+		goto done;						\
			
 
				+	(_eip) += (_size);						\
			
 
				+	(_type)_x;							\
			
 
				+})
			
 
				+
			
 
				+/* Access/update address held in a register, based on addressing mode. */
			
 
				+#define address_mask(reg)						\
			
 
				+	((c->ad_bytes == sizeof(unsigned long)) ? 			\
			
 
				+		(reg) :	((reg) & ((1UL << (c->ad_bytes << 3)) - 1)))
			
 
				+#define register_address(base, reg)                                     \
			
 
				+	((base) + address_mask(reg))
			
 
				+#define register_address_increment(reg, inc)                            \
			
 
				+	do {								\
			
 
				+		/* signed type ensures sign extension to long */        \
			
 
				+		int _inc = (inc);					\
			
 
				+		if (c->ad_bytes == sizeof(unsigned long))		\
			
 
				+			(reg) += _inc;					\
			
 
				+		else							\
			
 
				+			(reg) = ((reg) & 				\
			
 
				+				 ~((1UL << (c->ad_bytes << 3)) - 1)) |	\
			
 
				+				(((reg) + _inc) &			\
			
 
				+				 ((1UL << (c->ad_bytes << 3)) - 1));	\
			
 
				+	} while (0)
			
 
				+
			
 
				+#define JMP_REL(rel) 							\
			
 
				+	do {								\
			
 
				+		register_address_increment(c->eip, rel);		\
			
 
				+	} while (0)
			
 
				+
			
 
				+static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
			
 
				+			      struct x86_emulate_ops *ops,
			
 
				+			      unsigned long linear, u8 *dest)
			
 
				+{
			
 
				+	struct fetch_cache *fc = &ctxt->decode.fetch;
			
 
				+	int rc;
			
 
				+	int size;
			
 
				+
			
 
				+	if (linear < fc->start || linear >= fc->end) {
			
 
				+		size = min(15UL, PAGE_SIZE - offset_in_page(linear));
			
 
				+		rc = ops->read_std(linear, fc->data, size, ctxt->vcpu);
			
 
				+		if (rc)
			
 
				+			return rc;
			
 
				+		fc->start = linear;
			
 
				+		fc->end = linear + size;
			
 
				+	}
			
 
				+	*dest = fc->data[linear - fc->start];
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
			
 
				+			 struct x86_emulate_ops *ops,
			
 
				+			 unsigned long eip, void *dest, unsigned size)
			
 
				+{
			
 
				+	int rc = 0;
			
 
				+
			
 
				+	eip += ctxt->cs_base;
			
 
				+	while (size--) {
			
 
				+		rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++);
			
 
				+		if (rc)
			
 
				+			return rc;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Given the 'reg' portion of a ModRM byte, and a register block, return a
			
 
				+ * pointer into the block that addresses the relevant register.
			
 
				+ * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
			
 
				+ */
			
 
				+static void *decode_register(u8 modrm_reg, unsigned long *regs,
			
 
				+			     int highbyte_regs)
			
 
				+{
			
 
				+	void *p;
			
 
				+
			
 
				+	p = &regs[modrm_reg];
			
 
				+	if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
			
 
				+		p = (unsigned char *)&regs[modrm_reg & 3] + 1;
			
 
				+	return p;
			
 
				+}
			
 
				+
			
 
				+static int read_descriptor(struct x86_emulate_ctxt *ctxt,
			
 
				+			   struct x86_emulate_ops *ops,
			
 
				+			   void *ptr,
			
 
				+			   u16 *size, unsigned long *address, int op_bytes)
			
 
				+{
			
 
				+	int rc;
			
 
				+
			
 
				+	if (op_bytes == 2)
			
 
				+		op_bytes = 3;
			
 
				+	*address = 0;
			
 
				+	rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2,
			
 
				+			   ctxt->vcpu);
			
 
				+	if (rc)
			
 
				+		return rc;
			
 
				+	rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes,
			
 
				+			   ctxt->vcpu);
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+static int test_cc(unsigned int condition, unsigned int flags)
			
 
				+{
			
 
				+	int rc = 0;
			
 
				+
			
 
				+	switch ((condition & 15) >> 1) {
			
 
				+	case 0: /* o */
			
 
				+		rc |= (flags & EFLG_OF);
			
 
				+		break;
			
 
				+	case 1: /* b/c/nae */
			
 
				+		rc |= (flags & EFLG_CF);
			
 
				+		break;
			
 
				+	case 2: /* z/e */
			
 
				+		rc |= (flags & EFLG_ZF);
			
 
				+		break;
			
 
				+	case 3: /* be/na */
			
 
				+		rc |= (flags & (EFLG_CF|EFLG_ZF));
			
 
				+		break;
			
 
				+	case 4: /* s */
			
 
				+		rc |= (flags & EFLG_SF);
			
 
				+		break;
			
 
				+	case 5: /* p/pe */
			
 
				+		rc |= (flags & EFLG_PF);
			
 
				+		break;
			
 
				+	case 7: /* le/ng */
			
 
				+		rc |= (flags & EFLG_ZF);
			
 
				+		/* fall through */
			
 
				+	case 6: /* l/nge */
			
 
				+		rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF));
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	/* Odd condition identifiers (lsb == 1) have inverted sense. */
			
 
				+	return (!!rc ^ (condition & 1));
			
 
				+}
			
 
				+
			
 
				+static void decode_register_operand(struct operand *op,
			
 
				+				    struct decode_cache *c,
			
 
				+				    int inhibit_bytereg)
			
 
				+{
			
 
				+	unsigned reg = c->modrm_reg;
			
 
				+	int highbyte_regs = c->rex_prefix == 0;
			
 
				+
			
 
				+	if (!(c->d & ModRM))
			
 
				+		reg = (c->b & 7) | ((c->rex_prefix & 1) << 3);
			
 
				+	op->type = OP_REG;
			
 
				+	if ((c->d & ByteOp) && !inhibit_bytereg) {
			
 
				+		op->ptr = decode_register(reg, c->regs, highbyte_regs);
			
 
				+		op->val = *(u8 *)op->ptr;
			
 
				+		op->bytes = 1;
			
 
				+	} else {
			
 
				+		op->ptr = decode_register(reg, c->regs, 0);
			
 
				+		op->bytes = c->op_bytes;
			
 
				+		switch (op->bytes) {
			
 
				+		case 2:
			
 
				+			op->val = *(u16 *)op->ptr;
			
 
				+			break;
			
 
				+		case 4:
			
 
				+			op->val = *(u32 *)op->ptr;
			
 
				+			break;
			
 
				+		case 8:
			
 
				+			op->val = *(u64 *) op->ptr;
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+	op->orig_val = op->val;
			
 
				+}
			
 
				+
			
 
				+static int decode_modrm(struct x86_emulate_ctxt *ctxt,
			
 
				+			struct x86_emulate_ops *ops)
			
 
				+{
			
 
				+	struct decode_cache *c = &ctxt->decode;
			
 
				+	u8 sib;
			
 
				+	int index_reg = 0, base_reg = 0, scale, rip_relative = 0;
			
 
				+	int rc = 0;
			
 
				+
			
 
				+	if (c->rex_prefix) {
			
 
				+		c->modrm_reg = (c->rex_prefix & 4) << 1;	/* REX.R */
			
 
				+		index_reg = (c->rex_prefix & 2) << 2; /* REX.X */
			
 
				+		c->modrm_rm = base_reg = (c->rex_prefix & 1) << 3; /* REG.B */
			
 
				+	}
			
 
				+
			
 
				+	c->modrm = insn_fetch(u8, 1, c->eip);
			
 
				+	c->modrm_mod |= (c->modrm & 0xc0) >> 6;
			
 
				+	c->modrm_reg |= (c->modrm & 0x38) >> 3;
			
 
				+	c->modrm_rm |= (c->modrm & 0x07);
			
 
				+	c->modrm_ea = 0;
			
 
				+	c->use_modrm_ea = 1;
			
 
				+
			
 
				+	if (c->modrm_mod == 3) {
			
 
				+		c->modrm_val = *(unsigned long *)
			
 
				+			decode_register(c->modrm_rm, c->regs, c->d & ByteOp);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	if (c->ad_bytes == 2) {
			
 
				+		unsigned bx = c->regs[VCPU_REGS_RBX];
			
 
				+		unsigned bp = c->regs[VCPU_REGS_RBP];
			
 
				+		unsigned si = c->regs[VCPU_REGS_RSI];
			
 
				+		unsigned di = c->regs[VCPU_REGS_RDI];
			
 
				+
			
 
				+		/* 16-bit ModR/M decode. */
			
 
				+		switch (c->modrm_mod) {
			
 
				+		case 0:
			
 
				+			if (c->modrm_rm == 6)
			
 
				+				c->modrm_ea += insn_fetch(u16, 2, c->eip);
			
 
				+			break;
			
 
				+		case 1:
			
 
				+			c->modrm_ea += insn_fetch(s8, 1, c->eip);
			
 
				+			break;
			
 
				+		case 2:
			
 
				+			c->modrm_ea += insn_fetch(u16, 2, c->eip);
			
 
				+			break;
			
 
				+		}
			
 
				+		switch (c->modrm_rm) {
			
 
				+		case 0:
			
 
				+			c->modrm_ea += bx + si;
			
 
				+			break;
			
 
				+		case 1:
			
 
				+			c->modrm_ea += bx + di;
			
 
				+			break;
			
 
				+		case 2:
			
 
				+			c->modrm_ea += bp + si;
			
 
				+			break;
			
 
				+		case 3:
			
 
				+			c->modrm_ea += bp + di;
			
 
				+			break;
			
 
				+		case 4:
			
 
				+			c->modrm_ea += si;
			
 
				+			break;
			
 
				+		case 5:
			
 
				+			c->modrm_ea += di;
			
 
				+			break;
			
 
				+		case 6:
			
 
				+			if (c->modrm_mod != 0)
			
 
				+				c->modrm_ea += bp;
			
 
				+			break;
			
 
				+		case 7:
			
 
				+			c->modrm_ea += bx;
			
 
				+			break;
			
 
				+		}
			
 
				+		if (c->modrm_rm == 2 || c->modrm_rm == 3 ||
			
 
				+		    (c->modrm_rm == 6 && c->modrm_mod != 0))
			
 
				+			if (!c->override_base)
			
 
				+				c->override_base = &ctxt->ss_base;
			
 
				+		c->modrm_ea = (u16)c->modrm_ea;
			
 
				+	} else {
			
 
				+		/* 32/64-bit ModR/M decode. */
			
 
				+		switch (c->modrm_rm) {
			
 
				+		case 4:
			
 
				+		case 12:
			
 
				+			sib = insn_fetch(u8, 1, c->eip);
			
 
				+			index_reg |= (sib >> 3) & 7;
			
 
				+			base_reg |= sib & 7;
			
 
				+			scale = sib >> 6;
			
 
				+
			
 
				+			switch (base_reg) {
			
 
				+			case 5:
			
 
				+				if (c->modrm_mod != 0)
			
 
				+					c->modrm_ea += c->regs[base_reg];
			
 
				+				else
			
 
				+					c->modrm_ea +=
			
 
				+						insn_fetch(s32, 4, c->eip);
			
 
				+				break;
			
 
				+			default:
			
 
				+				c->modrm_ea += c->regs[base_reg];
			
 
				+			}
			
 
				+			switch (index_reg) {
			
 
				+			case 4:
			
 
				+				break;
			
 
				+			default:
			
 
				+				c->modrm_ea += c->regs[index_reg] << scale;
			
 
				+			}
			
 
				+			break;
			
 
				+		case 5:
			
 
				+			if (c->modrm_mod != 0)
			
 
				+				c->modrm_ea += c->regs[c->modrm_rm];
			
 
				+			else if (ctxt->mode == X86EMUL_MODE_PROT64)
			
 
				+				rip_relative = 1;
			
 
				+			break;
			
 
				+		default:
			
 
				+			c->modrm_ea += c->regs[c->modrm_rm];
			
 
				+			break;
			
 
				+		}
			
 
				+		switch (c->modrm_mod) {
			
 
				+		case 0:
			
 
				+			if (c->modrm_rm == 5)
			
 
				+				c->modrm_ea += insn_fetch(s32, 4, c->eip);
			
 
				+			break;
			
 
				+		case 1:
			
 
				+			c->modrm_ea += insn_fetch(s8, 1, c->eip);
			
 
				+			break;
			
 
				+		case 2:
			
 
				+			c->modrm_ea += insn_fetch(s32, 4, c->eip);
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+	if (rip_relative) {
			
 
				+		c->modrm_ea += c->eip;
			
 
				+		switch (c->d & SrcMask) {
			
 
				+		case SrcImmByte:
			
 
				+			c->modrm_ea += 1;
			
 
				+			break;
			
 
				+		case SrcImm:
			
 
				+			if (c->d & ByteOp)
			
 
				+				c->modrm_ea += 1;
			
 
				+			else
			
 
				+				if (c->op_bytes == 8)
			
 
				+					c->modrm_ea += 4;
			
 
				+				else
			
 
				+					c->modrm_ea += c->op_bytes;
			
 
				+		}
			
 
				+	}
			
 
				+done:
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+static int decode_abs(struct x86_emulate_ctxt *ctxt,
			
 
				+		      struct x86_emulate_ops *ops)
			
 
				+{
			
 
				+	struct decode_cache *c = &ctxt->decode;
			
 
				+	int rc = 0;
			
 
				+
			
 
				+	switch (c->ad_bytes) {
			
 
				+	case 2:
			
 
				+		c->modrm_ea = insn_fetch(u16, 2, c->eip);
			
 
				+		break;
			
 
				+	case 4:
			
 
				+		c->modrm_ea = insn_fetch(u32, 4, c->eip);
			
 
				+		break;
			
 
				+	case 8:
			
 
				+		c->modrm_ea = insn_fetch(u64, 8, c->eip);
			
 
				+		break;
			
 
				+	}
			
 
				+done:
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
			
 
				+{
			
 
				+	struct decode_cache *c = &ctxt->decode;
			
 
				+	int rc = 0;
			
 
				+	int mode = ctxt->mode;
			
 
				+	int def_op_bytes, def_ad_bytes;
			
 
				+
			
 
				+	/* Shadow copy of register state. Committed on successful emulation. */
			
 
				+
			
 
				+	memset(c, 0, sizeof(struct decode_cache));
			
 
				+	c->eip = ctxt->vcpu->arch.rip;
			
 
				+	memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
			
 
				+
			
 
				+	switch (mode) {
			
 
				+	case X86EMUL_MODE_REAL:
			
 
				+	case X86EMUL_MODE_PROT16:
			
 
				+		def_op_bytes = def_ad_bytes = 2;
			
 
				+		break;
			
 
				+	case X86EMUL_MODE_PROT32:
			
 
				+		def_op_bytes = def_ad_bytes = 4;
			
 
				+		break;
			
 
				+#ifdef CONFIG_X86_64
			
 
				+	case X86EMUL_MODE_PROT64:
			
 
				+		def_op_bytes = 4;
			
 
				+		def_ad_bytes = 8;
			
 
				+		break;
			
 
				+#endif
			
 
				+	default:
			
 
				+		return -1;
			
 
				+	}
			
 
				+
			
 
				+	c->op_bytes = def_op_bytes;
			
 
				+	c->ad_bytes = def_ad_bytes;
			
 
				+
			
 
				+	/* Legacy prefixes. */
			
 
				+	for (;;) {
			
 
				+		switch (c->b = insn_fetch(u8, 1, c->eip)) {
			
 
				+		case 0x66:	/* operand-size override */
			
 
				+			/* switch between 2/4 bytes */
			
 
				+			c->op_bytes = def_op_bytes ^ 6;
			
 
				+			break;
			
 
				+		case 0x67:	/* address-size override */
			
 
				+			if (mode == X86EMUL_MODE_PROT64)
			
 
				+				/* switch between 4/8 bytes */
			
 
				+				c->ad_bytes = def_ad_bytes ^ 12;
			
 
				+			else
			
 
				+				/* switch between 2/4 bytes */
			
 
				+				c->ad_bytes = def_ad_bytes ^ 6;
			
 
				+			break;
			
 
				+		case 0x2e:	/* CS override */
			
 
				+			c->override_base = &ctxt->cs_base;
			
 
				+			break;
			
 
				+		case 0x3e:	/* DS override */
			
 
				+			c->override_base = &ctxt->ds_base;
			
 
				+			break;
			
 
				+		case 0x26:	/* ES override */
			
 
				+			c->override_base = &ctxt->es_base;
			
 
				+			break;
			
 
				+		case 0x64:	/* FS override */
			
 
				+			c->override_base = &ctxt->fs_base;
			
 
				+			break;
			
 
				+		case 0x65:	/* GS override */
			
 
				+			c->override_base = &ctxt->gs_base;
			
 
				+			break;
			
 
				+		case 0x36:	/* SS override */
			
 
				+			c->override_base = &ctxt->ss_base;
			
 
				+			break;
			
 
				+		case 0x40 ... 0x4f: /* REX */
			
 
				+			if (mode != X86EMUL_MODE_PROT64)
			
 
				+				goto done_prefixes;
			
 
				+			c->rex_prefix = c->b;
			
 
				+			continue;
			
 
				+		case 0xf0:	/* LOCK */
			
 
				+			c->lock_prefix = 1;
			
 
				+			break;
			
 
				+		case 0xf2:	/* REPNE/REPNZ */
			
 
				+			c->rep_prefix = REPNE_PREFIX;
			
 
				+			break;
			
 
				+		case 0xf3:	/* REP/REPE/REPZ */
			
 
				+			c->rep_prefix = REPE_PREFIX;
			
 
				+			break;
			
 
				+		default:
			
 
				+			goto done_prefixes;
			
 
				+		}
			
 
				+
			
 
				+		/* Any legacy prefix after a REX prefix nullifies its effect. */
			
 
				+
			
 
				+		c->rex_prefix = 0;
			
 
				+	}
			
 
				+
			
 
				+done_prefixes:
			
 
				+
			
 
				+	/* REX prefix. */
			
 
				+	if (c->rex_prefix)
			
 
				+		if (c->rex_prefix & 8)
			
 
				+			c->op_bytes = 8;	/* REX.W */
			
 
				+
			
 
				+	/* Opcode byte(s). */
			
 
				+	c->d = opcode_table[c->b];
			
 
				+	if (c->d == 0) {
			
 
				+		/* Two-byte opcode? */
			
 
				+		if (c->b == 0x0f) {
			
 
				+			c->twobyte = 1;
			
 
				+			c->b = insn_fetch(u8, 1, c->eip);
			
 
				+			c->d = twobyte_table[c->b];
			
 
				+		}
			
 
				+
			
 
				+		/* Unrecognised? */
			
 
				+		if (c->d == 0) {
			
 
				+			DPRINTF("Cannot emulate %02x\n", c->b);
			
 
				+			return -1;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack))
			
 
				+		c->op_bytes = 8;
			
 
				+
			
 
				+	/* ModRM and SIB bytes. */
			
 
				+	if (c->d & ModRM)
			
 
				+		rc = decode_modrm(ctxt, ops);
			
 
				+	else if (c->d & MemAbs)
			
 
				+		rc = decode_abs(ctxt, ops);
			
 
				+	if (rc)
			
 
				+		goto done;
			
 
				+
			
 
				+	if (!c->override_base)
			
 
				+		c->override_base = &ctxt->ds_base;
			
 
				+	if (mode == X86EMUL_MODE_PROT64 &&
			
 
				+	    c->override_base != &ctxt->fs_base &&
			
 
				+	    c->override_base != &ctxt->gs_base)
			
 
				+		c->override_base = NULL;
			
 
				+
			
 
				+	if (c->override_base)
			
 
				+		c->modrm_ea += *c->override_base;
			
 
				+
			
 
				+	if (c->ad_bytes != 8)
			
 
				+		c->modrm_ea = (u32)c->modrm_ea;
			
 
				+	/*
			
 
				+	 * Decode and fetch the source operand: register, memory
			
 
				+	 * or immediate.
			
 
				+	 */
			
 
				+	switch (c->d & SrcMask) {
			
 
				+	case SrcNone:
			
 
				+		break;
			
 
				+	case SrcReg:
			
 
				+		decode_register_operand(&c->src, c, 0);
			
 
				+		break;
			
 
				+	case SrcMem16:
			
 
				+		c->src.bytes = 2;
			
 
				+		goto srcmem_common;
			
 
				+	case SrcMem32:
			
 
				+		c->src.bytes = 4;
			
 
				+		goto srcmem_common;
			
 
				+	case SrcMem:
			
 
				+		c->src.bytes = (c->d & ByteOp) ? 1 :
			
 
				+							   c->op_bytes;
			
 
				+		/* Don't fetch the address for invlpg: it could be unmapped. */
			
 
				+		if (c->twobyte && c->b == 0x01 && c->modrm_reg == 7)
			
 
				+			break;
			
 
				+	srcmem_common:
			
 
				+		/*
			
 
				+		 * For instructions with a ModR/M byte, switch to register
			
 
				+		 * access if Mod = 3.
			
 
				+		 */
			
 
				+		if ((c->d & ModRM) && c->modrm_mod == 3) {
			
 
				+			c->src.type = OP_REG;
			
 
				+			break;
			
 
				+		}
			
 
				+		c->src.type = OP_MEM;
			
 
				+		break;
			
 
				+	case SrcImm:
			
 
				+		c->src.type = OP_IMM;
			
 
				+		c->src.ptr = (unsigned long *)c->eip;
			
 
				+		c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
			
 
				+		if (c->src.bytes == 8)
			
 
				+			c->src.bytes = 4;
			
 
				+		/* NB. Immediates are sign-extended as necessary. */
			
 
				+		switch (c->src.bytes) {
			
 
				+		case 1:
			
 
				+			c->src.val = insn_fetch(s8, 1, c->eip);
			
 
				+			break;
			
 
				+		case 2:
			
 
				+			c->src.val = insn_fetch(s16, 2, c->eip);
			
 
				+			break;
			
 
				+		case 4:
			
 
				+			c->src.val = insn_fetch(s32, 4, c->eip);
			
 
				+			break;
			
 
				+		}
			
 
				+		break;
			
 
				+	case SrcImmByte:
			
 
				+		c->src.type = OP_IMM;
			
 
				+		c->src.ptr = (unsigned long *)c->eip;
			
 
				+		c->src.bytes = 1;
			
 
				+		c->src.val = insn_fetch(s8, 1, c->eip);
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	/* Decode and fetch the destination operand: register or memory. */
			
 
				+	switch (c->d & DstMask) {
			
 
				+	case ImplicitOps:
			
 
				+		/* Special instructions do their own operand decoding. */
			
 
				+		return 0;
			
 
				+	case DstReg:
			
 
				+		decode_register_operand(&c->dst, c,
			
 
				+			 c->twobyte && (c->b == 0xb6 || c->b == 0xb7));
			
 
				+		break;
			
 
				+	case DstMem:
			
 
				+		if ((c->d & ModRM) && c->modrm_mod == 3) {
			
 
				+			c->dst.type = OP_REG;
			
 
				+			break;
			
 
				+		}
			
 
				+		c->dst.type = OP_MEM;
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+done:
			
 
				+	return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
			
 
				+}
			
 
				+
			
 
				+static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
			
 
				+{
			
 
				+	struct decode_cache *c = &ctxt->decode;
			
 
				+
			
 
				+	c->dst.type  = OP_MEM;
			
 
				+	c->dst.bytes = c->op_bytes;
			
 
				+	c->dst.val = c->src.val;
			
 
				+	register_address_increment(c->regs[VCPU_REGS_RSP], -c->op_bytes);
			
 
				+	c->dst.ptr = (void *) register_address(ctxt->ss_base,
			
 
				+					       c->regs[VCPU_REGS_RSP]);
			
 
				+}
			
 
				+
			
 
				+static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
			
 
				+				struct x86_emulate_ops *ops)
			
 
				+{
			
 
				+	struct decode_cache *c = &ctxt->decode;
			
 
				+	int rc;
			
 
				+
			
 
				+	rc = ops->read_std(register_address(ctxt->ss_base,
			
 
				+					    c->regs[VCPU_REGS_RSP]),
			
 
				+			   &c->dst.val, c->dst.bytes, ctxt->vcpu);
			
 
				+	if (rc != 0)
			
 
				+		return rc;
			
 
				+
			
 
				+	register_address_increment(c->regs[VCPU_REGS_RSP], c->dst.bytes);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt)
			
 
				+{
			
 
				+	struct decode_cache *c = &ctxt->decode;
			
 
				+	switch (c->modrm_reg) {
			
 
				+	case 0:	/* rol */
			
 
				+		emulate_2op_SrcB("rol", c->src, c->dst, ctxt->eflags);
			
 
				+		break;
			
 
				+	case 1:	/* ror */
			
 
				+		emulate_2op_SrcB("ror", c->src, c->dst, ctxt->eflags);
			
 
				+		break;
			
 
				+	case 2:	/* rcl */
			
 
				+		emulate_2op_SrcB("rcl", c->src, c->dst, ctxt->eflags);
			
 
				+		break;
			
 
				+	case 3:	/* rcr */
			
 
				+		emulate_2op_SrcB("rcr", c->src, c->dst, ctxt->eflags);
			
 
				+		break;
			
 
				+	case 4:	/* sal/shl */
			
 
				+	case 6:	/* sal/shl */
			
 
				+		emulate_2op_SrcB("sal", c->src, c->dst, ctxt->eflags);
			
 
				+		break;
			
 
				+	case 5:	/* shr */
			
 
				+		emulate_2op_SrcB("shr", c->src, c->dst, ctxt->eflags);
			
 
				+		break;
			
 
				+	case 7:	/* sar */
			
 
				+		emulate_2op_SrcB("sar", c->src, c->dst, ctxt->eflags);
			
 
				+		break;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt,
			
 
				+			       struct x86_emulate_ops *ops)
			
 
				+{
			
 
				+	struct decode_cache *c = &ctxt->decode;
			
 
				+	int rc = 0;
			
 
				+
			
 
				+	switch (c->modrm_reg) {
			
 
				+	case 0 ... 1:	/* test */
			
 
				+		/*
			
 
				+		 * Special case in Grp3: test has an immediate
			
 
				+		 * source operand.
			
 
				+		 */
			
 
				+		c->src.type = OP_IMM;
			
 
				+		c->src.ptr = (unsigned long *)c->eip;
			
 
				+		c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
			
 
				+		if (c->src.bytes == 8)
			
 
				+			c->src.bytes = 4;
			
 
				+		switch (c->src.bytes) {
			
 
				+		case 1:
			
 
				+			c->src.val = insn_fetch(s8, 1, c->eip);
			
 
				+			break;
			
 
				+		case 2:
			
 
				+			c->src.val = insn_fetch(s16, 2, c->eip);
			
 
				+			break;
			
 
				+		case 4:
			
 
				+			c->src.val = insn_fetch(s32, 4, c->eip);
			
 
				+			break;
			
 
				+		}
			
 
				+		emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
			
 
				+		break;
			
 
				+	case 2:	/* not */
			
 
				+		c->dst.val = ~c->dst.val;
			
 
				+		break;
			
 
				+	case 3:	/* neg */
			
 
				+		emulate_1op("neg", c->dst, ctxt->eflags);
			
 
				+		break;
			
 
				+	default:
			
 
				+		DPRINTF("Cannot emulate %02x\n", c->b);
			
 
				+		rc = X86EMUL_UNHANDLEABLE;
			
 
				+		break;
			
 
				+	}
			
 
				+done:
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
			
 
				+			       struct x86_emulate_ops *ops)
			
 
				+{
			
 
				+	struct decode_cache *c = &ctxt->decode;
			
 
				+	int rc;
			
 
				+
			
 
				+	switch (c->modrm_reg) {
			
 
				+	case 0:	/* inc */
			
 
				+		emulate_1op("inc", c->dst, ctxt->eflags);
			
 
				+		break;
			
 
				+	case 1:	/* dec */
			
 
				+		emulate_1op("dec", c->dst, ctxt->eflags);
			
 
				+		break;
			
 
				+	case 4: /* jmp abs */
			
 
				+		if (c->b == 0xff)
			
 
				+			c->eip = c->dst.val;
			
 
				+		else {
			
 
				+			DPRINTF("Cannot emulate %02x\n", c->b);
			
 
				+			return X86EMUL_UNHANDLEABLE;
			
 
				+		}
			
 
				+		break;
			
 
				+	case 6:	/* push */
			
 
				+
			
 
				+		/* 64-bit mode: PUSH always pushes a 64-bit operand. */
			
 
				+
			
 
				+		if (ctxt->mode == X86EMUL_MODE_PROT64) {
			
 
				+			c->dst.bytes = 8;
			
 
				+			rc = ops->read_std((unsigned long)c->dst.ptr,
			
 
				+					   &c->dst.val, 8, ctxt->vcpu);
			
 
				+			if (rc != 0)
			
 
				+				return rc;
			
 
				+		}
			
 
				+		register_address_increment(c->regs[VCPU_REGS_RSP],
			
 
				+					   -c->dst.bytes);
			
 
				+		rc = ops->write_emulated(register_address(ctxt->ss_base,
			
 
				+				    c->regs[VCPU_REGS_RSP]), &c->dst.val,
			
 
				+				    c->dst.bytes, ctxt->vcpu);
			
 
				+		if (rc != 0)
			
 
				+			return rc;
			
 
				+		c->dst.type = OP_NONE;
			
 
				+		break;
			
 
				+	default:
			
 
				+		DPRINTF("Cannot emulate %02x\n", c->b);
			
 
				+		return X86EMUL_UNHANDLEABLE;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
			
 
				+			       struct x86_emulate_ops *ops,
			
 
				+			       unsigned long memop)
			
 
				+{
			
 
				+	struct decode_cache *c = &ctxt->decode;
			
 
				+	u64 old, new;
			
 
				+	int rc;
			
 
				+
			
 
				+	rc = ops->read_emulated(memop, &old, 8, ctxt->vcpu);
			
 
				+	if (rc != 0)
			
 
				+		return rc;
			
 
				+
			
 
				+	if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) ||
			
 
				+	    ((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) {
			
 
				+
			
 
				+		c->regs[VCPU_REGS_RAX] = (u32) (old >> 0);
			
 
				+		c->regs[VCPU_REGS_RDX] = (u32) (old >> 32);
			
 
				+		ctxt->eflags &= ~EFLG_ZF;
			
 
				+
			
 
				+	} else {
			
 
				+		new = ((u64)c->regs[VCPU_REGS_RCX] << 32) |
			
 
				+		       (u32) c->regs[VCPU_REGS_RBX];
			
 
				+
			
 
				+		rc = ops->cmpxchg_emulated(memop, &old, &new, 8, ctxt->vcpu);
			
 
				+		if (rc != 0)
			
 
				+			return rc;
			
 
				+		ctxt->eflags |= EFLG_ZF;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static inline int writeback(struct x86_emulate_ctxt *ctxt,
			
 
				+			    struct x86_emulate_ops *ops)
			
 
				+{
			
 
				+	int rc;
			
 
				+	struct decode_cache *c = &ctxt->decode;
			
 
				+
			
 
				+	switch (c->dst.type) {
			
 
				+	case OP_REG:
			
 
				+		/* The 4-byte case *is* correct:
			
 
				+		 * in 64-bit mode we zero-extend.
			
 
				+		 */
			
 
				+		switch (c->dst.bytes) {
			
 
				+		case 1:
			
 
				+			*(u8 *)c->dst.ptr = (u8)c->dst.val;
			
 
				+			break;
			
 
				+		case 2:
			
 
				+			*(u16 *)c->dst.ptr = (u16)c->dst.val;
			
 
				+			break;
			
 
				+		case 4:
			
 
				+			*c->dst.ptr = (u32)c->dst.val;
			
 
				+			break;	/* 64b: zero-ext */
			
 
				+		case 8:
			
 
				+			*c->dst.ptr = c->dst.val;
			
 
				+			break;
			
 
				+		}
			
 
				+		break;
			
 
				+	case OP_MEM:
			
 
				+		if (c->lock_prefix)
			
 
				+			rc = ops->cmpxchg_emulated(
			
 
				+					(unsigned long)c->dst.ptr,
			
 
				+					&c->dst.orig_val,
			
 
				+					&c->dst.val,
			
 
				+					c->dst.bytes,
			
 
				+					ctxt->vcpu);
			
 
				+		else
			
 
				+			rc = ops->write_emulated(
			
 
				+					(unsigned long)c->dst.ptr,
			
 
				+					&c->dst.val,
			
 
				+					c->dst.bytes,
			
 
				+					ctxt->vcpu);
			
 
				+		if (rc != 0)
			
 
				+			return rc;
			
 
				+		break;
			
 
				+	case OP_NONE:
			
 
				+		/* no writeback */
			
 
				+		break;
			
 
				+	default:
			
 
				+		break;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
			
 
				+{
			
 
				+	unsigned long memop = 0;
			
 
				+	u64 msr_data;
			
 
				+	unsigned long saved_eip = 0;
			
 
				+	struct decode_cache *c = &ctxt->decode;
			
 
				+	int rc = 0;
			
 
				+
			
 
				+	/* Shadow copy of register state. Committed on successful emulation.
			
 
				+	 * NOTE: we can copy them from vcpu as x86_decode_insn() doesn't
			
 
				+	 * modify them.
			
 
				+	 */
			
 
				+
			
 
				+	memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
			
 
				+	saved_eip = c->eip;
			
 
				+
			
 
				+	if (((c->d & ModRM) && (c->modrm_mod != 3)) || (c->d & MemAbs))
			
 
				+		memop = c->modrm_ea;
			
 
				+
			
 
				+	if (c->rep_prefix && (c->d & String)) {
			
 
				+		/* All REP prefixes have the same first termination condition */
			
 
				+		if (c->regs[VCPU_REGS_RCX] == 0) {
			
 
				+			ctxt->vcpu->arch.rip = c->eip;
			
 
				+			goto done;
			
 
				+		}
			
 
				+		/* The second termination condition only applies for REPE
			
 
				+		 * and REPNE. Test if the repeat string operation prefix is
			
 
				+		 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
			
 
				+		 * corresponding termination condition according to:
			
 
				+		 * 	- if REPE/REPZ and ZF = 0 then done
			
 
				+		 * 	- if REPNE/REPNZ and ZF = 1 then done
			
 
				+		 */
			
 
				+		if ((c->b == 0xa6) || (c->b == 0xa7) ||
			
 
				+				(c->b == 0xae) || (c->b == 0xaf)) {
			
 
				+			if ((c->rep_prefix == REPE_PREFIX) &&
			
 
				+				((ctxt->eflags & EFLG_ZF) == 0)) {
			
 
				+					ctxt->vcpu->arch.rip = c->eip;
			
 
				+					goto done;
			
 
				+			}
			
 
				+			if ((c->rep_prefix == REPNE_PREFIX) &&
			
 
				+				((ctxt->eflags & EFLG_ZF) == EFLG_ZF)) {
			
 
				+				ctxt->vcpu->arch.rip = c->eip;
			
 
				+				goto done;
			
 
				+			}
			
 
				+		}
			
 
				+		c->regs[VCPU_REGS_RCX]--;
			
 
				+		c->eip = ctxt->vcpu->arch.rip;
			
 
				+	}
			
 
				+
			
 
				+	if (c->src.type == OP_MEM) {
			
 
				+		c->src.ptr = (unsigned long *)memop;
			
 
				+		c->src.val = 0;
			
 
				+		rc = ops->read_emulated((unsigned long)c->src.ptr,
			
 
				+					&c->src.val,
			
 
				+					c->src.bytes,
			
 
				+					ctxt->vcpu);
			
 
				+		if (rc != 0)
			
 
				+			goto done;
			
 
				+		c->src.orig_val = c->src.val;
			
 
				+	}
			
 
				+
			
 
				+	if ((c->d & DstMask) == ImplicitOps)
			
 
				+		goto special_insn;
			
 
				+
			
 
				+
			
 
				+	if (c->dst.type == OP_MEM) {
			
 
				+		c->dst.ptr = (unsigned long *)memop;
			
 
				+		c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
			
 
				+		c->dst.val = 0;
			
 
				+		if (c->d & BitOp) {
			
 
				+			unsigned long mask = ~(c->dst.bytes * 8 - 1);
			
 
				+
			
 
				+			c->dst.ptr = (void *)c->dst.ptr +
			
 
				+						   (c->src.val & mask) / 8;
			
 
				+		}
			
 
				+		if (!(c->d & Mov) &&
			
 
				+				   /* optimisation - avoid slow emulated read */
			
 
				+		    ((rc = ops->read_emulated((unsigned long)c->dst.ptr,
			
 
				+					   &c->dst.val,
			
 
				+					  c->dst.bytes, ctxt->vcpu)) != 0))
			
 
				+			goto done;
			
 
				+	}
			
 
				+	c->dst.orig_val = c->dst.val;
			
 
				+
			
 
				+special_insn:
			
 
				+
			
 
				+	if (c->twobyte)
			
 
				+		goto twobyte_insn;
			
 
				+
			
 
				+	switch (c->b) {
			
 
				+	case 0x00 ... 0x05:
			
 
				+	      add:		/* add */
			
 
				+		emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags);
			
 
				+		break;
			
 
				+	case 0x08 ... 0x0d:
			
 
				+	      or:		/* or */
			
 
				+		emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags);
			
 
				+		break;
			
 
				+	case 0x10 ... 0x15:
			
 
				+	      adc:		/* adc */
			
 
				+		emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags);
			
 
				+		break;
			
 
				+	case 0x18 ... 0x1d:
			
 
				+	      sbb:		/* sbb */
			
 
				+		emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags);
			
 
				+		break;
			
 
				+	case 0x20 ... 0x23:
			
 
				+	      and:		/* and */
			
 
				+		emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags);
			
 
				+		break;
			
 
				+	case 0x24:              /* and al imm8 */
			
 
				+		c->dst.type = OP_REG;
			
 
				+		c->dst.ptr = &c->regs[VCPU_REGS_RAX];
			
 
				+		c->dst.val = *(u8 *)c->dst.ptr;
			
 
				+		c->dst.bytes = 1;
			
 
				+		c->dst.orig_val = c->dst.val;
			
 
				+		goto and;
			
 
				+	case 0x25:              /* and ax imm16, or eax imm32 */
			
 
				+		c->dst.type = OP_REG;
			
 
				+		c->dst.bytes = c->op_bytes;
			
 
				+		c->dst.ptr = &c->regs[VCPU_REGS_RAX];
			
 
				+		if (c->op_bytes == 2)
			
 
				+			c->dst.val = *(u16 *)c->dst.ptr;
			
 
				+		else
			
 
				+			c->dst.val = *(u32 *)c->dst.ptr;
			
 
				+		c->dst.orig_val = c->dst.val;
			
 
				+		goto and;
			
 
				+	case 0x28 ... 0x2d:
			
 
				+	      sub:		/* sub */
			
 
				+		emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags);
			
 
				+		break;
			
 
				+	case 0x30 ... 0x35:
			
 
				+	      xor:		/* xor */
			
 
				+		emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags);
			
 
				+		break;
			
 
				+	case 0x38 ... 0x3d:
			
 
				+	      cmp:		/* cmp */
			
 
				+		emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
			
 
				+		break;
			
 
				+	case 0x40 ... 0x47: /* inc r16/r32 */
			
 
				+		emulate_1op("inc", c->dst, ctxt->eflags);
			
 
				+		break;
			
 
				+	case 0x48 ... 0x4f: /* dec r16/r32 */
			
 
				+		emulate_1op("dec", c->dst, ctxt->eflags);
			
 
				+		break;
			
 
				+	case 0x50 ... 0x57:  /* push reg */
			
 
				+		c->dst.type  = OP_MEM;
			
 
				+		c->dst.bytes = c->op_bytes;
			
 
				+		c->dst.val = c->src.val;
			
 
				+		register_address_increment(c->regs[VCPU_REGS_RSP],
			
 
				+					   -c->op_bytes);
			
 
				+		c->dst.ptr = (void *) register_address(
			
 
				+			ctxt->ss_base, c->regs[VCPU_REGS_RSP]);
			
 
				+		break;
			
 
				+	case 0x58 ... 0x5f: /* pop reg */
			
 
				+	pop_instruction:
			
 
				+		if ((rc = ops->read_std(register_address(ctxt->ss_base,
			
 
				+			c->regs[VCPU_REGS_RSP]), c->dst.ptr,
			
 
				+			c->op_bytes, ctxt->vcpu)) != 0)
			
 
				+			goto done;
			
 
				+
			
 
				+		register_address_increment(c->regs[VCPU_REGS_RSP],
			
 
				+					   c->op_bytes);
			
 
				+		c->dst.type = OP_NONE;	/* Disable writeback. */
			
 
				+		break;
			
 
				+	case 0x63:		/* movsxd */
			
 
				+		if (ctxt->mode != X86EMUL_MODE_PROT64)
			
 
				+			goto cannot_emulate;
			
 
				+		c->dst.val = (s32) c->src.val;
			
 
				+		break;
			
 
				+	case 0x6a: /* push imm8 */
			
 
				+		c->src.val = 0L;
			
 
				+		c->src.val = insn_fetch(s8, 1, c->eip);
			
 
				+		emulate_push(ctxt);
			
 
				+		break;
			
 
				+	case 0x6c:		/* insb */
			
 
				+	case 0x6d:		/* insw/insd */
			
 
				+		 if (kvm_emulate_pio_string(ctxt->vcpu, NULL,
			
 
				+				1,
			
 
				+				(c->d & ByteOp) ? 1 : c->op_bytes,
			
 
				+				c->rep_prefix ?
			
 
				+				address_mask(c->regs[VCPU_REGS_RCX]) : 1,
			
 
				+				(ctxt->eflags & EFLG_DF),
			
 
				+				register_address(ctxt->es_base,
			
 
				+						 c->regs[VCPU_REGS_RDI]),
			
 
				+				c->rep_prefix,
			
 
				+				c->regs[VCPU_REGS_RDX]) == 0) {
			
 
				+			c->eip = saved_eip;
			
 
				+			return -1;
			
 
				+		}
			
 
				+		return 0;
			
 
				+	case 0x6e:		/* outsb */
			
 
				+	case 0x6f:		/* outsw/outsd */
			
 
				+		if (kvm_emulate_pio_string(ctxt->vcpu, NULL,
			
 
				+				0,
			
 
				+				(c->d & ByteOp) ? 1 : c->op_bytes,
			
 
				+				c->rep_prefix ?
			
 
				+				address_mask(c->regs[VCPU_REGS_RCX]) : 1,
			
 
				+				(ctxt->eflags & EFLG_DF),
			
 
				+				register_address(c->override_base ?
			
 
				+							*c->override_base :
			
 
				+							ctxt->ds_base,
			
 
				+						 c->regs[VCPU_REGS_RSI]),
			
 
				+				c->rep_prefix,
			
 
				+				c->regs[VCPU_REGS_RDX]) == 0) {
			
 
				+			c->eip = saved_eip;
			
 
				+			return -1;
			
 
				+		}
			
 
				+		return 0;
			
 
				+	case 0x70 ... 0x7f: /* jcc (short) */ {
			
 
				+		int rel = insn_fetch(s8, 1, c->eip);
			
 
				+
			
 
				+		if (test_cc(c->b, ctxt->eflags))
			
 
				+			JMP_REL(rel);
			
 
				+		break;
			
 
				+	}
			
 
				+	case 0x80 ... 0x83:	/* Grp1 */
			
 
				+		switch (c->modrm_reg) {
			
 
				+		case 0:
			
 
				+			goto add;
			
 
				+		case 1:
			
 
				+			goto or;
			
 
				+		case 2:
			
 
				+			goto adc;
			
 
				+		case 3:
			
 
				+			goto sbb;
			
 
				+		case 4:
			
 
				+			goto and;
			
 
				+		case 5:
			
 
				+			goto sub;
			
 
				+		case 6:
			
 
				+			goto xor;
			
 
				+		case 7:
			
 
				+			goto cmp;
			
 
				+		}
			
 
				+		break;
			
 
				+	case 0x84 ... 0x85:
			
 
				+		emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
			
 
				+		break;
			
 
				+	case 0x86 ... 0x87:	/* xchg */
			
 
				+		/* Write back the register source. */
			
 
				+		switch (c->dst.bytes) {
			
 
				+		case 1:
			
 
				+			*(u8 *) c->src.ptr = (u8) c->dst.val;
			
 
				+			break;
			
 
				+		case 2:
			
 
				+			*(u16 *) c->src.ptr = (u16) c->dst.val;
			
 
				+			break;
			
 
				+		case 4:
			
 
				+			*c->src.ptr = (u32) c->dst.val;
			
 
				+			break;	/* 64b reg: zero-extend */
			
 
				+		case 8:
			
 
				+			*c->src.ptr = c->dst.val;
			
 
				+			break;
			
 
				+		}
			
 
				+		/*
			
 
				+		 * Write back the memory destination with implicit LOCK
			
 
				+		 * prefix.
			
 
				+		 */
			
 
				+		c->dst.val = c->src.val;
			
 
				+		c->lock_prefix = 1;
			
 
				+		break;
			
 
				+	case 0x88 ... 0x8b:	/* mov */
			
 
				+		goto mov;
			
 
				+	case 0x8d: /* lea r16/r32, m */
			
 
				+		c->dst.val = c->modrm_val;
			
 
				+		break;
			
 
				+	case 0x8f:		/* pop (sole member of Grp1a) */
			
 
				+		rc = emulate_grp1a(ctxt, ops);
			
 
				+		if (rc != 0)
			
 
				+			goto done;
			
 
				+		break;
			
 
				+	case 0x9c: /* pushf */
			
 
				+		c->src.val =  (unsigned long) ctxt->eflags;
			
 
				+		emulate_push(ctxt);
			
 
				+		break;
			
 
				+	case 0x9d: /* popf */
			
 
				+		c->dst.ptr = (unsigned long *) &ctxt->eflags;
			
 
				+		goto pop_instruction;
			
 
				+	case 0xa0 ... 0xa1:	/* mov */
			
 
				+		c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
			
 
				+		c->dst.val = c->src.val;
			
 
				+		break;
			
 
				+	case 0xa2 ... 0xa3:	/* mov */
			
 
				+		c->dst.val = (unsigned long)c->regs[VCPU_REGS_RAX];
			
 
				+		break;
			
 
				+	case 0xa4 ... 0xa5:	/* movs */
			
 
				+		c->dst.type = OP_MEM;
			
 
				+		c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
			
 
				+		c->dst.ptr = (unsigned long *)register_address(
			
 
				+						   ctxt->es_base,
			
 
				+						   c->regs[VCPU_REGS_RDI]);
			
 
				+		if ((rc = ops->read_emulated(register_address(
			
 
				+		      c->override_base ? *c->override_base :
			
 
				+					ctxt->ds_base,
			
 
				+					c->regs[VCPU_REGS_RSI]),
			
 
				+					&c->dst.val,
			
 
				+					c->dst.bytes, ctxt->vcpu)) != 0)
			
 
				+			goto done;
			
 
				+		register_address_increment(c->regs[VCPU_REGS_RSI],
			
 
				+				       (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
			
 
				+							   : c->dst.bytes);
			
 
				+		register_address_increment(c->regs[VCPU_REGS_RDI],
			
 
				+				       (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
			
 
				+							   : c->dst.bytes);
			
 
				+		break;
			
 
				+	case 0xa6 ... 0xa7:	/* cmps */
			
 
				+		c->src.type = OP_NONE; /* Disable writeback. */
			
 
				+		c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
			
 
				+		c->src.ptr = (unsigned long *)register_address(
			
 
				+				c->override_base ? *c->override_base :
			
 
				+						   ctxt->ds_base,
			
 
				+						   c->regs[VCPU_REGS_RSI]);
			
 
				+		if ((rc = ops->read_emulated((unsigned long)c->src.ptr,
			
 
				+						&c->src.val,
			
 
				+						c->src.bytes,
			
 
				+						ctxt->vcpu)) != 0)
			
 
				+			goto done;
			
 
				+
			
 
				+		c->dst.type = OP_NONE; /* Disable writeback. */
			
 
				+		c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
			
 
				+		c->dst.ptr = (unsigned long *)register_address(
			
 
				+						   ctxt->es_base,
			
 
				+						   c->regs[VCPU_REGS_RDI]);
			
 
				+		if ((rc = ops->read_emulated((unsigned long)c->dst.ptr,
			
 
				+						&c->dst.val,
			
 
				+						c->dst.bytes,
			
 
				+						ctxt->vcpu)) != 0)
			
 
				+			goto done;
			
 
				+
			
 
				+		DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr);
			
 
				+
			
 
				+		emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
			
 
				+
			
 
				+		register_address_increment(c->regs[VCPU_REGS_RSI],
			
 
				+				       (ctxt->eflags & EFLG_DF) ? -c->src.bytes
			
 
				+								  : c->src.bytes);
			
 
				+		register_address_increment(c->regs[VCPU_REGS_RDI],
			
 
				+				       (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
			
 
				+								  : c->dst.bytes);
			
 
				+
			
 
				+		break;
			
 
				+	case 0xaa ... 0xab:	/* stos */
			
 
				+		c->dst.type = OP_MEM;
			
 
				+		c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
			
 
				+		c->dst.ptr = (unsigned long *)register_address(
			
 
				+						   ctxt->es_base,
			
 
				+						   c->regs[VCPU_REGS_RDI]);
			
 
				+		c->dst.val = c->regs[VCPU_REGS_RAX];
			
 
				+		register_address_increment(c->regs[VCPU_REGS_RDI],
			
 
				+				       (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
			
 
				+							   : c->dst.bytes);
			
 
				+		break;
			
 
				+	case 0xac ... 0xad:	/* lods */
			
 
				+		c->dst.type = OP_REG;
			
 
				+		c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
			
 
				+		c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
			
 
				+		if ((rc = ops->read_emulated(register_address(
			
 
				+				c->override_base ? *c->override_base :
			
 
				+						   ctxt->ds_base,
			
 
				+						 c->regs[VCPU_REGS_RSI]),
			
 
				+						 &c->dst.val,
			
 
				+						 c->dst.bytes,
			
 
				+						 ctxt->vcpu)) != 0)
			
 
				+			goto done;
			
 
				+		register_address_increment(c->regs[VCPU_REGS_RSI],
			
 
				+				       (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
			
 
				+							   : c->dst.bytes);
			
 
				+		break;
			
 
				+	case 0xae ... 0xaf:	/* scas */
			
 
				+		DPRINTF("Urk! I don't handle SCAS.\n");
			
 
				+		goto cannot_emulate;
			
 
				+	case 0xc0 ... 0xc1:
			
 
				+		emulate_grp2(ctxt);
			
 
				+		break;
			
 
				+	case 0xc3: /* ret */
			
 
				+		c->dst.ptr = &c->eip;
			
 
				+		goto pop_instruction;
			
 
				+	case 0xc6 ... 0xc7:	/* mov (sole member of Grp11) */
			
 
				+	mov:
			
 
				+		c->dst.val = c->src.val;
			
 
				+		break;
			
 
				+	case 0xd0 ... 0xd1:	/* Grp2 */
			
 
				+		c->src.val = 1;
			
 
				+		emulate_grp2(ctxt);
			
 
				+		break;
			
 
				+	case 0xd2 ... 0xd3:	/* Grp2 */
			
 
				+		c->src.val = c->regs[VCPU_REGS_RCX];
			
 
				+		emulate_grp2(ctxt);
			
 
				+		break;
			
 
				+	case 0xe8: /* call (near) */ {
			
 
				+		long int rel;
			
 
				+		switch (c->op_bytes) {
			
 
				+		case 2:
			
 
				+			rel = insn_fetch(s16, 2, c->eip);
			
 
				+			break;
			
 
				+		case 4:
			
 
				+			rel = insn_fetch(s32, 4, c->eip);
			
 
				+			break;
			
 
				+		default:
			
 
				+			DPRINTF("Call: Invalid op_bytes\n");
			
 
				+			goto cannot_emulate;
			
 
				+		}
			
 
				+		c->src.val = (unsigned long) c->eip;
			
 
				+		JMP_REL(rel);
			
 
				+		c->op_bytes = c->ad_bytes;
			
 
				+		emulate_push(ctxt);
			
 
				+		break;
			
 
				+	}
			
 
				+	case 0xe9: /* jmp rel */
			
 
				+	case 0xeb: /* jmp rel short */
			
 
				+		JMP_REL(c->src.val);
			
 
				+		c->dst.type = OP_NONE; /* Disable writeback. */
			
 
				+		break;
			
 
				+	case 0xf4:              /* hlt */
			
 
				+		ctxt->vcpu->arch.halt_request = 1;
			
 
				+		goto done;
			
 
				+	case 0xf5:	/* cmc */
			
 
				+		/* complement carry flag from eflags reg */
			
 
				+		ctxt->eflags ^= EFLG_CF;
			
 
				+		c->dst.type = OP_NONE;	/* Disable writeback. */
			
 
				+		break;
			
 
				+	case 0xf6 ... 0xf7:	/* Grp3 */
			
 
				+		rc = emulate_grp3(ctxt, ops);
			
 
				+		if (rc != 0)
			
 
				+			goto done;
			
 
				+		break;
			
 
				+	case 0xf8: /* clc */
			
 
				+		ctxt->eflags &= ~EFLG_CF;
			
 
				+		c->dst.type = OP_NONE;	/* Disable writeback. */
			
 
				+		break;
			
 
				+	case 0xfa: /* cli */
			
 
				+		ctxt->eflags &= ~X86_EFLAGS_IF;
			
 
				+		c->dst.type = OP_NONE;	/* Disable writeback. */
			
 
				+		break;
			
 
				+	case 0xfb: /* sti */
			
 
				+		ctxt->eflags |= X86_EFLAGS_IF;
			
 
				+		c->dst.type = OP_NONE;	/* Disable writeback. */
			
 
				+		break;
			
 
				+	case 0xfe ... 0xff:	/* Grp4/Grp5 */
			
 
				+		rc = emulate_grp45(ctxt, ops);
			
 
				+		if (rc != 0)
			
 
				+			goto done;
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+writeback:
			
 
				+	rc = writeback(ctxt, ops);
			
 
				+	if (rc != 0)
			
 
				+		goto done;
			
 
				+
			
 
				+	/* Commit shadow register state. */
			
 
				+	memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs);
			
 
				+	ctxt->vcpu->arch.rip = c->eip;
			
 
				+
			
 
				+done:
			
 
				+	if (rc == X86EMUL_UNHANDLEABLE) {
			
 
				+		c->eip = saved_eip;
			
 
				+		return -1;
			
 
				+	}
			
 
				+	return 0;
			
 
				+
			
 
				+twobyte_insn:
			
 
				+	switch (c->b) {
			
 
				+	case 0x01: /* lgdt, lidt, lmsw */
			
 
				+		switch (c->modrm_reg) {
			
 
				+			u16 size;
			
 
				+			unsigned long address;
			
 
				+
			
 
				+		case 0: /* vmcall */
			
 
				+			if (c->modrm_mod != 3 || c->modrm_rm != 1)
			
 
				+				goto cannot_emulate;
			
 
				+
			
 
				+			rc = kvm_fix_hypercall(ctxt->vcpu);
			
 
				+			if (rc)
			
 
				+				goto done;
			
 
				+
			
 
				+			kvm_emulate_hypercall(ctxt->vcpu);
			
 
				+			break;
			
 
				+		case 2: /* lgdt */
			
 
				+			rc = read_descriptor(ctxt, ops, c->src.ptr,
			
 
				+					     &size, &address, c->op_bytes);
			
 
				+			if (rc)
			
 
				+				goto done;
			
 
				+			realmode_lgdt(ctxt->vcpu, size, address);
			
 
				+			break;
			
 
				+		case 3: /* lidt/vmmcall */
			
 
				+			if (c->modrm_mod == 3 && c->modrm_rm == 1) {
			
 
				+				rc = kvm_fix_hypercall(ctxt->vcpu);
			
 
				+				if (rc)
			
 
				+					goto done;
			
 
				+				kvm_emulate_hypercall(ctxt->vcpu);
			
 
				+			} else {
			
 
				+				rc = read_descriptor(ctxt, ops, c->src.ptr,
			
 
				+						     &size, &address,
			
 
				+						     c->op_bytes);
			
 
				+				if (rc)
			
 
				+					goto done;
			
 
				+				realmode_lidt(ctxt->vcpu, size, address);
			
 
				+			}
			
 
				+			break;
			
 
				+		case 4: /* smsw */
			
 
				+			if (c->modrm_mod != 3)
			
 
				+				goto cannot_emulate;
			
 
				+			*(u16 *)&c->regs[c->modrm_rm]
			
 
				+				= realmode_get_cr(ctxt->vcpu, 0);
			
 
				+			break;
			
 
				+		case 6: /* lmsw */
			
 
				+			if (c->modrm_mod != 3)
			
 
				+				goto cannot_emulate;
			
 
				+			realmode_lmsw(ctxt->vcpu, (u16)c->modrm_val,
			
 
				+						  &ctxt->eflags);
			
 
				+			break;
			
 
				+		case 7: /* invlpg*/
			
 
				+			emulate_invlpg(ctxt->vcpu, memop);
			
 
				+			break;
			
 
				+		default:
			
 
				+			goto cannot_emulate;
			
 
				+		}
			
 
				+		/* Disable writeback. */
			
 
				+		c->dst.type = OP_NONE;
			
 
				+		break;
			
 
				+	case 0x06:
			
 
				+		emulate_clts(ctxt->vcpu);
			
 
				+		c->dst.type = OP_NONE;
			
 
				+		break;
			
 
				+	case 0x08:		/* invd */
			
 
				+	case 0x09:		/* wbinvd */
			
 
				+	case 0x0d:		/* GrpP (prefetch) */
			
 
				+	case 0x18:		/* Grp16 (prefetch/nop) */
			
 
				+		c->dst.type = OP_NONE;
			
 
				+		break;
			
 
				+	case 0x20: /* mov cr, reg */
			
 
				+		if (c->modrm_mod != 3)
			
 
				+			goto cannot_emulate;
			
 
				+		c->regs[c->modrm_rm] =
			
 
				+				realmode_get_cr(ctxt->vcpu, c->modrm_reg);
			
 
				+		c->dst.type = OP_NONE;	/* no writeback */
			
 
				+		break;
			
 
				+	case 0x21: /* mov from dr to reg */
			
 
				+		if (c->modrm_mod != 3)
			
 
				+			goto cannot_emulate;
			
 
				+		rc = emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]);
			
 
				+		if (rc)
			
 
				+			goto cannot_emulate;
			
 
				+		c->dst.type = OP_NONE;	/* no writeback */
			
 
				+		break;
			
 
				+	case 0x22: /* mov reg, cr */
			
 
				+		if (c->modrm_mod != 3)
			
 
				+			goto cannot_emulate;
			
 
				+		realmode_set_cr(ctxt->vcpu,
			
 
				+				c->modrm_reg, c->modrm_val, &ctxt->eflags);
			
 
				+		c->dst.type = OP_NONE;
			
 
				+		break;
			
 
				+	case 0x23: /* mov from reg to dr */
			
 
				+		if (c->modrm_mod != 3)
			
 
				+			goto cannot_emulate;
			
 
				+		rc = emulator_set_dr(ctxt, c->modrm_reg,
			
 
				+				     c->regs[c->modrm_rm]);
			
 
				+		if (rc)
			
 
				+			goto cannot_emulate;
			
 
				+		c->dst.type = OP_NONE;	/* no writeback */
			
 
				+		break;
			
 
				+	case 0x30:
			
 
				+		/* wrmsr */
			
 
				+		msr_data = (u32)c->regs[VCPU_REGS_RAX]
			
 
				+			| ((u64)c->regs[VCPU_REGS_RDX] << 32);
			
 
				+		rc = kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data);
			
 
				+		if (rc) {
			
 
				+			kvm_inject_gp(ctxt->vcpu, 0);
			
 
				+			c->eip = ctxt->vcpu->arch.rip;
			
 
				+		}
			
 
				+		rc = X86EMUL_CONTINUE;
			
 
				+		c->dst.type = OP_NONE;
			
 
				+		break;
			
 
				+	case 0x32:
			
 
				+		/* rdmsr */
			
 
				+		rc = kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data);
			
 
				+		if (rc) {
			
 
				+			kvm_inject_gp(ctxt->vcpu, 0);
			
 
				+			c->eip = ctxt->vcpu->arch.rip;
			
 
				+		} else {
			
 
				+			c->regs[VCPU_REGS_RAX] = (u32)msr_data;
			
 
				+			c->regs[VCPU_REGS_RDX] = msr_data >> 32;
			
 
				+		}
			
 
				+		rc = X86EMUL_CONTINUE;
			
 
				+		c->dst.type = OP_NONE;
			
 
				+		break;
			
 
				+	case 0x40 ... 0x4f:	/* cmov */
			
 
				+		c->dst.val = c->dst.orig_val = c->src.val;
			
 
				+		if (!test_cc(c->b, ctxt->eflags))
			
 
				+			c->dst.type = OP_NONE; /* no writeback */
			
 
				+		break;
			
 
				+	case 0x80 ... 0x8f: /* jnz rel, etc*/ {
			
 
				+		long int rel;
			
 
				+
			
 
				+		switch (c->op_bytes) {
			
 
				+		case 2:
			
 
				+			rel = insn_fetch(s16, 2, c->eip);
			
 
				+			break;
			
 
				+		case 4:
			
 
				+			rel = insn_fetch(s32, 4, c->eip);
			
 
				+			break;
			
 
				+		case 8:
			
 
				+			rel = insn_fetch(s64, 8, c->eip);
			
 
				+			break;
			
 
				+		default:
			
 
				+			DPRINTF("jnz: Invalid op_bytes\n");
			
 
				+			goto cannot_emulate;
			
 
				+		}
			
 
				+		if (test_cc(c->b, ctxt->eflags))
			
 
				+			JMP_REL(rel);
			
 
				+		c->dst.type = OP_NONE;
			
 
				+		break;
			
 
				+	}
			
 
				+	case 0xa3:
			
 
				+	      bt:		/* bt */
			
 
				+		c->dst.type = OP_NONE;
			
 
				+		/* only subword offset */
			
 
				+		c->src.val &= (c->dst.bytes << 3) - 1;
			
 
				+		emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags);
			
 
				+		break;
			
 
				+	case 0xab:
			
 
				+	      bts:		/* bts */
			
 
				+		/* only subword offset */
			
 
				+		c->src.val &= (c->dst.bytes << 3) - 1;
			
 
				+		emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags);
			
 
				+		break;
			
 
				+	case 0xb0 ... 0xb1:	/* cmpxchg */
			
 
				+		/*
			
 
				+		 * Save real source value, then compare EAX against
			
 
				+		 * destination.
			
 
				+		 */
			
 
				+		c->src.orig_val = c->src.val;
			
 
				+		c->src.val = c->regs[VCPU_REGS_RAX];
			
 
				+		emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
			
 
				+		if (ctxt->eflags & EFLG_ZF) {
			
 
				+			/* Success: write back to memory. */
			
 
				+			c->dst.val = c->src.orig_val;
			
 
				+		} else {
			
 
				+			/* Failure: write the value we saw to EAX. */
			
 
				+			c->dst.type = OP_REG;
			
 
				+			c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
			
 
				+		}
			
 
				+		break;
			
 
				+	case 0xb3:
			
 
				+	      btr:		/* btr */
			
 
				+		/* only subword offset */
			
 
				+		c->src.val &= (c->dst.bytes << 3) - 1;
			
 
				+		emulate_2op_SrcV_nobyte("btr", c->src, c->dst, ctxt->eflags);
			
 
				+		break;
			
 
				+	case 0xb6 ... 0xb7:	/* movzx */
			
 
				+		c->dst.bytes = c->op_bytes;
			
 
				+		c->dst.val = (c->d & ByteOp) ? (u8) c->src.val
			
 
				+						       : (u16) c->src.val;
			
 
				+		break;
			
 
				+	case 0xba:		/* Grp8 */
			
 
				+		switch (c->modrm_reg & 3) {
			
 
				+		case 0:
			
 
				+			goto bt;
			
 
				+		case 1:
			
 
				+			goto bts;
			
 
				+		case 2:
			
 
				+			goto btr;
			
 
				+		case 3:
			
 
				+			goto btc;
			
 
				+		}
			
 
				+		break;
			
 
				+	case 0xbb:
			
 
				+	      btc:		/* btc */
			
 
				+		/* only subword offset */
			
 
				+		c->src.val &= (c->dst.bytes << 3) - 1;
			
 
				+		emulate_2op_SrcV_nobyte("btc", c->src, c->dst, ctxt->eflags);
			
 
				+		break;
			
 
				+	case 0xbe ... 0xbf:	/* movsx */
			
 
				+		c->dst.bytes = c->op_bytes;
			
 
				+		c->dst.val = (c->d & ByteOp) ? (s8) c->src.val :
			
 
				+							(s16) c->src.val;
			
 
				+		break;
			
 
				+	case 0xc3:		/* movnti */
			
 
				+		c->dst.bytes = c->op_bytes;
			
 
				+		c->dst.val = (c->op_bytes == 4) ? (u32) c->src.val :
			
 
				+							(u64) c->src.val;
			
 
				+		break;
			
 
				+	case 0xc7:		/* Grp9 (cmpxchg8b) */
			
 
				+		rc = emulate_grp9(ctxt, ops, memop);
			
 
				+		if (rc != 0)
			
 
				+			goto done;
			
 
				+		c->dst.type = OP_NONE;
			
 
				+		break;
			
 
				+	}
			
 
				+	goto writeback;
			
 
				+
			
 
				+cannot_emulate:
			
 
				+	DPRINTF("Cannot emulate %02x\n", c->b);
			
 
				+	c->eip = saved_eip;
			
 
				+	return -1;
			
 
				+}
			
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -90,8 +90,6 @@ source "drivers/dca/Kconfig"
 
				 
			
 
				 source "drivers/auxdisplay/Kconfig"
			
 
				 
			
 
				-source "drivers/kvm/Kconfig"
			
 
				-
			
 
				 source "drivers/uio/Kconfig"
			
 
				 
			
 
				 source "drivers/virtio/Kconfig"
			
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -47,7 +47,6 @@ obj-$(CONFIG_SPI)		+= spi/
 
				 obj-$(CONFIG_PCCARD)		+= pcmcia/
			
 
				 obj-$(CONFIG_DIO)		+= dio/
			
 
				 obj-$(CONFIG_SBUS)		+= sbus/
			
 
				-obj-$(CONFIG_KVM)		+= kvm/
			
 
				 obj-$(CONFIG_ZORRO)		+= zorro/
			
 
				 obj-$(CONFIG_MAC)		+= macintosh/
			
 
				 obj-$(CONFIG_ATA_OVER_ETH)	+= block/aoe/
			
--- a/drivers/kvm/irq.h
+++ b/drivers/kvm/irq.h
@@ -1,165 +0,0 @@
 
				-/*
			
 
				- * irq.h: in kernel interrupt controller related definitions
			
 
				- * Copyright (c) 2007, Intel Corporation.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify it
			
 
				- * under the terms and conditions of the GNU General Public License,
			
 
				- * version 2, as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope it will be useful, but WITHOUT
			
 
				- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
			
 
				- * more details.
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License along with
			
 
				- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
			
 
				- * Place - Suite 330, Boston, MA 02111-1307 USA.
			
 
				- * Authors:
			
 
				- *   Yaozu (Eddie) Dong <Eddie.dong@intel.com>
			
 
				- *
			
 
				- */
			
 
				-
			
 
				-#ifndef __IRQ_H
			
 
				-#define __IRQ_H
			
 
				-
			
 
				-#include "kvm.h"
			
 
				-
			
 
				-typedef void irq_request_func(void *opaque, int level);
			
 
				-
			
 
				-struct kvm_kpic_state {
			
 
				-	u8 last_irr;	/* edge detection */
			
 
				-	u8 irr;		/* interrupt request register */
			
 
				-	u8 imr;		/* interrupt mask register */
			
 
				-	u8 isr;		/* interrupt service register */
			
 
				-	u8 priority_add;	/* highest irq priority */
			
 
				-	u8 irq_base;
			
 
				-	u8 read_reg_select;
			
 
				-	u8 poll;
			
 
				-	u8 special_mask;
			
 
				-	u8 init_state;
			
 
				-	u8 auto_eoi;
			
 
				-	u8 rotate_on_auto_eoi;
			
 
				-	u8 special_fully_nested_mode;
			
 
				-	u8 init4;		/* true if 4 byte init */
			
 
				-	u8 elcr;		/* PIIX edge/trigger selection */
			
 
				-	u8 elcr_mask;
			
 
				-	struct kvm_pic *pics_state;
			
 
				-};
			
 
				-
			
 
				-struct kvm_pic {
			
 
				-	struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
			
 
				-	irq_request_func *irq_request;
			
 
				-	void *irq_request_opaque;
			
 
				-	int output;		/* intr from master PIC */
			
 
				-	struct kvm_io_device dev;
			
 
				-};
			
 
				-
			
 
				-struct kvm_pic *kvm_create_pic(struct kvm *kvm);
			
 
				-void kvm_pic_set_irq(void *opaque, int irq, int level);
			
 
				-int kvm_pic_read_irq(struct kvm_pic *s);
			
 
				-int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
			
 
				-int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
			
 
				-void kvm_pic_update_irq(struct kvm_pic *s);
			
 
				-
			
 
				-#define IOAPIC_NUM_PINS  KVM_IOAPIC_NUM_PINS
			
 
				-#define IOAPIC_VERSION_ID 0x11	/* IOAPIC version */
			
 
				-#define IOAPIC_EDGE_TRIG  0
			
 
				-#define IOAPIC_LEVEL_TRIG 1
			
 
				-
			
 
				-#define IOAPIC_DEFAULT_BASE_ADDRESS  0xfec00000
			
 
				-#define IOAPIC_MEM_LENGTH            0x100
			
 
				-
			
 
				-/* Direct registers. */
			
 
				-#define IOAPIC_REG_SELECT  0x00
			
 
				-#define IOAPIC_REG_WINDOW  0x10
			
 
				-#define IOAPIC_REG_EOI     0x40	/* IA64 IOSAPIC only */
			
 
				-
			
 
				-/* Indirect registers. */
			
 
				-#define IOAPIC_REG_APIC_ID 0x00	/* x86 IOAPIC only */
			
 
				-#define IOAPIC_REG_VERSION 0x01
			
 
				-#define IOAPIC_REG_ARB_ID  0x02	/* x86 IOAPIC only */
			
 
				-
			
 
				-struct kvm_ioapic {
			
 
				-	u64 base_address;
			
 
				-	u32 ioregsel;
			
 
				-	u32 id;
			
 
				-	u32 irr;
			
 
				-	u32 pad;
			
 
				-	union ioapic_redir_entry {
			
 
				-		u64 bits;
			
 
				-		struct {
			
 
				-			u8 vector;
			
 
				-			u8 delivery_mode:3;
			
 
				-			u8 dest_mode:1;
			
 
				-			u8 delivery_status:1;
			
 
				-			u8 polarity:1;
			
 
				-			u8 remote_irr:1;
			
 
				-			u8 trig_mode:1;
			
 
				-			u8 mask:1;
			
 
				-			u8 reserve:7;
			
 
				-			u8 reserved[4];
			
 
				-			u8 dest_id;
			
 
				-		} fields;
			
 
				-	} redirtbl[IOAPIC_NUM_PINS];
			
 
				-	struct kvm_io_device dev;
			
 
				-	struct kvm *kvm;
			
 
				-};
			
 
				-
			
 
				-struct kvm_lapic {
			
 
				-	unsigned long base_address;
			
 
				-	struct kvm_io_device dev;
			
 
				-	struct {
			
 
				-		atomic_t pending;
			
 
				-		s64 period;	/* unit: ns */
			
 
				-		u32 divide_count;
			
 
				-		ktime_t last_update;
			
 
				-		struct hrtimer dev;
			
 
				-	} timer;
			
 
				-	struct kvm_vcpu *vcpu;
			
 
				-	struct page *regs_page;
			
 
				-	void *regs;
			
 
				-};
			
 
				-
			
 
				-#ifdef DEBUG
			
 
				-#define ASSERT(x)  							\
			
 
				-do {									\
			
 
				-	if (!(x)) {							\
			
 
				-		printk(KERN_EMERG "assertion failed %s: %d: %s\n",	\
			
 
				-		       __FILE__, __LINE__, #x);				\
			
 
				-		BUG();							\
			
 
				-	}								\
			
 
				-} while (0)
			
 
				-#else
			
 
				-#define ASSERT(x) do { } while (0)
			
 
				-#endif
			
 
				-
			
 
				-void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
			
 
				-int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
			
 
				-int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu);
			
 
				-int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
			
 
				-int kvm_create_lapic(struct kvm_vcpu *vcpu);
			
 
				-void kvm_lapic_reset(struct kvm_vcpu *vcpu);
			
 
				-void kvm_free_apic(struct kvm_lapic *apic);
			
 
				-u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
			
 
				-void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
			
 
				-void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
			
 
				-struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector,
			
 
				-				       unsigned long bitmap);
			
 
				-u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
			
 
				-void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
			
 
				-int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
			
 
				-void kvm_ioapic_update_eoi(struct kvm *kvm, int vector);
			
 
				-int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
			
 
				-int kvm_apic_set_irq(struct kvm_lapic *apic, u8 vec, u8 trig);
			
 
				-void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
			
 
				-int kvm_ioapic_init(struct kvm *kvm);
			
 
				-void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
			
 
				-int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
			
 
				-int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
			
 
				-void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
			
 
				-void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
			
 
				-void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
			
 
				-void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
			
 
				-void kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
			
 
				-
			
 
				-#endif
			
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -1,1498 +0,0 @@
 
				-/*
			
 
				- * Kernel-based Virtual Machine driver for Linux
			
 
				- *
			
 
				- * This module enables machines with Intel VT-x extensions to run virtual
			
 
				- * machines without emulation or binary translation.
			
 
				- *
			
 
				- * MMU support
			
 
				- *
			
 
				- * Copyright (C) 2006 Qumranet, Inc.
			
 
				- *
			
 
				- * Authors:
			
 
				- *   Yaniv Kamay  <yaniv@qumranet.com>
			
 
				- *   Avi Kivity   <avi@qumranet.com>
			
 
				- *
			
 
				- * This work is licensed under the terms of the GNU GPL, version 2.  See
			
 
				- * the COPYING file in the top-level directory.
			
 
				- *
			
 
				- */
			
 
				-
			
 
				-#include "vmx.h"
			
 
				-#include "kvm.h"
			
 
				-
			
 
				-#include <linux/types.h>
			
 
				-#include <linux/string.h>
			
 
				-#include <linux/mm.h>
			
 
				-#include <linux/highmem.h>
			
 
				-#include <linux/module.h>
			
 
				-
			
 
				-#include <asm/page.h>
			
 
				-#include <asm/cmpxchg.h>
			
 
				-
			
 
				-#undef MMU_DEBUG
			
 
				-
			
 
				-#undef AUDIT
			
 
				-
			
 
				-#ifdef AUDIT
			
 
				-static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg);
			
 
				-#else
			
 
				-static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) {}
			
 
				-#endif
			
 
				-
			
 
				-#ifdef MMU_DEBUG
			
 
				-
			
 
				-#define pgprintk(x...) do { if (dbg) printk(x); } while (0)
			
 
				-#define rmap_printk(x...) do { if (dbg) printk(x); } while (0)
			
 
				-
			
 
				-#else
			
 
				-
			
 
				-#define pgprintk(x...) do { } while (0)
			
 
				-#define rmap_printk(x...) do { } while (0)
			
 
				-
			
 
				-#endif
			
 
				-
			
 
				-#if defined(MMU_DEBUG) || defined(AUDIT)
			
 
				-static int dbg = 1;
			
 
				-#endif
			
 
				-
			
 
				-#ifndef MMU_DEBUG
			
 
				-#define ASSERT(x) do { } while (0)
			
 
				-#else
			
 
				-#define ASSERT(x)							\
			
 
				-	if (!(x)) {							\
			
 
				-		printk(KERN_WARNING "assertion failed %s:%d: %s\n",	\
			
 
				-		       __FILE__, __LINE__, #x);				\
			
 
				-	}
			
 
				-#endif
			
 
				-
			
 
				-#define PT64_PT_BITS 9
			
 
				-#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
			
 
				-#define PT32_PT_BITS 10
			
 
				-#define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS)
			
 
				-
			
 
				-#define PT_WRITABLE_SHIFT 1
			
 
				-
			
 
				-#define PT_PRESENT_MASK (1ULL << 0)
			
 
				-#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)
			
 
				-#define PT_USER_MASK (1ULL << 2)
			
 
				-#define PT_PWT_MASK (1ULL << 3)
			
 
				-#define PT_PCD_MASK (1ULL << 4)
			
 
				-#define PT_ACCESSED_MASK (1ULL << 5)
			
 
				-#define PT_DIRTY_MASK (1ULL << 6)
			
 
				-#define PT_PAGE_SIZE_MASK (1ULL << 7)
			
 
				-#define PT_PAT_MASK (1ULL << 7)
			
 
				-#define PT_GLOBAL_MASK (1ULL << 8)
			
 
				-#define PT64_NX_MASK (1ULL << 63)
			
 
				-
			
 
				-#define PT_PAT_SHIFT 7
			
 
				-#define PT_DIR_PAT_SHIFT 12
			
 
				-#define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT)
			
 
				-
			
 
				-#define PT32_DIR_PSE36_SIZE 4
			
 
				-#define PT32_DIR_PSE36_SHIFT 13
			
 
				-#define PT32_DIR_PSE36_MASK (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)
			
 
				-
			
 
				-
			
 
				-#define PT_FIRST_AVAIL_BITS_SHIFT 9
			
 
				-#define PT64_SECOND_AVAIL_BITS_SHIFT 52
			
 
				-
			
 
				-#define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
			
 
				-
			
 
				-#define VALID_PAGE(x) ((x) != INVALID_PAGE)
			
 
				-
			
 
				-#define PT64_LEVEL_BITS 9
			
 
				-
			
 
				-#define PT64_LEVEL_SHIFT(level) \
			
 
				-		( PAGE_SHIFT + (level - 1) * PT64_LEVEL_BITS )
			
 
				-
			
 
				-#define PT64_LEVEL_MASK(level) \
			
 
				-		(((1ULL << PT64_LEVEL_BITS) - 1) << PT64_LEVEL_SHIFT(level))
			
 
				-
			
 
				-#define PT64_INDEX(address, level)\
			
 
				-	(((address) >> PT64_LEVEL_SHIFT(level)) & ((1 << PT64_LEVEL_BITS) - 1))
			
 
				-
			
 
				-
			
 
				-#define PT32_LEVEL_BITS 10
			
 
				-
			
 
				-#define PT32_LEVEL_SHIFT(level) \
			
 
				-		( PAGE_SHIFT + (level - 1) * PT32_LEVEL_BITS )
			
 
				-
			
 
				-#define PT32_LEVEL_MASK(level) \
			
 
				-		(((1ULL << PT32_LEVEL_BITS) - 1) << PT32_LEVEL_SHIFT(level))
			
 
				-
			
 
				-#define PT32_INDEX(address, level)\
			
 
				-	(((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1))
			
 
				-
			
 
				-
			
 
				-#define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))
			
 
				-#define PT64_DIR_BASE_ADDR_MASK \
			
 
				-	(PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + PT64_LEVEL_BITS)) - 1))
			
 
				-
			
 
				-#define PT32_BASE_ADDR_MASK PAGE_MASK
			
 
				-#define PT32_DIR_BASE_ADDR_MASK \
			
 
				-	(PAGE_MASK & ~((1ULL << (PAGE_SHIFT + PT32_LEVEL_BITS)) - 1))
			
 
				-
			
 
				-
			
 
				-#define PFERR_PRESENT_MASK (1U << 0)
			
 
				-#define PFERR_WRITE_MASK (1U << 1)
			
 
				-#define PFERR_USER_MASK (1U << 2)
			
 
				-#define PFERR_FETCH_MASK (1U << 4)
			
 
				-
			
 
				-#define PT64_ROOT_LEVEL 4
			
 
				-#define PT32_ROOT_LEVEL 2
			
 
				-#define PT32E_ROOT_LEVEL 3
			
 
				-
			
 
				-#define PT_DIRECTORY_LEVEL 2
			
 
				-#define PT_PAGE_TABLE_LEVEL 1
			
 
				-
			
 
				-#define RMAP_EXT 4
			
 
				-
			
 
				-struct kvm_rmap_desc {
			
 
				-	u64 *shadow_ptes[RMAP_EXT];
			
 
				-	struct kvm_rmap_desc *more;
			
 
				-};
			
 
				-
			
 
				-static struct kmem_cache *pte_chain_cache;
			
 
				-static struct kmem_cache *rmap_desc_cache;
			
 
				-static struct kmem_cache *mmu_page_header_cache;
			
 
				-
			
 
				-static int is_write_protection(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	return vcpu->cr0 & X86_CR0_WP;
			
 
				-}
			
 
				-
			
 
				-static int is_cpuid_PSE36(void)
			
 
				-{
			
 
				-	return 1;
			
 
				-}
			
 
				-
			
 
				-static int is_nx(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	return vcpu->shadow_efer & EFER_NX;
			
 
				-}
			
 
				-
			
 
				-static int is_present_pte(unsigned long pte)
			
 
				-{
			
 
				-	return pte & PT_PRESENT_MASK;
			
 
				-}
			
 
				-
			
 
				-static int is_writeble_pte(unsigned long pte)
			
 
				-{
			
 
				-	return pte & PT_WRITABLE_MASK;
			
 
				-}
			
 
				-
			
 
				-static int is_io_pte(unsigned long pte)
			
 
				-{
			
 
				-	return pte & PT_SHADOW_IO_MARK;
			
 
				-}
			
 
				-
			
 
				-static int is_rmap_pte(u64 pte)
			
 
				-{
			
 
				-	return (pte & (PT_WRITABLE_MASK | PT_PRESENT_MASK))
			
 
				-		== (PT_WRITABLE_MASK | PT_PRESENT_MASK);
			
 
				-}
			
 
				-
			
 
				-static void set_shadow_pte(u64 *sptep, u64 spte)
			
 
				-{
			
 
				-#ifdef CONFIG_X86_64
			
 
				-	set_64bit((unsigned long *)sptep, spte);
			
 
				-#else
			
 
				-	set_64bit((unsigned long long *)sptep, spte);
			
 
				-#endif
			
 
				-}
			
 
				-
			
 
				-static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
			
 
				-				  struct kmem_cache *base_cache, int min)
			
 
				-{
			
 
				-	void *obj;
			
 
				-
			
 
				-	if (cache->nobjs >= min)
			
 
				-		return 0;
			
 
				-	while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
			
 
				-		obj = kmem_cache_zalloc(base_cache, GFP_KERNEL);
			
 
				-		if (!obj)
			
 
				-			return -ENOMEM;
			
 
				-		cache->objects[cache->nobjs++] = obj;
			
 
				-	}
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
			
 
				-{
			
 
				-	while (mc->nobjs)
			
 
				-		kfree(mc->objects[--mc->nobjs]);
			
 
				-}
			
 
				-
			
 
				-static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache,
			
 
				-				       int min)
			
 
				-{
			
 
				-	struct page *page;
			
 
				-
			
 
				-	if (cache->nobjs >= min)
			
 
				-		return 0;
			
 
				-	while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
			
 
				-		page = alloc_page(GFP_KERNEL);
			
 
				-		if (!page)
			
 
				-			return -ENOMEM;
			
 
				-		set_page_private(page, 0);
			
 
				-		cache->objects[cache->nobjs++] = page_address(page);
			
 
				-	}
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void mmu_free_memory_cache_page(struct kvm_mmu_memory_cache *mc)
			
 
				-{
			
 
				-	while (mc->nobjs)
			
 
				-		free_page((unsigned long)mc->objects[--mc->nobjs]);
			
 
				-}
			
 
				-
			
 
				-static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	int r;
			
 
				-
			
 
				-	kvm_mmu_free_some_pages(vcpu);
			
 
				-	r = mmu_topup_memory_cache(&vcpu->mmu_pte_chain_cache,
			
 
				-				   pte_chain_cache, 4);
			
 
				-	if (r)
			
 
				-		goto out;
			
 
				-	r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache,
			
 
				-				   rmap_desc_cache, 1);
			
 
				-	if (r)
			
 
				-		goto out;
			
 
				-	r = mmu_topup_memory_cache_page(&vcpu->mmu_page_cache, 4);
			
 
				-	if (r)
			
 
				-		goto out;
			
 
				-	r = mmu_topup_memory_cache(&vcpu->mmu_page_header_cache,
			
 
				-				   mmu_page_header_cache, 4);
			
 
				-out:
			
 
				-	return r;
			
 
				-}
			
 
				-
			
 
				-static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache);
			
 
				-	mmu_free_memory_cache(&vcpu->mmu_rmap_desc_cache);
			
 
				-	mmu_free_memory_cache_page(&vcpu->mmu_page_cache);
			
 
				-	mmu_free_memory_cache(&vcpu->mmu_page_header_cache);
			
 
				-}
			
 
				-
			
 
				-static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc,
			
 
				-				    size_t size)
			
 
				-{
			
 
				-	void *p;
			
 
				-
			
 
				-	BUG_ON(!mc->nobjs);
			
 
				-	p = mc->objects[--mc->nobjs];
			
 
				-	memset(p, 0, size);
			
 
				-	return p;
			
 
				-}
			
 
				-
			
 
				-static struct kvm_pte_chain *mmu_alloc_pte_chain(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	return mmu_memory_cache_alloc(&vcpu->mmu_pte_chain_cache,
			
 
				-				      sizeof(struct kvm_pte_chain));
			
 
				-}
			
 
				-
			
 
				-static void mmu_free_pte_chain(struct kvm_pte_chain *pc)
			
 
				-{
			
 
				-	kfree(pc);
			
 
				-}
			
 
				-
			
 
				-static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	return mmu_memory_cache_alloc(&vcpu->mmu_rmap_desc_cache,
			
 
				-				      sizeof(struct kvm_rmap_desc));
			
 
				-}
			
 
				-
			
 
				-static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd)
			
 
				-{
			
 
				-	kfree(rd);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Reverse mapping data structures:
			
 
				- *
			
 
				- * If page->private bit zero is zero, then page->private points to the
			
 
				- * shadow page table entry that points to page_address(page).
			
 
				- *
			
 
				- * If page->private bit zero is one, (then page->private & ~1) points
			
 
				- * to a struct kvm_rmap_desc containing more mappings.
			
 
				- */
			
 
				-static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte)
			
 
				-{
			
 
				-	struct page *page;
			
 
				-	struct kvm_rmap_desc *desc;
			
 
				-	int i;
			
 
				-
			
 
				-	if (!is_rmap_pte(*spte))
			
 
				-		return;
			
 
				-	page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
			
 
				-	if (!page_private(page)) {
			
 
				-		rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte);
			
 
				-		set_page_private(page,(unsigned long)spte);
			
 
				-	} else if (!(page_private(page) & 1)) {
			
 
				-		rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte);
			
 
				-		desc = mmu_alloc_rmap_desc(vcpu);
			
 
				-		desc->shadow_ptes[0] = (u64 *)page_private(page);
			
 
				-		desc->shadow_ptes[1] = spte;
			
 
				-		set_page_private(page,(unsigned long)desc | 1);
			
 
				-	} else {
			
 
				-		rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte);
			
 
				-		desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul);
			
 
				-		while (desc->shadow_ptes[RMAP_EXT-1] && desc->more)
			
 
				-			desc = desc->more;
			
 
				-		if (desc->shadow_ptes[RMAP_EXT-1]) {
			
 
				-			desc->more = mmu_alloc_rmap_desc(vcpu);
			
 
				-			desc = desc->more;
			
 
				-		}
			
 
				-		for (i = 0; desc->shadow_ptes[i]; ++i)
			
 
				-			;
			
 
				-		desc->shadow_ptes[i] = spte;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void rmap_desc_remove_entry(struct page *page,
			
 
				-				   struct kvm_rmap_desc *desc,
			
 
				-				   int i,
			
 
				-				   struct kvm_rmap_desc *prev_desc)
			
 
				-{
			
 
				-	int j;
			
 
				-
			
 
				-	for (j = RMAP_EXT - 1; !desc->shadow_ptes[j] && j > i; --j)
			
 
				-		;
			
 
				-	desc->shadow_ptes[i] = desc->shadow_ptes[j];
			
 
				-	desc->shadow_ptes[j] = NULL;
			
 
				-	if (j != 0)
			
 
				-		return;
			
 
				-	if (!prev_desc && !desc->more)
			
 
				-		set_page_private(page,(unsigned long)desc->shadow_ptes[0]);
			
 
				-	else
			
 
				-		if (prev_desc)
			
 
				-			prev_desc->more = desc->more;
			
 
				-		else
			
 
				-			set_page_private(page,(unsigned long)desc->more | 1);
			
 
				-	mmu_free_rmap_desc(desc);
			
 
				-}
			
 
				-
			
 
				-static void rmap_remove(u64 *spte)
			
 
				-{
			
 
				-	struct page *page;
			
 
				-	struct kvm_rmap_desc *desc;
			
 
				-	struct kvm_rmap_desc *prev_desc;
			
 
				-	int i;
			
 
				-
			
 
				-	if (!is_rmap_pte(*spte))
			
 
				-		return;
			
 
				-	page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
			
 
				-	if (!page_private(page)) {
			
 
				-		printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
			
 
				-		BUG();
			
 
				-	} else if (!(page_private(page) & 1)) {
			
 
				-		rmap_printk("rmap_remove:  %p %llx 1->0\n", spte, *spte);
			
 
				-		if ((u64 *)page_private(page) != spte) {
			
 
				-			printk(KERN_ERR "rmap_remove:  %p %llx 1->BUG\n",
			
 
				-			       spte, *spte);
			
 
				-			BUG();
			
 
				-		}
			
 
				-		set_page_private(page,0);
			
 
				-	} else {
			
 
				-		rmap_printk("rmap_remove:  %p %llx many->many\n", spte, *spte);
			
 
				-		desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul);
			
 
				-		prev_desc = NULL;
			
 
				-		while (desc) {
			
 
				-			for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i)
			
 
				-				if (desc->shadow_ptes[i] == spte) {
			
 
				-					rmap_desc_remove_entry(page,
			
 
				-							       desc, i,
			
 
				-							       prev_desc);
			
 
				-					return;
			
 
				-				}
			
 
				-			prev_desc = desc;
			
 
				-			desc = desc->more;
			
 
				-		}
			
 
				-		BUG();
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
			
 
				-{
			
 
				-	struct kvm *kvm = vcpu->kvm;
			
 
				-	struct page *page;
			
 
				-	struct kvm_rmap_desc *desc;
			
 
				-	u64 *spte;
			
 
				-
			
 
				-	page = gfn_to_page(kvm, gfn);
			
 
				-	BUG_ON(!page);
			
 
				-
			
 
				-	while (page_private(page)) {
			
 
				-		if (!(page_private(page) & 1))
			
 
				-			spte = (u64 *)page_private(page);
			
 
				-		else {
			
 
				-			desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul);
			
 
				-			spte = desc->shadow_ptes[0];
			
 
				-		}
			
 
				-		BUG_ON(!spte);
			
 
				-		BUG_ON((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT
			
 
				-		       != page_to_pfn(page));
			
 
				-		BUG_ON(!(*spte & PT_PRESENT_MASK));
			
 
				-		BUG_ON(!(*spte & PT_WRITABLE_MASK));
			
 
				-		rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
			
 
				-		rmap_remove(spte);
			
 
				-		set_shadow_pte(spte, *spte & ~PT_WRITABLE_MASK);
			
 
				-		kvm_flush_remote_tlbs(vcpu->kvm);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-#ifdef MMU_DEBUG
			
 
				-static int is_empty_shadow_page(u64 *spt)
			
 
				-{
			
 
				-	u64 *pos;
			
 
				-	u64 *end;
			
 
				-
			
 
				-	for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++)
			
 
				-		if (*pos != 0) {
			
 
				-			printk(KERN_ERR "%s: %p %llx\n", __FUNCTION__,
			
 
				-			       pos, *pos);
			
 
				-			return 0;
			
 
				-		}
			
 
				-	return 1;
			
 
				-}
			
 
				-#endif
			
 
				-
			
 
				-static void kvm_mmu_free_page(struct kvm *kvm,
			
 
				-			      struct kvm_mmu_page *page_head)
			
 
				-{
			
 
				-	ASSERT(is_empty_shadow_page(page_head->spt));
			
 
				-	list_del(&page_head->link);
			
 
				-	__free_page(virt_to_page(page_head->spt));
			
 
				-	kfree(page_head);
			
 
				-	++kvm->n_free_mmu_pages;
			
 
				-}
			
 
				-
			
 
				-static unsigned kvm_page_table_hashfn(gfn_t gfn)
			
 
				-{
			
 
				-	return gfn;
			
 
				-}
			
 
				-
			
 
				-static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
			
 
				-					       u64 *parent_pte)
			
 
				-{
			
 
				-	struct kvm_mmu_page *page;
			
 
				-
			
 
				-	if (!vcpu->kvm->n_free_mmu_pages)
			
 
				-		return NULL;
			
 
				-
			
 
				-	page = mmu_memory_cache_alloc(&vcpu->mmu_page_header_cache,
			
 
				-				      sizeof *page);
			
 
				-	page->spt = mmu_memory_cache_alloc(&vcpu->mmu_page_cache, PAGE_SIZE);
			
 
				-	set_page_private(virt_to_page(page->spt), (unsigned long)page);
			
 
				-	list_add(&page->link, &vcpu->kvm->active_mmu_pages);
			
 
				-	ASSERT(is_empty_shadow_page(page->spt));
			
 
				-	page->slot_bitmap = 0;
			
 
				-	page->multimapped = 0;
			
 
				-	page->parent_pte = parent_pte;
			
 
				-	--vcpu->kvm->n_free_mmu_pages;
			
 
				-	return page;
			
 
				-}
			
 
				-
			
 
				-static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu,
			
 
				-				    struct kvm_mmu_page *page, u64 *parent_pte)
			
 
				-{
			
 
				-	struct kvm_pte_chain *pte_chain;
			
 
				-	struct hlist_node *node;
			
 
				-	int i;
			
 
				-
			
 
				-	if (!parent_pte)
			
 
				-		return;
			
 
				-	if (!page->multimapped) {
			
 
				-		u64 *old = page->parent_pte;
			
 
				-
			
 
				-		if (!old) {
			
 
				-			page->parent_pte = parent_pte;
			
 
				-			return;
			
 
				-		}
			
 
				-		page->multimapped = 1;
			
 
				-		pte_chain = mmu_alloc_pte_chain(vcpu);
			
 
				-		INIT_HLIST_HEAD(&page->parent_ptes);
			
 
				-		hlist_add_head(&pte_chain->link, &page->parent_ptes);
			
 
				-		pte_chain->parent_ptes[0] = old;
			
 
				-	}
			
 
				-	hlist_for_each_entry(pte_chain, node, &page->parent_ptes, link) {
			
 
				-		if (pte_chain->parent_ptes[NR_PTE_CHAIN_ENTRIES-1])
			
 
				-			continue;
			
 
				-		for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i)
			
 
				-			if (!pte_chain->parent_ptes[i]) {
			
 
				-				pte_chain->parent_ptes[i] = parent_pte;
			
 
				-				return;
			
 
				-			}
			
 
				-	}
			
 
				-	pte_chain = mmu_alloc_pte_chain(vcpu);
			
 
				-	BUG_ON(!pte_chain);
			
 
				-	hlist_add_head(&pte_chain->link, &page->parent_ptes);
			
 
				-	pte_chain->parent_ptes[0] = parent_pte;
			
 
				-}
			
 
				-
			
 
				-static void mmu_page_remove_parent_pte(struct kvm_mmu_page *page,
			
 
				-				       u64 *parent_pte)
			
 
				-{
			
 
				-	struct kvm_pte_chain *pte_chain;
			
 
				-	struct hlist_node *node;
			
 
				-	int i;
			
 
				-
			
 
				-	if (!page->multimapped) {
			
 
				-		BUG_ON(page->parent_pte != parent_pte);
			
 
				-		page->parent_pte = NULL;
			
 
				-		return;
			
 
				-	}
			
 
				-	hlist_for_each_entry(pte_chain, node, &page->parent_ptes, link)
			
 
				-		for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) {
			
 
				-			if (!pte_chain->parent_ptes[i])
			
 
				-				break;
			
 
				-			if (pte_chain->parent_ptes[i] != parent_pte)
			
 
				-				continue;
			
 
				-			while (i + 1 < NR_PTE_CHAIN_ENTRIES
			
 
				-				&& pte_chain->parent_ptes[i + 1]) {
			
 
				-				pte_chain->parent_ptes[i]
			
 
				-					= pte_chain->parent_ptes[i + 1];
			
 
				-				++i;
			
 
				-			}
			
 
				-			pte_chain->parent_ptes[i] = NULL;
			
 
				-			if (i == 0) {
			
 
				-				hlist_del(&pte_chain->link);
			
 
				-				mmu_free_pte_chain(pte_chain);
			
 
				-				if (hlist_empty(&page->parent_ptes)) {
			
 
				-					page->multimapped = 0;
			
 
				-					page->parent_pte = NULL;
			
 
				-				}
			
 
				-			}
			
 
				-			return;
			
 
				-		}
			
 
				-	BUG();
			
 
				-}
			
 
				-
			
 
				-static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm_vcpu *vcpu,
			
 
				-						gfn_t gfn)
			
 
				-{
			
 
				-	unsigned index;
			
 
				-	struct hlist_head *bucket;
			
 
				-	struct kvm_mmu_page *page;
			
 
				-	struct hlist_node *node;
			
 
				-
			
 
				-	pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn);
			
 
				-	index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
			
 
				-	bucket = &vcpu->kvm->mmu_page_hash[index];
			
 
				-	hlist_for_each_entry(page, node, bucket, hash_link)
			
 
				-		if (page->gfn == gfn && !page->role.metaphysical) {
			
 
				-			pgprintk("%s: found role %x\n",
			
 
				-				 __FUNCTION__, page->role.word);
			
 
				-			return page;
			
 
				-		}
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
			
 
				-					     gfn_t gfn,
			
 
				-					     gva_t gaddr,
			
 
				-					     unsigned level,
			
 
				-					     int metaphysical,
			
 
				-					     unsigned hugepage_access,
			
 
				-					     u64 *parent_pte)
			
 
				-{
			
 
				-	union kvm_mmu_page_role role;
			
 
				-	unsigned index;
			
 
				-	unsigned quadrant;
			
 
				-	struct hlist_head *bucket;
			
 
				-	struct kvm_mmu_page *page;
			
 
				-	struct hlist_node *node;
			
 
				-
			
 
				-	role.word = 0;
			
 
				-	role.glevels = vcpu->mmu.root_level;
			
 
				-	role.level = level;
			
 
				-	role.metaphysical = metaphysical;
			
 
				-	role.hugepage_access = hugepage_access;
			
 
				-	if (vcpu->mmu.root_level <= PT32_ROOT_LEVEL) {
			
 
				-		quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
			
 
				-		quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
			
 
				-		role.quadrant = quadrant;
			
 
				-	}
			
 
				-	pgprintk("%s: looking gfn %lx role %x\n", __FUNCTION__,
			
 
				-		 gfn, role.word);
			
 
				-	index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
			
 
				-	bucket = &vcpu->kvm->mmu_page_hash[index];
			
 
				-	hlist_for_each_entry(page, node, bucket, hash_link)
			
 
				-		if (page->gfn == gfn && page->role.word == role.word) {
			
 
				-			mmu_page_add_parent_pte(vcpu, page, parent_pte);
			
 
				-			pgprintk("%s: found\n", __FUNCTION__);
			
 
				-			return page;
			
 
				-		}
			
 
				-	page = kvm_mmu_alloc_page(vcpu, parent_pte);
			
 
				-	if (!page)
			
 
				-		return page;
			
 
				-	pgprintk("%s: adding gfn %lx role %x\n", __FUNCTION__, gfn, role.word);
			
 
				-	page->gfn = gfn;
			
 
				-	page->role = role;
			
 
				-	hlist_add_head(&page->hash_link, bucket);
			
 
				-	if (!metaphysical)
			
 
				-		rmap_write_protect(vcpu, gfn);
			
 
				-	return page;
			
 
				-}
			
 
				-
			
 
				-static void kvm_mmu_page_unlink_children(struct kvm *kvm,
			
 
				-					 struct kvm_mmu_page *page)
			
 
				-{
			
 
				-	unsigned i;
			
 
				-	u64 *pt;
			
 
				-	u64 ent;
			
 
				-
			
 
				-	pt = page->spt;
			
 
				-
			
 
				-	if (page->role.level == PT_PAGE_TABLE_LEVEL) {
			
 
				-		for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
			
 
				-			if (pt[i] & PT_PRESENT_MASK)
			
 
				-				rmap_remove(&pt[i]);
			
 
				-			pt[i] = 0;
			
 
				-		}
			
 
				-		kvm_flush_remote_tlbs(kvm);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
			
 
				-		ent = pt[i];
			
 
				-
			
 
				-		pt[i] = 0;
			
 
				-		if (!(ent & PT_PRESENT_MASK))
			
 
				-			continue;
			
 
				-		ent &= PT64_BASE_ADDR_MASK;
			
 
				-		mmu_page_remove_parent_pte(page_header(ent), &pt[i]);
			
 
				-	}
			
 
				-	kvm_flush_remote_tlbs(kvm);
			
 
				-}
			
 
				-
			
 
				-static void kvm_mmu_put_page(struct kvm_mmu_page *page,
			
 
				-			     u64 *parent_pte)
			
 
				-{
			
 
				-	mmu_page_remove_parent_pte(page, parent_pte);
			
 
				-}
			
 
				-
			
 
				-static void kvm_mmu_zap_page(struct kvm *kvm,
			
 
				-			     struct kvm_mmu_page *page)
			
 
				-{
			
 
				-	u64 *parent_pte;
			
 
				-
			
 
				-	while (page->multimapped || page->parent_pte) {
			
 
				-		if (!page->multimapped)
			
 
				-			parent_pte = page->parent_pte;
			
 
				-		else {
			
 
				-			struct kvm_pte_chain *chain;
			
 
				-
			
 
				-			chain = container_of(page->parent_ptes.first,
			
 
				-					     struct kvm_pte_chain, link);
			
 
				-			parent_pte = chain->parent_ptes[0];
			
 
				-		}
			
 
				-		BUG_ON(!parent_pte);
			
 
				-		kvm_mmu_put_page(page, parent_pte);
			
 
				-		set_shadow_pte(parent_pte, 0);
			
 
				-	}
			
 
				-	kvm_mmu_page_unlink_children(kvm, page);
			
 
				-	if (!page->root_count) {
			
 
				-		hlist_del(&page->hash_link);
			
 
				-		kvm_mmu_free_page(kvm, page);
			
 
				-	} else
			
 
				-		list_move(&page->link, &kvm->active_mmu_pages);
			
 
				-}
			
 
				-
			
 
				-static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn)
			
 
				-{
			
 
				-	unsigned index;
			
 
				-	struct hlist_head *bucket;
			
 
				-	struct kvm_mmu_page *page;
			
 
				-	struct hlist_node *node, *n;
			
 
				-	int r;
			
 
				-
			
 
				-	pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn);
			
 
				-	r = 0;
			
 
				-	index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
			
 
				-	bucket = &vcpu->kvm->mmu_page_hash[index];
			
 
				-	hlist_for_each_entry_safe(page, node, n, bucket, hash_link)
			
 
				-		if (page->gfn == gfn && !page->role.metaphysical) {
			
 
				-			pgprintk("%s: gfn %lx role %x\n", __FUNCTION__, gfn,
			
 
				-				 page->role.word);
			
 
				-			kvm_mmu_zap_page(vcpu->kvm, page);
			
 
				-			r = 1;
			
 
				-		}
			
 
				-	return r;
			
 
				-}
			
 
				-
			
 
				-static void mmu_unshadow(struct kvm_vcpu *vcpu, gfn_t gfn)
			
 
				-{
			
 
				-	struct kvm_mmu_page *page;
			
 
				-
			
 
				-	while ((page = kvm_mmu_lookup_page(vcpu, gfn)) != NULL) {
			
 
				-		pgprintk("%s: zap %lx %x\n",
			
 
				-			 __FUNCTION__, gfn, page->role.word);
			
 
				-		kvm_mmu_zap_page(vcpu->kvm, page);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void page_header_update_slot(struct kvm *kvm, void *pte, gpa_t gpa)
			
 
				-{
			
 
				-	int slot = memslot_id(kvm, gfn_to_memslot(kvm, gpa >> PAGE_SHIFT));
			
 
				-	struct kvm_mmu_page *page_head = page_header(__pa(pte));
			
 
				-
			
 
				-	__set_bit(slot, &page_head->slot_bitmap);
			
 
				-}
			
 
				-
			
 
				-hpa_t safe_gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa)
			
 
				-{
			
 
				-	hpa_t hpa = gpa_to_hpa(vcpu, gpa);
			
 
				-
			
 
				-	return is_error_hpa(hpa) ? bad_page_address | (gpa & ~PAGE_MASK): hpa;
			
 
				-}
			
 
				-
			
 
				-hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa)
			
 
				-{
			
 
				-	struct page *page;
			
 
				-
			
 
				-	ASSERT((gpa & HPA_ERR_MASK) == 0);
			
 
				-	page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
			
 
				-	if (!page)
			
 
				-		return gpa | HPA_ERR_MASK;
			
 
				-	return ((hpa_t)page_to_pfn(page) << PAGE_SHIFT)
			
 
				-		| (gpa & (PAGE_SIZE-1));
			
 
				-}
			
 
				-
			
 
				-hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva)
			
 
				-{
			
 
				-	gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva);
			
 
				-
			
 
				-	if (gpa == UNMAPPED_GVA)
			
 
				-		return UNMAPPED_GVA;
			
 
				-	return gpa_to_hpa(vcpu, gpa);
			
 
				-}
			
 
				-
			
 
				-struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
			
 
				-{
			
 
				-	gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva);
			
 
				-
			
 
				-	if (gpa == UNMAPPED_GVA)
			
 
				-		return NULL;
			
 
				-	return pfn_to_page(gpa_to_hpa(vcpu, gpa) >> PAGE_SHIFT);
			
 
				-}
			
 
				-
			
 
				-static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-}
			
 
				-
			
 
				-static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p)
			
 
				-{
			
 
				-	int level = PT32E_ROOT_LEVEL;
			
 
				-	hpa_t table_addr = vcpu->mmu.root_hpa;
			
 
				-
			
 
				-	for (; ; level--) {
			
 
				-		u32 index = PT64_INDEX(v, level);
			
 
				-		u64 *table;
			
 
				-		u64 pte;
			
 
				-
			
 
				-		ASSERT(VALID_PAGE(table_addr));
			
 
				-		table = __va(table_addr);
			
 
				-
			
 
				-		if (level == 1) {
			
 
				-			pte = table[index];
			
 
				-			if (is_present_pte(pte) && is_writeble_pte(pte))
			
 
				-				return 0;
			
 
				-			mark_page_dirty(vcpu->kvm, v >> PAGE_SHIFT);
			
 
				-			page_header_update_slot(vcpu->kvm, table, v);
			
 
				-			table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK |
			
 
				-								PT_USER_MASK;
			
 
				-			rmap_add(vcpu, &table[index]);
			
 
				-			return 0;
			
 
				-		}
			
 
				-
			
 
				-		if (table[index] == 0) {
			
 
				-			struct kvm_mmu_page *new_table;
			
 
				-			gfn_t pseudo_gfn;
			
 
				-
			
 
				-			pseudo_gfn = (v & PT64_DIR_BASE_ADDR_MASK)
			
 
				-				>> PAGE_SHIFT;
			
 
				-			new_table = kvm_mmu_get_page(vcpu, pseudo_gfn,
			
 
				-						     v, level - 1,
			
 
				-						     1, 0, &table[index]);
			
 
				-			if (!new_table) {
			
 
				-				pgprintk("nonpaging_map: ENOMEM\n");
			
 
				-				return -ENOMEM;
			
 
				-			}
			
 
				-
			
 
				-			table[index] = __pa(new_table->spt) | PT_PRESENT_MASK
			
 
				-				| PT_WRITABLE_MASK | PT_USER_MASK;
			
 
				-		}
			
 
				-		table_addr = table[index] & PT64_BASE_ADDR_MASK;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void mmu_free_roots(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	int i;
			
 
				-	struct kvm_mmu_page *page;
			
 
				-
			
 
				-	if (!VALID_PAGE(vcpu->mmu.root_hpa))
			
 
				-		return;
			
 
				-#ifdef CONFIG_X86_64
			
 
				-	if (vcpu->mmu.shadow_root_level == PT64_ROOT_LEVEL) {
			
 
				-		hpa_t root = vcpu->mmu.root_hpa;
			
 
				-
			
 
				-		page = page_header(root);
			
 
				-		--page->root_count;
			
 
				-		vcpu->mmu.root_hpa = INVALID_PAGE;
			
 
				-		return;
			
 
				-	}
			
 
				-#endif
			
 
				-	for (i = 0; i < 4; ++i) {
			
 
				-		hpa_t root = vcpu->mmu.pae_root[i];
			
 
				-
			
 
				-		if (root) {
			
 
				-			root &= PT64_BASE_ADDR_MASK;
			
 
				-			page = page_header(root);
			
 
				-			--page->root_count;
			
 
				-		}
			
 
				-		vcpu->mmu.pae_root[i] = INVALID_PAGE;
			
 
				-	}
			
 
				-	vcpu->mmu.root_hpa = INVALID_PAGE;
			
 
				-}
			
 
				-
			
 
				-static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	int i;
			
 
				-	gfn_t root_gfn;
			
 
				-	struct kvm_mmu_page *page;
			
 
				-
			
 
				-	root_gfn = vcpu->cr3 >> PAGE_SHIFT;
			
 
				-
			
 
				-#ifdef CONFIG_X86_64
			
 
				-	if (vcpu->mmu.shadow_root_level == PT64_ROOT_LEVEL) {
			
 
				-		hpa_t root = vcpu->mmu.root_hpa;
			
 
				-
			
 
				-		ASSERT(!VALID_PAGE(root));
			
 
				-		page = kvm_mmu_get_page(vcpu, root_gfn, 0,
			
 
				-					PT64_ROOT_LEVEL, 0, 0, NULL);
			
 
				-		root = __pa(page->spt);
			
 
				-		++page->root_count;
			
 
				-		vcpu->mmu.root_hpa = root;
			
 
				-		return;
			
 
				-	}
			
 
				-#endif
			
 
				-	for (i = 0; i < 4; ++i) {
			
 
				-		hpa_t root = vcpu->mmu.pae_root[i];
			
 
				-
			
 
				-		ASSERT(!VALID_PAGE(root));
			
 
				-		if (vcpu->mmu.root_level == PT32E_ROOT_LEVEL) {
			
 
				-			if (!is_present_pte(vcpu->pdptrs[i])) {
			
 
				-				vcpu->mmu.pae_root[i] = 0;
			
 
				-				continue;
			
 
				-			}
			
 
				-			root_gfn = vcpu->pdptrs[i] >> PAGE_SHIFT;
			
 
				-		} else if (vcpu->mmu.root_level == 0)
			
 
				-			root_gfn = 0;
			
 
				-		page = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
			
 
				-					PT32_ROOT_LEVEL, !is_paging(vcpu),
			
 
				-					0, NULL);
			
 
				-		root = __pa(page->spt);
			
 
				-		++page->root_count;
			
 
				-		vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK;
			
 
				-	}
			
 
				-	vcpu->mmu.root_hpa = __pa(vcpu->mmu.pae_root);
			
 
				-}
			
 
				-
			
 
				-static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
			
 
				-{
			
 
				-	return vaddr;
			
 
				-}
			
 
				-
			
 
				-static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
			
 
				-			       u32 error_code)
			
 
				-{
			
 
				-	gpa_t addr = gva;
			
 
				-	hpa_t paddr;
			
 
				-	int r;
			
 
				-
			
 
				-	r = mmu_topup_memory_caches(vcpu);
			
 
				-	if (r)
			
 
				-		return r;
			
 
				-
			
 
				-	ASSERT(vcpu);
			
 
				-	ASSERT(VALID_PAGE(vcpu->mmu.root_hpa));
			
 
				-
			
 
				-
			
 
				-	paddr = gpa_to_hpa(vcpu , addr & PT64_BASE_ADDR_MASK);
			
 
				-
			
 
				-	if (is_error_hpa(paddr))
			
 
				-		return 1;
			
 
				-
			
 
				-	return nonpaging_map(vcpu, addr & PAGE_MASK, paddr);
			
 
				-}
			
 
				-
			
 
				-static void nonpaging_free(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	mmu_free_roots(vcpu);
			
 
				-}
			
 
				-
			
 
				-static int nonpaging_init_context(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	struct kvm_mmu *context = &vcpu->mmu;
			
 
				-
			
 
				-	context->new_cr3 = nonpaging_new_cr3;
			
 
				-	context->page_fault = nonpaging_page_fault;
			
 
				-	context->gva_to_gpa = nonpaging_gva_to_gpa;
			
 
				-	context->free = nonpaging_free;
			
 
				-	context->root_level = 0;
			
 
				-	context->shadow_root_level = PT32E_ROOT_LEVEL;
			
 
				-	context->root_hpa = INVALID_PAGE;
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	++vcpu->stat.tlb_flush;
			
 
				-	kvm_x86_ops->tlb_flush(vcpu);
			
 
				-}
			
 
				-
			
 
				-static void paging_new_cr3(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->cr3);
			
 
				-	mmu_free_roots(vcpu);
			
 
				-}
			
 
				-
			
 
				-static void inject_page_fault(struct kvm_vcpu *vcpu,
			
 
				-			      u64 addr,
			
 
				-			      u32 err_code)
			
 
				-{
			
 
				-	kvm_x86_ops->inject_page_fault(vcpu, addr, err_code);
			
 
				-}
			
 
				-
			
 
				-static void paging_free(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	nonpaging_free(vcpu);
			
 
				-}
			
 
				-
			
 
				-#define PTTYPE 64
			
 
				-#include "paging_tmpl.h"
			
 
				-#undef PTTYPE
			
 
				-
			
 
				-#define PTTYPE 32
			
 
				-#include "paging_tmpl.h"
			
 
				-#undef PTTYPE
			
 
				-
			
 
				-static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level)
			
 
				-{
			
 
				-	struct kvm_mmu *context = &vcpu->mmu;
			
 
				-
			
 
				-	ASSERT(is_pae(vcpu));
			
 
				-	context->new_cr3 = paging_new_cr3;
			
 
				-	context->page_fault = paging64_page_fault;
			
 
				-	context->gva_to_gpa = paging64_gva_to_gpa;
			
 
				-	context->free = paging_free;
			
 
				-	context->root_level = level;
			
 
				-	context->shadow_root_level = level;
			
 
				-	context->root_hpa = INVALID_PAGE;
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int paging64_init_context(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL);
			
 
				-}
			
 
				-
			
 
				-static int paging32_init_context(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	struct kvm_mmu *context = &vcpu->mmu;
			
 
				-
			
 
				-	context->new_cr3 = paging_new_cr3;
			
 
				-	context->page_fault = paging32_page_fault;
			
 
				-	context->gva_to_gpa = paging32_gva_to_gpa;
			
 
				-	context->free = paging_free;
			
 
				-	context->root_level = PT32_ROOT_LEVEL;
			
 
				-	context->shadow_root_level = PT32E_ROOT_LEVEL;
			
 
				-	context->root_hpa = INVALID_PAGE;
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int paging32E_init_context(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL);
			
 
				-}
			
 
				-
			
 
				-static int init_kvm_mmu(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	ASSERT(vcpu);
			
 
				-	ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa));
			
 
				-
			
 
				-	if (!is_paging(vcpu))
			
 
				-		return nonpaging_init_context(vcpu);
			
 
				-	else if (is_long_mode(vcpu))
			
 
				-		return paging64_init_context(vcpu);
			
 
				-	else if (is_pae(vcpu))
			
 
				-		return paging32E_init_context(vcpu);
			
 
				-	else
			
 
				-		return paging32_init_context(vcpu);
			
 
				-}
			
 
				-
			
 
				-static void destroy_kvm_mmu(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	ASSERT(vcpu);
			
 
				-	if (VALID_PAGE(vcpu->mmu.root_hpa)) {
			
 
				-		vcpu->mmu.free(vcpu);
			
 
				-		vcpu->mmu.root_hpa = INVALID_PAGE;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-int kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	destroy_kvm_mmu(vcpu);
			
 
				-	return init_kvm_mmu(vcpu);
			
 
				-}
			
 
				-EXPORT_SYMBOL_GPL(kvm_mmu_reset_context);
			
 
				-
			
 
				-int kvm_mmu_load(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	int r;
			
 
				-
			
 
				-	mutex_lock(&vcpu->kvm->lock);
			
 
				-	r = mmu_topup_memory_caches(vcpu);
			
 
				-	if (r)
			
 
				-		goto out;
			
 
				-	mmu_alloc_roots(vcpu);
			
 
				-	kvm_x86_ops->set_cr3(vcpu, vcpu->mmu.root_hpa);
			
 
				-	kvm_mmu_flush_tlb(vcpu);
			
 
				-out:
			
 
				-	mutex_unlock(&vcpu->kvm->lock);
			
 
				-	return r;
			
 
				-}
			
 
				-EXPORT_SYMBOL_GPL(kvm_mmu_load);
			
 
				-
			
 
				-void kvm_mmu_unload(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	mmu_free_roots(vcpu);
			
 
				-}
			
 
				-
			
 
				-static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
			
 
				-				  struct kvm_mmu_page *page,
			
 
				-				  u64 *spte)
			
 
				-{
			
 
				-	u64 pte;
			
 
				-	struct kvm_mmu_page *child;
			
 
				-
			
 
				-	pte = *spte;
			
 
				-	if (is_present_pte(pte)) {
			
 
				-		if (page->role.level == PT_PAGE_TABLE_LEVEL)
			
 
				-			rmap_remove(spte);
			
 
				-		else {
			
 
				-			child = page_header(pte & PT64_BASE_ADDR_MASK);
			
 
				-			mmu_page_remove_parent_pte(child, spte);
			
 
				-		}
			
 
				-	}
			
 
				-	set_shadow_pte(spte, 0);
			
 
				-	kvm_flush_remote_tlbs(vcpu->kvm);
			
 
				-}
			
 
				-
			
 
				-static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
			
 
				-				  struct kvm_mmu_page *page,
			
 
				-				  u64 *spte,
			
 
				-				  const void *new, int bytes)
			
 
				-{
			
 
				-	if (page->role.level != PT_PAGE_TABLE_LEVEL)
			
 
				-		return;
			
 
				-
			
 
				-	if (page->role.glevels == PT32_ROOT_LEVEL)
			
 
				-		paging32_update_pte(vcpu, page, spte, new, bytes);
			
 
				-	else
			
 
				-		paging64_update_pte(vcpu, page, spte, new, bytes);
			
 
				-}
			
 
				-
			
 
				-void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
			
 
				-		       const u8 *new, int bytes)
			
 
				-{
			
 
				-	gfn_t gfn = gpa >> PAGE_SHIFT;
			
 
				-	struct kvm_mmu_page *page;
			
 
				-	struct hlist_node *node, *n;
			
 
				-	struct hlist_head *bucket;
			
 
				-	unsigned index;
			
 
				-	u64 *spte;
			
 
				-	unsigned offset = offset_in_page(gpa);
			
 
				-	unsigned pte_size;
			
 
				-	unsigned page_offset;
			
 
				-	unsigned misaligned;
			
 
				-	unsigned quadrant;
			
 
				-	int level;
			
 
				-	int flooded = 0;
			
 
				-	int npte;
			
 
				-
			
 
				-	pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes);
			
 
				-	if (gfn == vcpu->last_pt_write_gfn) {
			
 
				-		++vcpu->last_pt_write_count;
			
 
				-		if (vcpu->last_pt_write_count >= 3)
			
 
				-			flooded = 1;
			
 
				-	} else {
			
 
				-		vcpu->last_pt_write_gfn = gfn;
			
 
				-		vcpu->last_pt_write_count = 1;
			
 
				-	}
			
 
				-	index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
			
 
				-	bucket = &vcpu->kvm->mmu_page_hash[index];
			
 
				-	hlist_for_each_entry_safe(page, node, n, bucket, hash_link) {
			
 
				-		if (page->gfn != gfn || page->role.metaphysical)
			
 
				-			continue;
			
 
				-		pte_size = page->role.glevels == PT32_ROOT_LEVEL ? 4 : 8;
			
 
				-		misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
			
 
				-		misaligned |= bytes < 4;
			
 
				-		if (misaligned || flooded) {
			
 
				-			/*
			
 
				-			 * Misaligned accesses are too much trouble to fix
			
 
				-			 * up; also, they usually indicate a page is not used
			
 
				-			 * as a page table.
			
 
				-			 *
			
 
				-			 * If we're seeing too many writes to a page,
			
 
				-			 * it may no longer be a page table, or we may be
			
 
				-			 * forking, in which case it is better to unmap the
			
 
				-			 * page.
			
 
				-			 */
			
 
				-			pgprintk("misaligned: gpa %llx bytes %d role %x\n",
			
 
				-				 gpa, bytes, page->role.word);
			
 
				-			kvm_mmu_zap_page(vcpu->kvm, page);
			
 
				-			continue;
			
 
				-		}
			
 
				-		page_offset = offset;
			
 
				-		level = page->role.level;
			
 
				-		npte = 1;
			
 
				-		if (page->role.glevels == PT32_ROOT_LEVEL) {
			
 
				-			page_offset <<= 1;	/* 32->64 */
			
 
				-			/*
			
 
				-			 * A 32-bit pde maps 4MB while the shadow pdes map
			
 
				-			 * only 2MB.  So we need to double the offset again
			
 
				-			 * and zap two pdes instead of one.
			
 
				-			 */
			
 
				-			if (level == PT32_ROOT_LEVEL) {
			
 
				-				page_offset &= ~7; /* kill rounding error */
			
 
				-				page_offset <<= 1;
			
 
				-				npte = 2;
			
 
				-			}
			
 
				-			quadrant = page_offset >> PAGE_SHIFT;
			
 
				-			page_offset &= ~PAGE_MASK;
			
 
				-			if (quadrant != page->role.quadrant)
			
 
				-				continue;
			
 
				-		}
			
 
				-		spte = &page->spt[page_offset / sizeof(*spte)];
			
 
				-		while (npte--) {
			
 
				-			mmu_pte_write_zap_pte(vcpu, page, spte);
			
 
				-			mmu_pte_write_new_pte(vcpu, page, spte, new, bytes);
			
 
				-			++spte;
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
			
 
				-{
			
 
				-	gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva);
			
 
				-
			
 
				-	return kvm_mmu_unprotect_page(vcpu, gpa >> PAGE_SHIFT);
			
 
				-}
			
 
				-
			
 
				-void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	while (vcpu->kvm->n_free_mmu_pages < KVM_REFILL_PAGES) {
			
 
				-		struct kvm_mmu_page *page;
			
 
				-
			
 
				-		page = container_of(vcpu->kvm->active_mmu_pages.prev,
			
 
				-				    struct kvm_mmu_page, link);
			
 
				-		kvm_mmu_zap_page(vcpu->kvm, page);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void free_mmu_pages(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	struct kvm_mmu_page *page;
			
 
				-
			
 
				-	while (!list_empty(&vcpu->kvm->active_mmu_pages)) {
			
 
				-		page = container_of(vcpu->kvm->active_mmu_pages.next,
			
 
				-				    struct kvm_mmu_page, link);
			
 
				-		kvm_mmu_zap_page(vcpu->kvm, page);
			
 
				-	}
			
 
				-	free_page((unsigned long)vcpu->mmu.pae_root);
			
 
				-}
			
 
				-
			
 
				-static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	struct page *page;
			
 
				-	int i;
			
 
				-
			
 
				-	ASSERT(vcpu);
			
 
				-
			
 
				-	vcpu->kvm->n_free_mmu_pages = KVM_NUM_MMU_PAGES;
			
 
				-
			
 
				-	/*
			
 
				-	 * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64.
			
 
				-	 * Therefore we need to allocate shadow page tables in the first
			
 
				-	 * 4GB of memory, which happens to fit the DMA32 zone.
			
 
				-	 */
			
 
				-	page = alloc_page(GFP_KERNEL | __GFP_DMA32);
			
 
				-	if (!page)
			
 
				-		goto error_1;
			
 
				-	vcpu->mmu.pae_root = page_address(page);
			
 
				-	for (i = 0; i < 4; ++i)
			
 
				-		vcpu->mmu.pae_root[i] = INVALID_PAGE;
			
 
				-
			
 
				-	return 0;
			
 
				-
			
 
				-error_1:
			
 
				-	free_mmu_pages(vcpu);
			
 
				-	return -ENOMEM;
			
 
				-}
			
 
				-
			
 
				-int kvm_mmu_create(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	ASSERT(vcpu);
			
 
				-	ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa));
			
 
				-
			
 
				-	return alloc_mmu_pages(vcpu);
			
 
				-}
			
 
				-
			
 
				-int kvm_mmu_setup(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	ASSERT(vcpu);
			
 
				-	ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa));
			
 
				-
			
 
				-	return init_kvm_mmu(vcpu);
			
 
				-}
			
 
				-
			
 
				-void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	ASSERT(vcpu);
			
 
				-
			
 
				-	destroy_kvm_mmu(vcpu);
			
 
				-	free_mmu_pages(vcpu);
			
 
				-	mmu_free_memory_caches(vcpu);
			
 
				-}
			
 
				-
			
 
				-void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
			
 
				-{
			
 
				-	struct kvm_mmu_page *page;
			
 
				-
			
 
				-	list_for_each_entry(page, &kvm->active_mmu_pages, link) {
			
 
				-		int i;
			
 
				-		u64 *pt;
			
 
				-
			
 
				-		if (!test_bit(slot, &page->slot_bitmap))
			
 
				-			continue;
			
 
				-
			
 
				-		pt = page->spt;
			
 
				-		for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
			
 
				-			/* avoid RMW */
			
 
				-			if (pt[i] & PT_WRITABLE_MASK) {
			
 
				-				rmap_remove(&pt[i]);
			
 
				-				pt[i] &= ~PT_WRITABLE_MASK;
			
 
				-			}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-void kvm_mmu_zap_all(struct kvm *kvm)
			
 
				-{
			
 
				-	struct kvm_mmu_page *page, *node;
			
 
				-
			
 
				-	list_for_each_entry_safe(page, node, &kvm->active_mmu_pages, link)
			
 
				-		kvm_mmu_zap_page(kvm, page);
			
 
				-
			
 
				-	kvm_flush_remote_tlbs(kvm);
			
 
				-}
			
 
				-
			
 
				-void kvm_mmu_module_exit(void)
			
 
				-{
			
 
				-	if (pte_chain_cache)
			
 
				-		kmem_cache_destroy(pte_chain_cache);
			
 
				-	if (rmap_desc_cache)
			
 
				-		kmem_cache_destroy(rmap_desc_cache);
			
 
				-	if (mmu_page_header_cache)
			
 
				-		kmem_cache_destroy(mmu_page_header_cache);
			
 
				-}
			
 
				-
			
 
				-int kvm_mmu_module_init(void)
			
 
				-{
			
 
				-	pte_chain_cache = kmem_cache_create("kvm_pte_chain",
			
 
				-					    sizeof(struct kvm_pte_chain),
			
 
				-					    0, 0, NULL);
			
 
				-	if (!pte_chain_cache)
			
 
				-		goto nomem;
			
 
				-	rmap_desc_cache = kmem_cache_create("kvm_rmap_desc",
			
 
				-					    sizeof(struct kvm_rmap_desc),
			
 
				-					    0, 0, NULL);
			
 
				-	if (!rmap_desc_cache)
			
 
				-		goto nomem;
			
 
				-
			
 
				-	mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header",
			
 
				-						  sizeof(struct kvm_mmu_page),
			
 
				-						  0, 0, NULL);
			
 
				-	if (!mmu_page_header_cache)
			
 
				-		goto nomem;
			
 
				-
			
 
				-	return 0;
			
 
				-
			
 
				-nomem:
			
 
				-	kvm_mmu_module_exit();
			
 
				-	return -ENOMEM;
			
 
				-}
			
 
				-
			
 
				-#ifdef AUDIT
			
 
				-
			
 
				-static const char *audit_msg;
			
 
				-
			
 
				-static gva_t canonicalize(gva_t gva)
			
 
				-{
			
 
				-#ifdef CONFIG_X86_64
			
 
				-	gva = (long long)(gva << 16) >> 16;
			
 
				-#endif
			
 
				-	return gva;
			
 
				-}
			
 
				-
			
 
				-static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
			
 
				-				gva_t va, int level)
			
 
				-{
			
 
				-	u64 *pt = __va(page_pte & PT64_BASE_ADDR_MASK);
			
 
				-	int i;
			
 
				-	gva_t va_delta = 1ul << (PAGE_SHIFT + 9 * (level - 1));
			
 
				-
			
 
				-	for (i = 0; i < PT64_ENT_PER_PAGE; ++i, va += va_delta) {
			
 
				-		u64 ent = pt[i];
			
 
				-
			
 
				-		if (!(ent & PT_PRESENT_MASK))
			
 
				-			continue;
			
 
				-
			
 
				-		va = canonicalize(va);
			
 
				-		if (level > 1)
			
 
				-			audit_mappings_page(vcpu, ent, va, level - 1);
			
 
				-		else {
			
 
				-			gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, va);
			
 
				-			hpa_t hpa = gpa_to_hpa(vcpu, gpa);
			
 
				-
			
 
				-			if ((ent & PT_PRESENT_MASK)
			
 
				-			    && (ent & PT64_BASE_ADDR_MASK) != hpa)
			
 
				-				printk(KERN_ERR "audit error: (%s) levels %d"
			
 
				-				       " gva %lx gpa %llx hpa %llx ent %llx\n",
			
 
				-				       audit_msg, vcpu->mmu.root_level,
			
 
				-				       va, gpa, hpa, ent);
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void audit_mappings(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	unsigned i;
			
 
				-
			
 
				-	if (vcpu->mmu.root_level == 4)
			
 
				-		audit_mappings_page(vcpu, vcpu->mmu.root_hpa, 0, 4);
			
 
				-	else
			
 
				-		for (i = 0; i < 4; ++i)
			
 
				-			if (vcpu->mmu.pae_root[i] & PT_PRESENT_MASK)
			
 
				-				audit_mappings_page(vcpu,
			
 
				-						    vcpu->mmu.pae_root[i],
			
 
				-						    i << 30,
			
 
				-						    2);
			
 
				-}
			
 
				-
			
 
				-static int count_rmaps(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	int nmaps = 0;
			
 
				-	int i, j, k;
			
 
				-
			
 
				-	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
			
 
				-		struct kvm_memory_slot *m = &vcpu->kvm->memslots[i];
			
 
				-		struct kvm_rmap_desc *d;
			
 
				-
			
 
				-		for (j = 0; j < m->npages; ++j) {
			
 
				-			struct page *page = m->phys_mem[j];
			
 
				-
			
 
				-			if (!page->private)
			
 
				-				continue;
			
 
				-			if (!(page->private & 1)) {
			
 
				-				++nmaps;
			
 
				-				continue;
			
 
				-			}
			
 
				-			d = (struct kvm_rmap_desc *)(page->private & ~1ul);
			
 
				-			while (d) {
			
 
				-				for (k = 0; k < RMAP_EXT; ++k)
			
 
				-					if (d->shadow_ptes[k])
			
 
				-						++nmaps;
			
 
				-					else
			
 
				-						break;
			
 
				-				d = d->more;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-	return nmaps;
			
 
				-}
			
 
				-
			
 
				-static int count_writable_mappings(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	int nmaps = 0;
			
 
				-	struct kvm_mmu_page *page;
			
 
				-	int i;
			
 
				-
			
 
				-	list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) {
			
 
				-		u64 *pt = page->spt;
			
 
				-
			
 
				-		if (page->role.level != PT_PAGE_TABLE_LEVEL)
			
 
				-			continue;
			
 
				-
			
 
				-		for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
			
 
				-			u64 ent = pt[i];
			
 
				-
			
 
				-			if (!(ent & PT_PRESENT_MASK))
			
 
				-				continue;
			
 
				-			if (!(ent & PT_WRITABLE_MASK))
			
 
				-				continue;
			
 
				-			++nmaps;
			
 
				-		}
			
 
				-	}
			
 
				-	return nmaps;
			
 
				-}
			
 
				-
			
 
				-static void audit_rmap(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	int n_rmap = count_rmaps(vcpu);
			
 
				-	int n_actual = count_writable_mappings(vcpu);
			
 
				-
			
 
				-	if (n_rmap != n_actual)
			
 
				-		printk(KERN_ERR "%s: (%s) rmap %d actual %d\n",
			
 
				-		       __FUNCTION__, audit_msg, n_rmap, n_actual);
			
 
				-}
			
 
				-
			
 
				-static void audit_write_protection(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	struct kvm_mmu_page *page;
			
 
				-
			
 
				-	list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) {
			
 
				-		hfn_t hfn;
			
 
				-		struct page *pg;
			
 
				-
			
 
				-		if (page->role.metaphysical)
			
 
				-			continue;
			
 
				-
			
 
				-		hfn = gpa_to_hpa(vcpu, (gpa_t)page->gfn << PAGE_SHIFT)
			
 
				-			>> PAGE_SHIFT;
			
 
				-		pg = pfn_to_page(hfn);
			
 
				-		if (pg->private)
			
 
				-			printk(KERN_ERR "%s: (%s) shadow page has writable"
			
 
				-			       " mappings: gfn %lx role %x\n",
			
 
				-			       __FUNCTION__, audit_msg, page->gfn,
			
 
				-			       page->role.word);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg)
			
 
				-{
			
 
				-	int olddbg = dbg;
			
 
				-
			
 
				-	dbg = 0;
			
 
				-	audit_msg = msg;
			
 
				-	audit_rmap(vcpu);
			
 
				-	audit_write_protection(vcpu);
			
 
				-	audit_mappings(vcpu);
			
 
				-	dbg = olddbg;
			
 
				-}
			
 
				-
			
 
				-#endif
			
--- a/drivers/kvm/paging_tmpl.h
+++ b/drivers/kvm/paging_tmpl.h
@@ -1,511 +0,0 @@
 
				-/*
			
 
				- * Kernel-based Virtual Machine driver for Linux
			
 
				- *
			
 
				- * This module enables machines with Intel VT-x extensions to run virtual
			
 
				- * machines without emulation or binary translation.
			
 
				- *
			
 
				- * MMU support
			
 
				- *
			
 
				- * Copyright (C) 2006 Qumranet, Inc.
			
 
				- *
			
 
				- * Authors:
			
 
				- *   Yaniv Kamay  <yaniv@qumranet.com>
			
 
				- *   Avi Kivity   <avi@qumranet.com>
			
 
				- *
			
 
				- * This work is licensed under the terms of the GNU GPL, version 2.  See
			
 
				- * the COPYING file in the top-level directory.
			
 
				- *
			
 
				- */
			
 
				-
			
 
				-/*
			
 
				- * We need the mmu code to access both 32-bit and 64-bit guest ptes,
			
 
				- * so the code in this file is compiled twice, once per pte size.
			
 
				- */
			
 
				-
			
 
				-#if PTTYPE == 64
			
 
				-	#define pt_element_t u64
			
 
				-	#define guest_walker guest_walker64
			
 
				-	#define FNAME(name) paging##64_##name
			
 
				-	#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
			
 
				-	#define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK
			
 
				-	#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
			
 
				-	#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
			
 
				-	#define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level)
			
 
				-	#ifdef CONFIG_X86_64
			
 
				-	#define PT_MAX_FULL_LEVELS 4
			
 
				-	#else
			
 
				-	#define PT_MAX_FULL_LEVELS 2
			
 
				-	#endif
			
 
				-#elif PTTYPE == 32
			
 
				-	#define pt_element_t u32
			
 
				-	#define guest_walker guest_walker32
			
 
				-	#define FNAME(name) paging##32_##name
			
 
				-	#define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
			
 
				-	#define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK
			
 
				-	#define PT_INDEX(addr, level) PT32_INDEX(addr, level)
			
 
				-	#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
			
 
				-	#define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level)
			
 
				-	#define PT_MAX_FULL_LEVELS 2
			
 
				-#else
			
 
				-	#error Invalid PTTYPE value
			
 
				-#endif
			
 
				-
			
 
				-/*
			
 
				- * The guest_walker structure emulates the behavior of the hardware page
			
 
				- * table walker.
			
 
				- */
			
 
				-struct guest_walker {
			
 
				-	int level;
			
 
				-	gfn_t table_gfn[PT_MAX_FULL_LEVELS];
			
 
				-	pt_element_t *table;
			
 
				-	pt_element_t pte;
			
 
				-	pt_element_t *ptep;
			
 
				-	struct page *page;
			
 
				-	int index;
			
 
				-	pt_element_t inherited_ar;
			
 
				-	gfn_t gfn;
			
 
				-	u32 error_code;
			
 
				-};
			
 
				-
			
 
				-/*
			
 
				- * Fetch a guest pte for a guest virtual address
			
 
				- */
			
 
				-static int FNAME(walk_addr)(struct guest_walker *walker,
			
 
				-			    struct kvm_vcpu *vcpu, gva_t addr,
			
 
				-			    int write_fault, int user_fault, int fetch_fault)
			
 
				-{
			
 
				-	hpa_t hpa;
			
 
				-	struct kvm_memory_slot *slot;
			
 
				-	pt_element_t *ptep;
			
 
				-	pt_element_t root;
			
 
				-	gfn_t table_gfn;
			
 
				-
			
 
				-	pgprintk("%s: addr %lx\n", __FUNCTION__, addr);
			
 
				-	walker->level = vcpu->mmu.root_level;
			
 
				-	walker->table = NULL;
			
 
				-	walker->page = NULL;
			
 
				-	walker->ptep = NULL;
			
 
				-	root = vcpu->cr3;
			
 
				-#if PTTYPE == 64
			
 
				-	if (!is_long_mode(vcpu)) {
			
 
				-		walker->ptep = &vcpu->pdptrs[(addr >> 30) & 3];
			
 
				-		root = *walker->ptep;
			
 
				-		walker->pte = root;
			
 
				-		if (!(root & PT_PRESENT_MASK))
			
 
				-			goto not_present;
			
 
				-		--walker->level;
			
 
				-	}
			
 
				-#endif
			
 
				-	table_gfn = (root & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
			
 
				-	walker->table_gfn[walker->level - 1] = table_gfn;
			
 
				-	pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
			
 
				-		 walker->level - 1, table_gfn);
			
 
				-	slot = gfn_to_memslot(vcpu->kvm, table_gfn);
			
 
				-	hpa = safe_gpa_to_hpa(vcpu, root & PT64_BASE_ADDR_MASK);
			
 
				-	walker->page = pfn_to_page(hpa >> PAGE_SHIFT);
			
 
				-	walker->table = kmap_atomic(walker->page, KM_USER0);
			
 
				-
			
 
				-	ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
			
 
				-	       (vcpu->cr3 & CR3_NONPAE_RESERVED_BITS) == 0);
			
 
				-
			
 
				-	walker->inherited_ar = PT_USER_MASK | PT_WRITABLE_MASK;
			
 
				-
			
 
				-	for (;;) {
			
 
				-		int index = PT_INDEX(addr, walker->level);
			
 
				-		hpa_t paddr;
			
 
				-
			
 
				-		ptep = &walker->table[index];
			
 
				-		walker->index = index;
			
 
				-		ASSERT(((unsigned long)walker->table & PAGE_MASK) ==
			
 
				-		       ((unsigned long)ptep & PAGE_MASK));
			
 
				-
			
 
				-		if (!is_present_pte(*ptep))
			
 
				-			goto not_present;
			
 
				-
			
 
				-		if (write_fault && !is_writeble_pte(*ptep))
			
 
				-			if (user_fault || is_write_protection(vcpu))
			
 
				-				goto access_error;
			
 
				-
			
 
				-		if (user_fault && !(*ptep & PT_USER_MASK))
			
 
				-			goto access_error;
			
 
				-
			
 
				-#if PTTYPE == 64
			
 
				-		if (fetch_fault && is_nx(vcpu) && (*ptep & PT64_NX_MASK))
			
 
				-			goto access_error;
			
 
				-#endif
			
 
				-
			
 
				-		if (!(*ptep & PT_ACCESSED_MASK)) {
			
 
				-			mark_page_dirty(vcpu->kvm, table_gfn);
			
 
				-			*ptep |= PT_ACCESSED_MASK;
			
 
				-		}
			
 
				-
			
 
				-		if (walker->level == PT_PAGE_TABLE_LEVEL) {
			
 
				-			walker->gfn = (*ptep & PT_BASE_ADDR_MASK)
			
 
				-				>> PAGE_SHIFT;
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		if (walker->level == PT_DIRECTORY_LEVEL
			
 
				-		    && (*ptep & PT_PAGE_SIZE_MASK)
			
 
				-		    && (PTTYPE == 64 || is_pse(vcpu))) {
			
 
				-			walker->gfn = (*ptep & PT_DIR_BASE_ADDR_MASK)
			
 
				-				>> PAGE_SHIFT;
			
 
				-			walker->gfn += PT_INDEX(addr, PT_PAGE_TABLE_LEVEL);
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		walker->inherited_ar &= walker->table[index];
			
 
				-		table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
			
 
				-		kunmap_atomic(walker->table, KM_USER0);
			
 
				-		paddr = safe_gpa_to_hpa(vcpu, table_gfn << PAGE_SHIFT);
			
 
				-		walker->page = pfn_to_page(paddr >> PAGE_SHIFT);
			
 
				-		walker->table = kmap_atomic(walker->page, KM_USER0);
			
 
				-		--walker->level;
			
 
				-		walker->table_gfn[walker->level - 1 ] = table_gfn;
			
 
				-		pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
			
 
				-			 walker->level - 1, table_gfn);
			
 
				-	}
			
 
				-	walker->pte = *ptep;
			
 
				-	if (walker->page)
			
 
				-		walker->ptep = NULL;
			
 
				-	if (walker->table)
			
 
				-		kunmap_atomic(walker->table, KM_USER0);
			
 
				-	pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep);
			
 
				-	return 1;
			
 
				-
			
 
				-not_present:
			
 
				-	walker->error_code = 0;
			
 
				-	goto err;
			
 
				-
			
 
				-access_error:
			
 
				-	walker->error_code = PFERR_PRESENT_MASK;
			
 
				-
			
 
				-err:
			
 
				-	if (write_fault)
			
 
				-		walker->error_code |= PFERR_WRITE_MASK;
			
 
				-	if (user_fault)
			
 
				-		walker->error_code |= PFERR_USER_MASK;
			
 
				-	if (fetch_fault)
			
 
				-		walker->error_code |= PFERR_FETCH_MASK;
			
 
				-	if (walker->table)
			
 
				-		kunmap_atomic(walker->table, KM_USER0);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void FNAME(mark_pagetable_dirty)(struct kvm *kvm,
			
 
				-					struct guest_walker *walker)
			
 
				-{
			
 
				-	mark_page_dirty(kvm, walker->table_gfn[walker->level - 1]);
			
 
				-}
			
 
				-
			
 
				-static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu,
			
 
				-				  u64 *shadow_pte,
			
 
				-				  gpa_t gaddr,
			
 
				-				  pt_element_t gpte,
			
 
				-				  u64 access_bits,
			
 
				-				  int user_fault,
			
 
				-				  int write_fault,
			
 
				-				  int *ptwrite,
			
 
				-				  struct guest_walker *walker,
			
 
				-				  gfn_t gfn)
			
 
				-{
			
 
				-	hpa_t paddr;
			
 
				-	int dirty = gpte & PT_DIRTY_MASK;
			
 
				-	u64 spte = *shadow_pte;
			
 
				-	int was_rmapped = is_rmap_pte(spte);
			
 
				-
			
 
				-	pgprintk("%s: spte %llx gpte %llx access %llx write_fault %d"
			
 
				-		 " user_fault %d gfn %lx\n",
			
 
				-		 __FUNCTION__, spte, (u64)gpte, access_bits,
			
 
				-		 write_fault, user_fault, gfn);
			
 
				-
			
 
				-	if (write_fault && !dirty) {
			
 
				-		pt_element_t *guest_ent, *tmp = NULL;
			
 
				-
			
 
				-		if (walker->ptep)
			
 
				-			guest_ent = walker->ptep;
			
 
				-		else {
			
 
				-			tmp = kmap_atomic(walker->page, KM_USER0);
			
 
				-			guest_ent = &tmp[walker->index];
			
 
				-		}
			
 
				-
			
 
				-		*guest_ent |= PT_DIRTY_MASK;
			
 
				-		if (!walker->ptep)
			
 
				-			kunmap_atomic(tmp, KM_USER0);
			
 
				-		dirty = 1;
			
 
				-		FNAME(mark_pagetable_dirty)(vcpu->kvm, walker);
			
 
				-	}
			
 
				-
			
 
				-	spte |= PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_DIRTY_MASK;
			
 
				-	spte |= gpte & PT64_NX_MASK;
			
 
				-	if (!dirty)
			
 
				-		access_bits &= ~PT_WRITABLE_MASK;
			
 
				-
			
 
				-	paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK);
			
 
				-
			
 
				-	spte |= PT_PRESENT_MASK;
			
 
				-	if (access_bits & PT_USER_MASK)
			
 
				-		spte |= PT_USER_MASK;
			
 
				-
			
 
				-	if (is_error_hpa(paddr)) {
			
 
				-		spte |= gaddr;
			
 
				-		spte |= PT_SHADOW_IO_MARK;
			
 
				-		spte &= ~PT_PRESENT_MASK;
			
 
				-		set_shadow_pte(shadow_pte, spte);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	spte |= paddr;
			
 
				-
			
 
				-	if ((access_bits & PT_WRITABLE_MASK)
			
 
				-	    || (write_fault && !is_write_protection(vcpu) && !user_fault)) {
			
 
				-		struct kvm_mmu_page *shadow;
			
 
				-
			
 
				-		spte |= PT_WRITABLE_MASK;
			
 
				-		if (user_fault) {
			
 
				-			mmu_unshadow(vcpu, gfn);
			
 
				-			goto unshadowed;
			
 
				-		}
			
 
				-
			
 
				-		shadow = kvm_mmu_lookup_page(vcpu, gfn);
			
 
				-		if (shadow) {
			
 
				-			pgprintk("%s: found shadow page for %lx, marking ro\n",
			
 
				-				 __FUNCTION__, gfn);
			
 
				-			access_bits &= ~PT_WRITABLE_MASK;
			
 
				-			if (is_writeble_pte(spte)) {
			
 
				-				spte &= ~PT_WRITABLE_MASK;
			
 
				-				kvm_x86_ops->tlb_flush(vcpu);
			
 
				-			}
			
 
				-			if (write_fault)
			
 
				-				*ptwrite = 1;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-unshadowed:
			
 
				-
			
 
				-	if (access_bits & PT_WRITABLE_MASK)
			
 
				-		mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT);
			
 
				-
			
 
				-	set_shadow_pte(shadow_pte, spte);
			
 
				-	page_header_update_slot(vcpu->kvm, shadow_pte, gaddr);
			
 
				-	if (!was_rmapped)
			
 
				-		rmap_add(vcpu, shadow_pte);
			
 
				-}
			
 
				-
			
 
				-static void FNAME(set_pte)(struct kvm_vcpu *vcpu, pt_element_t gpte,
			
 
				-			   u64 *shadow_pte, u64 access_bits,
			
 
				-			   int user_fault, int write_fault, int *ptwrite,
			
 
				-			   struct guest_walker *walker, gfn_t gfn)
			
 
				-{
			
 
				-	access_bits &= gpte;
			
 
				-	FNAME(set_pte_common)(vcpu, shadow_pte, gpte & PT_BASE_ADDR_MASK,
			
 
				-			      gpte, access_bits, user_fault, write_fault,
			
 
				-			      ptwrite, walker, gfn);
			
 
				-}
			
 
				-
			
 
				-static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
			
 
				-			      u64 *spte, const void *pte, int bytes)
			
 
				-{
			
 
				-	pt_element_t gpte;
			
 
				-
			
 
				-	if (bytes < sizeof(pt_element_t))
			
 
				-		return;
			
 
				-	gpte = *(const pt_element_t *)pte;
			
 
				-	if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK))
			
 
				-		return;
			
 
				-	pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte);
			
 
				-	FNAME(set_pte)(vcpu, gpte, spte, PT_USER_MASK | PT_WRITABLE_MASK, 0,
			
 
				-		       0, NULL, NULL,
			
 
				-		       (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT);
			
 
				-}
			
 
				-
			
 
				-static void FNAME(set_pde)(struct kvm_vcpu *vcpu, pt_element_t gpde,
			
 
				-			   u64 *shadow_pte, u64 access_bits,
			
 
				-			   int user_fault, int write_fault, int *ptwrite,
			
 
				-			   struct guest_walker *walker, gfn_t gfn)
			
 
				-{
			
 
				-	gpa_t gaddr;
			
 
				-
			
 
				-	access_bits &= gpde;
			
 
				-	gaddr = (gpa_t)gfn << PAGE_SHIFT;
			
 
				-	if (PTTYPE == 32 && is_cpuid_PSE36())
			
 
				-		gaddr |= (gpde & PT32_DIR_PSE36_MASK) <<
			
 
				-			(32 - PT32_DIR_PSE36_SHIFT);
			
 
				-	FNAME(set_pte_common)(vcpu, shadow_pte, gaddr,
			
 
				-			      gpde, access_bits, user_fault, write_fault,
			
 
				-			      ptwrite, walker, gfn);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Fetch a shadow pte for a specific level in the paging hierarchy.
			
 
				- */
			
 
				-static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
			
 
				-			 struct guest_walker *walker,
			
 
				-			 int user_fault, int write_fault, int *ptwrite)
			
 
				-{
			
 
				-	hpa_t shadow_addr;
			
 
				-	int level;
			
 
				-	u64 *shadow_ent;
			
 
				-	u64 *prev_shadow_ent = NULL;
			
 
				-
			
 
				-	if (!is_present_pte(walker->pte))
			
 
				-		return NULL;
			
 
				-
			
 
				-	shadow_addr = vcpu->mmu.root_hpa;
			
 
				-	level = vcpu->mmu.shadow_root_level;
			
 
				-	if (level == PT32E_ROOT_LEVEL) {
			
 
				-		shadow_addr = vcpu->mmu.pae_root[(addr >> 30) & 3];
			
 
				-		shadow_addr &= PT64_BASE_ADDR_MASK;
			
 
				-		--level;
			
 
				-	}
			
 
				-
			
 
				-	for (; ; level--) {
			
 
				-		u32 index = SHADOW_PT_INDEX(addr, level);
			
 
				-		struct kvm_mmu_page *shadow_page;
			
 
				-		u64 shadow_pte;
			
 
				-		int metaphysical;
			
 
				-		gfn_t table_gfn;
			
 
				-		unsigned hugepage_access = 0;
			
 
				-
			
 
				-		shadow_ent = ((u64 *)__va(shadow_addr)) + index;
			
 
				-		if (is_present_pte(*shadow_ent) || is_io_pte(*shadow_ent)) {
			
 
				-			if (level == PT_PAGE_TABLE_LEVEL)
			
 
				-				break;
			
 
				-			shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK;
			
 
				-			prev_shadow_ent = shadow_ent;
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		if (level == PT_PAGE_TABLE_LEVEL)
			
 
				-			break;
			
 
				-
			
 
				-		if (level - 1 == PT_PAGE_TABLE_LEVEL
			
 
				-		    && walker->level == PT_DIRECTORY_LEVEL) {
			
 
				-			metaphysical = 1;
			
 
				-			hugepage_access = walker->pte;
			
 
				-			hugepage_access &= PT_USER_MASK | PT_WRITABLE_MASK;
			
 
				-			if (walker->pte & PT64_NX_MASK)
			
 
				-				hugepage_access |= (1 << 2);
			
 
				-			hugepage_access >>= PT_WRITABLE_SHIFT;
			
 
				-			table_gfn = (walker->pte & PT_BASE_ADDR_MASK)
			
 
				-				>> PAGE_SHIFT;
			
 
				-		} else {
			
 
				-			metaphysical = 0;
			
 
				-			table_gfn = walker->table_gfn[level - 2];
			
 
				-		}
			
 
				-		shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
			
 
				-					       metaphysical, hugepage_access,
			
 
				-					       shadow_ent);
			
 
				-		shadow_addr = __pa(shadow_page->spt);
			
 
				-		shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK
			
 
				-			| PT_WRITABLE_MASK | PT_USER_MASK;
			
 
				-		*shadow_ent = shadow_pte;
			
 
				-		prev_shadow_ent = shadow_ent;
			
 
				-	}
			
 
				-
			
 
				-	if (walker->level == PT_DIRECTORY_LEVEL) {
			
 
				-		FNAME(set_pde)(vcpu, walker->pte, shadow_ent,
			
 
				-			       walker->inherited_ar, user_fault, write_fault,
			
 
				-			       ptwrite, walker, walker->gfn);
			
 
				-	} else {
			
 
				-		ASSERT(walker->level == PT_PAGE_TABLE_LEVEL);
			
 
				-		FNAME(set_pte)(vcpu, walker->pte, shadow_ent,
			
 
				-			       walker->inherited_ar, user_fault, write_fault,
			
 
				-			       ptwrite, walker, walker->gfn);
			
 
				-	}
			
 
				-	return shadow_ent;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Page fault handler.  There are several causes for a page fault:
			
 
				- *   - there is no shadow pte for the guest pte
			
 
				- *   - write access through a shadow pte marked read only so that we can set
			
 
				- *     the dirty bit
			
 
				- *   - write access to a shadow pte marked read only so we can update the page
			
 
				- *     dirty bitmap, when userspace requests it
			
 
				- *   - mmio access; in this case we will never install a present shadow pte
			
 
				- *   - normal guest page fault due to the guest pte marked not present, not
			
 
				- *     writable, or not executable
			
 
				- *
			
 
				- *  Returns: 1 if we need to emulate the instruction, 0 otherwise, or
			
 
				- *           a negative value on error.
			
 
				- */
			
 
				-static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
			
 
				-			       u32 error_code)
			
 
				-{
			
 
				-	int write_fault = error_code & PFERR_WRITE_MASK;
			
 
				-	int user_fault = error_code & PFERR_USER_MASK;
			
 
				-	int fetch_fault = error_code & PFERR_FETCH_MASK;
			
 
				-	struct guest_walker walker;
			
 
				-	u64 *shadow_pte;
			
 
				-	int write_pt = 0;
			
 
				-	int r;
			
 
				-
			
 
				-	pgprintk("%s: addr %lx err %x\n", __FUNCTION__, addr, error_code);
			
 
				-	kvm_mmu_audit(vcpu, "pre page fault");
			
 
				-
			
 
				-	r = mmu_topup_memory_caches(vcpu);
			
 
				-	if (r)
			
 
				-		return r;
			
 
				-
			
 
				-	/*
			
 
				-	 * Look up the shadow pte for the faulting address.
			
 
				-	 */
			
 
				-	r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault,
			
 
				-			     fetch_fault);
			
 
				-
			
 
				-	/*
			
 
				-	 * The page is not mapped by the guest.  Let the guest handle it.
			
 
				-	 */
			
 
				-	if (!r) {
			
 
				-		pgprintk("%s: guest page fault\n", __FUNCTION__);
			
 
				-		inject_page_fault(vcpu, addr, walker.error_code);
			
 
				-		vcpu->last_pt_write_count = 0; /* reset fork detector */
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
			
 
				-				  &write_pt);
			
 
				-	pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__,
			
 
				-		 shadow_pte, *shadow_pte, write_pt);
			
 
				-
			
 
				-	if (!write_pt)
			
 
				-		vcpu->last_pt_write_count = 0; /* reset fork detector */
			
 
				-
			
 
				-	/*
			
 
				-	 * mmio: emulate if accessible, otherwise its a guest fault.
			
 
				-	 */
			
 
				-	if (is_io_pte(*shadow_pte))
			
 
				-		return 1;
			
 
				-
			
 
				-	++vcpu->stat.pf_fixed;
			
 
				-	kvm_mmu_audit(vcpu, "post page fault (fixed)");
			
 
				-
			
 
				-	return write_pt;
			
 
				-}
			
 
				-
			
 
				-static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
			
 
				-{
			
 
				-	struct guest_walker walker;
			
 
				-	gpa_t gpa = UNMAPPED_GVA;
			
 
				-	int r;
			
 
				-
			
 
				-	r = FNAME(walk_addr)(&walker, vcpu, vaddr, 0, 0, 0);
			
 
				-
			
 
				-	if (r) {
			
 
				-		gpa = (gpa_t)walker.gfn << PAGE_SHIFT;
			
 
				-		gpa |= vaddr & ~PAGE_MASK;
			
 
				-	}
			
 
				-
			
 
				-	return gpa;
			
 
				-}
			
 
				-
			
 
				-#undef pt_element_t
			
 
				-#undef guest_walker
			
 
				-#undef FNAME
			
 
				-#undef PT_BASE_ADDR_MASK
			
 
				-#undef PT_INDEX
			
 
				-#undef SHADOW_PT_INDEX
			
 
				-#undef PT_LEVEL_MASK
			
 
				-#undef PT_DIR_BASE_ADDR_MASK
			
 
				-#undef PT_MAX_FULL_LEVELS
			
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -1,1662 +0,0 @@
 
				-/******************************************************************************
			
 
				- * x86_emulate.c
			
 
				- *
			
 
				- * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
			
 
				- *
			
 
				- * Copyright (c) 2005 Keir Fraser
			
 
				- *
			
 
				- * Linux coding style, mod r/m decoder, segment base fixes, real-mode
			
 
				- * privileged instructions:
			
 
				- *
			
 
				- * Copyright (C) 2006 Qumranet
			
 
				- *
			
 
				- *   Avi Kivity <avi@qumranet.com>
			
 
				- *   Yaniv Kamay <yaniv@qumranet.com>
			
 
				- *
			
 
				- * This work is licensed under the terms of the GNU GPL, version 2.  See
			
 
				- * the COPYING file in the top-level directory.
			
 
				- *
			
 
				- * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
			
 
				- */
			
 
				-
			
 
				-#ifndef __KERNEL__
			
 
				-#include <stdio.h>
			
 
				-#include <stdint.h>
			
 
				-#include <public/xen.h>
			
 
				-#define DPRINTF(_f, _a ...) printf( _f , ## _a )
			
 
				-#else
			
 
				-#include "kvm.h"
			
 
				-#define DPRINTF(x...) do {} while (0)
			
 
				-#endif
			
 
				-#include "x86_emulate.h"
			
 
				-#include <linux/module.h>
			
 
				-
			
 
				-/*
			
 
				- * Opcode effective-address decode tables.
			
 
				- * Note that we only emulate instructions that have at least one memory
			
 
				- * operand (excluding implicit stack references). We assume that stack
			
 
				- * references and instruction fetches will never occur in special memory
			
 
				- * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
			
 
				- * not be handled.
			
 
				- */
			
 
				-
			
 
				-/* Operand sizes: 8-bit operands or specified/overridden size. */
			
 
				-#define ByteOp      (1<<0)	/* 8-bit operands. */
			
 
				-/* Destination operand type. */
			
 
				-#define ImplicitOps (1<<1)	/* Implicit in opcode. No generic decode. */
			
 
				-#define DstReg      (2<<1)	/* Register operand. */
			
 
				-#define DstMem      (3<<1)	/* Memory operand. */
			
 
				-#define DstMask     (3<<1)
			
 
				-/* Source operand type. */
			
 
				-#define SrcNone     (0<<3)	/* No source operand. */
			
 
				-#define SrcImplicit (0<<3)	/* Source operand is implicit in the opcode. */
			
 
				-#define SrcReg      (1<<3)	/* Register operand. */
			
 
				-#define SrcMem      (2<<3)	/* Memory operand. */
			
 
				-#define SrcMem16    (3<<3)	/* Memory operand (16-bit). */
			
 
				-#define SrcMem32    (4<<3)	/* Memory operand (32-bit). */
			
 
				-#define SrcImm      (5<<3)	/* Immediate operand. */
			
 
				-#define SrcImmByte  (6<<3)	/* 8-bit sign-extended immediate operand. */
			
 
				-#define SrcMask     (7<<3)
			
 
				-/* Generic ModRM decode. */
			
 
				-#define ModRM       (1<<6)
			
 
				-/* Destination is only written; never read. */
			
 
				-#define Mov         (1<<7)
			
 
				-#define BitOp       (1<<8)
			
 
				-
			
 
				-static u8 opcode_table[256] = {
			
 
				-	/* 0x00 - 0x07 */
			
 
				-	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
			
 
				-	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
			
 
				-	0, 0, 0, 0,
			
 
				-	/* 0x08 - 0x0F */
			
 
				-	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
			
 
				-	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
			
 
				-	0, 0, 0, 0,
			
 
				-	/* 0x10 - 0x17 */
			
 
				-	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
			
 
				-	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
			
 
				-	0, 0, 0, 0,
			
 
				-	/* 0x18 - 0x1F */
			
 
				-	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
			
 
				-	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
			
 
				-	0, 0, 0, 0,
			
 
				-	/* 0x20 - 0x27 */
			
 
				-	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
			
 
				-	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
			
 
				-	SrcImmByte, SrcImm, 0, 0,
			
 
				-	/* 0x28 - 0x2F */
			
 
				-	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
			
 
				-	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
			
 
				-	0, 0, 0, 0,
			
 
				-	/* 0x30 - 0x37 */
			
 
				-	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
			
 
				-	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
			
 
				-	0, 0, 0, 0,
			
 
				-	/* 0x38 - 0x3F */
			
 
				-	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
			
 
				-	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
			
 
				-	0, 0, 0, 0,
			
 
				-	/* 0x40 - 0x4F */
			
 
				-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
			
 
				-	/* 0x50 - 0x57 */
			
 
				-	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
			
 
				-	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
			
 
				-	/* 0x58 - 0x5F */
			
 
				-	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
			
 
				-	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
			
 
				-	/* 0x60 - 0x67 */
			
 
				-	0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
			
 
				-	0, 0, 0, 0,
			
 
				-	/* 0x68 - 0x6F */
			
 
				-	0, 0, ImplicitOps|Mov, 0,
			
 
				-	SrcNone  | ByteOp  | ImplicitOps, SrcNone  | ImplicitOps, /* insb, insw/insd */
			
 
				-	SrcNone  | ByteOp  | ImplicitOps, SrcNone  | ImplicitOps, /* outsb, outsw/outsd */
			
 
				-	/* 0x70 - 0x77 */
			
 
				-	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
			
 
				-	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
			
 
				-	/* 0x78 - 0x7F */
			
 
				-	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
			
 
				-	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
			
 
				-	/* 0x80 - 0x87 */
			
 
				-	ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
			
 
				-	ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM,
			
 
				-	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
			
 
				-	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
			
 
				-	/* 0x88 - 0x8F */
			
 
				-	ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov,
			
 
				-	ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
			
 
				-	0, ModRM | DstReg, 0, DstMem | SrcNone | ModRM | Mov,
			
 
				-	/* 0x90 - 0x9F */
			
 
				-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps, ImplicitOps, 0, 0,
			
 
				-	/* 0xA0 - 0xA7 */
			
 
				-	ByteOp | DstReg | SrcMem | Mov, DstReg | SrcMem | Mov,
			
 
				-	ByteOp | DstMem | SrcReg | Mov, DstMem | SrcReg | Mov,
			
 
				-	ByteOp | ImplicitOps | Mov, ImplicitOps | Mov,
			
 
				-	ByteOp | ImplicitOps, ImplicitOps,
			
 
				-	/* 0xA8 - 0xAF */
			
 
				-	0, 0, ByteOp | ImplicitOps | Mov, ImplicitOps | Mov,
			
 
				-	ByteOp | ImplicitOps | Mov, ImplicitOps | Mov,
			
 
				-	ByteOp | ImplicitOps, ImplicitOps,
			
 
				-	/* 0xB0 - 0xBF */
			
 
				-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
			
 
				-	/* 0xC0 - 0xC7 */
			
 
				-	ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM,
			
 
				-	0, ImplicitOps, 0, 0,
			
 
				-	ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
			
 
				-	/* 0xC8 - 0xCF */
			
 
				-	0, 0, 0, 0, 0, 0, 0, 0,
			
 
				-	/* 0xD0 - 0xD7 */
			
 
				-	ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
			
 
				-	ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
			
 
				-	0, 0, 0, 0,
			
 
				-	/* 0xD8 - 0xDF */
			
 
				-	0, 0, 0, 0, 0, 0, 0, 0,
			
 
				-	/* 0xE0 - 0xE7 */
			
 
				-	0, 0, 0, 0, 0, 0, 0, 0,
			
 
				-	/* 0xE8 - 0xEF */
			
 
				-	ImplicitOps, SrcImm|ImplicitOps, 0, SrcImmByte|ImplicitOps, 0, 0, 0, 0,
			
 
				-	/* 0xF0 - 0xF7 */
			
 
				-	0, 0, 0, 0,
			
 
				-	ImplicitOps, 0,
			
 
				-	ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
			
 
				-	/* 0xF8 - 0xFF */
			
 
				-	0, 0, 0, 0,
			
 
				-	0, 0, ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM
			
 
				-};
			
 
				-
			
 
				-static u16 twobyte_table[256] = {
			
 
				-	/* 0x00 - 0x0F */
			
 
				-	0, SrcMem | ModRM | DstReg, 0, 0, 0, 0, ImplicitOps, 0,
			
 
				-	ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0,
			
 
				-	/* 0x10 - 0x1F */
			
 
				-	0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0,
			
 
				-	/* 0x20 - 0x2F */
			
 
				-	ModRM | ImplicitOps, ModRM, ModRM | ImplicitOps, ModRM, 0, 0, 0, 0,
			
 
				-	0, 0, 0, 0, 0, 0, 0, 0,
			
 
				-	/* 0x30 - 0x3F */
			
 
				-	ImplicitOps, 0, ImplicitOps, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
			
 
				-	/* 0x40 - 0x47 */
			
 
				-	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
			
 
				-	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
			
 
				-	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
			
 
				-	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
			
 
				-	/* 0x48 - 0x4F */
			
 
				-	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
			
 
				-	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
			
 
				-	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
			
 
				-	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
			
 
				-	/* 0x50 - 0x5F */
			
 
				-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
			
 
				-	/* 0x60 - 0x6F */
			
 
				-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
			
 
				-	/* 0x70 - 0x7F */
			
 
				-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
			
 
				-	/* 0x80 - 0x8F */
			
 
				-	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
			
 
				-	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
			
 
				-	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
			
 
				-	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
			
 
				-	/* 0x90 - 0x9F */
			
 
				-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
			
 
				-	/* 0xA0 - 0xA7 */
			
 
				-	0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0,
			
 
				-	/* 0xA8 - 0xAF */
			
 
				-	0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0,
			
 
				-	/* 0xB0 - 0xB7 */
			
 
				-	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0,
			
 
				-	    DstMem | SrcReg | ModRM | BitOp,
			
 
				-	0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
			
 
				-	    DstReg | SrcMem16 | ModRM | Mov,
			
 
				-	/* 0xB8 - 0xBF */
			
 
				-	0, 0, DstMem | SrcImmByte | ModRM, DstMem | SrcReg | ModRM | BitOp,
			
 
				-	0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
			
 
				-	    DstReg | SrcMem16 | ModRM | Mov,
			
 
				-	/* 0xC0 - 0xCF */
			
 
				-	0, 0, 0, DstMem | SrcReg | ModRM | Mov, 0, 0, 0, ImplicitOps | ModRM,
			
 
				-	0, 0, 0, 0, 0, 0, 0, 0,
			
 
				-	/* 0xD0 - 0xDF */
			
 
				-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
			
 
				-	/* 0xE0 - 0xEF */
			
 
				-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
			
 
				-	/* 0xF0 - 0xFF */
			
 
				-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
			
 
				-};
			
 
				-
			
 
				-/* Type, address-of, and value of an instruction's operand. */
			
 
				-struct operand {
			
 
				-	enum { OP_REG, OP_MEM, OP_IMM } type;
			
 
				-	unsigned int bytes;
			
 
				-	unsigned long val, orig_val, *ptr;
			
 
				-};
			
 
				-
			
 
				-/* EFLAGS bit definitions. */
			
 
				-#define EFLG_OF (1<<11)
			
 
				-#define EFLG_DF (1<<10)
			
 
				-#define EFLG_SF (1<<7)
			
 
				-#define EFLG_ZF (1<<6)
			
 
				-#define EFLG_AF (1<<4)
			
 
				-#define EFLG_PF (1<<2)
			
 
				-#define EFLG_CF (1<<0)
			
 
				-
			
 
				-/*
			
 
				- * Instruction emulation:
			
 
				- * Most instructions are emulated directly via a fragment of inline assembly
			
 
				- * code. This allows us to save/restore EFLAGS and thus very easily pick up
			
 
				- * any modified flags.
			
 
				- */
			
 
				-
			
 
				-#if defined(CONFIG_X86_64)
			
 
				-#define _LO32 "k"		/* force 32-bit operand */
			
 
				-#define _STK  "%%rsp"		/* stack pointer */
			
 
				-#elif defined(__i386__)
			
 
				-#define _LO32 ""		/* force 32-bit operand */
			
 
				-#define _STK  "%%esp"		/* stack pointer */
			
 
				-#endif
			
 
				-
			
 
				-/*
			
 
				- * These EFLAGS bits are restored from saved value during emulation, and
			
 
				- * any changes are written back to the saved value after emulation.
			
 
				- */
			
 
				-#define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)
			
 
				-
			
 
				-/* Before executing instruction: restore necessary bits in EFLAGS. */
			
 
				-#define _PRE_EFLAGS(_sav, _msk, _tmp) \
			
 
				-	/* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); */	\
			
 
				-	"push %"_sav"; "					\
			
 
				-	"movl %"_msk",%"_LO32 _tmp"; "				\
			
 
				-	"andl %"_LO32 _tmp",("_STK"); "				\
			
 
				-	"pushf; "						\
			
 
				-	"notl %"_LO32 _tmp"; "					\
			
 
				-	"andl %"_LO32 _tmp",("_STK"); "				\
			
 
				-	"pop  %"_tmp"; "					\
			
 
				-	"orl  %"_LO32 _tmp",("_STK"); "				\
			
 
				-	"popf; "						\
			
 
				-	/* _sav &= ~msk; */					\
			
 
				-	"movl %"_msk",%"_LO32 _tmp"; "				\
			
 
				-	"notl %"_LO32 _tmp"; "					\
			
 
				-	"andl %"_LO32 _tmp",%"_sav"; "
			
 
				-
			
 
				-/* After executing instruction: write-back necessary bits in EFLAGS. */
			
 
				-#define _POST_EFLAGS(_sav, _msk, _tmp) \
			
 
				-	/* _sav |= EFLAGS & _msk; */		\
			
 
				-	"pushf; "				\
			
 
				-	"pop  %"_tmp"; "			\
			
 
				-	"andl %"_msk",%"_LO32 _tmp"; "		\
			
 
				-	"orl  %"_LO32 _tmp",%"_sav"; "
			
 
				-
			
 
				-/* Raw emulation: instruction has two explicit operands. */
			
 
				-#define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \
			
 
				-	do { 								    \
			
 
				-		unsigned long _tmp;					    \
			
 
				-									    \
			
 
				-		switch ((_dst).bytes) {					    \
			
 
				-		case 2:							    \
			
 
				-			__asm__ __volatile__ (				    \
			
 
				-				_PRE_EFLAGS("0","4","2")		    \
			
 
				-				_op"w %"_wx"3,%1; "			    \
			
 
				-				_POST_EFLAGS("0","4","2")		    \
			
 
				-				: "=m" (_eflags), "=m" ((_dst).val),        \
			
 
				-				  "=&r" (_tmp)				    \
			
 
				-				: _wy ((_src).val), "i" (EFLAGS_MASK) );    \
			
 
				-			break;						    \
			
 
				-		case 4:							    \
			
 
				-			__asm__ __volatile__ (				    \
			
 
				-				_PRE_EFLAGS("0","4","2")		    \
			
 
				-				_op"l %"_lx"3,%1; "			    \
			
 
				-				_POST_EFLAGS("0","4","2")		    \
			
 
				-				: "=m" (_eflags), "=m" ((_dst).val),	    \
			
 
				-				  "=&r" (_tmp)				    \
			
 
				-				: _ly ((_src).val), "i" (EFLAGS_MASK) );    \
			
 
				-			break;						    \
			
 
				-		case 8:							    \
			
 
				-			__emulate_2op_8byte(_op, _src, _dst,		    \
			
 
				-					    _eflags, _qx, _qy);		    \
			
 
				-			break;						    \
			
 
				-		}							    \
			
 
				-	} while (0)
			
 
				-
			
 
				-#define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
			
 
				-	do {								     \
			
 
				-		unsigned long _tmp;					     \
			
 
				-		switch ( (_dst).bytes )					     \
			
 
				-		{							     \
			
 
				-		case 1:							     \
			
 
				-			__asm__ __volatile__ (				     \
			
 
				-				_PRE_EFLAGS("0","4","2")		     \
			
 
				-				_op"b %"_bx"3,%1; "			     \
			
 
				-				_POST_EFLAGS("0","4","2")		     \
			
 
				-				: "=m" (_eflags), "=m" ((_dst).val),	     \
			
 
				-				  "=&r" (_tmp)				     \
			
 
				-				: _by ((_src).val), "i" (EFLAGS_MASK) );     \
			
 
				-			break;						     \
			
 
				-		default:						     \
			
 
				-			__emulate_2op_nobyte(_op, _src, _dst, _eflags,	     \
			
 
				-					     _wx, _wy, _lx, _ly, _qx, _qy);  \
			
 
				-			break;						     \
			
 
				-		}							     \
			
 
				-	} while (0)
			
 
				-
			
 
				-/* Source operand is byte-sized and may be restricted to just %cl. */
			
 
				-#define emulate_2op_SrcB(_op, _src, _dst, _eflags)                      \
			
 
				-	__emulate_2op(_op, _src, _dst, _eflags,				\
			
 
				-		      "b", "c", "b", "c", "b", "c", "b", "c")
			
 
				-
			
 
				-/* Source operand is byte, word, long or quad sized. */
			
 
				-#define emulate_2op_SrcV(_op, _src, _dst, _eflags)                      \
			
 
				-	__emulate_2op(_op, _src, _dst, _eflags,				\
			
 
				-		      "b", "q", "w", "r", _LO32, "r", "", "r")
			
 
				-
			
 
				-/* Source operand is word, long or quad sized. */
			
 
				-#define emulate_2op_SrcV_nobyte(_op, _src, _dst, _eflags)               \
			
 
				-	__emulate_2op_nobyte(_op, _src, _dst, _eflags,			\
			
 
				-			     "w", "r", _LO32, "r", "", "r")
			
 
				-
			
 
				-/* Instruction has only one explicit operand (no source operand). */
			
 
				-#define emulate_1op(_op, _dst, _eflags)                                    \
			
 
				-	do {								\
			
 
				-		unsigned long _tmp;					\
			
 
				-									\
			
 
				-		switch ( (_dst).bytes )					\
			
 
				-		{							\
			
 
				-		case 1:							\
			
 
				-			__asm__ __volatile__ (				\
			
 
				-				_PRE_EFLAGS("0","3","2")		\
			
 
				-				_op"b %1; "				\
			
 
				-				_POST_EFLAGS("0","3","2")		\
			
 
				-				: "=m" (_eflags), "=m" ((_dst).val),	\
			
 
				-				  "=&r" (_tmp)				\
			
 
				-				: "i" (EFLAGS_MASK) );			\
			
 
				-			break;						\
			
 
				-		case 2:							\
			
 
				-			__asm__ __volatile__ (				\
			
 
				-				_PRE_EFLAGS("0","3","2")		\
			
 
				-				_op"w %1; "				\
			
 
				-				_POST_EFLAGS("0","3","2")		\
			
 
				-				: "=m" (_eflags), "=m" ((_dst).val),	\
			
 
				-				  "=&r" (_tmp)				\
			
 
				-				: "i" (EFLAGS_MASK) );			\
			
 
				-			break;						\
			
 
				-		case 4:							\
			
 
				-			__asm__ __volatile__ (				\
			
 
				-				_PRE_EFLAGS("0","3","2")		\
			
 
				-				_op"l %1; "				\
			
 
				-				_POST_EFLAGS("0","3","2")		\
			
 
				-				: "=m" (_eflags), "=m" ((_dst).val),	\
			
 
				-				  "=&r" (_tmp)				\
			
 
				-				: "i" (EFLAGS_MASK) );			\
			
 
				-			break;						\
			
 
				-		case 8:							\
			
 
				-			__emulate_1op_8byte(_op, _dst, _eflags);	\
			
 
				-			break;						\
			
 
				-		}							\
			
 
				-	} while (0)
			
 
				-
			
 
				-/* Emulate an instruction with quadword operands (x86/64 only). */
			
 
				-#if defined(CONFIG_X86_64)
			
 
				-#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy)           \
			
 
				-	do {								  \
			
 
				-		__asm__ __volatile__ (					  \
			
 
				-			_PRE_EFLAGS("0","4","2")			  \
			
 
				-			_op"q %"_qx"3,%1; "				  \
			
 
				-			_POST_EFLAGS("0","4","2")			  \
			
 
				-			: "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
			
 
				-			: _qy ((_src).val), "i" (EFLAGS_MASK) );	  \
			
 
				-	} while (0)
			
 
				-
			
 
				-#define __emulate_1op_8byte(_op, _dst, _eflags)                           \
			
 
				-	do {								  \
			
 
				-		__asm__ __volatile__ (					  \
			
 
				-			_PRE_EFLAGS("0","3","2")			  \
			
 
				-			_op"q %1; "					  \
			
 
				-			_POST_EFLAGS("0","3","2")			  \
			
 
				-			: "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
			
 
				-			: "i" (EFLAGS_MASK) );				  \
			
 
				-	} while (0)
			
 
				-
			
 
				-#elif defined(__i386__)
			
 
				-#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy)
			
 
				-#define __emulate_1op_8byte(_op, _dst, _eflags)
			
 
				-#endif				/* __i386__ */
			
 
				-
			
 
				-/* Fetch next part of the instruction being emulated. */
			
 
				-#define insn_fetch(_type, _size, _eip)                                  \
			
 
				-({	unsigned long _x;						\
			
 
				-	rc = ops->read_std((unsigned long)(_eip) + ctxt->cs_base, &_x,	\
			
 
				-                                                  (_size), ctxt->vcpu); \
			
 
				-	if ( rc != 0 )							\
			
 
				-		goto done;						\
			
 
				-	(_eip) += (_size);						\
			
 
				-	(_type)_x;							\
			
 
				-})
			
 
				-
			
 
				-/* Access/update address held in a register, based on addressing mode. */
			
 
				-#define address_mask(reg)						\
			
 
				-	((ad_bytes == sizeof(unsigned long)) ? 				\
			
 
				-		(reg) :	((reg) & ((1UL << (ad_bytes << 3)) - 1)))
			
 
				-#define register_address(base, reg)                                     \
			
 
				-	((base) + address_mask(reg))
			
 
				-#define register_address_increment(reg, inc)                            \
			
 
				-	do {								\
			
 
				-		/* signed type ensures sign extension to long */        \
			
 
				-		int _inc = (inc);					\
			
 
				-		if ( ad_bytes == sizeof(unsigned long) )		\
			
 
				-			(reg) += _inc;					\
			
 
				-		else							\
			
 
				-			(reg) = ((reg) & ~((1UL << (ad_bytes << 3)) - 1)) | \
			
 
				-			   (((reg) + _inc) & ((1UL << (ad_bytes << 3)) - 1)); \
			
 
				-	} while (0)
			
 
				-
			
 
				-#define JMP_REL(rel) 							\
			
 
				-	do {								\
			
 
				-		register_address_increment(_eip, rel);			\
			
 
				-	} while (0)
			
 
				-
			
 
				-/*
			
 
				- * Given the 'reg' portion of a ModRM byte, and a register block, return a
			
 
				- * pointer into the block that addresses the relevant register.
			
 
				- * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
			
 
				- */
			
 
				-static void *decode_register(u8 modrm_reg, unsigned long *regs,
			
 
				-			     int highbyte_regs)
			
 
				-{
			
 
				-	void *p;
			
 
				-
			
 
				-	p = &regs[modrm_reg];
			
 
				-	if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
			
 
				-		p = (unsigned char *)&regs[modrm_reg & 3] + 1;
			
 
				-	return p;
			
 
				-}
			
 
				-
			
 
				-static int read_descriptor(struct x86_emulate_ctxt *ctxt,
			
 
				-			   struct x86_emulate_ops *ops,
			
 
				-			   void *ptr,
			
 
				-			   u16 *size, unsigned long *address, int op_bytes)
			
 
				-{
			
 
				-	int rc;
			
 
				-
			
 
				-	if (op_bytes == 2)
			
 
				-		op_bytes = 3;
			
 
				-	*address = 0;
			
 
				-	rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2,
			
 
				-			   ctxt->vcpu);
			
 
				-	if (rc)
			
 
				-		return rc;
			
 
				-	rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes,
			
 
				-			   ctxt->vcpu);
			
 
				-	return rc;
			
 
				-}
			
 
				-
			
 
				-static int test_cc(unsigned int condition, unsigned int flags)
			
 
				-{
			
 
				-	int rc = 0;
			
 
				-
			
 
				-	switch ((condition & 15) >> 1) {
			
 
				-	case 0: /* o */
			
 
				-		rc |= (flags & EFLG_OF);
			
 
				-		break;
			
 
				-	case 1: /* b/c/nae */
			
 
				-		rc |= (flags & EFLG_CF);
			
 
				-		break;
			
 
				-	case 2: /* z/e */
			
 
				-		rc |= (flags & EFLG_ZF);
			
 
				-		break;
			
 
				-	case 3: /* be/na */
			
 
				-		rc |= (flags & (EFLG_CF|EFLG_ZF));
			
 
				-		break;
			
 
				-	case 4: /* s */
			
 
				-		rc |= (flags & EFLG_SF);
			
 
				-		break;
			
 
				-	case 5: /* p/pe */
			
 
				-		rc |= (flags & EFLG_PF);
			
 
				-		break;
			
 
				-	case 7: /* le/ng */
			
 
				-		rc |= (flags & EFLG_ZF);
			
 
				-		/* fall through */
			
 
				-	case 6: /* l/nge */
			
 
				-		rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF));
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	/* Odd condition identifiers (lsb == 1) have inverted sense. */
			
 
				-	return (!!rc ^ (condition & 1));
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
			
 
				-{
			
 
				-	unsigned d;
			
 
				-	u8 b, sib, twobyte = 0, rex_prefix = 0;
			
 
				-	u8 modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
			
 
				-	unsigned long *override_base = NULL;
			
 
				-	unsigned int op_bytes, ad_bytes, lock_prefix = 0, rep_prefix = 0, i;
			
 
				-	int rc = 0;
			
 
				-	struct operand src, dst;
			
 
				-	unsigned long cr2 = ctxt->cr2;
			
 
				-	int mode = ctxt->mode;
			
 
				-	unsigned long modrm_ea;
			
 
				-	int use_modrm_ea, index_reg = 0, base_reg = 0, scale, rip_relative = 0;
			
 
				-	int no_wb = 0;
			
 
				-	u64 msr_data;
			
 
				-
			
 
				-	/* Shadow copy of register state. Committed on successful emulation. */
			
 
				-	unsigned long _regs[NR_VCPU_REGS];
			
 
				-	unsigned long _eip = ctxt->vcpu->rip, _eflags = ctxt->eflags;
			
 
				-	unsigned long modrm_val = 0;
			
 
				-
			
 
				-	memcpy(_regs, ctxt->vcpu->regs, sizeof _regs);
			
 
				-
			
 
				-	switch (mode) {
			
 
				-	case X86EMUL_MODE_REAL:
			
 
				-	case X86EMUL_MODE_PROT16:
			
 
				-		op_bytes = ad_bytes = 2;
			
 
				-		break;
			
 
				-	case X86EMUL_MODE_PROT32:
			
 
				-		op_bytes = ad_bytes = 4;
			
 
				-		break;
			
 
				-#ifdef CONFIG_X86_64
			
 
				-	case X86EMUL_MODE_PROT64:
			
 
				-		op_bytes = 4;
			
 
				-		ad_bytes = 8;
			
 
				-		break;
			
 
				-#endif
			
 
				-	default:
			
 
				-		return -1;
			
 
				-	}
			
 
				-
			
 
				-	/* Legacy prefixes. */
			
 
				-	for (i = 0; i < 8; i++) {
			
 
				-		switch (b = insn_fetch(u8, 1, _eip)) {
			
 
				-		case 0x66:	/* operand-size override */
			
 
				-			op_bytes ^= 6;	/* switch between 2/4 bytes */
			
 
				-			break;
			
 
				-		case 0x67:	/* address-size override */
			
 
				-			if (mode == X86EMUL_MODE_PROT64)
			
 
				-				ad_bytes ^= 12;	/* switch between 4/8 bytes */
			
 
				-			else
			
 
				-				ad_bytes ^= 6;	/* switch between 2/4 bytes */
			
 
				-			break;
			
 
				-		case 0x2e:	/* CS override */
			
 
				-			override_base = &ctxt->cs_base;
			
 
				-			break;
			
 
				-		case 0x3e:	/* DS override */
			
 
				-			override_base = &ctxt->ds_base;
			
 
				-			break;
			
 
				-		case 0x26:	/* ES override */
			
 
				-			override_base = &ctxt->es_base;
			
 
				-			break;
			
 
				-		case 0x64:	/* FS override */
			
 
				-			override_base = &ctxt->fs_base;
			
 
				-			break;
			
 
				-		case 0x65:	/* GS override */
			
 
				-			override_base = &ctxt->gs_base;
			
 
				-			break;
			
 
				-		case 0x36:	/* SS override */
			
 
				-			override_base = &ctxt->ss_base;
			
 
				-			break;
			
 
				-		case 0xf0:	/* LOCK */
			
 
				-			lock_prefix = 1;
			
 
				-			break;
			
 
				-		case 0xf2:	/* REPNE/REPNZ */
			
 
				-		case 0xf3:	/* REP/REPE/REPZ */
			
 
				-			rep_prefix = 1;
			
 
				-			break;
			
 
				-		default:
			
 
				-			goto done_prefixes;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-done_prefixes:
			
 
				-
			
 
				-	/* REX prefix. */
			
 
				-	if ((mode == X86EMUL_MODE_PROT64) && ((b & 0xf0) == 0x40)) {
			
 
				-		rex_prefix = b;
			
 
				-		if (b & 8)
			
 
				-			op_bytes = 8;	/* REX.W */
			
 
				-		modrm_reg = (b & 4) << 1;	/* REX.R */
			
 
				-		index_reg = (b & 2) << 2; /* REX.X */
			
 
				-		modrm_rm = base_reg = (b & 1) << 3; /* REG.B */
			
 
				-		b = insn_fetch(u8, 1, _eip);
			
 
				-	}
			
 
				-
			
 
				-	/* Opcode byte(s). */
			
 
				-	d = opcode_table[b];
			
 
				-	if (d == 0) {
			
 
				-		/* Two-byte opcode? */
			
 
				-		if (b == 0x0f) {
			
 
				-			twobyte = 1;
			
 
				-			b = insn_fetch(u8, 1, _eip);
			
 
				-			d = twobyte_table[b];
			
 
				-		}
			
 
				-
			
 
				-		/* Unrecognised? */
			
 
				-		if (d == 0)
			
 
				-			goto cannot_emulate;
			
 
				-	}
			
 
				-
			
 
				-	/* ModRM and SIB bytes. */
			
 
				-	if (d & ModRM) {
			
 
				-		modrm = insn_fetch(u8, 1, _eip);
			
 
				-		modrm_mod |= (modrm & 0xc0) >> 6;
			
 
				-		modrm_reg |= (modrm & 0x38) >> 3;
			
 
				-		modrm_rm |= (modrm & 0x07);
			
 
				-		modrm_ea = 0;
			
 
				-		use_modrm_ea = 1;
			
 
				-
			
 
				-		if (modrm_mod == 3) {
			
 
				-			modrm_val = *(unsigned long *)
			
 
				-				decode_register(modrm_rm, _regs, d & ByteOp);
			
 
				-			goto modrm_done;
			
 
				-		}
			
 
				-
			
 
				-		if (ad_bytes == 2) {
			
 
				-			unsigned bx = _regs[VCPU_REGS_RBX];
			
 
				-			unsigned bp = _regs[VCPU_REGS_RBP];
			
 
				-			unsigned si = _regs[VCPU_REGS_RSI];
			
 
				-			unsigned di = _regs[VCPU_REGS_RDI];
			
 
				-
			
 
				-			/* 16-bit ModR/M decode. */
			
 
				-			switch (modrm_mod) {
			
 
				-			case 0:
			
 
				-				if (modrm_rm == 6)
			
 
				-					modrm_ea += insn_fetch(u16, 2, _eip);
			
 
				-				break;
			
 
				-			case 1:
			
 
				-				modrm_ea += insn_fetch(s8, 1, _eip);
			
 
				-				break;
			
 
				-			case 2:
			
 
				-				modrm_ea += insn_fetch(u16, 2, _eip);
			
 
				-				break;
			
 
				-			}
			
 
				-			switch (modrm_rm) {
			
 
				-			case 0:
			
 
				-				modrm_ea += bx + si;
			
 
				-				break;
			
 
				-			case 1:
			
 
				-				modrm_ea += bx + di;
			
 
				-				break;
			
 
				-			case 2:
			
 
				-				modrm_ea += bp + si;
			
 
				-				break;
			
 
				-			case 3:
			
 
				-				modrm_ea += bp + di;
			
 
				-				break;
			
 
				-			case 4:
			
 
				-				modrm_ea += si;
			
 
				-				break;
			
 
				-			case 5:
			
 
				-				modrm_ea += di;
			
 
				-				break;
			
 
				-			case 6:
			
 
				-				if (modrm_mod != 0)
			
 
				-					modrm_ea += bp;
			
 
				-				break;
			
 
				-			case 7:
			
 
				-				modrm_ea += bx;
			
 
				-				break;
			
 
				-			}
			
 
				-			if (modrm_rm == 2 || modrm_rm == 3 ||
			
 
				-			    (modrm_rm == 6 && modrm_mod != 0))
			
 
				-				if (!override_base)
			
 
				-					override_base = &ctxt->ss_base;
			
 
				-			modrm_ea = (u16)modrm_ea;
			
 
				-		} else {
			
 
				-			/* 32/64-bit ModR/M decode. */
			
 
				-			switch (modrm_rm) {
			
 
				-			case 4:
			
 
				-			case 12:
			
 
				-				sib = insn_fetch(u8, 1, _eip);
			
 
				-				index_reg |= (sib >> 3) & 7;
			
 
				-				base_reg |= sib & 7;
			
 
				-				scale = sib >> 6;
			
 
				-
			
 
				-				switch (base_reg) {
			
 
				-				case 5:
			
 
				-					if (modrm_mod != 0)
			
 
				-						modrm_ea += _regs[base_reg];
			
 
				-					else
			
 
				-						modrm_ea += insn_fetch(s32, 4, _eip);
			
 
				-					break;
			
 
				-				default:
			
 
				-					modrm_ea += _regs[base_reg];
			
 
				-				}
			
 
				-				switch (index_reg) {
			
 
				-				case 4:
			
 
				-					break;
			
 
				-				default:
			
 
				-					modrm_ea += _regs[index_reg] << scale;
			
 
				-
			
 
				-				}
			
 
				-				break;
			
 
				-			case 5:
			
 
				-				if (modrm_mod != 0)
			
 
				-					modrm_ea += _regs[modrm_rm];
			
 
				-				else if (mode == X86EMUL_MODE_PROT64)
			
 
				-					rip_relative = 1;
			
 
				-				break;
			
 
				-			default:
			
 
				-				modrm_ea += _regs[modrm_rm];
			
 
				-				break;
			
 
				-			}
			
 
				-			switch (modrm_mod) {
			
 
				-			case 0:
			
 
				-				if (modrm_rm == 5)
			
 
				-					modrm_ea += insn_fetch(s32, 4, _eip);
			
 
				-				break;
			
 
				-			case 1:
			
 
				-				modrm_ea += insn_fetch(s8, 1, _eip);
			
 
				-				break;
			
 
				-			case 2:
			
 
				-				modrm_ea += insn_fetch(s32, 4, _eip);
			
 
				-				break;
			
 
				-			}
			
 
				-		}
			
 
				-		if (!override_base)
			
 
				-			override_base = &ctxt->ds_base;
			
 
				-		if (mode == X86EMUL_MODE_PROT64 &&
			
 
				-		    override_base != &ctxt->fs_base &&
			
 
				-		    override_base != &ctxt->gs_base)
			
 
				-			override_base = NULL;
			
 
				-
			
 
				-		if (override_base)
			
 
				-			modrm_ea += *override_base;
			
 
				-
			
 
				-		if (rip_relative) {
			
 
				-			modrm_ea += _eip;
			
 
				-			switch (d & SrcMask) {
			
 
				-			case SrcImmByte:
			
 
				-				modrm_ea += 1;
			
 
				-				break;
			
 
				-			case SrcImm:
			
 
				-				if (d & ByteOp)
			
 
				-					modrm_ea += 1;
			
 
				-				else
			
 
				-					if (op_bytes == 8)
			
 
				-						modrm_ea += 4;
			
 
				-					else
			
 
				-						modrm_ea += op_bytes;
			
 
				-			}
			
 
				-		}
			
 
				-		if (ad_bytes != 8)
			
 
				-			modrm_ea = (u32)modrm_ea;
			
 
				-		cr2 = modrm_ea;
			
 
				-	modrm_done:
			
 
				-		;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Decode and fetch the source operand: register, memory
			
 
				-	 * or immediate.
			
 
				-	 */
			
 
				-	switch (d & SrcMask) {
			
 
				-	case SrcNone:
			
 
				-		break;
			
 
				-	case SrcReg:
			
 
				-		src.type = OP_REG;
			
 
				-		if (d & ByteOp) {
			
 
				-			src.ptr = decode_register(modrm_reg, _regs,
			
 
				-						  (rex_prefix == 0));
			
 
				-			src.val = src.orig_val = *(u8 *) src.ptr;
			
 
				-			src.bytes = 1;
			
 
				-		} else {
			
 
				-			src.ptr = decode_register(modrm_reg, _regs, 0);
			
 
				-			switch ((src.bytes = op_bytes)) {
			
 
				-			case 2:
			
 
				-				src.val = src.orig_val = *(u16 *) src.ptr;
			
 
				-				break;
			
 
				-			case 4:
			
 
				-				src.val = src.orig_val = *(u32 *) src.ptr;
			
 
				-				break;
			
 
				-			case 8:
			
 
				-				src.val = src.orig_val = *(u64 *) src.ptr;
			
 
				-				break;
			
 
				-			}
			
 
				-		}
			
 
				-		break;
			
 
				-	case SrcMem16:
			
 
				-		src.bytes = 2;
			
 
				-		goto srcmem_common;
			
 
				-	case SrcMem32:
			
 
				-		src.bytes = 4;
			
 
				-		goto srcmem_common;
			
 
				-	case SrcMem:
			
 
				-		src.bytes = (d & ByteOp) ? 1 : op_bytes;
			
 
				-		/* Don't fetch the address for invlpg: it could be unmapped. */
			
 
				-		if (twobyte && b == 0x01 && modrm_reg == 7)
			
 
				-			break;
			
 
				-	      srcmem_common:
			
 
				-		/*
			
 
				-		 * For instructions with a ModR/M byte, switch to register
			
 
				-		 * access if Mod = 3.
			
 
				-		 */
			
 
				-		if ((d & ModRM) && modrm_mod == 3) {
			
 
				-			src.type = OP_REG;
			
 
				-			break;
			
 
				-		}
			
 
				-		src.type = OP_MEM;
			
 
				-		src.ptr = (unsigned long *)cr2;
			
 
				-		src.val = 0;
			
 
				-		if ((rc = ops->read_emulated((unsigned long)src.ptr,
			
 
				-					     &src.val, src.bytes, ctxt->vcpu)) != 0)
			
 
				-			goto done;
			
 
				-		src.orig_val = src.val;
			
 
				-		break;
			
 
				-	case SrcImm:
			
 
				-		src.type = OP_IMM;
			
 
				-		src.ptr = (unsigned long *)_eip;
			
 
				-		src.bytes = (d & ByteOp) ? 1 : op_bytes;
			
 
				-		if (src.bytes == 8)
			
 
				-			src.bytes = 4;
			
 
				-		/* NB. Immediates are sign-extended as necessary. */
			
 
				-		switch (src.bytes) {
			
 
				-		case 1:
			
 
				-			src.val = insn_fetch(s8, 1, _eip);
			
 
				-			break;
			
 
				-		case 2:
			
 
				-			src.val = insn_fetch(s16, 2, _eip);
			
 
				-			break;
			
 
				-		case 4:
			
 
				-			src.val = insn_fetch(s32, 4, _eip);
			
 
				-			break;
			
 
				-		}
			
 
				-		break;
			
 
				-	case SrcImmByte:
			
 
				-		src.type = OP_IMM;
			
 
				-		src.ptr = (unsigned long *)_eip;
			
 
				-		src.bytes = 1;
			
 
				-		src.val = insn_fetch(s8, 1, _eip);
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	/* Decode and fetch the destination operand: register or memory. */
			
 
				-	switch (d & DstMask) {
			
 
				-	case ImplicitOps:
			
 
				-		/* Special instructions do their own operand decoding. */
			
 
				-		goto special_insn;
			
 
				-	case DstReg:
			
 
				-		dst.type = OP_REG;
			
 
				-		if ((d & ByteOp)
			
 
				-		    && !(twobyte && (b == 0xb6 || b == 0xb7))) {
			
 
				-			dst.ptr = decode_register(modrm_reg, _regs,
			
 
				-						  (rex_prefix == 0));
			
 
				-			dst.val = *(u8 *) dst.ptr;
			
 
				-			dst.bytes = 1;
			
 
				-		} else {
			
 
				-			dst.ptr = decode_register(modrm_reg, _regs, 0);
			
 
				-			switch ((dst.bytes = op_bytes)) {
			
 
				-			case 2:
			
 
				-				dst.val = *(u16 *)dst.ptr;
			
 
				-				break;
			
 
				-			case 4:
			
 
				-				dst.val = *(u32 *)dst.ptr;
			
 
				-				break;
			
 
				-			case 8:
			
 
				-				dst.val = *(u64 *)dst.ptr;
			
 
				-				break;
			
 
				-			}
			
 
				-		}
			
 
				-		break;
			
 
				-	case DstMem:
			
 
				-		dst.type = OP_MEM;
			
 
				-		dst.ptr = (unsigned long *)cr2;
			
 
				-		dst.bytes = (d & ByteOp) ? 1 : op_bytes;
			
 
				-		dst.val = 0;
			
 
				-		/*
			
 
				-		 * For instructions with a ModR/M byte, switch to register
			
 
				-		 * access if Mod = 3.
			
 
				-		 */
			
 
				-		if ((d & ModRM) && modrm_mod == 3) {
			
 
				-			dst.type = OP_REG;
			
 
				-			break;
			
 
				-		}
			
 
				-		if (d & BitOp) {
			
 
				-			unsigned long mask = ~(dst.bytes * 8 - 1);
			
 
				-
			
 
				-			dst.ptr = (void *)dst.ptr + (src.val & mask) / 8;
			
 
				-		}
			
 
				-		if (!(d & Mov) && /* optimisation - avoid slow emulated read */
			
 
				-		    ((rc = ops->read_emulated((unsigned long)dst.ptr,
			
 
				-					      &dst.val, dst.bytes, ctxt->vcpu)) != 0))
			
 
				-			goto done;
			
 
				-		break;
			
 
				-	}
			
 
				-	dst.orig_val = dst.val;
			
 
				-
			
 
				-	if (twobyte)
			
 
				-		goto twobyte_insn;
			
 
				-
			
 
				-	switch (b) {
			
 
				-	case 0x00 ... 0x05:
			
 
				-	      add:		/* add */
			
 
				-		emulate_2op_SrcV("add", src, dst, _eflags);
			
 
				-		break;
			
 
				-	case 0x08 ... 0x0d:
			
 
				-	      or:		/* or */
			
 
				-		emulate_2op_SrcV("or", src, dst, _eflags);
			
 
				-		break;
			
 
				-	case 0x10 ... 0x15:
			
 
				-	      adc:		/* adc */
			
 
				-		emulate_2op_SrcV("adc", src, dst, _eflags);
			
 
				-		break;
			
 
				-	case 0x18 ... 0x1d:
			
 
				-	      sbb:		/* sbb */
			
 
				-		emulate_2op_SrcV("sbb", src, dst, _eflags);
			
 
				-		break;
			
 
				-	case 0x20 ... 0x23:
			
 
				-	      and:		/* and */
			
 
				-		emulate_2op_SrcV("and", src, dst, _eflags);
			
 
				-		break;
			
 
				-	case 0x24:              /* and al imm8 */
			
 
				-		dst.type = OP_REG;
			
 
				-		dst.ptr = &_regs[VCPU_REGS_RAX];
			
 
				-		dst.val = *(u8 *)dst.ptr;
			
 
				-		dst.bytes = 1;
			
 
				-		dst.orig_val = dst.val;
			
 
				-		goto and;
			
 
				-	case 0x25:              /* and ax imm16, or eax imm32 */
			
 
				-		dst.type = OP_REG;
			
 
				-		dst.bytes = op_bytes;
			
 
				-		dst.ptr = &_regs[VCPU_REGS_RAX];
			
 
				-		if (op_bytes == 2)
			
 
				-			dst.val = *(u16 *)dst.ptr;
			
 
				-		else
			
 
				-			dst.val = *(u32 *)dst.ptr;
			
 
				-		dst.orig_val = dst.val;
			
 
				-		goto and;
			
 
				-	case 0x28 ... 0x2d:
			
 
				-	      sub:		/* sub */
			
 
				-		emulate_2op_SrcV("sub", src, dst, _eflags);
			
 
				-		break;
			
 
				-	case 0x30 ... 0x35:
			
 
				-	      xor:		/* xor */
			
 
				-		emulate_2op_SrcV("xor", src, dst, _eflags);
			
 
				-		break;
			
 
				-	case 0x38 ... 0x3d:
			
 
				-	      cmp:		/* cmp */
			
 
				-		emulate_2op_SrcV("cmp", src, dst, _eflags);
			
 
				-		break;
			
 
				-	case 0x63:		/* movsxd */
			
 
				-		if (mode != X86EMUL_MODE_PROT64)
			
 
				-			goto cannot_emulate;
			
 
				-		dst.val = (s32) src.val;
			
 
				-		break;
			
 
				-	case 0x80 ... 0x83:	/* Grp1 */
			
 
				-		switch (modrm_reg) {
			
 
				-		case 0:
			
 
				-			goto add;
			
 
				-		case 1:
			
 
				-			goto or;
			
 
				-		case 2:
			
 
				-			goto adc;
			
 
				-		case 3:
			
 
				-			goto sbb;
			
 
				-		case 4:
			
 
				-			goto and;
			
 
				-		case 5:
			
 
				-			goto sub;
			
 
				-		case 6:
			
 
				-			goto xor;
			
 
				-		case 7:
			
 
				-			goto cmp;
			
 
				-		}
			
 
				-		break;
			
 
				-	case 0x84 ... 0x85:
			
 
				-	      test:		/* test */
			
 
				-		emulate_2op_SrcV("test", src, dst, _eflags);
			
 
				-		break;
			
 
				-	case 0x86 ... 0x87:	/* xchg */
			
 
				-		/* Write back the register source. */
			
 
				-		switch (dst.bytes) {
			
 
				-		case 1:
			
 
				-			*(u8 *) src.ptr = (u8) dst.val;
			
 
				-			break;
			
 
				-		case 2:
			
 
				-			*(u16 *) src.ptr = (u16) dst.val;
			
 
				-			break;
			
 
				-		case 4:
			
 
				-			*src.ptr = (u32) dst.val;
			
 
				-			break;	/* 64b reg: zero-extend */
			
 
				-		case 8:
			
 
				-			*src.ptr = dst.val;
			
 
				-			break;
			
 
				-		}
			
 
				-		/*
			
 
				-		 * Write back the memory destination with implicit LOCK
			
 
				-		 * prefix.
			
 
				-		 */
			
 
				-		dst.val = src.val;
			
 
				-		lock_prefix = 1;
			
 
				-		break;
			
 
				-	case 0x88 ... 0x8b:	/* mov */
			
 
				-		goto mov;
			
 
				-	case 0x8d: /* lea r16/r32, m */
			
 
				-		dst.val = modrm_val;
			
 
				-		break;
			
 
				-	case 0x8f:		/* pop (sole member of Grp1a) */
			
 
				-		/* 64-bit mode: POP always pops a 64-bit operand. */
			
 
				-		if (mode == X86EMUL_MODE_PROT64)
			
 
				-			dst.bytes = 8;
			
 
				-		if ((rc = ops->read_std(register_address(ctxt->ss_base,
			
 
				-							 _regs[VCPU_REGS_RSP]),
			
 
				-					&dst.val, dst.bytes, ctxt->vcpu)) != 0)
			
 
				-			goto done;
			
 
				-		register_address_increment(_regs[VCPU_REGS_RSP], dst.bytes);
			
 
				-		break;
			
 
				-	case 0xa0 ... 0xa1:	/* mov */
			
 
				-		dst.ptr = (unsigned long *)&_regs[VCPU_REGS_RAX];
			
 
				-		dst.val = src.val;
			
 
				-		_eip += ad_bytes;	/* skip src displacement */
			
 
				-		break;
			
 
				-	case 0xa2 ... 0xa3:	/* mov */
			
 
				-		dst.val = (unsigned long)_regs[VCPU_REGS_RAX];
			
 
				-		_eip += ad_bytes;	/* skip dst displacement */
			
 
				-		break;
			
 
				-	case 0xc0 ... 0xc1:
			
 
				-	      grp2:		/* Grp2 */
			
 
				-		switch (modrm_reg) {
			
 
				-		case 0:	/* rol */
			
 
				-			emulate_2op_SrcB("rol", src, dst, _eflags);
			
 
				-			break;
			
 
				-		case 1:	/* ror */
			
 
				-			emulate_2op_SrcB("ror", src, dst, _eflags);
			
 
				-			break;
			
 
				-		case 2:	/* rcl */
			
 
				-			emulate_2op_SrcB("rcl", src, dst, _eflags);
			
 
				-			break;
			
 
				-		case 3:	/* rcr */
			
 
				-			emulate_2op_SrcB("rcr", src, dst, _eflags);
			
 
				-			break;
			
 
				-		case 4:	/* sal/shl */
			
 
				-		case 6:	/* sal/shl */
			
 
				-			emulate_2op_SrcB("sal", src, dst, _eflags);
			
 
				-			break;
			
 
				-		case 5:	/* shr */
			
 
				-			emulate_2op_SrcB("shr", src, dst, _eflags);
			
 
				-			break;
			
 
				-		case 7:	/* sar */
			
 
				-			emulate_2op_SrcB("sar", src, dst, _eflags);
			
 
				-			break;
			
 
				-		}
			
 
				-		break;
			
 
				-	case 0xc6 ... 0xc7:	/* mov (sole member of Grp11) */
			
 
				-	mov:
			
 
				-		dst.val = src.val;
			
 
				-		break;
			
 
				-	case 0xd0 ... 0xd1:	/* Grp2 */
			
 
				-		src.val = 1;
			
 
				-		goto grp2;
			
 
				-	case 0xd2 ... 0xd3:	/* Grp2 */
			
 
				-		src.val = _regs[VCPU_REGS_RCX];
			
 
				-		goto grp2;
			
 
				-	case 0xf6 ... 0xf7:	/* Grp3 */
			
 
				-		switch (modrm_reg) {
			
 
				-		case 0 ... 1:	/* test */
			
 
				-			/*
			
 
				-			 * Special case in Grp3: test has an immediate
			
 
				-			 * source operand.
			
 
				-			 */
			
 
				-			src.type = OP_IMM;
			
 
				-			src.ptr = (unsigned long *)_eip;
			
 
				-			src.bytes = (d & ByteOp) ? 1 : op_bytes;
			
 
				-			if (src.bytes == 8)
			
 
				-				src.bytes = 4;
			
 
				-			switch (src.bytes) {
			
 
				-			case 1:
			
 
				-				src.val = insn_fetch(s8, 1, _eip);
			
 
				-				break;
			
 
				-			case 2:
			
 
				-				src.val = insn_fetch(s16, 2, _eip);
			
 
				-				break;
			
 
				-			case 4:
			
 
				-				src.val = insn_fetch(s32, 4, _eip);
			
 
				-				break;
			
 
				-			}
			
 
				-			goto test;
			
 
				-		case 2:	/* not */
			
 
				-			dst.val = ~dst.val;
			
 
				-			break;
			
 
				-		case 3:	/* neg */
			
 
				-			emulate_1op("neg", dst, _eflags);
			
 
				-			break;
			
 
				-		default:
			
 
				-			goto cannot_emulate;
			
 
				-		}
			
 
				-		break;
			
 
				-	case 0xfe ... 0xff:	/* Grp4/Grp5 */
			
 
				-		switch (modrm_reg) {
			
 
				-		case 0:	/* inc */
			
 
				-			emulate_1op("inc", dst, _eflags);
			
 
				-			break;
			
 
				-		case 1:	/* dec */
			
 
				-			emulate_1op("dec", dst, _eflags);
			
 
				-			break;
			
 
				-		case 4: /* jmp abs */
			
 
				-			if (b == 0xff)
			
 
				-				_eip = dst.val;
			
 
				-			else
			
 
				-				goto cannot_emulate;
			
 
				-			break;
			
 
				-		case 6:	/* push */
			
 
				-			/* 64-bit mode: PUSH always pushes a 64-bit operand. */
			
 
				-			if (mode == X86EMUL_MODE_PROT64) {
			
 
				-				dst.bytes = 8;
			
 
				-				if ((rc = ops->read_std((unsigned long)dst.ptr,
			
 
				-							&dst.val, 8,
			
 
				-							ctxt->vcpu)) != 0)
			
 
				-					goto done;
			
 
				-			}
			
 
				-			register_address_increment(_regs[VCPU_REGS_RSP],
			
 
				-						   -dst.bytes);
			
 
				-			if ((rc = ops->write_emulated(
			
 
				-				     register_address(ctxt->ss_base,
			
 
				-						      _regs[VCPU_REGS_RSP]),
			
 
				-				     &dst.val, dst.bytes, ctxt->vcpu)) != 0)
			
 
				-				goto done;
			
 
				-			no_wb = 1;
			
 
				-			break;
			
 
				-		default:
			
 
				-			goto cannot_emulate;
			
 
				-		}
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-writeback:
			
 
				-	if (!no_wb) {
			
 
				-		switch (dst.type) {
			
 
				-		case OP_REG:
			
 
				-			/* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
			
 
				-			switch (dst.bytes) {
			
 
				-			case 1:
			
 
				-				*(u8 *)dst.ptr = (u8)dst.val;
			
 
				-				break;
			
 
				-			case 2:
			
 
				-				*(u16 *)dst.ptr = (u16)dst.val;
			
 
				-				break;
			
 
				-			case 4:
			
 
				-				*dst.ptr = (u32)dst.val;
			
 
				-				break;	/* 64b: zero-ext */
			
 
				-			case 8:
			
 
				-				*dst.ptr = dst.val;
			
 
				-				break;
			
 
				-			}
			
 
				-			break;
			
 
				-		case OP_MEM:
			
 
				-			if (lock_prefix)
			
 
				-				rc = ops->cmpxchg_emulated((unsigned long)dst.
			
 
				-							   ptr, &dst.orig_val,
			
 
				-							   &dst.val, dst.bytes,
			
 
				-							   ctxt->vcpu);
			
 
				-			else
			
 
				-				rc = ops->write_emulated((unsigned long)dst.ptr,
			
 
				-							 &dst.val, dst.bytes,
			
 
				-							 ctxt->vcpu);
			
 
				-			if (rc != 0)
			
 
				-				goto done;
			
 
				-		default:
			
 
				-			break;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	/* Commit shadow register state. */
			
 
				-	memcpy(ctxt->vcpu->regs, _regs, sizeof _regs);
			
 
				-	ctxt->eflags = _eflags;
			
 
				-	ctxt->vcpu->rip = _eip;
			
 
				-
			
 
				-done:
			
 
				-	return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
			
 
				-
			
 
				-special_insn:
			
 
				-	if (twobyte)
			
 
				-		goto twobyte_special_insn;
			
 
				-	switch(b) {
			
 
				-	case 0x50 ... 0x57:  /* push reg */
			
 
				-		if (op_bytes == 2)
			
 
				-			src.val = (u16) _regs[b & 0x7];
			
 
				-		else
			
 
				-			src.val = (u32) _regs[b & 0x7];
			
 
				-		dst.type  = OP_MEM;
			
 
				-		dst.bytes = op_bytes;
			
 
				-		dst.val = src.val;
			
 
				-		register_address_increment(_regs[VCPU_REGS_RSP], -op_bytes);
			
 
				-		dst.ptr = (void *) register_address(
			
 
				-			ctxt->ss_base, _regs[VCPU_REGS_RSP]);
			
 
				-		break;
			
 
				-	case 0x58 ... 0x5f: /* pop reg */
			
 
				-		dst.ptr = (unsigned long *)&_regs[b & 0x7];
			
 
				-	pop_instruction:
			
 
				-		if ((rc = ops->read_std(register_address(ctxt->ss_base,
			
 
				-			_regs[VCPU_REGS_RSP]), dst.ptr, op_bytes, ctxt->vcpu))
			
 
				-			!= 0)
			
 
				-			goto done;
			
 
				-
			
 
				-		register_address_increment(_regs[VCPU_REGS_RSP], op_bytes);
			
 
				-		no_wb = 1; /* Disable writeback. */
			
 
				-		break;
			
 
				-	case 0x6a: /* push imm8 */
			
 
				-		src.val = 0L;
			
 
				-		src.val = insn_fetch(s8, 1, _eip);
			
 
				-	push:
			
 
				-		dst.type  = OP_MEM;
			
 
				-		dst.bytes = op_bytes;
			
 
				-		dst.val = src.val;
			
 
				-		register_address_increment(_regs[VCPU_REGS_RSP], -op_bytes);
			
 
				-		dst.ptr = (void *) register_address(ctxt->ss_base,
			
 
				-							_regs[VCPU_REGS_RSP]);
			
 
				-		break;
			
 
				-	case 0x6c:		/* insb */
			
 
				-	case 0x6d:		/* insw/insd */
			
 
				-		 if (kvm_emulate_pio_string(ctxt->vcpu, NULL,
			
 
				-				1, 					/* in */
			
 
				-				(d & ByteOp) ? 1 : op_bytes, 		/* size */
			
 
				-				rep_prefix ?
			
 
				-				address_mask(_regs[VCPU_REGS_RCX]) : 1,	/* count */
			
 
				-				(_eflags & EFLG_DF),			/* down */
			
 
				-				register_address(ctxt->es_base,
			
 
				-						 _regs[VCPU_REGS_RDI]),	/* address */
			
 
				-				rep_prefix,
			
 
				-				_regs[VCPU_REGS_RDX]			/* port */
			
 
				-				) == 0)
			
 
				-			return -1;
			
 
				-		return 0;
			
 
				-	case 0x6e:		/* outsb */
			
 
				-	case 0x6f:		/* outsw/outsd */
			
 
				-		if (kvm_emulate_pio_string(ctxt->vcpu, NULL,
			
 
				-				0, 					/* in */
			
 
				-				(d & ByteOp) ? 1 : op_bytes, 		/* size */
			
 
				-				rep_prefix ?
			
 
				-				address_mask(_regs[VCPU_REGS_RCX]) : 1,	/* count */
			
 
				-				(_eflags & EFLG_DF),			/* down */
			
 
				-				register_address(override_base ?
			
 
				-						 *override_base : ctxt->ds_base,
			
 
				-						 _regs[VCPU_REGS_RSI]),	/* address */
			
 
				-				rep_prefix,
			
 
				-				_regs[VCPU_REGS_RDX]			/* port */
			
 
				-				) == 0)
			
 
				-			return -1;
			
 
				-		return 0;
			
 
				-	case 0x70 ... 0x7f: /* jcc (short) */ {
			
 
				-		int rel = insn_fetch(s8, 1, _eip);
			
 
				-
			
 
				-		if (test_cc(b, _eflags))
			
 
				-		JMP_REL(rel);
			
 
				-		break;
			
 
				-	}
			
 
				-	case 0x9c: /* pushf */
			
 
				-		src.val =  (unsigned long) _eflags;
			
 
				-		goto push;
			
 
				-	case 0x9d: /* popf */
			
 
				-		dst.ptr = (unsigned long *) &_eflags;
			
 
				-		goto pop_instruction;
			
 
				-	case 0xc3: /* ret */
			
 
				-		dst.ptr = &_eip;
			
 
				-		goto pop_instruction;
			
 
				-	case 0xf4:              /* hlt */
			
 
				-		ctxt->vcpu->halt_request = 1;
			
 
				-		goto done;
			
 
				-	}
			
 
				-	if (rep_prefix) {
			
 
				-		if (_regs[VCPU_REGS_RCX] == 0) {
			
 
				-			ctxt->vcpu->rip = _eip;
			
 
				-			goto done;
			
 
				-		}
			
 
				-		_regs[VCPU_REGS_RCX]--;
			
 
				-		_eip = ctxt->vcpu->rip;
			
 
				-	}
			
 
				-	switch (b) {
			
 
				-	case 0xa4 ... 0xa5:	/* movs */
			
 
				-		dst.type = OP_MEM;
			
 
				-		dst.bytes = (d & ByteOp) ? 1 : op_bytes;
			
 
				-		dst.ptr = (unsigned long *)register_address(ctxt->es_base,
			
 
				-							_regs[VCPU_REGS_RDI]);
			
 
				-		if ((rc = ops->read_emulated(register_address(
			
 
				-		      override_base ? *override_base : ctxt->ds_base,
			
 
				-		      _regs[VCPU_REGS_RSI]), &dst.val, dst.bytes, ctxt->vcpu)) != 0)
			
 
				-			goto done;
			
 
				-		register_address_increment(_regs[VCPU_REGS_RSI],
			
 
				-			     (_eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
			
 
				-		register_address_increment(_regs[VCPU_REGS_RDI],
			
 
				-			     (_eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
			
 
				-		break;
			
 
				-	case 0xa6 ... 0xa7:	/* cmps */
			
 
				-		DPRINTF("Urk! I don't handle CMPS.\n");
			
 
				-		goto cannot_emulate;
			
 
				-	case 0xaa ... 0xab:	/* stos */
			
 
				-		dst.type = OP_MEM;
			
 
				-		dst.bytes = (d & ByteOp) ? 1 : op_bytes;
			
 
				-		dst.ptr = (unsigned long *)cr2;
			
 
				-		dst.val = _regs[VCPU_REGS_RAX];
			
 
				-		register_address_increment(_regs[VCPU_REGS_RDI],
			
 
				-			     (_eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
			
 
				-		break;
			
 
				-	case 0xac ... 0xad:	/* lods */
			
 
				-		dst.type = OP_REG;
			
 
				-		dst.bytes = (d & ByteOp) ? 1 : op_bytes;
			
 
				-		dst.ptr = (unsigned long *)&_regs[VCPU_REGS_RAX];
			
 
				-		if ((rc = ops->read_emulated(cr2, &dst.val, dst.bytes,
			
 
				-					     ctxt->vcpu)) != 0)
			
 
				-			goto done;
			
 
				-		register_address_increment(_regs[VCPU_REGS_RSI],
			
 
				-			   (_eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
			
 
				-		break;
			
 
				-	case 0xae ... 0xaf:	/* scas */
			
 
				-		DPRINTF("Urk! I don't handle SCAS.\n");
			
 
				-		goto cannot_emulate;
			
 
				-	case 0xe8: /* call (near) */ {
			
 
				-		long int rel;
			
 
				-		switch (op_bytes) {
			
 
				-		case 2:
			
 
				-			rel = insn_fetch(s16, 2, _eip);
			
 
				-			break;
			
 
				-		case 4:
			
 
				-			rel = insn_fetch(s32, 4, _eip);
			
 
				-			break;
			
 
				-		case 8:
			
 
				-			rel = insn_fetch(s64, 8, _eip);
			
 
				-			break;
			
 
				-		default:
			
 
				-			DPRINTF("Call: Invalid op_bytes\n");
			
 
				-			goto cannot_emulate;
			
 
				-		}
			
 
				-		src.val = (unsigned long) _eip;
			
 
				-		JMP_REL(rel);
			
 
				-		op_bytes = ad_bytes;
			
 
				-		goto push;
			
 
				-	}
			
 
				-	case 0xe9: /* jmp rel */
			
 
				-	case 0xeb: /* jmp rel short */
			
 
				-		JMP_REL(src.val);
			
 
				-		no_wb = 1; /* Disable writeback. */
			
 
				-		break;
			
 
				-
			
 
				-
			
 
				-	}
			
 
				-	goto writeback;
			
 
				-
			
 
				-twobyte_insn:
			
 
				-	switch (b) {
			
 
				-	case 0x01: /* lgdt, lidt, lmsw */
			
 
				-		/* Disable writeback. */
			
 
				-		no_wb = 1;
			
 
				-		switch (modrm_reg) {
			
 
				-			u16 size;
			
 
				-			unsigned long address;
			
 
				-
			
 
				-		case 2: /* lgdt */
			
 
				-			rc = read_descriptor(ctxt, ops, src.ptr,
			
 
				-					     &size, &address, op_bytes);
			
 
				-			if (rc)
			
 
				-				goto done;
			
 
				-			realmode_lgdt(ctxt->vcpu, size, address);
			
 
				-			break;
			
 
				-		case 3: /* lidt */
			
 
				-			rc = read_descriptor(ctxt, ops, src.ptr,
			
 
				-					     &size, &address, op_bytes);
			
 
				-			if (rc)
			
 
				-				goto done;
			
 
				-			realmode_lidt(ctxt->vcpu, size, address);
			
 
				-			break;
			
 
				-		case 4: /* smsw */
			
 
				-			if (modrm_mod != 3)
			
 
				-				goto cannot_emulate;
			
 
				-			*(u16 *)&_regs[modrm_rm]
			
 
				-				= realmode_get_cr(ctxt->vcpu, 0);
			
 
				-			break;
			
 
				-		case 6: /* lmsw */
			
 
				-			if (modrm_mod != 3)
			
 
				-				goto cannot_emulate;
			
 
				-			realmode_lmsw(ctxt->vcpu, (u16)modrm_val, &_eflags);
			
 
				-			break;
			
 
				-		case 7: /* invlpg*/
			
 
				-			emulate_invlpg(ctxt->vcpu, cr2);
			
 
				-			break;
			
 
				-		default:
			
 
				-			goto cannot_emulate;
			
 
				-		}
			
 
				-		break;
			
 
				-	case 0x21: /* mov from dr to reg */
			
 
				-		no_wb = 1;
			
 
				-		if (modrm_mod != 3)
			
 
				-			goto cannot_emulate;
			
 
				-		rc = emulator_get_dr(ctxt, modrm_reg, &_regs[modrm_rm]);
			
 
				-		break;
			
 
				-	case 0x23: /* mov from reg to dr */
			
 
				-		no_wb = 1;
			
 
				-		if (modrm_mod != 3)
			
 
				-			goto cannot_emulate;
			
 
				-		rc = emulator_set_dr(ctxt, modrm_reg, _regs[modrm_rm]);
			
 
				-		break;
			
 
				-	case 0x40 ... 0x4f:	/* cmov */
			
 
				-		dst.val = dst.orig_val = src.val;
			
 
				-		no_wb = 1;
			
 
				-		/*
			
 
				-		 * First, assume we're decoding an even cmov opcode
			
 
				-		 * (lsb == 0).
			
 
				-		 */
			
 
				-		switch ((b & 15) >> 1) {
			
 
				-		case 0:	/* cmovo */
			
 
				-			no_wb = (_eflags & EFLG_OF) ? 0 : 1;
			
 
				-			break;
			
 
				-		case 1:	/* cmovb/cmovc/cmovnae */
			
 
				-			no_wb = (_eflags & EFLG_CF) ? 0 : 1;
			
 
				-			break;
			
 
				-		case 2:	/* cmovz/cmove */
			
 
				-			no_wb = (_eflags & EFLG_ZF) ? 0 : 1;
			
 
				-			break;
			
 
				-		case 3:	/* cmovbe/cmovna */
			
 
				-			no_wb = (_eflags & (EFLG_CF | EFLG_ZF)) ? 0 : 1;
			
 
				-			break;
			
 
				-		case 4:	/* cmovs */
			
 
				-			no_wb = (_eflags & EFLG_SF) ? 0 : 1;
			
 
				-			break;
			
 
				-		case 5:	/* cmovp/cmovpe */
			
 
				-			no_wb = (_eflags & EFLG_PF) ? 0 : 1;
			
 
				-			break;
			
 
				-		case 7:	/* cmovle/cmovng */
			
 
				-			no_wb = (_eflags & EFLG_ZF) ? 0 : 1;
			
 
				-			/* fall through */
			
 
				-		case 6:	/* cmovl/cmovnge */
			
 
				-			no_wb &= (!(_eflags & EFLG_SF) !=
			
 
				-			      !(_eflags & EFLG_OF)) ? 0 : 1;
			
 
				-			break;
			
 
				-		}
			
 
				-		/* Odd cmov opcodes (lsb == 1) have inverted sense. */
			
 
				-		no_wb ^= b & 1;
			
 
				-		break;
			
 
				-	case 0xa3:
			
 
				-	      bt:		/* bt */
			
 
				-		src.val &= (dst.bytes << 3) - 1; /* only subword offset */
			
 
				-		emulate_2op_SrcV_nobyte("bt", src, dst, _eflags);
			
 
				-		break;
			
 
				-	case 0xab:
			
 
				-	      bts:		/* bts */
			
 
				-		src.val &= (dst.bytes << 3) - 1; /* only subword offset */
			
 
				-		emulate_2op_SrcV_nobyte("bts", src, dst, _eflags);
			
 
				-		break;
			
 
				-	case 0xb0 ... 0xb1:	/* cmpxchg */
			
 
				-		/*
			
 
				-		 * Save real source value, then compare EAX against
			
 
				-		 * destination.
			
 
				-		 */
			
 
				-		src.orig_val = src.val;
			
 
				-		src.val = _regs[VCPU_REGS_RAX];
			
 
				-		emulate_2op_SrcV("cmp", src, dst, _eflags);
			
 
				-		if (_eflags & EFLG_ZF) {
			
 
				-			/* Success: write back to memory. */
			
 
				-			dst.val = src.orig_val;
			
 
				-		} else {
			
 
				-			/* Failure: write the value we saw to EAX. */
			
 
				-			dst.type = OP_REG;
			
 
				-			dst.ptr = (unsigned long *)&_regs[VCPU_REGS_RAX];
			
 
				-		}
			
 
				-		break;
			
 
				-	case 0xb3:
			
 
				-	      btr:		/* btr */
			
 
				-		src.val &= (dst.bytes << 3) - 1; /* only subword offset */
			
 
				-		emulate_2op_SrcV_nobyte("btr", src, dst, _eflags);
			
 
				-		break;
			
 
				-	case 0xb6 ... 0xb7:	/* movzx */
			
 
				-		dst.bytes = op_bytes;
			
 
				-		dst.val = (d & ByteOp) ? (u8) src.val : (u16) src.val;
			
 
				-		break;
			
 
				-	case 0xba:		/* Grp8 */
			
 
				-		switch (modrm_reg & 3) {
			
 
				-		case 0:
			
 
				-			goto bt;
			
 
				-		case 1:
			
 
				-			goto bts;
			
 
				-		case 2:
			
 
				-			goto btr;
			
 
				-		case 3:
			
 
				-			goto btc;
			
 
				-		}
			
 
				-		break;
			
 
				-	case 0xbb:
			
 
				-	      btc:		/* btc */
			
 
				-		src.val &= (dst.bytes << 3) - 1; /* only subword offset */
			
 
				-		emulate_2op_SrcV_nobyte("btc", src, dst, _eflags);
			
 
				-		break;
			
 
				-	case 0xbe ... 0xbf:	/* movsx */
			
 
				-		dst.bytes = op_bytes;
			
 
				-		dst.val = (d & ByteOp) ? (s8) src.val : (s16) src.val;
			
 
				-		break;
			
 
				-	case 0xc3:		/* movnti */
			
 
				-		dst.bytes = op_bytes;
			
 
				-		dst.val = (op_bytes == 4) ? (u32) src.val : (u64) src.val;
			
 
				-		break;
			
 
				-	}
			
 
				-	goto writeback;
			
 
				-
			
 
				-twobyte_special_insn:
			
 
				-	/* Disable writeback. */
			
 
				-	no_wb = 1;
			
 
				-	switch (b) {
			
 
				-	case 0x06:
			
 
				-		emulate_clts(ctxt->vcpu);
			
 
				-		break;
			
 
				-	case 0x08:		/* invd */
			
 
				-		break;
			
 
				-	case 0x09:		/* wbinvd */
			
 
				-		break;
			
 
				-	case 0x0d:		/* GrpP (prefetch) */
			
 
				-	case 0x18:		/* Grp16 (prefetch/nop) */
			
 
				-		break;
			
 
				-	case 0x20: /* mov cr, reg */
			
 
				-		if (modrm_mod != 3)
			
 
				-			goto cannot_emulate;
			
 
				-		_regs[modrm_rm] = realmode_get_cr(ctxt->vcpu, modrm_reg);
			
 
				-		break;
			
 
				-	case 0x22: /* mov reg, cr */
			
 
				-		if (modrm_mod != 3)
			
 
				-			goto cannot_emulate;
			
 
				-		realmode_set_cr(ctxt->vcpu, modrm_reg, modrm_val, &_eflags);
			
 
				-		break;
			
 
				-	case 0x30:
			
 
				-		/* wrmsr */
			
 
				-		msr_data = (u32)_regs[VCPU_REGS_RAX]
			
 
				-			| ((u64)_regs[VCPU_REGS_RDX] << 32);
			
 
				-		rc = kvm_set_msr(ctxt->vcpu, _regs[VCPU_REGS_RCX], msr_data);
			
 
				-		if (rc) {
			
 
				-			kvm_x86_ops->inject_gp(ctxt->vcpu, 0);
			
 
				-			_eip = ctxt->vcpu->rip;
			
 
				-		}
			
 
				-		rc = X86EMUL_CONTINUE;
			
 
				-		break;
			
 
				-	case 0x32:
			
 
				-		/* rdmsr */
			
 
				-		rc = kvm_get_msr(ctxt->vcpu, _regs[VCPU_REGS_RCX], &msr_data);
			
 
				-		if (rc) {
			
 
				-			kvm_x86_ops->inject_gp(ctxt->vcpu, 0);
			
 
				-			_eip = ctxt->vcpu->rip;
			
 
				-		} else {
			
 
				-			_regs[VCPU_REGS_RAX] = (u32)msr_data;
			
 
				-			_regs[VCPU_REGS_RDX] = msr_data >> 32;
			
 
				-		}
			
 
				-		rc = X86EMUL_CONTINUE;
			
 
				-		break;
			
 
				-	case 0x80 ... 0x8f: /* jnz rel, etc*/ {
			
 
				-		long int rel;
			
 
				-
			
 
				-		switch (op_bytes) {
			
 
				-		case 2:
			
 
				-			rel = insn_fetch(s16, 2, _eip);
			
 
				-			break;
			
 
				-		case 4:
			
 
				-			rel = insn_fetch(s32, 4, _eip);
			
 
				-			break;
			
 
				-		case 8:
			
 
				-			rel = insn_fetch(s64, 8, _eip);
			
 
				-			break;
			
 
				-		default:
			
 
				-			DPRINTF("jnz: Invalid op_bytes\n");
			
 
				-			goto cannot_emulate;
			
 
				-		}
			
 
				-		if (test_cc(b, _eflags))
			
 
				-			JMP_REL(rel);
			
 
				-		break;
			
 
				-	}
			
 
				-	case 0xc7:		/* Grp9 (cmpxchg8b) */
			
 
				-		{
			
 
				-			u64 old, new;
			
 
				-			if ((rc = ops->read_emulated(cr2, &old, 8, ctxt->vcpu))
			
 
				-									!= 0)
			
 
				-				goto done;
			
 
				-			if (((u32) (old >> 0) != (u32) _regs[VCPU_REGS_RAX]) ||
			
 
				-			    ((u32) (old >> 32) != (u32) _regs[VCPU_REGS_RDX])) {
			
 
				-				_regs[VCPU_REGS_RAX] = (u32) (old >> 0);
			
 
				-				_regs[VCPU_REGS_RDX] = (u32) (old >> 32);
			
 
				-				_eflags &= ~EFLG_ZF;
			
 
				-			} else {
			
 
				-				new = ((u64)_regs[VCPU_REGS_RCX] << 32)
			
 
				-					| (u32) _regs[VCPU_REGS_RBX];
			
 
				-				if ((rc = ops->cmpxchg_emulated(cr2, &old,
			
 
				-							  &new, 8, ctxt->vcpu)) != 0)
			
 
				-					goto done;
			
 
				-				_eflags |= EFLG_ZF;
			
 
				-			}
			
 
				-			break;
			
 
				-		}
			
 
				-	}
			
 
				-	goto writeback;
			
 
				-
			
 
				-cannot_emulate:
			
 
				-	DPRINTF("Cannot emulate %02x\n", b);
			
 
				-	return -1;
			
 
				-}
			
 
				-
			
 
				-#ifdef __XEN__
			
 
				-
			
 
				-#include <asm/mm.h>
			
 
				-#include <asm/uaccess.h>
			
 
				-
			
 
				-int
			
 
				-x86_emulate_read_std(unsigned long addr,
			
 
				-		     unsigned long *val,
			
 
				-		     unsigned int bytes, struct x86_emulate_ctxt *ctxt)
			
 
				-{
			
 
				-	unsigned int rc;
			
 
				-
			
 
				-	*val = 0;
			
 
				-
			
 
				-	if ((rc = copy_from_user((void *)val, (void *)addr, bytes)) != 0) {
			
 
				-		propagate_page_fault(addr + bytes - rc, 0);	/* read fault */
			
 
				-		return X86EMUL_PROPAGATE_FAULT;
			
 
				-	}
			
 
				-
			
 
				-	return X86EMUL_CONTINUE;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-x86_emulate_write_std(unsigned long addr,
			
 
				-		      unsigned long val,
			
 
				-		      unsigned int bytes, struct x86_emulate_ctxt *ctxt)
			
 
				-{
			
 
				-	unsigned int rc;
			
 
				-
			
 
				-	if ((rc = copy_to_user((void *)addr, (void *)&val, bytes)) != 0) {
			
 
				-		propagate_page_fault(addr + bytes - rc, PGERR_write_access);
			
 
				-		return X86EMUL_PROPAGATE_FAULT;
			
 
				-	}
			
 
				-
			
 
				-	return X86EMUL_CONTINUE;
			
 
				-}
			
 
				-
			
 
				-#endif
			
--- a/include/asm-x86/Kbuild
+++ b/include/asm-x86/Kbuild
@@ -3,6 +3,7 @@ include include/asm-generic/Kbuild.asm
 
				 header-y += boot.h
			
 
				 header-y += bootparam.h
			
 
				 header-y += debugreg.h
			
 
				+header-y += kvm.h
			
 
				 header-y += ldt.h
			
 
				 header-y += msr-index.h
			
 
				 header-y += prctl.h
			
--- a/include/asm-x86/kvm.h
+++ b/include/asm-x86/kvm.h
@@ -0,0 +1,191 @@
 
				+#ifndef __LINUX_KVM_X86_H
			
 
				+#define __LINUX_KVM_X86_H
			
 
				+
			
 
				+/*
			
 
				+ * KVM x86 specific structures and definitions
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+#include <asm/types.h>
			
 
				+#include <linux/ioctl.h>
			
 
				+
			
 
				+/* Architectural interrupt line count. */
			
 
				+#define KVM_NR_INTERRUPTS 256
			
 
				+
			
 
				+struct kvm_memory_alias {
			
 
				+	__u32 slot;  /* this has a different namespace than memory slots */
			
 
				+	__u32 flags;
			
 
				+	__u64 guest_phys_addr;
			
 
				+	__u64 memory_size;
			
 
				+	__u64 target_phys_addr;
			
 
				+};
			
 
				+
			
 
				+/* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */
			
 
				+struct kvm_pic_state {
			
 
				+	__u8 last_irr;	/* edge detection */
			
 
				+	__u8 irr;		/* interrupt request register */
			
 
				+	__u8 imr;		/* interrupt mask register */
			
 
				+	__u8 isr;		/* interrupt service register */
			
 
				+	__u8 priority_add;	/* highest irq priority */
			
 
				+	__u8 irq_base;
			
 
				+	__u8 read_reg_select;
			
 
				+	__u8 poll;
			
 
				+	__u8 special_mask;
			
 
				+	__u8 init_state;
			
 
				+	__u8 auto_eoi;
			
 
				+	__u8 rotate_on_auto_eoi;
			
 
				+	__u8 special_fully_nested_mode;
			
 
				+	__u8 init4;		/* true if 4 byte init */
			
 
				+	__u8 elcr;		/* PIIX edge/trigger selection */
			
 
				+	__u8 elcr_mask;
			
 
				+};
			
 
				+
			
 
				+#define KVM_IOAPIC_NUM_PINS  24
			
 
				+struct kvm_ioapic_state {
			
 
				+	__u64 base_address;
			
 
				+	__u32 ioregsel;
			
 
				+	__u32 id;
			
 
				+	__u32 irr;
			
 
				+	__u32 pad;
			
 
				+	union {
			
 
				+		__u64 bits;
			
 
				+		struct {
			
 
				+			__u8 vector;
			
 
				+			__u8 delivery_mode:3;
			
 
				+			__u8 dest_mode:1;
			
 
				+			__u8 delivery_status:1;
			
 
				+			__u8 polarity:1;
			
 
				+			__u8 remote_irr:1;
			
 
				+			__u8 trig_mode:1;
			
 
				+			__u8 mask:1;
			
 
				+			__u8 reserve:7;
			
 
				+			__u8 reserved[4];
			
 
				+			__u8 dest_id;
			
 
				+		} fields;
			
 
				+	} redirtbl[KVM_IOAPIC_NUM_PINS];
			
 
				+};
			
 
				+
			
 
				+#define KVM_IRQCHIP_PIC_MASTER   0
			
 
				+#define KVM_IRQCHIP_PIC_SLAVE    1
			
 
				+#define KVM_IRQCHIP_IOAPIC       2
			
 
				+
			
 
				+/* for KVM_GET_REGS and KVM_SET_REGS */
			
 
				+struct kvm_regs {
			
 
				+	/* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
			
 
				+	__u64 rax, rbx, rcx, rdx;
			
 
				+	__u64 rsi, rdi, rsp, rbp;
			
 
				+	__u64 r8,  r9,  r10, r11;
			
 
				+	__u64 r12, r13, r14, r15;
			
 
				+	__u64 rip, rflags;
			
 
				+};
			
 
				+
			
 
				+/* for KVM_GET_LAPIC and KVM_SET_LAPIC */
			
 
				+#define KVM_APIC_REG_SIZE 0x400
			
 
				+struct kvm_lapic_state {
			
 
				+	char regs[KVM_APIC_REG_SIZE];
			
 
				+};
			
 
				+
			
 
				+struct kvm_segment {
			
 
				+	__u64 base;
			
 
				+	__u32 limit;
			
 
				+	__u16 selector;
			
 
				+	__u8  type;
			
 
				+	__u8  present, dpl, db, s, l, g, avl;
			
 
				+	__u8  unusable;
			
 
				+	__u8  padding;
			
 
				+};
			
 
				+
			
 
				+struct kvm_dtable {
			
 
				+	__u64 base;
			
 
				+	__u16 limit;
			
 
				+	__u16 padding[3];
			
 
				+};
			
 
				+
			
 
				+
			
 
				+/* for KVM_GET_SREGS and KVM_SET_SREGS */
			
 
				+struct kvm_sregs {
			
 
				+	/* out (KVM_GET_SREGS) / in (KVM_SET_SREGS) */
			
 
				+	struct kvm_segment cs, ds, es, fs, gs, ss;
			
 
				+	struct kvm_segment tr, ldt;
			
 
				+	struct kvm_dtable gdt, idt;
			
 
				+	__u64 cr0, cr2, cr3, cr4, cr8;
			
 
				+	__u64 efer;
			
 
				+	__u64 apic_base;
			
 
				+	__u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
			
 
				+};
			
 
				+
			
 
				+/* for KVM_GET_FPU and KVM_SET_FPU */
			
 
				+struct kvm_fpu {
			
 
				+	__u8  fpr[8][16];
			
 
				+	__u16 fcw;
			
 
				+	__u16 fsw;
			
 
				+	__u8  ftwx;  /* in fxsave format */
			
 
				+	__u8  pad1;
			
 
				+	__u16 last_opcode;
			
 
				+	__u64 last_ip;
			
 
				+	__u64 last_dp;
			
 
				+	__u8  xmm[16][16];
			
 
				+	__u32 mxcsr;
			
 
				+	__u32 pad2;
			
 
				+};
			
 
				+
			
 
				+struct kvm_msr_entry {
			
 
				+	__u32 index;
			
 
				+	__u32 reserved;
			
 
				+	__u64 data;
			
 
				+};
			
 
				+
			
 
				+/* for KVM_GET_MSRS and KVM_SET_MSRS */
			
 
				+struct kvm_msrs {
			
 
				+	__u32 nmsrs; /* number of msrs in entries */
			
 
				+	__u32 pad;
			
 
				+
			
 
				+	struct kvm_msr_entry entries[0];
			
 
				+};
			
 
				+
			
 
				+/* for KVM_GET_MSR_INDEX_LIST */
			
 
				+struct kvm_msr_list {
			
 
				+	__u32 nmsrs; /* number of msrs in entries */
			
 
				+	__u32 indices[0];
			
 
				+};
			
 
				+
			
 
				+
			
 
				+struct kvm_cpuid_entry {
			
 
				+	__u32 function;
			
 
				+	__u32 eax;
			
 
				+	__u32 ebx;
			
 
				+	__u32 ecx;
			
 
				+	__u32 edx;
			
 
				+	__u32 padding;
			
 
				+};
			
 
				+
			
 
				+/* for KVM_SET_CPUID */
			
 
				+struct kvm_cpuid {
			
 
				+	__u32 nent;
			
 
				+	__u32 padding;
			
 
				+	struct kvm_cpuid_entry entries[0];
			
 
				+};
			
 
				+
			
 
				+struct kvm_cpuid_entry2 {
			
 
				+	__u32 function;
			
 
				+	__u32 index;
			
 
				+	__u32 flags;
			
 
				+	__u32 eax;
			
 
				+	__u32 ebx;
			
 
				+	__u32 ecx;
			
 
				+	__u32 edx;
			
 
				+	__u32 padding[3];
			
 
				+};
			
 
				+
			
 
				+#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1
			
 
				+#define KVM_CPUID_FLAG_STATEFUL_FUNC    2
			
 
				+#define KVM_CPUID_FLAG_STATE_READ_NEXT  4
			
 
				+
			
 
				+/* for KVM_SET_CPUID2 */
			
 
				+struct kvm_cpuid2 {
			
 
				+	__u32 nent;
			
 
				+	__u32 padding;
			
 
				+	struct kvm_cpuid_entry2 entries[0];
			
 
				+};
			
 
				+
			
 
				+#endif
			
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -1,23 +1,24 @@
 
				-#ifndef __KVM_H
			
 
				-#define __KVM_H
			
 
				-
			
 
				-/*
			
 
				+#/*
			
 
				+ * Kernel-based Virtual Machine driver for Linux
			
 
				+ *
			
 
				+ * This header defines architecture specific interfaces, x86 version
			
 
				+ *
			
 
				  * This work is licensed under the terms of the GNU GPL, version 2.  See
			
 
				  * the COPYING file in the top-level directory.
			
 
				+ *
			
 
				  */
			
 
				 
			
 
				+#ifndef ASM_KVM_HOST_H
			
 
				+#define ASM_KVM_HOST_H
			
 
				+
			
 
				 #include <linux/types.h>
			
 
				-#include <linux/list.h>
			
 
				-#include <linux/mutex.h>
			
 
				-#include <linux/spinlock.h>
			
 
				-#include <linux/signal.h>
			
 
				-#include <linux/sched.h>
			
 
				 #include <linux/mm.h>
			
 
				-#include <linux/preempt.h>
			
 
				-#include <asm/signal.h>
			
 
				 
			
 
				 #include <linux/kvm.h>
			
 
				 #include <linux/kvm_para.h>
			
 
				+#include <linux/kvm_types.h>
			
 
				+
			
 
				+#include <asm/desc.h>
			
 
				 
			
 
				 #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1)
			
 
				 #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD))
			
@@ -37,15 +38,8 @@
 
				 #define INVALID_PAGE (~(hpa_t)0)
			
 
				 #define UNMAPPED_GVA (~(gpa_t)0)
			
 
				 
			
 
				-#define KVM_MAX_VCPUS 4
			
 
				-#define KVM_ALIAS_SLOTS 4
			
 
				-#define KVM_MEMORY_SLOTS 8
			
 
				-#define KVM_NUM_MMU_PAGES 1024
			
 
				-#define KVM_MIN_FREE_MMU_PAGES 5
			
 
				-#define KVM_REFILL_PAGES 25
			
 
				-#define KVM_MAX_CPUID_ENTRIES 40
			
 
				-
			
 
				 #define DE_VECTOR 0
			
 
				+#define UD_VECTOR 6
			
 
				 #define NM_VECTOR 7
			
 
				 #define DF_VECTOR 8
			
 
				 #define TS_VECTOR 10
			
@@ -59,31 +53,66 @@
 
				 
			
 
				 #define IOPL_SHIFT 12
			
 
				 
			
 
				-#define KVM_PIO_PAGE_OFFSET 1
			
 
				+#define KVM_ALIAS_SLOTS 4
			
 
				 
			
 
				-/*
			
 
				- * vcpu->requests bit members
			
 
				- */
			
 
				-#define KVM_TLB_FLUSH 0
			
 
				+#define KVM_PERMILLE_MMU_PAGES 20
			
 
				+#define KVM_MIN_ALLOC_MMU_PAGES 64
			
 
				+#define KVM_NUM_MMU_PAGES 1024
			
 
				+#define KVM_MIN_FREE_MMU_PAGES 5
			
 
				+#define KVM_REFILL_PAGES 25
			
 
				+#define KVM_MAX_CPUID_ENTRIES 40
			
 
				 
			
 
				-/*
			
 
				- * Address types:
			
 
				- *
			
 
				- *  gva - guest virtual address
			
 
				- *  gpa - guest physical address
			
 
				- *  gfn - guest frame number
			
 
				- *  hva - host virtual address
			
 
				- *  hpa - host physical address
			
 
				- *  hfn - host frame number
			
 
				- */
			
 
				+extern spinlock_t kvm_lock;
			
 
				+extern struct list_head vm_list;
			
 
				+
			
 
				+struct kvm_vcpu;
			
 
				+struct kvm;
			
 
				+
			
 
				+enum {
			
 
				+	VCPU_REGS_RAX = 0,
			
 
				+	VCPU_REGS_RCX = 1,
			
 
				+	VCPU_REGS_RDX = 2,
			
 
				+	VCPU_REGS_RBX = 3,
			
 
				+	VCPU_REGS_RSP = 4,
			
 
				+	VCPU_REGS_RBP = 5,
			
 
				+	VCPU_REGS_RSI = 6,
			
 
				+	VCPU_REGS_RDI = 7,
			
 
				+#ifdef CONFIG_X86_64
			
 
				+	VCPU_REGS_R8 = 8,
			
 
				+	VCPU_REGS_R9 = 9,
			
 
				+	VCPU_REGS_R10 = 10,
			
 
				+	VCPU_REGS_R11 = 11,
			
 
				+	VCPU_REGS_R12 = 12,
			
 
				+	VCPU_REGS_R13 = 13,
			
 
				+	VCPU_REGS_R14 = 14,
			
 
				+	VCPU_REGS_R15 = 15,
			
 
				+#endif
			
 
				+	NR_VCPU_REGS
			
 
				+};
			
 
				+
			
 
				+enum {
			
 
				+	VCPU_SREG_CS,
			
 
				+	VCPU_SREG_DS,
			
 
				+	VCPU_SREG_ES,
			
 
				+	VCPU_SREG_FS,
			
 
				+	VCPU_SREG_GS,
			
 
				+	VCPU_SREG_SS,
			
 
				+	VCPU_SREG_TR,
			
 
				+	VCPU_SREG_LDTR,
			
 
				+};
			
 
				 
			
 
				-typedef unsigned long  gva_t;
			
 
				-typedef u64            gpa_t;
			
 
				-typedef unsigned long  gfn_t;
			
 
				+#include <asm/kvm_x86_emulate.h>
			
 
				 
			
 
				-typedef unsigned long  hva_t;
			
 
				-typedef u64            hpa_t;
			
 
				-typedef unsigned long  hfn_t;
			
 
				+#define KVM_NR_MEM_OBJS 40
			
 
				+
			
 
				+/*
			
 
				+ * We don't want allocation failures within the mmu code, so we preallocate
			
 
				+ * enough memory for a single page fault in a cache.
			
 
				+ */
			
 
				+struct kvm_mmu_memory_cache {
			
 
				+	int nobjs;
			
 
				+	void *objects[KVM_NR_MEM_OBJS];
			
 
				+};
			
 
				 
			
 
				 #define NR_PTE_CHAIN_ENTRIES 5
			
 
				 
			
@@ -99,7 +128,7 @@ struct kvm_pte_chain {
 
				  *   bits 4:7 - page table level for this shadow (1-4)
			
 
				  *   bits 8:9 - page table quadrant for 2-level guests
			
 
				  *   bit   16 - "metaphysical" - gfn is not a real page (huge page/real mode)
			
 
				- *   bits 17:19 - "access" - the user, writable, and nx bits of a huge page pde
			
 
				+ *   bits 17:19 - common access permissions for all ptes in this shadow page
			
 
				  */
			
 
				 union kvm_mmu_page_role {
			
 
				 	unsigned word;
			
@@ -109,7 +138,7 @@ union kvm_mmu_page_role {
 
				 		unsigned quadrant : 2;
			
 
				 		unsigned pad_for_nice_hex_output : 6;
			
 
				 		unsigned metaphysical : 1;
			
 
				-		unsigned hugepage_access : 3;
			
 
				+		unsigned access : 3;
			
 
				 	};
			
 
				 };
			
 
				 
			
@@ -125,6 +154,8 @@ struct kvm_mmu_page {
 
				 	union kvm_mmu_page_role role;
			
 
				 
			
 
				 	u64 *spt;
			
 
				+	/* hold the gfn of each spte inside spt */
			
 
				+	gfn_t *gfns;
			
 
				 	unsigned long slot_bitmap; /* One bit set per slot which has memory
			
 
				 				    * in this shadow page.
			
 
				 				    */
			
@@ -136,9 +167,6 @@ struct kvm_mmu_page {
 
				 	};
			
 
				 };
			
 
				 
			
 
				-struct kvm_vcpu;
			
 
				-extern struct kmem_cache *kvm_vcpu_cache;
			
 
				-
			
 
				 /*
			
 
				  * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
			
 
				  * 32-bit).  The kvm_mmu structure abstracts the details of the current mmu
			
@@ -149,6 +177,8 @@ struct kvm_mmu {
 
				 	int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err);
			
 
				 	void (*free)(struct kvm_vcpu *vcpu);
			
 
				 	gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva);
			
 
				+	void (*prefetch_page)(struct kvm_vcpu *vcpu,
			
 
				+			      struct kvm_mmu_page *page);
			
 
				 	hpa_t root_hpa;
			
 
				 	int root_level;
			
 
				 	int shadow_root_level;
			
@@ -156,159 +186,9 @@ struct kvm_mmu {
 
				 	u64 *pae_root;
			
 
				 };
			
 
				 
			
 
				-#define KVM_NR_MEM_OBJS 20
			
 
				-
			
 
				-struct kvm_mmu_memory_cache {
			
 
				-	int nobjs;
			
 
				-	void *objects[KVM_NR_MEM_OBJS];
			
 
				-};
			
 
				-
			
 
				-/*
			
 
				- * We don't want allocation failures within the mmu code, so we preallocate
			
 
				- * enough memory for a single page fault in a cache.
			
 
				- */
			
 
				-struct kvm_guest_debug {
			
 
				-	int enabled;
			
 
				-	unsigned long bp[4];
			
 
				-	int singlestep;
			
 
				-};
			
 
				-
			
 
				-enum {
			
 
				-	VCPU_REGS_RAX = 0,
			
 
				-	VCPU_REGS_RCX = 1,
			
 
				-	VCPU_REGS_RDX = 2,
			
 
				-	VCPU_REGS_RBX = 3,
			
 
				-	VCPU_REGS_RSP = 4,
			
 
				-	VCPU_REGS_RBP = 5,
			
 
				-	VCPU_REGS_RSI = 6,
			
 
				-	VCPU_REGS_RDI = 7,
			
 
				-#ifdef CONFIG_X86_64
			
 
				-	VCPU_REGS_R8 = 8,
			
 
				-	VCPU_REGS_R9 = 9,
			
 
				-	VCPU_REGS_R10 = 10,
			
 
				-	VCPU_REGS_R11 = 11,
			
 
				-	VCPU_REGS_R12 = 12,
			
 
				-	VCPU_REGS_R13 = 13,
			
 
				-	VCPU_REGS_R14 = 14,
			
 
				-	VCPU_REGS_R15 = 15,
			
 
				-#endif
			
 
				-	NR_VCPU_REGS
			
 
				-};
			
 
				-
			
 
				-enum {
			
 
				-	VCPU_SREG_CS,
			
 
				-	VCPU_SREG_DS,
			
 
				-	VCPU_SREG_ES,
			
 
				-	VCPU_SREG_FS,
			
 
				-	VCPU_SREG_GS,
			
 
				-	VCPU_SREG_SS,
			
 
				-	VCPU_SREG_TR,
			
 
				-	VCPU_SREG_LDTR,
			
 
				-};
			
 
				-
			
 
				-struct kvm_pio_request {
			
 
				-	unsigned long count;
			
 
				-	int cur_count;
			
 
				-	struct page *guest_pages[2];
			
 
				-	unsigned guest_page_offset;
			
 
				-	int in;
			
 
				-	int port;
			
 
				-	int size;
			
 
				-	int string;
			
 
				-	int down;
			
 
				-	int rep;
			
 
				-};
			
 
				-
			
 
				-struct kvm_stat {
			
 
				-	u32 pf_fixed;
			
 
				-	u32 pf_guest;
			
 
				-	u32 tlb_flush;
			
 
				-	u32 invlpg;
			
 
				-
			
 
				-	u32 exits;
			
 
				-	u32 io_exits;
			
 
				-	u32 mmio_exits;
			
 
				-	u32 signal_exits;
			
 
				-	u32 irq_window_exits;
			
 
				-	u32 halt_exits;
			
 
				-	u32 halt_wakeup;
			
 
				-	u32 request_irq_exits;
			
 
				-	u32 irq_exits;
			
 
				-	u32 light_exits;
			
 
				-	u32 efer_reload;
			
 
				-};
			
 
				-
			
 
				-struct kvm_io_device {
			
 
				-	void (*read)(struct kvm_io_device *this,
			
 
				-		     gpa_t addr,
			
 
				-		     int len,
			
 
				-		     void *val);
			
 
				-	void (*write)(struct kvm_io_device *this,
			
 
				-		      gpa_t addr,
			
 
				-		      int len,
			
 
				-		      const void *val);
			
 
				-	int (*in_range)(struct kvm_io_device *this, gpa_t addr);
			
 
				-	void (*destructor)(struct kvm_io_device *this);
			
 
				-
			
 
				-	void             *private;
			
 
				-};
			
 
				-
			
 
				-static inline void kvm_iodevice_read(struct kvm_io_device *dev,
			
 
				-				     gpa_t addr,
			
 
				-				     int len,
			
 
				-				     void *val)
			
 
				-{
			
 
				-	dev->read(dev, addr, len, val);
			
 
				-}
			
 
				-
			
 
				-static inline void kvm_iodevice_write(struct kvm_io_device *dev,
			
 
				-				      gpa_t addr,
			
 
				-				      int len,
			
 
				-				      const void *val)
			
 
				-{
			
 
				-	dev->write(dev, addr, len, val);
			
 
				-}
			
 
				-
			
 
				-static inline int kvm_iodevice_inrange(struct kvm_io_device *dev, gpa_t addr)
			
 
				-{
			
 
				-	return dev->in_range(dev, addr);
			
 
				-}
			
 
				-
			
 
				-static inline void kvm_iodevice_destructor(struct kvm_io_device *dev)
			
 
				-{
			
 
				-	if (dev->destructor)
			
 
				-		dev->destructor(dev);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * It would be nice to use something smarter than a linear search, TBD...
			
 
				- * Thankfully we dont expect many devices to register (famous last words :),
			
 
				- * so until then it will suffice.  At least its abstracted so we can change
			
 
				- * in one place.
			
 
				- */
			
 
				-struct kvm_io_bus {
			
 
				-	int                   dev_count;
			
 
				-#define NR_IOBUS_DEVS 6
			
 
				-	struct kvm_io_device *devs[NR_IOBUS_DEVS];
			
 
				-};
			
 
				-
			
 
				-void kvm_io_bus_init(struct kvm_io_bus *bus);
			
 
				-void kvm_io_bus_destroy(struct kvm_io_bus *bus);
			
 
				-struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr);
			
 
				-void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
			
 
				-			     struct kvm_io_device *dev);
			
 
				-
			
 
				-struct kvm_vcpu {
			
 
				-	struct kvm *kvm;
			
 
				-	struct preempt_notifier preempt_notifier;
			
 
				-	int vcpu_id;
			
 
				-	struct mutex mutex;
			
 
				-	int   cpu;
			
 
				+struct kvm_vcpu_arch {
			
 
				 	u64 host_tsc;
			
 
				-	struct kvm_run *run;
			
 
				 	int interrupt_window_open;
			
 
				-	int guest_mode;
			
 
				-	unsigned long requests;
			
 
				 	unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
			
 
				 	DECLARE_BITMAP(irq_pending, KVM_NR_INTERRUPTS);
			
 
				 	unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */
			
@@ -317,9 +197,6 @@ struct kvm_vcpu {
 
				 	unsigned long cr0;
			
 
				 	unsigned long cr2;
			
 
				 	unsigned long cr3;
			
 
				-	gpa_t para_state_gpa;
			
 
				-	struct page *para_state_page;
			
 
				-	gpa_t hypercall_gpa;
			
 
				 	unsigned long cr4;
			
 
				 	unsigned long cr8;
			
 
				 	u64 pdptrs[4]; /* pae */
			
@@ -334,6 +211,7 @@ struct kvm_vcpu {
 
				 	int mp_state;
			
 
				 	int sipi_vector;
			
 
				 	u64 ia32_misc_enable_msr;
			
 
				+	bool tpr_access_reporting;
			
 
				 
			
 
				 	struct kvm_mmu mmu;
			
 
				 
			
@@ -344,29 +222,26 @@ struct kvm_vcpu {
 
				 
			
 
				 	gfn_t last_pt_write_gfn;
			
 
				 	int   last_pt_write_count;
			
 
				+	u64  *last_pte_updated;
			
 
				 
			
 
				-	struct kvm_guest_debug guest_debug;
			
 
				+	struct {
			
 
				+		gfn_t gfn;          /* presumed gfn during guest pte update */
			
 
				+		struct page *page;  /* page corresponding to that gfn */
			
 
				+	} update_pte;
			
 
				 
			
 
				 	struct i387_fxsave_struct host_fx_image;
			
 
				 	struct i387_fxsave_struct guest_fx_image;
			
 
				-	int fpu_active;
			
 
				-	int guest_fpu_loaded;
			
 
				-
			
 
				-	int mmio_needed;
			
 
				-	int mmio_read_completed;
			
 
				-	int mmio_is_write;
			
 
				-	int mmio_size;
			
 
				-	unsigned char mmio_data[8];
			
 
				-	gpa_t mmio_phys_addr;
			
 
				+
			
 
				 	gva_t mmio_fault_cr2;
			
 
				 	struct kvm_pio_request pio;
			
 
				 	void *pio_data;
			
 
				-	wait_queue_head_t wq;
			
 
				 
			
 
				-	int sigset_active;
			
 
				-	sigset_t sigset;
			
 
				-
			
 
				-	struct kvm_stat stat;
			
 
				+	struct kvm_queued_exception {
			
 
				+		bool pending;
			
 
				+		bool has_error_code;
			
 
				+		u8 nr;
			
 
				+		u32 error_code;
			
 
				+	} exception;
			
 
				 
			
 
				 	struct {
			
 
				 		int active;
			
@@ -381,7 +256,10 @@ struct kvm_vcpu {
 
				 	int halt_request; /* real mode on Intel only */
			
 
				 
			
 
				 	int cpuid_nent;
			
 
				-	struct kvm_cpuid_entry cpuid_entries[KVM_MAX_CPUID_ENTRIES];
			
 
				+	struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES];
			
 
				+	/* emulate context */
			
 
				+
			
 
				+	struct x86_emulate_ctxt emulate_ctxt;
			
 
				 };
			
 
				 
			
 
				 struct kvm_mem_alias {
			
@@ -390,51 +268,58 @@ struct kvm_mem_alias {
 
				 	gfn_t target_gfn;
			
 
				 };
			
 
				 
			
 
				-struct kvm_memory_slot {
			
 
				-	gfn_t base_gfn;
			
 
				-	unsigned long npages;
			
 
				-	unsigned long flags;
			
 
				-	struct page **phys_mem;
			
 
				-	unsigned long *dirty_bitmap;
			
 
				-};
			
 
				-
			
 
				-struct kvm {
			
 
				-	struct mutex lock; /* protects everything except vcpus */
			
 
				+struct kvm_arch{
			
 
				 	int naliases;
			
 
				 	struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
			
 
				-	int nmemslots;
			
 
				-	struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS];
			
 
				+
			
 
				+	unsigned int n_free_mmu_pages;
			
 
				+	unsigned int n_requested_mmu_pages;
			
 
				+	unsigned int n_alloc_mmu_pages;
			
 
				+	struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
			
 
				 	/*
			
 
				 	 * Hash table of struct kvm_mmu_page.
			
 
				 	 */
			
 
				 	struct list_head active_mmu_pages;
			
 
				-	int n_free_mmu_pages;
			
 
				-	struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
			
 
				-	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
			
 
				-	unsigned long rmap_overflow;
			
 
				-	struct list_head vm_list;
			
 
				-	struct file *filp;
			
 
				-	struct kvm_io_bus mmio_bus;
			
 
				-	struct kvm_io_bus pio_bus;
			
 
				 	struct kvm_pic *vpic;
			
 
				 	struct kvm_ioapic *vioapic;
			
 
				+
			
 
				 	int round_robin_prev_vcpu;
			
 
				+	unsigned int tss_addr;
			
 
				+	struct page *apic_access_page;
			
 
				 };
			
 
				 
			
 
				-static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
			
 
				-{
			
 
				-	return kvm->vpic;
			
 
				-}
			
 
				+struct kvm_vm_stat {
			
 
				+	u32 mmu_shadow_zapped;
			
 
				+	u32 mmu_pte_write;
			
 
				+	u32 mmu_pte_updated;
			
 
				+	u32 mmu_pde_zapped;
			
 
				+	u32 mmu_flooded;
			
 
				+	u32 mmu_recycled;
			
 
				+	u32 mmu_cache_miss;
			
 
				+	u32 remote_tlb_flush;
			
 
				+};
			
 
				 
			
 
				-static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
			
 
				-{
			
 
				-	return kvm->vioapic;
			
 
				-}
			
 
				+struct kvm_vcpu_stat {
			
 
				+	u32 pf_fixed;
			
 
				+	u32 pf_guest;
			
 
				+	u32 tlb_flush;
			
 
				+	u32 invlpg;
			
 
				 
			
 
				-static inline int irqchip_in_kernel(struct kvm *kvm)
			
 
				-{
			
 
				-	return pic_irqchip(kvm) != 0;
			
 
				-}
			
 
				+	u32 exits;
			
 
				+	u32 io_exits;
			
 
				+	u32 mmio_exits;
			
 
				+	u32 signal_exits;
			
 
				+	u32 irq_window_exits;
			
 
				+	u32 halt_exits;
			
 
				+	u32 halt_wakeup;
			
 
				+	u32 request_irq_exits;
			
 
				+	u32 irq_exits;
			
 
				+	u32 host_state_reload;
			
 
				+	u32 efer_reload;
			
 
				+	u32 fpu_reload;
			
 
				+	u32 insn_emulation;
			
 
				+	u32 insn_emulation_fail;
			
 
				+};
			
 
				 
			
 
				 struct descriptor_table {
			
 
				 	u16 limit;
			
@@ -449,11 +334,12 @@ struct kvm_x86_ops {
 
				 	void (*check_processor_compatibility)(void *rtn);
			
 
				 	int (*hardware_setup)(void);               /* __init */
			
 
				 	void (*hardware_unsetup)(void);            /* __exit */
			
 
				+	bool (*cpu_has_accelerated_tpr)(void);
			
 
				 
			
 
				 	/* Create, but do not attach this VCPU */
			
 
				 	struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
			
 
				 	void (*vcpu_free)(struct kvm_vcpu *vcpu);
			
 
				-	void (*vcpu_reset)(struct kvm_vcpu *vcpu);
			
 
				+	int (*vcpu_reset)(struct kvm_vcpu *vcpu);
			
 
				 
			
 
				 	void (*prepare_guest_switch)(struct kvm_vcpu *vcpu);
			
 
				 	void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
			
@@ -489,10 +375,6 @@ struct kvm_x86_ops {
 
				 	void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
			
 
				 
			
 
				 	void (*tlb_flush)(struct kvm_vcpu *vcpu);
			
 
				-	void (*inject_page_fault)(struct kvm_vcpu *vcpu,
			
 
				-				  unsigned long addr, u32 err_code);
			
 
				-
			
 
				-	void (*inject_gp)(struct kvm_vcpu *vcpu, unsigned err_code);
			
 
				 
			
 
				 	void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
			
 
				 	int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu);
			
@@ -501,54 +383,31 @@ struct kvm_x86_ops {
 
				 				unsigned char *hypercall_addr);
			
 
				 	int (*get_irq)(struct kvm_vcpu *vcpu);
			
 
				 	void (*set_irq)(struct kvm_vcpu *vcpu, int vec);
			
 
				+	void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
			
 
				+				bool has_error_code, u32 error_code);
			
 
				+	bool (*exception_injected)(struct kvm_vcpu *vcpu);
			
 
				 	void (*inject_pending_irq)(struct kvm_vcpu *vcpu);
			
 
				 	void (*inject_pending_vectors)(struct kvm_vcpu *vcpu,
			
 
				 				       struct kvm_run *run);
			
 
				+
			
 
				+	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
			
 
				 };
			
 
				 
			
 
				 extern struct kvm_x86_ops *kvm_x86_ops;
			
 
				 
			
 
				-/* The guest did something we don't support. */
			
 
				-#define pr_unimpl(vcpu, fmt, ...)					\
			
 
				- do {									\
			
 
				-	if (printk_ratelimit())						\
			
 
				-		printk(KERN_ERR "kvm: %i: cpu%i " fmt,			\
			
 
				-		       current->tgid, (vcpu)->vcpu_id , ## __VA_ARGS__); \
			
 
				- } while(0)
			
 
				-
			
 
				-#define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt)
			
 
				-#define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt)
			
 
				-
			
 
				-int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
			
 
				-void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
			
 
				-
			
 
				-int kvm_init_x86(struct kvm_x86_ops *ops, unsigned int vcpu_size,
			
 
				-		  struct module *module);
			
 
				-void kvm_exit_x86(void);
			
 
				-
			
 
				 int kvm_mmu_module_init(void);
			
 
				 void kvm_mmu_module_exit(void);
			
 
				 
			
 
				 void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
			
 
				 int kvm_mmu_create(struct kvm_vcpu *vcpu);
			
 
				 int kvm_mmu_setup(struct kvm_vcpu *vcpu);
			
 
				+void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte);
			
 
				 
			
 
				 int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
			
 
				 void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
			
 
				 void kvm_mmu_zap_all(struct kvm *kvm);
			
 
				-
			
 
				-hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa);
			
 
				-#define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
			
 
				-#define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB)
			
 
				-static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; }
			
 
				-hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva);
			
 
				-struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva);
			
 
				-
			
 
				-extern hpa_t bad_page_address;
			
 
				-
			
 
				-struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
			
 
				-struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
			
 
				-void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
			
 
				+unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
			
 
				+void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
			
 
				 
			
 
				 enum emulation_result {
			
 
				 	EMULATE_DONE,       /* no further processing */
			
@@ -556,8 +415,10 @@ enum emulation_result {
 
				 	EMULATE_FAIL,         /* can't emulate this instruction */
			
 
				 };
			
 
				 
			
 
				+#define EMULTYPE_NO_DECODE	    (1 << 0)
			
 
				+#define EMULTYPE_TRAP_UD	    (1 << 1)
			
 
				 int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run,
			
 
				-			unsigned long cr2, u16 error_code);
			
 
				+			unsigned long cr2, u16 error_code, int emulation_type);
			
 
				 void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context);
			
 
				 void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
			
 
				 void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
			
@@ -572,7 +433,7 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
 
				 
			
 
				 struct x86_emulate_ctxt;
			
 
				 
			
 
				-int kvm_emulate_pio (struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
			
 
				+int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
			
 
				 		     int size, unsigned port);
			
 
				 int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
			
 
				 			   int size, unsigned long count, int down,
			
@@ -581,7 +442,7 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
 
				 int kvm_emulate_halt(struct kvm_vcpu *vcpu);
			
 
				 int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address);
			
 
				 int emulate_clts(struct kvm_vcpu *vcpu);
			
 
				-int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr,
			
 
				+int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr,
			
 
				 		    unsigned long *dest);
			
 
				 int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
			
 
				 		    unsigned long value);
			
@@ -597,15 +458,15 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l);
 
				 int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
			
 
				 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data);
			
 
				 
			
 
				-void fx_init(struct kvm_vcpu *vcpu);
			
 
				+void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
			
 
				+void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
			
 
				+void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2,
			
 
				+			   u32 error_code);
			
 
				 
			
 
				-void kvm_resched(struct kvm_vcpu *vcpu);
			
 
				-void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
			
 
				-void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
			
 
				-void kvm_flush_remote_tlbs(struct kvm *kvm);
			
 
				+void fx_init(struct kvm_vcpu *vcpu);
			
 
				 
			
 
				 int emulator_read_std(unsigned long addr,
			
 
				-                      void *val,
			
 
				+		      void *val,
			
 
				 		      unsigned int bytes,
			
 
				 		      struct kvm_vcpu *vcpu);
			
 
				 int emulator_write_emulated(unsigned long addr,
			
@@ -615,6 +476,7 @@ int emulator_write_emulated(unsigned long addr,
 
				 
			
 
				 unsigned long segment_base(u16 selector);
			
 
				 
			
 
				+void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu);
			
 
				 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
			
 
				 		       const u8 *new, int bytes);
			
 
				 int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
			
@@ -622,66 +484,14 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
 
				 int kvm_mmu_load(struct kvm_vcpu *vcpu);
			
 
				 void kvm_mmu_unload(struct kvm_vcpu *vcpu);
			
 
				 
			
 
				-int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run);
			
 
				+int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
			
 
				 
			
 
				-static inline void kvm_guest_enter(void)
			
 
				-{
			
 
				-	current->flags |= PF_VCPU;
			
 
				-}
			
 
				+int kvm_fix_hypercall(struct kvm_vcpu *vcpu);
			
 
				 
			
 
				-static inline void kvm_guest_exit(void)
			
 
				-{
			
 
				-	current->flags &= ~PF_VCPU;
			
 
				-}
			
 
				+int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code);
			
 
				 
			
 
				-static inline int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
			
 
				-				     u32 error_code)
			
 
				-{
			
 
				-	return vcpu->mmu.page_fault(vcpu, gva, error_code);
			
 
				-}
			
 
				-
			
 
				-static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	if (unlikely(vcpu->kvm->n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES))
			
 
				-		__kvm_mmu_free_some_pages(vcpu);
			
 
				-}
			
 
				-
			
 
				-static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	if (likely(vcpu->mmu.root_hpa != INVALID_PAGE))
			
 
				-		return 0;
			
 
				-
			
 
				-	return kvm_mmu_load(vcpu);
			
 
				-}
			
 
				-
			
 
				-static inline int is_long_mode(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-#ifdef CONFIG_X86_64
			
 
				-	return vcpu->shadow_efer & EFER_LME;
			
 
				-#else
			
 
				-	return 0;
			
 
				-#endif
			
 
				-}
			
 
				-
			
 
				-static inline int is_pae(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	return vcpu->cr4 & X86_CR4_PAE;
			
 
				-}
			
 
				-
			
 
				-static inline int is_pse(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	return vcpu->cr4 & X86_CR4_PSE;
			
 
				-}
			
 
				-
			
 
				-static inline int is_paging(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	return vcpu->cr0 & X86_CR0_PG;
			
 
				-}
			
 
				-
			
 
				-static inline int memslot_id(struct kvm *kvm, struct kvm_memory_slot *slot)
			
 
				-{
			
 
				-	return slot - kvm->memslots;
			
 
				-}
			
 
				+int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
			
 
				+int complete_pio(struct kvm_vcpu *vcpu);
			
 
				 
			
 
				 static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
			
 
				 {
			
@@ -693,55 +503,55 @@ static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
 
				 static inline u16 read_fs(void)
			
 
				 {
			
 
				 	u16 seg;
			
 
				-	asm ("mov %%fs, %0" : "=g"(seg));
			
 
				+	asm("mov %%fs, %0" : "=g"(seg));
			
 
				 	return seg;
			
 
				 }
			
 
				 
			
 
				 static inline u16 read_gs(void)
			
 
				 {
			
 
				 	u16 seg;
			
 
				-	asm ("mov %%gs, %0" : "=g"(seg));
			
 
				+	asm("mov %%gs, %0" : "=g"(seg));
			
 
				 	return seg;
			
 
				 }
			
 
				 
			
 
				 static inline u16 read_ldt(void)
			
 
				 {
			
 
				 	u16 ldt;
			
 
				-	asm ("sldt %0" : "=g"(ldt));
			
 
				+	asm("sldt %0" : "=g"(ldt));
			
 
				 	return ldt;
			
 
				 }
			
 
				 
			
 
				 static inline void load_fs(u16 sel)
			
 
				 {
			
 
				-	asm ("mov %0, %%fs" : : "rm"(sel));
			
 
				+	asm("mov %0, %%fs" : : "rm"(sel));
			
 
				 }
			
 
				 
			
 
				 static inline void load_gs(u16 sel)
			
 
				 {
			
 
				-	asm ("mov %0, %%gs" : : "rm"(sel));
			
 
				+	asm("mov %0, %%gs" : : "rm"(sel));
			
 
				 }
			
 
				 
			
 
				 #ifndef load_ldt
			
 
				 static inline void load_ldt(u16 sel)
			
 
				 {
			
 
				-	asm ("lldt %0" : : "rm"(sel));
			
 
				+	asm("lldt %0" : : "rm"(sel));
			
 
				 }
			
 
				 #endif
			
 
				 
			
 
				 static inline void get_idt(struct descriptor_table *table)
			
 
				 {
			
 
				-	asm ("sidt %0" : "=m"(*table));
			
 
				+	asm("sidt %0" : "=m"(*table));
			
 
				 }
			
 
				 
			
 
				 static inline void get_gdt(struct descriptor_table *table)
			
 
				 {
			
 
				-	asm ("sgdt %0" : "=m"(*table));
			
 
				+	asm("sgdt %0" : "=m"(*table));
			
 
				 }
			
 
				 
			
 
				 static inline unsigned long read_tr_base(void)
			
 
				 {
			
 
				 	u16 tr;
			
 
				-	asm ("str %0" : "=g"(tr));
			
 
				+	asm("str %0" : "=g"(tr));
			
 
				 	return segment_base(tr);
			
 
				 }
			
 
				 
			
@@ -757,17 +567,17 @@ static inline unsigned long read_msr(unsigned long msr)
 
				 
			
 
				 static inline void fx_save(struct i387_fxsave_struct *image)
			
 
				 {
			
 
				-	asm ("fxsave (%0)":: "r" (image));
			
 
				+	asm("fxsave (%0)":: "r" (image));
			
 
				 }
			
 
				 
			
 
				 static inline void fx_restore(struct i387_fxsave_struct *image)
			
 
				 {
			
 
				-	asm ("fxrstor (%0)":: "r" (image));
			
 
				+	asm("fxrstor (%0)":: "r" (image));
			
 
				 }
			
 
				 
			
 
				 static inline void fpu_init(void)
			
 
				 {
			
 
				-	asm ("finit");
			
 
				+	asm("finit");
			
 
				 }
			
 
				 
			
 
				 static inline u32 get_rdx_init_val(void)
			
@@ -775,6 +585,11 @@ static inline u32 get_rdx_init_val(void)
 
				 	return 0x600; /* P6 family */
			
 
				 }
			
 
				 
			
 
				+static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
			
 
				+{
			
 
				+	kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
			
 
				+}
			
 
				+
			
 
				 #define ASM_VMX_VMCLEAR_RAX       ".byte 0x66, 0x0f, 0xc7, 0x30"
			
 
				 #define ASM_VMX_VMLAUNCH          ".byte 0x0f, 0x01, 0xc2"
			
 
				 #define ASM_VMX_VMRESUME          ".byte 0x0f, 0x01, 0xc3"
			
--- a/include/asm-x86/kvm_para.h
+++ b/include/asm-x86/kvm_para.h
@@ -0,0 +1,105 @@
 
				+#ifndef __X86_KVM_PARA_H
			
 
				+#define __X86_KVM_PARA_H
			
 
				+
			
 
				+/* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx.  It
			
 
				+ * should be used to determine that a VM is running under KVM.
			
 
				+ */
			
 
				+#define KVM_CPUID_SIGNATURE	0x40000000
			
 
				+
			
 
				+/* This CPUID returns a feature bitmap in eax.  Before enabling a particular
			
 
				+ * paravirtualization, the appropriate feature bit should be checked.
			
 
				+ */
			
 
				+#define KVM_CPUID_FEATURES	0x40000001
			
 
				+
			
 
				+#ifdef __KERNEL__
			
 
				+#include <asm/processor.h>
			
 
				+
			
 
				+/* This instruction is vmcall.  On non-VT architectures, it will generate a
			
 
				+ * trap that we will then rewrite to the appropriate instruction.
			
 
				+ */
			
 
				+#define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1"
			
 
				+
			
 
				+/* For KVM hypercalls, a three-byte sequence of either the vmrun or the vmmrun
			
 
				+ * instruction.  The hypervisor may replace it with something else but only the
			
 
				+ * instructions are guaranteed to be supported.
			
 
				+ *
			
 
				+ * Up to four arguments may be passed in rbx, rcx, rdx, and rsi respectively.
			
 
				+ * The hypercall number should be placed in rax and the return value will be
			
 
				+ * placed in rax.  No other registers will be clobbered unless explicited
			
 
				+ * noted by the particular hypercall.
			
 
				+ */
			
 
				+
			
 
				+static inline long kvm_hypercall0(unsigned int nr)
			
 
				+{
			
 
				+	long ret;
			
 
				+	asm volatile(KVM_HYPERCALL
			
 
				+		     : "=a"(ret)
			
 
				+		     : "a"(nr));
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static inline long kvm_hypercall1(unsigned int nr, unsigned long p1)
			
 
				+{
			
 
				+	long ret;
			
 
				+	asm volatile(KVM_HYPERCALL
			
 
				+		     : "=a"(ret)
			
 
				+		     : "a"(nr), "b"(p1));
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static inline long kvm_hypercall2(unsigned int nr, unsigned long p1,
			
 
				+				  unsigned long p2)
			
 
				+{
			
 
				+	long ret;
			
 
				+	asm volatile(KVM_HYPERCALL
			
 
				+		     : "=a"(ret)
			
 
				+		     : "a"(nr), "b"(p1), "c"(p2));
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static inline long kvm_hypercall3(unsigned int nr, unsigned long p1,
			
 
				+				  unsigned long p2, unsigned long p3)
			
 
				+{
			
 
				+	long ret;
			
 
				+	asm volatile(KVM_HYPERCALL
			
 
				+		     : "=a"(ret)
			
 
				+		     : "a"(nr), "b"(p1), "c"(p2), "d"(p3));
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
			
 
				+				  unsigned long p2, unsigned long p3,
			
 
				+				  unsigned long p4)
			
 
				+{
			
 
				+	long ret;
			
 
				+	asm volatile(KVM_HYPERCALL
			
 
				+		     : "=a"(ret)
			
 
				+		     : "a"(nr), "b"(p1), "c"(p2), "d"(p3), "S"(p4));
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static inline int kvm_para_available(void)
			
 
				+{
			
 
				+	unsigned int eax, ebx, ecx, edx;
			
 
				+	char signature[13];
			
 
				+
			
 
				+	cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx);
			
 
				+	memcpy(signature + 0, &ebx, 4);
			
 
				+	memcpy(signature + 4, &ecx, 4);
			
 
				+	memcpy(signature + 8, &edx, 4);
			
 
				+	signature[12] = 0;
			
 
				+
			
 
				+	if (strcmp(signature, "KVMKVMKVM") == 0)
			
 
				+		return 1;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static inline unsigned int kvm_arch_para_features(void)
			
 
				+{
			
 
				+	return cpuid_eax(KVM_CPUID_FEATURES);
			
 
				+}
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+#endif
			
--- a/include/asm-x86/kvm_x86_emulate.h
+++ b/include/asm-x86/kvm_x86_emulate.h
@@ -62,17 +62,6 @@ struct x86_emulate_ops {
 
				 	int (*read_std)(unsigned long addr, void *val,
			
 
				 			unsigned int bytes, struct kvm_vcpu *vcpu);
			
 
				 
			
 
				-	/*
			
 
				-	 * write_std: Write bytes of standard (non-emulated/special) memory.
			
 
				-	 *            Used for stack operations, and others.
			
 
				-	 *  @addr:  [IN ] Linear address to which to write.
			
 
				-	 *  @val:   [IN ] Value to write to memory (low-order bytes used as
			
 
				-	 *                required).
			
 
				-	 *  @bytes: [IN ] Number of bytes to write to memory.
			
 
				-	 */
			
 
				-	int (*write_std)(unsigned long addr, const void *val,
			
 
				-			 unsigned int bytes, struct kvm_vcpu *vcpu);
			
 
				-
			
 
				 	/*
			
 
				 	 * read_emulated: Read bytes from emulated/special memory area.
			
 
				 	 *  @addr:  [IN ] Linear address from which to read.
			
@@ -112,13 +101,50 @@ struct x86_emulate_ops {
 
				 
			
 
				 };
			
 
				 
			
 
				+/* Type, address-of, and value of an instruction's operand. */
			
 
				+struct operand {
			
 
				+	enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type;
			
 
				+	unsigned int bytes;
			
 
				+	unsigned long val, orig_val, *ptr;
			
 
				+};
			
 
				+
			
 
				+struct fetch_cache {
			
 
				+	u8 data[15];
			
 
				+	unsigned long start;
			
 
				+	unsigned long end;
			
 
				+};
			
 
				+
			
 
				+struct decode_cache {
			
 
				+	u8 twobyte;
			
 
				+	u8 b;
			
 
				+	u8 lock_prefix;
			
 
				+	u8 rep_prefix;
			
 
				+	u8 op_bytes;
			
 
				+	u8 ad_bytes;
			
 
				+	u8 rex_prefix;
			
 
				+	struct operand src;
			
 
				+	struct operand dst;
			
 
				+	unsigned long *override_base;
			
 
				+	unsigned int d;
			
 
				+	unsigned long regs[NR_VCPU_REGS];
			
 
				+	unsigned long eip;
			
 
				+	/* modrm */
			
 
				+	u8 modrm;
			
 
				+	u8 modrm_mod;
			
 
				+	u8 modrm_reg;
			
 
				+	u8 modrm_rm;
			
 
				+	u8 use_modrm_ea;
			
 
				+	unsigned long modrm_ea;
			
 
				+	unsigned long modrm_val;
			
 
				+	struct fetch_cache fetch;
			
 
				+};
			
 
				+
			
 
				 struct x86_emulate_ctxt {
			
 
				 	/* Register state before/after emulation. */
			
 
				 	struct kvm_vcpu *vcpu;
			
 
				 
			
 
				 	/* Linear faulting address (if emulating a page-faulting instruction). */
			
 
				 	unsigned long eflags;
			
 
				-	unsigned long cr2;
			
 
				 
			
 
				 	/* Emulated execution mode, represented by an X86EMUL_MODE value. */
			
 
				 	int mode;
			
@@ -129,8 +155,16 @@ struct x86_emulate_ctxt {
 
				 	unsigned long ss_base;
			
 
				 	unsigned long gs_base;
			
 
				 	unsigned long fs_base;
			
 
				+
			
 
				+	/* decode cache */
			
 
				+
			
 
				+	struct decode_cache decode;
			
 
				 };
			
 
				 
			
 
				+/* Repeat String Operation Prefix */
			
 
				+#define REPE_PREFIX  1
			
 
				+#define REPNE_PREFIX    2
			
 
				+
			
 
				 /* Execution mode, passed to the emulator. */
			
 
				 #define X86EMUL_MODE_REAL     0	/* Real mode.             */
			
 
				 #define X86EMUL_MODE_PROT16   2	/* 16-bit protected mode. */
			
@@ -144,12 +178,9 @@ struct x86_emulate_ctxt {
 
				 #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64
			
 
				 #endif
			
 
				 
			
 
				-/*
			
 
				- * x86_emulate_memop: Emulate an instruction that faulted attempting to
			
 
				- *                    read/write a 'special' memory area.
			
 
				- * Returns -1 on failure, 0 on success.
			
 
				- */
			
 
				-int x86_emulate_memop(struct x86_emulate_ctxt *ctxt,
			
 
				-		      struct x86_emulate_ops *ops);
			
 
				+int x86_decode_insn(struct x86_emulate_ctxt *ctxt,
			
 
				+		    struct x86_emulate_ops *ops);
			
 
				+int x86_emulate_insn(struct x86_emulate_ctxt *ctxt,
			
 
				+		     struct x86_emulate_ops *ops);
			
 
				 
			
 
				 #endif				/* __X86_EMULATE_H__ */
			
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -100,7 +100,6 @@ header-y += iso_fs.h
 
				 header-y += ixjuser.h
			
 
				 header-y += jffs2.h
			
 
				 header-y += keyctl.h
			
 
				-header-y += kvm.h
			
 
				 header-y += limits.h
			
 
				 header-y += lock_dlm_plock.h
			
 
				 header-y += magic.h
			
@@ -256,6 +255,7 @@ unifdef-y += kd.h
 
				 unifdef-y += kernelcapi.h
			
 
				 unifdef-y += kernel.h
			
 
				 unifdef-y += keyboard.h
			
 
				+unifdef-$(CONFIG_HAVE_KVM) += kvm.h
			
 
				 unifdef-y += llc.h
			
 
				 unifdef-y += loop.h
			
 
				 unifdef-y += lp.h
			
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -9,12 +9,10 @@
 
				 
			
 
				 #include <asm/types.h>
			
 
				 #include <linux/ioctl.h>
			
 
				+#include <asm/kvm.h>
			
 
				 
			
 
				 #define KVM_API_VERSION 12
			
 
				 
			
 
				-/* Architectural interrupt line count. */
			
 
				-#define KVM_NR_INTERRUPTS 256
			
 
				-
			
 
				 /* for KVM_CREATE_MEMORY_REGION */
			
 
				 struct kvm_memory_region {
			
 
				 	__u32 slot;
			
@@ -23,17 +21,19 @@ struct kvm_memory_region {
 
				 	__u64 memory_size; /* bytes */
			
 
				 };
			
 
				 
			
 
				-/* for kvm_memory_region::flags */
			
 
				-#define KVM_MEM_LOG_DIRTY_PAGES  1UL
			
 
				-
			
 
				-struct kvm_memory_alias {
			
 
				-	__u32 slot;  /* this has a different namespace than memory slots */
			
 
				+/* for KVM_SET_USER_MEMORY_REGION */
			
 
				+struct kvm_userspace_memory_region {
			
 
				+	__u32 slot;
			
 
				 	__u32 flags;
			
 
				 	__u64 guest_phys_addr;
			
 
				-	__u64 memory_size;
			
 
				-	__u64 target_phys_addr;
			
 
				+	__u64 memory_size; /* bytes */
			
 
				+	__u64 userspace_addr; /* start of the userspace allocated memory */
			
 
				 };
			
 
				 
			
 
				+/* for kvm_memory_region::flags */
			
 
				+#define KVM_MEM_LOG_DIRTY_PAGES  1UL
			
 
				+
			
 
				+
			
 
				 /* for KVM_IRQ_LINE */
			
 
				 struct kvm_irq_level {
			
 
				 	/*
			
@@ -45,62 +45,18 @@ struct kvm_irq_level {
 
				 	__u32 level;
			
 
				 };
			
 
				 
			
 
				-/* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */
			
 
				-struct kvm_pic_state {
			
 
				-	__u8 last_irr;	/* edge detection */
			
 
				-	__u8 irr;		/* interrupt request register */
			
 
				-	__u8 imr;		/* interrupt mask register */
			
 
				-	__u8 isr;		/* interrupt service register */
			
 
				-	__u8 priority_add;	/* highest irq priority */
			
 
				-	__u8 irq_base;
			
 
				-	__u8 read_reg_select;
			
 
				-	__u8 poll;
			
 
				-	__u8 special_mask;
			
 
				-	__u8 init_state;
			
 
				-	__u8 auto_eoi;
			
 
				-	__u8 rotate_on_auto_eoi;
			
 
				-	__u8 special_fully_nested_mode;
			
 
				-	__u8 init4;		/* true if 4 byte init */
			
 
				-	__u8 elcr;		/* PIIX edge/trigger selection */
			
 
				-	__u8 elcr_mask;
			
 
				-};
			
 
				-
			
 
				-#define KVM_IOAPIC_NUM_PINS  24
			
 
				-struct kvm_ioapic_state {
			
 
				-	__u64 base_address;
			
 
				-	__u32 ioregsel;
			
 
				-	__u32 id;
			
 
				-	__u32 irr;
			
 
				-	__u32 pad;
			
 
				-	union {
			
 
				-		__u64 bits;
			
 
				-		struct {
			
 
				-			__u8 vector;
			
 
				-			__u8 delivery_mode:3;
			
 
				-			__u8 dest_mode:1;
			
 
				-			__u8 delivery_status:1;
			
 
				-			__u8 polarity:1;
			
 
				-			__u8 remote_irr:1;
			
 
				-			__u8 trig_mode:1;
			
 
				-			__u8 mask:1;
			
 
				-			__u8 reserve:7;
			
 
				-			__u8 reserved[4];
			
 
				-			__u8 dest_id;
			
 
				-		} fields;
			
 
				-	} redirtbl[KVM_IOAPIC_NUM_PINS];
			
 
				-};
			
 
				-
			
 
				-#define KVM_IRQCHIP_PIC_MASTER   0
			
 
				-#define KVM_IRQCHIP_PIC_SLAVE    1
			
 
				-#define KVM_IRQCHIP_IOAPIC       2
			
 
				 
			
 
				 struct kvm_irqchip {
			
 
				 	__u32 chip_id;
			
 
				 	__u32 pad;
			
 
				         union {
			
 
				 		char dummy[512];  /* reserving space */
			
 
				+#ifdef CONFIG_X86
			
 
				 		struct kvm_pic_state pic;
			
 
				+#endif
			
 
				+#if defined(CONFIG_X86) || defined(CONFIG_IA64)
			
 
				 		struct kvm_ioapic_state ioapic;
			
 
				+#endif
			
 
				 	} chip;
			
 
				 };
			
 
				 
			
@@ -116,6 +72,7 @@ struct kvm_irqchip {
 
				 #define KVM_EXIT_FAIL_ENTRY       9
			
 
				 #define KVM_EXIT_INTR             10
			
 
				 #define KVM_EXIT_SET_TPR          11
			
 
				+#define KVM_EXIT_TPR_ACCESS       12
			
 
				 
			
 
				 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
			
 
				 struct kvm_run {
			
@@ -174,90 +131,17 @@ struct kvm_run {
 
				 			__u32 longmode;
			
 
				 			__u32 pad;
			
 
				 		} hypercall;
			
 
				+		/* KVM_EXIT_TPR_ACCESS */
			
 
				+		struct {
			
 
				+			__u64 rip;
			
 
				+			__u32 is_write;
			
 
				+			__u32 pad;
			
 
				+		} tpr_access;
			
 
				 		/* Fix the size of the union. */
			
 
				 		char padding[256];
			
 
				 	};
			
 
				 };
			
 
				 
			
 
				-/* for KVM_GET_REGS and KVM_SET_REGS */
			
 
				-struct kvm_regs {
			
 
				-	/* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
			
 
				-	__u64 rax, rbx, rcx, rdx;
			
 
				-	__u64 rsi, rdi, rsp, rbp;
			
 
				-	__u64 r8,  r9,  r10, r11;
			
 
				-	__u64 r12, r13, r14, r15;
			
 
				-	__u64 rip, rflags;
			
 
				-};
			
 
				-
			
 
				-/* for KVM_GET_FPU and KVM_SET_FPU */
			
 
				-struct kvm_fpu {
			
 
				-	__u8  fpr[8][16];
			
 
				-	__u16 fcw;
			
 
				-	__u16 fsw;
			
 
				-	__u8  ftwx;  /* in fxsave format */
			
 
				-	__u8  pad1;
			
 
				-	__u16 last_opcode;
			
 
				-	__u64 last_ip;
			
 
				-	__u64 last_dp;
			
 
				-	__u8  xmm[16][16];
			
 
				-	__u32 mxcsr;
			
 
				-	__u32 pad2;
			
 
				-};
			
 
				-
			
 
				-/* for KVM_GET_LAPIC and KVM_SET_LAPIC */
			
 
				-#define KVM_APIC_REG_SIZE 0x400
			
 
				-struct kvm_lapic_state {
			
 
				-	char regs[KVM_APIC_REG_SIZE];
			
 
				-};
			
 
				-
			
 
				-struct kvm_segment {
			
 
				-	__u64 base;
			
 
				-	__u32 limit;
			
 
				-	__u16 selector;
			
 
				-	__u8  type;
			
 
				-	__u8  present, dpl, db, s, l, g, avl;
			
 
				-	__u8  unusable;
			
 
				-	__u8  padding;
			
 
				-};
			
 
				-
			
 
				-struct kvm_dtable {
			
 
				-	__u64 base;
			
 
				-	__u16 limit;
			
 
				-	__u16 padding[3];
			
 
				-};
			
 
				-
			
 
				-/* for KVM_GET_SREGS and KVM_SET_SREGS */
			
 
				-struct kvm_sregs {
			
 
				-	/* out (KVM_GET_SREGS) / in (KVM_SET_SREGS) */
			
 
				-	struct kvm_segment cs, ds, es, fs, gs, ss;
			
 
				-	struct kvm_segment tr, ldt;
			
 
				-	struct kvm_dtable gdt, idt;
			
 
				-	__u64 cr0, cr2, cr3, cr4, cr8;
			
 
				-	__u64 efer;
			
 
				-	__u64 apic_base;
			
 
				-	__u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
			
 
				-};
			
 
				-
			
 
				-struct kvm_msr_entry {
			
 
				-	__u32 index;
			
 
				-	__u32 reserved;
			
 
				-	__u64 data;
			
 
				-};
			
 
				-
			
 
				-/* for KVM_GET_MSRS and KVM_SET_MSRS */
			
 
				-struct kvm_msrs {
			
 
				-	__u32 nmsrs; /* number of msrs in entries */
			
 
				-	__u32 pad;
			
 
				-
			
 
				-	struct kvm_msr_entry entries[0];
			
 
				-};
			
 
				-
			
 
				-/* for KVM_GET_MSR_INDEX_LIST */
			
 
				-struct kvm_msr_list {
			
 
				-	__u32 nmsrs; /* number of msrs in entries */
			
 
				-	__u32 indices[0];
			
 
				-};
			
 
				-
			
 
				 /* for KVM_TRANSLATE */
			
 
				 struct kvm_translation {
			
 
				 	/* in */
			
@@ -302,28 +186,24 @@ struct kvm_dirty_log {
 
				 	};
			
 
				 };
			
 
				 
			
 
				-struct kvm_cpuid_entry {
			
 
				-	__u32 function;
			
 
				-	__u32 eax;
			
 
				-	__u32 ebx;
			
 
				-	__u32 ecx;
			
 
				-	__u32 edx;
			
 
				-	__u32 padding;
			
 
				-};
			
 
				-
			
 
				-/* for KVM_SET_CPUID */
			
 
				-struct kvm_cpuid {
			
 
				-	__u32 nent;
			
 
				-	__u32 padding;
			
 
				-	struct kvm_cpuid_entry entries[0];
			
 
				-};
			
 
				-
			
 
				 /* for KVM_SET_SIGNAL_MASK */
			
 
				 struct kvm_signal_mask {
			
 
				 	__u32 len;
			
 
				 	__u8  sigset[0];
			
 
				 };
			
 
				 
			
 
				+/* for KVM_TPR_ACCESS_REPORTING */
			
 
				+struct kvm_tpr_access_ctl {
			
 
				+	__u32 enabled;
			
 
				+	__u32 flags;
			
 
				+	__u32 reserved[8];
			
 
				+};
			
 
				+
			
 
				+/* for KVM_SET_VAPIC_ADDR */
			
 
				+struct kvm_vapic_addr {
			
 
				+	__u64 vapic_addr;
			
 
				+};
			
 
				+
			
 
				 #define KVMIO 0xAE
			
 
				 
			
 
				 /*
			
@@ -347,11 +227,21 @@ struct kvm_signal_mask {
 
				  */
			
 
				 #define KVM_CAP_IRQCHIP	  0
			
 
				 #define KVM_CAP_HLT	  1
			
 
				+#define KVM_CAP_MMU_SHADOW_CACHE_CONTROL 2
			
 
				+#define KVM_CAP_USER_MEMORY 3
			
 
				+#define KVM_CAP_SET_TSS_ADDR 4
			
 
				+#define KVM_CAP_EXT_CPUID 5
			
 
				+#define KVM_CAP_VAPIC 6
			
 
				 
			
 
				 /*
			
 
				  * ioctls for VM fds
			
 
				  */
			
 
				 #define KVM_SET_MEMORY_REGION     _IOW(KVMIO, 0x40, struct kvm_memory_region)
			
 
				+#define KVM_SET_NR_MMU_PAGES      _IO(KVMIO, 0x44)
			
 
				+#define KVM_GET_NR_MMU_PAGES      _IO(KVMIO, 0x45)
			
 
				+#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46,\
			
 
				+					struct kvm_userspace_memory_region)
			
 
				+#define KVM_SET_TSS_ADDR          _IO(KVMIO, 0x47)
			
 
				 /*
			
 
				  * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns
			
 
				  * a vcpu fd.
			
@@ -359,6 +249,7 @@ struct kvm_signal_mask {
 
				 #define KVM_CREATE_VCPU           _IO(KVMIO,  0x41)
			
 
				 #define KVM_GET_DIRTY_LOG         _IOW(KVMIO, 0x42, struct kvm_dirty_log)
			
 
				 #define KVM_SET_MEMORY_ALIAS      _IOW(KVMIO, 0x43, struct kvm_memory_alias)
			
 
				+#define KVM_GET_SUPPORTED_CPUID   _IOWR(KVMIO, 0x48, struct kvm_cpuid2)
			
 
				 /* Device model IOC */
			
 
				 #define KVM_CREATE_IRQCHIP	  _IO(KVMIO,  0x60)
			
 
				 #define KVM_IRQ_LINE		  _IOW(KVMIO, 0x61, struct kvm_irq_level)
			
@@ -384,5 +275,11 @@ struct kvm_signal_mask {
 
				 #define KVM_SET_FPU               _IOW(KVMIO,  0x8d, struct kvm_fpu)
			
 
				 #define KVM_GET_LAPIC             _IOR(KVMIO,  0x8e, struct kvm_lapic_state)
			
 
				 #define KVM_SET_LAPIC             _IOW(KVMIO,  0x8f, struct kvm_lapic_state)
			
 
				+#define KVM_SET_CPUID2            _IOW(KVMIO,  0x90, struct kvm_cpuid2)
			
 
				+#define KVM_GET_CPUID2            _IOWR(KVMIO, 0x91, struct kvm_cpuid2)
			
 
				+/* Available with KVM_CAP_VAPIC */
			
 
				+#define KVM_TPR_ACCESS_REPORTING  _IOWR(KVMIO,  0x92, struct kvm_tpr_access_ctl)
			
 
				+/* Available with KVM_CAP_VAPIC */
			
 
				+#define KVM_SET_VAPIC_ADDR        _IOW(KVMIO,  0x93, struct kvm_vapic_addr)
			
 
				 
			
 
				 #endif
			
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -0,0 +1,299 @@
 
				+#ifndef __KVM_HOST_H
			
 
				+#define __KVM_HOST_H
			
 
				+
			
 
				+/*
			
 
				+ * This work is licensed under the terms of the GNU GPL, version 2.  See
			
 
				+ * the COPYING file in the top-level directory.
			
 
				+ */
			
 
				+
			
 
				+#include <linux/types.h>
			
 
				+#include <linux/hardirq.h>
			
 
				+#include <linux/list.h>
			
 
				+#include <linux/mutex.h>
			
 
				+#include <linux/spinlock.h>
			
 
				+#include <linux/signal.h>
			
 
				+#include <linux/sched.h>
			
 
				+#include <linux/mm.h>
			
 
				+#include <linux/preempt.h>
			
 
				+#include <asm/signal.h>
			
 
				+
			
 
				+#include <linux/kvm.h>
			
 
				+#include <linux/kvm_para.h>
			
 
				+
			
 
				+#include <linux/kvm_types.h>
			
 
				+
			
 
				+#include <asm/kvm_host.h>
			
 
				+
			
 
				+#define KVM_MAX_VCPUS 4
			
 
				+#define KVM_MEMORY_SLOTS 8
			
 
				+/* memory slots that does not exposed to userspace */
			
 
				+#define KVM_PRIVATE_MEM_SLOTS 4
			
 
				+
			
 
				+#define KVM_PIO_PAGE_OFFSET 1
			
 
				+
			
 
				+/*
			
 
				+ * vcpu->requests bit members
			
 
				+ */
			
 
				+#define KVM_REQ_TLB_FLUSH          0
			
 
				+#define KVM_REQ_MIGRATE_TIMER      1
			
 
				+#define KVM_REQ_REPORT_TPR_ACCESS  2
			
 
				+
			
 
				+struct kvm_vcpu;
			
 
				+extern struct kmem_cache *kvm_vcpu_cache;
			
 
				+
			
 
				+struct kvm_guest_debug {
			
 
				+	int enabled;
			
 
				+	unsigned long bp[4];
			
 
				+	int singlestep;
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * It would be nice to use something smarter than a linear search, TBD...
			
 
				+ * Thankfully we dont expect many devices to register (famous last words :),
			
 
				+ * so until then it will suffice.  At least its abstracted so we can change
			
 
				+ * in one place.
			
 
				+ */
			
 
				+struct kvm_io_bus {
			
 
				+	int                   dev_count;
			
 
				+#define NR_IOBUS_DEVS 6
			
 
				+	struct kvm_io_device *devs[NR_IOBUS_DEVS];
			
 
				+};
			
 
				+
			
 
				+void kvm_io_bus_init(struct kvm_io_bus *bus);
			
 
				+void kvm_io_bus_destroy(struct kvm_io_bus *bus);
			
 
				+struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr);
			
 
				+void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
			
 
				+			     struct kvm_io_device *dev);
			
 
				+
			
 
				+struct kvm_vcpu {
			
 
				+	struct kvm *kvm;
			
 
				+	struct preempt_notifier preempt_notifier;
			
 
				+	int vcpu_id;
			
 
				+	struct mutex mutex;
			
 
				+	int   cpu;
			
 
				+	struct kvm_run *run;
			
 
				+	int guest_mode;
			
 
				+	unsigned long requests;
			
 
				+	struct kvm_guest_debug guest_debug;
			
 
				+	int fpu_active;
			
 
				+	int guest_fpu_loaded;
			
 
				+	wait_queue_head_t wq;
			
 
				+	int sigset_active;
			
 
				+	sigset_t sigset;
			
 
				+	struct kvm_vcpu_stat stat;
			
 
				+
			
 
				+#ifdef CONFIG_HAS_IOMEM
			
 
				+	int mmio_needed;
			
 
				+	int mmio_read_completed;
			
 
				+	int mmio_is_write;
			
 
				+	int mmio_size;
			
 
				+	unsigned char mmio_data[8];
			
 
				+	gpa_t mmio_phys_addr;
			
 
				+#endif
			
 
				+
			
 
				+	struct kvm_vcpu_arch arch;
			
 
				+};
			
 
				+
			
 
				+struct kvm_memory_slot {
			
 
				+	gfn_t base_gfn;
			
 
				+	unsigned long npages;
			
 
				+	unsigned long flags;
			
 
				+	unsigned long *rmap;
			
 
				+	unsigned long *dirty_bitmap;
			
 
				+	unsigned long userspace_addr;
			
 
				+	int user_alloc;
			
 
				+};
			
 
				+
			
 
				+struct kvm {
			
 
				+	struct mutex lock; /* protects the vcpus array and APIC accesses */
			
 
				+	spinlock_t mmu_lock;
			
 
				+	struct mm_struct *mm; /* userspace tied to this vm */
			
 
				+	int nmemslots;
			
 
				+	struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS +
			
 
				+					KVM_PRIVATE_MEM_SLOTS];
			
 
				+	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
			
 
				+	struct list_head vm_list;
			
 
				+	struct file *filp;
			
 
				+	struct kvm_io_bus mmio_bus;
			
 
				+	struct kvm_io_bus pio_bus;
			
 
				+	struct kvm_vm_stat stat;
			
 
				+	struct kvm_arch arch;
			
 
				+};
			
 
				+
			
 
				+/* The guest did something we don't support. */
			
 
				+#define pr_unimpl(vcpu, fmt, ...)					\
			
 
				+ do {									\
			
 
				+	if (printk_ratelimit())						\
			
 
				+		printk(KERN_ERR "kvm: %i: cpu%i " fmt,			\
			
 
				+		       current->tgid, (vcpu)->vcpu_id , ## __VA_ARGS__); \
			
 
				+ } while (0)
			
 
				+
			
 
				+#define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt)
			
 
				+#define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt)
			
 
				+
			
 
				+int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
			
 
				+void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
			
 
				+
			
 
				+void vcpu_load(struct kvm_vcpu *vcpu);
			
 
				+void vcpu_put(struct kvm_vcpu *vcpu);
			
 
				+
			
 
				+void decache_vcpus_on_cpu(int cpu);
			
 
				+
			
 
				+
			
 
				+int kvm_init(void *opaque, unsigned int vcpu_size,
			
 
				+		  struct module *module);
			
 
				+void kvm_exit(void);
			
 
				+
			
 
				+#define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
			
 
				+#define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB)
			
 
				+static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; }
			
 
				+struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva);
			
 
				+
			
 
				+extern struct page *bad_page;
			
 
				+
			
 
				+int is_error_page(struct page *page);
			
 
				+int kvm_is_error_hva(unsigned long addr);
			
 
				+int kvm_set_memory_region(struct kvm *kvm,
			
 
				+			  struct kvm_userspace_memory_region *mem,
			
 
				+			  int user_alloc);
			
 
				+int __kvm_set_memory_region(struct kvm *kvm,
			
 
				+			    struct kvm_userspace_memory_region *mem,
			
 
				+			    int user_alloc);
			
 
				+int kvm_arch_set_memory_region(struct kvm *kvm,
			
 
				+				struct kvm_userspace_memory_region *mem,
			
 
				+				struct kvm_memory_slot old,
			
 
				+				int user_alloc);
			
 
				+gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
			
 
				+struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
			
 
				+void kvm_release_page_clean(struct page *page);
			
 
				+void kvm_release_page_dirty(struct page *page);
			
 
				+int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
			
 
				+			int len);
			
 
				+int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
			
 
				+			  unsigned long len);
			
 
				+int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len);
			
 
				+int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
			
 
				+			 int offset, int len);
			
 
				+int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
			
 
				+		    unsigned long len);
			
 
				+int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
			
 
				+int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
			
 
				+struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
			
 
				+int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
			
 
				+void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
			
 
				+
			
 
				+void kvm_vcpu_block(struct kvm_vcpu *vcpu);
			
 
				+void kvm_resched(struct kvm_vcpu *vcpu);
			
 
				+void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
			
 
				+void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
			
 
				+void kvm_flush_remote_tlbs(struct kvm *kvm);
			
 
				+
			
 
				+long kvm_arch_dev_ioctl(struct file *filp,
			
 
				+			unsigned int ioctl, unsigned long arg);
			
 
				+long kvm_arch_vcpu_ioctl(struct file *filp,
			
 
				+			 unsigned int ioctl, unsigned long arg);
			
 
				+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
			
 
				+void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
			
 
				+
			
 
				+int kvm_dev_ioctl_check_extension(long ext);
			
 
				+
			
 
				+int kvm_get_dirty_log(struct kvm *kvm,
			
 
				+			struct kvm_dirty_log *log, int *is_dirty);
			
 
				+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
			
 
				+				struct kvm_dirty_log *log);
			
 
				+
			
 
				+int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
			
 
				+				   struct
			
 
				+				   kvm_userspace_memory_region *mem,
			
 
				+				   int user_alloc);
			
 
				+long kvm_arch_vm_ioctl(struct file *filp,
			
 
				+		       unsigned int ioctl, unsigned long arg);
			
 
				+void kvm_arch_destroy_vm(struct kvm *kvm);
			
 
				+
			
 
				+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);
			
 
				+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);
			
 
				+
			
 
				+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
			
 
				+				    struct kvm_translation *tr);
			
 
				+
			
 
				+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs);
			
 
				+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs);
			
 
				+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
			
 
				+				  struct kvm_sregs *sregs);
			
 
				+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
			
 
				+				  struct kvm_sregs *sregs);
			
 
				+int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
			
 
				+				    struct kvm_debug_guest *dbg);
			
 
				+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
			
 
				+
			
 
				+int kvm_arch_init(void *opaque);
			
 
				+void kvm_arch_exit(void);
			
 
				+
			
 
				+int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu);
			
 
				+void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu);
			
 
				+
			
 
				+void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu);
			
 
				+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
			
 
				+void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
			
 
				+struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id);
			
 
				+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
			
 
				+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
			
 
				+
			
 
				+int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu);
			
 
				+void kvm_arch_hardware_enable(void *garbage);
			
 
				+void kvm_arch_hardware_disable(void *garbage);
			
 
				+int kvm_arch_hardware_setup(void);
			
 
				+void kvm_arch_hardware_unsetup(void);
			
 
				+void kvm_arch_check_processor_compat(void *rtn);
			
 
				+int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
			
 
				+
			
 
				+void kvm_free_physmem(struct kvm *kvm);
			
 
				+
			
 
				+struct  kvm *kvm_arch_create_vm(void);
			
 
				+void kvm_arch_destroy_vm(struct kvm *kvm);
			
 
				+
			
 
				+int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
			
 
				+int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
			
 
				+void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
			
 
				+
			
 
				+static inline void kvm_guest_enter(void)
			
 
				+{
			
 
				+	account_system_vtime(current);
			
 
				+	current->flags |= PF_VCPU;
			
 
				+}
			
 
				+
			
 
				+static inline void kvm_guest_exit(void)
			
 
				+{
			
 
				+	account_system_vtime(current);
			
 
				+	current->flags &= ~PF_VCPU;
			
 
				+}
			
 
				+
			
 
				+static inline int memslot_id(struct kvm *kvm, struct kvm_memory_slot *slot)
			
 
				+{
			
 
				+	return slot - kvm->memslots;
			
 
				+}
			
 
				+
			
 
				+static inline gpa_t gfn_to_gpa(gfn_t gfn)
			
 
				+{
			
 
				+	return (gpa_t)gfn << PAGE_SHIFT;
			
 
				+}
			
 
				+
			
 
				+static inline void kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
			
 
				+}
			
 
				+
			
 
				+enum kvm_stat_kind {
			
 
				+	KVM_STAT_VM,
			
 
				+	KVM_STAT_VCPU,
			
 
				+};
			
 
				+
			
 
				+struct kvm_stats_debugfs_item {
			
 
				+	const char *name;
			
 
				+	int offset;
			
 
				+	enum kvm_stat_kind kind;
			
 
				+	struct dentry *dentry;
			
 
				+};
			
 
				+extern struct kvm_stats_debugfs_item debugfs_entries[];
			
 
				+
			
 
				+#endif
			
--- a/include/linux/kvm_para.h
+++ b/include/linux/kvm_para.h
@@ -2,72 +2,30 @@
 
				 #define __LINUX_KVM_PARA_H
			
 
				 
			
 
				 /*
			
 
				- * Guest OS interface for KVM paravirtualization
			
 
				- *
			
 
				- * Note: this interface is totally experimental, and is certain to change
			
 
				- *       as we make progress.
			
 
				+ * This header file provides a method for making a hypercall to the host
			
 
				+ * Architectures should define:
			
 
				+ * - kvm_hypercall0, kvm_hypercall1...
			
 
				+ * - kvm_arch_para_features
			
 
				+ * - kvm_para_available
			
 
				  */
			
 
				 
			
 
				-/*
			
 
				- * Per-VCPU descriptor area shared between guest and host. Writable to
			
 
				- * both guest and host. Registered with the host by the guest when
			
 
				- * a guest acknowledges paravirtual mode.
			
 
				- *
			
 
				- * NOTE: all addresses are guest-physical addresses (gpa), to make it
			
 
				- * easier for the hypervisor to map between the various addresses.
			
 
				- */
			
 
				-struct kvm_vcpu_para_state {
			
 
				-	/*
			
 
				-	 * API version information for compatibility. If there's any support
			
 
				-	 * mismatch (too old host trying to execute too new guest) then
			
 
				-	 * the host will deny entry into paravirtual mode. Any other
			
 
				-	 * combination (new host + old guest and new host + new guest)
			
 
				-	 * is supposed to work - new host versions will support all old
			
 
				-	 * guest API versions.
			
 
				-	 */
			
 
				-	u32 guest_version;
			
 
				-	u32 host_version;
			
 
				-	u32 size;
			
 
				-	u32 ret;
			
 
				-
			
 
				-	/*
			
 
				-	 * The address of the vm exit instruction (VMCALL or VMMCALL),
			
 
				-	 * which the host will patch according to the CPU model the
			
 
				-	 * VM runs on:
			
 
				-	 */
			
 
				-	u64 hypercall_gpa;
			
 
				-
			
 
				-} __attribute__ ((aligned(PAGE_SIZE)));
			
 
				-
			
 
				-#define KVM_PARA_API_VERSION 1
			
 
				-
			
 
				-/*
			
 
				- * This is used for an RDMSR's ECX parameter to probe for a KVM host.
			
 
				- * Hopefully no CPU vendor will use up this number. This is placed well
			
 
				- * out of way of the typical space occupied by CPU vendors' MSR indices,
			
 
				- * and we think (or at least hope) it wont be occupied in the future
			
 
				- * either.
			
 
				- */
			
 
				-#define MSR_KVM_API_MAGIC 0x87655678
			
 
				+/* Return values for hypercalls */
			
 
				+#define KVM_ENOSYS		1000
			
 
				 
			
 
				-#define KVM_EINVAL 1
			
 
				+#define KVM_HC_VAPIC_POLL_IRQ            1
			
 
				 
			
 
				 /*
			
 
				- * Hypercall calling convention:
			
 
				- *
			
 
				- * Each hypercall may have 0-6 parameters.
			
 
				- *
			
 
				- * 64-bit hypercall index is in RAX, goes from 0 to __NR_hypercalls-1
			
 
				- *
			
 
				- * 64-bit parameters 1-6 are in the standard gcc x86_64 calling convention
			
 
				- * order: RDI, RSI, RDX, RCX, R8, R9.
			
 
				- *
			
 
				- * 32-bit index is EBX, parameters are: EAX, ECX, EDX, ESI, EDI, EBP.
			
 
				- * (the first 3 are according to the gcc regparm calling convention)
			
 
				- *
			
 
				- * No registers are clobbered by the hypercall, except that the
			
 
				- * return value is in RAX.
			
 
				+ * hypercalls use architecture specific
			
 
				  */
			
 
				-#define __NR_hypercalls			0
			
 
				+#include <asm/kvm_para.h>
			
 
				+
			
 
				+#ifdef __KERNEL__
			
 
				+static inline int kvm_para_has_feature(unsigned int feature)
			
 
				+{
			
 
				+	if (kvm_arch_para_features() & (1UL << feature))
			
 
				+		return 1;
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif /* __KERNEL__ */
			
 
				+#endif /* __LINUX_KVM_PARA_H */
			
 
				 
			
 
				-#endif
			
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -0,0 +1,54 @@
 
				+/*
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2 of the License.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write to the Free Software
			
 
				+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+#ifndef __KVM_TYPES_H__
			
 
				+#define __KVM_TYPES_H__
			
 
				+
			
 
				+#include <asm/types.h>
			
 
				+
			
 
				+/*
			
 
				+ * Address types:
			
 
				+ *
			
 
				+ *  gva - guest virtual address
			
 
				+ *  gpa - guest physical address
			
 
				+ *  gfn - guest frame number
			
 
				+ *  hva - host virtual address
			
 
				+ *  hpa - host physical address
			
 
				+ *  hfn - host frame number
			
 
				+ */
			
 
				+
			
 
				+typedef unsigned long  gva_t;
			
 
				+typedef u64            gpa_t;
			
 
				+typedef unsigned long  gfn_t;
			
 
				+
			
 
				+typedef unsigned long  hva_t;
			
 
				+typedef u64            hpa_t;
			
 
				+typedef unsigned long  hfn_t;
			
 
				+
			
 
				+struct kvm_pio_request {
			
 
				+	unsigned long count;
			
 
				+	int cur_count;
			
 
				+	struct page *guest_pages[2];
			
 
				+	unsigned guest_page_offset;
			
 
				+	int in;
			
 
				+	int port;
			
 
				+	int size;
			
 
				+	int string;
			
 
				+	int down;
			
 
				+	int rep;
			
 
				+};
			
 
				+
			
 
				+#endif /* __KVM_TYPES_H__ */
			
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -393,6 +393,7 @@ void fastcall __mmdrop(struct mm_struct *mm)
 
				 	destroy_context(mm);
			
 
				 	free_mm(mm);
			
 
				 }
			
 
				+EXPORT_SYMBOL_GPL(__mmdrop);
			
 
				 
			
 
				 /*
			
 
				  * Decrement the use count and release all resources for an mm.
			
--- a/drivers/kvm/ioapic.c
+++ b/drivers/kvm/ioapic.c
@@ -26,7 +26,7 @@
 
				  *  Based on Xen 3.1 code.
			
 
				  */
			
 
				 
			
 
				-#include "kvm.h"
			
 
				+#include <linux/kvm_host.h>
			
 
				 #include <linux/kvm.h>
			
 
				 #include <linux/mm.h>
			
 
				 #include <linux/highmem.h>
			
@@ -34,14 +34,17 @@
 
				 #include <linux/hrtimer.h>
			
 
				 #include <linux/io.h>
			
 
				 #include <asm/processor.h>
			
 
				-#include <asm/msr.h>
			
 
				 #include <asm/page.h>
			
 
				 #include <asm/current.h>
			
 
				-#include <asm/apicdef.h>
			
 
				-#include <asm/io_apic.h>
			
 
				-#include "irq.h"
			
 
				-/* #define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
			
 
				+
			
 
				+#include "ioapic.h"
			
 
				+#include "lapic.h"
			
 
				+
			
 
				+#if 0
			
 
				+#define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg)
			
 
				+#else
			
 
				 #define ioapic_debug(fmt, arg...)
			
 
				+#endif
			
 
				 static void ioapic_deliver(struct kvm_ioapic *vioapic, int irq);
			
 
				 
			
 
				 static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
			
@@ -113,7 +116,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 
				 	default:
			
 
				 		index = (ioapic->ioregsel - 0x10) >> 1;
			
 
				 
			
 
				-		ioapic_debug("change redir index %x val %x", index, val);
			
 
				+		ioapic_debug("change redir index %x val %x\n", index, val);
			
 
				 		if (index >= IOAPIC_NUM_PINS)
			
 
				 			return;
			
 
				 		if (ioapic->ioregsel & 1) {
			
@@ -131,16 +134,16 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 
				 }
			
 
				 
			
 
				 static void ioapic_inj_irq(struct kvm_ioapic *ioapic,
			
 
				-			   struct kvm_lapic *target,
			
 
				+			   struct kvm_vcpu *vcpu,
			
 
				 			   u8 vector, u8 trig_mode, u8 delivery_mode)
			
 
				 {
			
 
				-	ioapic_debug("irq %d trig %d deliv %d", vector, trig_mode,
			
 
				+	ioapic_debug("irq %d trig %d deliv %d\n", vector, trig_mode,
			
 
				 		     delivery_mode);
			
 
				 
			
 
				-	ASSERT((delivery_mode == dest_Fixed) ||
			
 
				-	       (delivery_mode == dest_LowestPrio));
			
 
				+	ASSERT((delivery_mode == IOAPIC_FIXED) ||
			
 
				+	       (delivery_mode == IOAPIC_LOWEST_PRIORITY));
			
 
				 
			
 
				-	kvm_apic_set_irq(target, vector, trig_mode);
			
 
				+	kvm_apic_set_irq(vcpu, vector, trig_mode);
			
 
				 }
			
 
				 
			
 
				 static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
			
@@ -151,12 +154,12 @@ static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
 
				 	struct kvm *kvm = ioapic->kvm;
			
 
				 	struct kvm_vcpu *vcpu;
			
 
				 
			
 
				-	ioapic_debug("dest %d dest_mode %d", dest, dest_mode);
			
 
				+	ioapic_debug("dest %d dest_mode %d\n", dest, dest_mode);
			
 
				 
			
 
				 	if (dest_mode == 0) {	/* Physical mode. */
			
 
				 		if (dest == 0xFF) {	/* Broadcast. */
			
 
				 			for (i = 0; i < KVM_MAX_VCPUS; ++i)
			
 
				-				if (kvm->vcpus[i] && kvm->vcpus[i]->apic)
			
 
				+				if (kvm->vcpus[i] && kvm->vcpus[i]->arch.apic)
			
 
				 					mask |= 1 << i;
			
 
				 			return mask;
			
 
				 		}
			
@@ -164,8 +167,8 @@ static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
 
				 			vcpu = kvm->vcpus[i];
			
 
				 			if (!vcpu)
			
 
				 				continue;
			
 
				-			if (kvm_apic_match_physical_addr(vcpu->apic, dest)) {
			
 
				-				if (vcpu->apic)
			
 
				+			if (kvm_apic_match_physical_addr(vcpu->arch.apic, dest)) {
			
 
				+				if (vcpu->arch.apic)
			
 
				 					mask = 1 << i;
			
 
				 				break;
			
 
				 			}
			
@@ -175,11 +178,11 @@ static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
 
				 			vcpu = kvm->vcpus[i];
			
 
				 			if (!vcpu)
			
 
				 				continue;
			
 
				-			if (vcpu->apic &&
			
 
				-			    kvm_apic_match_logical_addr(vcpu->apic, dest))
			
 
				+			if (vcpu->arch.apic &&
			
 
				+			    kvm_apic_match_logical_addr(vcpu->arch.apic, dest))
			
 
				 				mask |= 1 << vcpu->vcpu_id;
			
 
				 		}
			
 
				-	ioapic_debug("mask %x", mask);
			
 
				+	ioapic_debug("mask %x\n", mask);
			
 
				 	return mask;
			
 
				 }
			
 
				 
			
@@ -191,41 +194,39 @@ static void ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
 
				 	u8 vector = ioapic->redirtbl[irq].fields.vector;
			
 
				 	u8 trig_mode = ioapic->redirtbl[irq].fields.trig_mode;
			
 
				 	u32 deliver_bitmask;
			
 
				-	struct kvm_lapic *target;
			
 
				 	struct kvm_vcpu *vcpu;
			
 
				 	int vcpu_id;
			
 
				 
			
 
				 	ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
			
 
				-		     "vector=%x trig_mode=%x",
			
 
				+		     "vector=%x trig_mode=%x\n",
			
 
				 		     dest, dest_mode, delivery_mode, vector, trig_mode);
			
 
				 
			
 
				 	deliver_bitmask = ioapic_get_delivery_bitmask(ioapic, dest, dest_mode);
			
 
				 	if (!deliver_bitmask) {
			
 
				-		ioapic_debug("no target on destination");
			
 
				+		ioapic_debug("no target on destination\n");
			
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				 	switch (delivery_mode) {
			
 
				-	case dest_LowestPrio:
			
 
				-		target =
			
 
				-		    kvm_apic_round_robin(ioapic->kvm, vector, deliver_bitmask);
			
 
				-		if (target != NULL)
			
 
				-			ioapic_inj_irq(ioapic, target, vector,
			
 
				+	case IOAPIC_LOWEST_PRIORITY:
			
 
				+		vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector,
			
 
				+				deliver_bitmask);
			
 
				+		if (vcpu != NULL)
			
 
				+			ioapic_inj_irq(ioapic, vcpu, vector,
			
 
				 				       trig_mode, delivery_mode);
			
 
				 		else
			
 
				-			ioapic_debug("null round robin: "
			
 
				-				     "mask=%x vector=%x delivery_mode=%x",
			
 
				-				     deliver_bitmask, vector, dest_LowestPrio);
			
 
				+			ioapic_debug("null lowest prio vcpu: "
			
 
				+				     "mask=%x vector=%x delivery_mode=%x\n",
			
 
				+				     deliver_bitmask, vector, IOAPIC_LOWEST_PRIORITY);
			
 
				 		break;
			
 
				-	case dest_Fixed:
			
 
				+	case IOAPIC_FIXED:
			
 
				 		for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
			
 
				 			if (!(deliver_bitmask & (1 << vcpu_id)))
			
 
				 				continue;
			
 
				 			deliver_bitmask &= ~(1 << vcpu_id);
			
 
				 			vcpu = ioapic->kvm->vcpus[vcpu_id];
			
 
				 			if (vcpu) {
			
 
				-				target = vcpu->apic;
			
 
				-				ioapic_inj_irq(ioapic, target, vector,
			
 
				+				ioapic_inj_irq(ioapic, vcpu, vector,
			
 
				 					       trig_mode, delivery_mode);
			
 
				 			}
			
 
				 		}
			
@@ -271,7 +272,7 @@ static int get_eoi_gsi(struct kvm_ioapic *ioapic, int vector)
 
				 
			
 
				 void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
			
 
				 {
			
 
				-	struct kvm_ioapic *ioapic = kvm->vioapic;
			
 
				+	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
			
 
				 	union ioapic_redir_entry *ent;
			
 
				 	int gsi;
			
 
				 
			
@@ -304,7 +305,7 @@ static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
 
				 	struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
			
 
				 	u32 result;
			
 
				 
			
 
				-	ioapic_debug("addr %lx", (unsigned long)addr);
			
 
				+	ioapic_debug("addr %lx\n", (unsigned long)addr);
			
 
				 	ASSERT(!(addr & 0xf));	/* check alignment */
			
 
				 
			
 
				 	addr &= 0xff;
			
@@ -341,8 +342,8 @@ static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
 
				 	struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
			
 
				 	u32 data;
			
 
				 
			
 
				-	ioapic_debug("ioapic_mmio_write addr=%lx len=%d val=%p\n",
			
 
				-		     addr, len, val);
			
 
				+	ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n",
			
 
				+		     (void*)addr, len, val);
			
 
				 	ASSERT(!(addr & 0xf));	/* check alignment */
			
 
				 	if (len == 4 || len == 8)
			
 
				 		data = *(u32 *) val;
			
@@ -360,24 +361,38 @@ static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
 
				 	case IOAPIC_REG_WINDOW:
			
 
				 		ioapic_write_indirect(ioapic, data);
			
 
				 		break;
			
 
				+#ifdef	CONFIG_IA64
			
 
				+	case IOAPIC_REG_EOI:
			
 
				+		kvm_ioapic_update_eoi(ioapic->kvm, data);
			
 
				+		break;
			
 
				+#endif
			
 
				 
			
 
				 	default:
			
 
				 		break;
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				+void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < IOAPIC_NUM_PINS; i++)
			
 
				+		ioapic->redirtbl[i].fields.mask = 1;
			
 
				+	ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
			
 
				+	ioapic->ioregsel = 0;
			
 
				+	ioapic->irr = 0;
			
 
				+	ioapic->id = 0;
			
 
				+}
			
 
				+
			
 
				 int kvm_ioapic_init(struct kvm *kvm)
			
 
				 {
			
 
				 	struct kvm_ioapic *ioapic;
			
 
				-	int i;
			
 
				 
			
 
				 	ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
			
 
				 	if (!ioapic)
			
 
				 		return -ENOMEM;
			
 
				-	kvm->vioapic = ioapic;
			
 
				-	for (i = 0; i < IOAPIC_NUM_PINS; i++)
			
 
				-		ioapic->redirtbl[i].fields.mask = 1;
			
 
				-	ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
			
 
				+	kvm->arch.vioapic = ioapic;
			
 
				+	kvm_ioapic_reset(ioapic);
			
 
				 	ioapic->dev.read = ioapic_mmio_read;
			
 
				 	ioapic->dev.write = ioapic_mmio_write;
			
 
				 	ioapic->dev.in_range = ioapic_in_range;
			
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -0,0 +1,95 @@
 
				+#ifndef __KVM_IO_APIC_H
			
 
				+#define __KVM_IO_APIC_H
			
 
				+
			
 
				+#include <linux/kvm_host.h>
			
 
				+
			
 
				+#include "iodev.h"
			
 
				+
			
 
				+struct kvm;
			
 
				+struct kvm_vcpu;
			
 
				+
			
 
				+#define IOAPIC_NUM_PINS  KVM_IOAPIC_NUM_PINS
			
 
				+#define IOAPIC_VERSION_ID 0x11	/* IOAPIC version */
			
 
				+#define IOAPIC_EDGE_TRIG  0
			
 
				+#define IOAPIC_LEVEL_TRIG 1
			
 
				+
			
 
				+#define IOAPIC_DEFAULT_BASE_ADDRESS  0xfec00000
			
 
				+#define IOAPIC_MEM_LENGTH            0x100
			
 
				+
			
 
				+/* Direct registers. */
			
 
				+#define IOAPIC_REG_SELECT  0x00
			
 
				+#define IOAPIC_REG_WINDOW  0x10
			
 
				+#define IOAPIC_REG_EOI     0x40	/* IA64 IOSAPIC only */
			
 
				+
			
 
				+/* Indirect registers. */
			
 
				+#define IOAPIC_REG_APIC_ID 0x00	/* x86 IOAPIC only */
			
 
				+#define IOAPIC_REG_VERSION 0x01
			
 
				+#define IOAPIC_REG_ARB_ID  0x02	/* x86 IOAPIC only */
			
 
				+
			
 
				+/*ioapic delivery mode*/
			
 
				+#define	IOAPIC_FIXED			0x0
			
 
				+#define	IOAPIC_LOWEST_PRIORITY		0x1
			
 
				+#define	IOAPIC_PMI			0x2
			
 
				+#define	IOAPIC_NMI			0x4
			
 
				+#define	IOAPIC_INIT			0x5
			
 
				+#define	IOAPIC_EXTINT			0x7
			
 
				+
			
 
				+struct kvm_ioapic {
			
 
				+	u64 base_address;
			
 
				+	u32 ioregsel;
			
 
				+	u32 id;
			
 
				+	u32 irr;
			
 
				+	u32 pad;
			
 
				+	union ioapic_redir_entry {
			
 
				+		u64 bits;
			
 
				+		struct {
			
 
				+			u8 vector;
			
 
				+			u8 delivery_mode:3;
			
 
				+			u8 dest_mode:1;
			
 
				+			u8 delivery_status:1;
			
 
				+			u8 polarity:1;
			
 
				+			u8 remote_irr:1;
			
 
				+			u8 trig_mode:1;
			
 
				+			u8 mask:1;
			
 
				+			u8 reserve:7;
			
 
				+			u8 reserved[4];
			
 
				+			u8 dest_id;
			
 
				+		} fields;
			
 
				+	} redirtbl[IOAPIC_NUM_PINS];
			
 
				+	struct kvm_io_device dev;
			
 
				+	struct kvm *kvm;
			
 
				+};
			
 
				+
			
 
				+#ifdef DEBUG
			
 
				+#define ASSERT(x)  							\
			
 
				+do {									\
			
 
				+	if (!(x)) {							\
			
 
				+		printk(KERN_EMERG "assertion failed %s: %d: %s\n",	\
			
 
				+		       __FILE__, __LINE__, #x);				\
			
 
				+		BUG();							\
			
 
				+	}								\
			
 
				+} while (0)
			
 
				+#else
			
 
				+#define ASSERT(x) do { } while (0)
			
 
				+#endif
			
 
				+
			
 
				+static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
			
 
				+{
			
 
				+	return kvm->arch.vioapic;
			
 
				+}
			
 
				+
			
 
				+#ifdef CONFIG_IA64
			
 
				+static inline int irqchip_in_kernel(struct kvm *kvm)
			
 
				+{
			
 
				+	return 1;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
			
 
				+				       unsigned long bitmap);
			
 
				+void kvm_ioapic_update_eoi(struct kvm *kvm, int vector);
			
 
				+int kvm_ioapic_init(struct kvm *kvm);
			
 
				+void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
			
 
				+void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
			
 
				+
			
 
				+#endif
			
--- a/virt/kvm/iodev.h
+++ b/virt/kvm/iodev.h
@@ -0,0 +1,63 @@
 
				+/*
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2 of the License.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write to the Free Software
			
 
				+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __KVM_IODEV_H__
			
 
				+#define __KVM_IODEV_H__
			
 
				+
			
 
				+#include <linux/kvm_types.h>
			
 
				+
			
 
				+struct kvm_io_device {
			
 
				+	void (*read)(struct kvm_io_device *this,
			
 
				+		     gpa_t addr,
			
 
				+		     int len,
			
 
				+		     void *val);
			
 
				+	void (*write)(struct kvm_io_device *this,
			
 
				+		      gpa_t addr,
			
 
				+		      int len,
			
 
				+		      const void *val);
			
 
				+	int (*in_range)(struct kvm_io_device *this, gpa_t addr);
			
 
				+	void (*destructor)(struct kvm_io_device *this);
			
 
				+
			
 
				+	void             *private;
			
 
				+};
			
 
				+
			
 
				+static inline void kvm_iodevice_read(struct kvm_io_device *dev,
			
 
				+				     gpa_t addr,
			
 
				+				     int len,
			
 
				+				     void *val)
			
 
				+{
			
 
				+	dev->read(dev, addr, len, val);
			
 
				+}
			
 
				+
			
 
				+static inline void kvm_iodevice_write(struct kvm_io_device *dev,
			
 
				+				      gpa_t addr,
			
 
				+				      int len,
			
 
				+				      const void *val)
			
 
				+{
			
 
				+	dev->write(dev, addr, len, val);
			
 
				+}
			
 
				+
			
 
				+static inline int kvm_iodevice_inrange(struct kvm_io_device *dev, gpa_t addr)
			
 
				+{
			
 
				+	return dev->in_range(dev, addr);
			
 
				+}
			
 
				+
			
 
				+static inline void kvm_iodevice_destructor(struct kvm_io_device *dev)
			
 
				+{
			
 
				+	if (dev->destructor)
			
 
				+		dev->destructor(dev);
			
 
				+}
			
 
				+
			
 
				+#endif /* __KVM_IODEV_H__ */
			
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -0,0 +1,1400 @@
 
				+/*
			
 
				+ * Kernel-based Virtual Machine driver for Linux
			
 
				+ *
			
 
				+ * This module enables machines with Intel VT-x extensions to run virtual
			
 
				+ * machines without emulation or binary translation.
			
 
				+ *
			
 
				+ * Copyright (C) 2006 Qumranet, Inc.
			
 
				+ *
			
 
				+ * Authors:
			
 
				+ *   Avi Kivity   <avi@qumranet.com>
			
 
				+ *   Yaniv Kamay  <yaniv@qumranet.com>
			
 
				+ *
			
 
				+ * This work is licensed under the terms of the GNU GPL, version 2.  See
			
 
				+ * the COPYING file in the top-level directory.
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+#include "iodev.h"
			
 
				+
			
 
				+#include <linux/kvm_host.h>
			
 
				+#include <linux/kvm.h>
			
 
				+#include <linux/module.h>
			
 
				+#include <linux/errno.h>
			
 
				+#include <linux/percpu.h>
			
 
				+#include <linux/gfp.h>
			
 
				+#include <linux/mm.h>
			
 
				+#include <linux/miscdevice.h>
			
 
				+#include <linux/vmalloc.h>
			
 
				+#include <linux/reboot.h>
			
 
				+#include <linux/debugfs.h>
			
 
				+#include <linux/highmem.h>
			
 
				+#include <linux/file.h>
			
 
				+#include <linux/sysdev.h>
			
 
				+#include <linux/cpu.h>
			
 
				+#include <linux/sched.h>
			
 
				+#include <linux/cpumask.h>
			
 
				+#include <linux/smp.h>
			
 
				+#include <linux/anon_inodes.h>
			
 
				+#include <linux/profile.h>
			
 
				+#include <linux/kvm_para.h>
			
 
				+#include <linux/pagemap.h>
			
 
				+#include <linux/mman.h>
			
 
				+
			
 
				+#include <asm/processor.h>
			
 
				+#include <asm/io.h>
			
 
				+#include <asm/uaccess.h>
			
 
				+#include <asm/pgtable.h>
			
 
				+
			
 
				+MODULE_AUTHOR("Qumranet");
			
 
				+MODULE_LICENSE("GPL");
			
 
				+
			
 
				+DEFINE_SPINLOCK(kvm_lock);
			
 
				+LIST_HEAD(vm_list);
			
 
				+
			
 
				+static cpumask_t cpus_hardware_enabled;
			
 
				+
			
 
				+struct kmem_cache *kvm_vcpu_cache;
			
 
				+EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
			
 
				+
			
 
				+static __read_mostly struct preempt_ops kvm_preempt_ops;
			
 
				+
			
 
				+static struct dentry *debugfs_dir;
			
 
				+
			
 
				+static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
			
 
				+			   unsigned long arg);
			
 
				+
			
 
				+static inline int valid_vcpu(int n)
			
 
				+{
			
 
				+	return likely(n >= 0 && n < KVM_MAX_VCPUS);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Switches to specified vcpu, until a matching vcpu_put()
			
 
				+ */
			
 
				+void vcpu_load(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	int cpu;
			
 
				+
			
 
				+	mutex_lock(&vcpu->mutex);
			
 
				+	cpu = get_cpu();
			
 
				+	preempt_notifier_register(&vcpu->preempt_notifier);
			
 
				+	kvm_arch_vcpu_load(vcpu, cpu);
			
 
				+	put_cpu();
			
 
				+}
			
 
				+
			
 
				+void vcpu_put(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	preempt_disable();
			
 
				+	kvm_arch_vcpu_put(vcpu);
			
 
				+	preempt_notifier_unregister(&vcpu->preempt_notifier);
			
 
				+	preempt_enable();
			
 
				+	mutex_unlock(&vcpu->mutex);
			
 
				+}
			
 
				+
			
 
				+static void ack_flush(void *_completed)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+void kvm_flush_remote_tlbs(struct kvm *kvm)
			
 
				+{
			
 
				+	int i, cpu;
			
 
				+	cpumask_t cpus;
			
 
				+	struct kvm_vcpu *vcpu;
			
 
				+
			
 
				+	cpus_clear(cpus);
			
 
				+	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
			
 
				+		vcpu = kvm->vcpus[i];
			
 
				+		if (!vcpu)
			
 
				+			continue;
			
 
				+		if (test_and_set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
			
 
				+			continue;
			
 
				+		cpu = vcpu->cpu;
			
 
				+		if (cpu != -1 && cpu != raw_smp_processor_id())
			
 
				+			cpu_set(cpu, cpus);
			
 
				+	}
			
 
				+	if (cpus_empty(cpus))
			
 
				+		return;
			
 
				+	++kvm->stat.remote_tlb_flush;
			
 
				+	smp_call_function_mask(cpus, ack_flush, NULL, 1);
			
 
				+}
			
 
				+
			
 
				+int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
			
 
				+{
			
 
				+	struct page *page;
			
 
				+	int r;
			
 
				+
			
 
				+	mutex_init(&vcpu->mutex);
			
 
				+	vcpu->cpu = -1;
			
 
				+	vcpu->kvm = kvm;
			
 
				+	vcpu->vcpu_id = id;
			
 
				+	init_waitqueue_head(&vcpu->wq);
			
 
				+
			
 
				+	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
			
 
				+	if (!page) {
			
 
				+		r = -ENOMEM;
			
 
				+		goto fail;
			
 
				+	}
			
 
				+	vcpu->run = page_address(page);
			
 
				+
			
 
				+	r = kvm_arch_vcpu_init(vcpu);
			
 
				+	if (r < 0)
			
 
				+		goto fail_free_run;
			
 
				+	return 0;
			
 
				+
			
 
				+fail_free_run:
			
 
				+	free_page((unsigned long)vcpu->run);
			
 
				+fail:
			
 
				+	return r;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(kvm_vcpu_init);
			
 
				+
			
 
				+void kvm_vcpu_uninit(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	kvm_arch_vcpu_uninit(vcpu);
			
 
				+	free_page((unsigned long)vcpu->run);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(kvm_vcpu_uninit);
			
 
				+
			
 
				+static struct kvm *kvm_create_vm(void)
			
 
				+{
			
 
				+	struct kvm *kvm = kvm_arch_create_vm();
			
 
				+
			
 
				+	if (IS_ERR(kvm))
			
 
				+		goto out;
			
 
				+
			
 
				+	kvm->mm = current->mm;
			
 
				+	atomic_inc(&kvm->mm->mm_count);
			
 
				+	spin_lock_init(&kvm->mmu_lock);
			
 
				+	kvm_io_bus_init(&kvm->pio_bus);
			
 
				+	mutex_init(&kvm->lock);
			
 
				+	kvm_io_bus_init(&kvm->mmio_bus);
			
 
				+	spin_lock(&kvm_lock);
			
 
				+	list_add(&kvm->vm_list, &vm_list);
			
 
				+	spin_unlock(&kvm_lock);
			
 
				+out:
			
 
				+	return kvm;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Free any memory in @free but not in @dont.
			
 
				+ */
			
 
				+static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
			
 
				+				  struct kvm_memory_slot *dont)
			
 
				+{
			
 
				+	if (!dont || free->rmap != dont->rmap)
			
 
				+		vfree(free->rmap);
			
 
				+
			
 
				+	if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
			
 
				+		vfree(free->dirty_bitmap);
			
 
				+
			
 
				+	free->npages = 0;
			
 
				+	free->dirty_bitmap = NULL;
			
 
				+	free->rmap = NULL;
			
 
				+}
			
 
				+
			
 
				+void kvm_free_physmem(struct kvm *kvm)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < kvm->nmemslots; ++i)
			
 
				+		kvm_free_physmem_slot(&kvm->memslots[i], NULL);
			
 
				+}
			
 
				+
			
 
				+static void kvm_destroy_vm(struct kvm *kvm)
			
 
				+{
			
 
				+	struct mm_struct *mm = kvm->mm;
			
 
				+
			
 
				+	spin_lock(&kvm_lock);
			
 
				+	list_del(&kvm->vm_list);
			
 
				+	spin_unlock(&kvm_lock);
			
 
				+	kvm_io_bus_destroy(&kvm->pio_bus);
			
 
				+	kvm_io_bus_destroy(&kvm->mmio_bus);
			
 
				+	kvm_arch_destroy_vm(kvm);
			
 
				+	mmdrop(mm);
			
 
				+}
			
 
				+
			
 
				+static int kvm_vm_release(struct inode *inode, struct file *filp)
			
 
				+{
			
 
				+	struct kvm *kvm = filp->private_data;
			
 
				+
			
 
				+	kvm_destroy_vm(kvm);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Allocate some memory and give it an address in the guest physical address
			
 
				+ * space.
			
 
				+ *
			
 
				+ * Discontiguous memory is allowed, mostly for framebuffers.
			
 
				+ *
			
 
				+ * Must be called holding mmap_sem for write.
			
 
				+ */
			
 
				+int __kvm_set_memory_region(struct kvm *kvm,
			
 
				+			    struct kvm_userspace_memory_region *mem,
			
 
				+			    int user_alloc)
			
 
				+{
			
 
				+	int r;
			
 
				+	gfn_t base_gfn;
			
 
				+	unsigned long npages;
			
 
				+	unsigned long i;
			
 
				+	struct kvm_memory_slot *memslot;
			
 
				+	struct kvm_memory_slot old, new;
			
 
				+
			
 
				+	r = -EINVAL;
			
 
				+	/* General sanity checks */
			
 
				+	if (mem->memory_size & (PAGE_SIZE - 1))
			
 
				+		goto out;
			
 
				+	if (mem->guest_phys_addr & (PAGE_SIZE - 1))
			
 
				+		goto out;
			
 
				+	if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
			
 
				+		goto out;
			
 
				+	if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
			
 
				+		goto out;
			
 
				+
			
 
				+	memslot = &kvm->memslots[mem->slot];
			
 
				+	base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
			
 
				+	npages = mem->memory_size >> PAGE_SHIFT;
			
 
				+
			
 
				+	if (!npages)
			
 
				+		mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES;
			
 
				+
			
 
				+	new = old = *memslot;
			
 
				+
			
 
				+	new.base_gfn = base_gfn;
			
 
				+	new.npages = npages;
			
 
				+	new.flags = mem->flags;
			
 
				+
			
 
				+	/* Disallow changing a memory slot's size. */
			
 
				+	r = -EINVAL;
			
 
				+	if (npages && old.npages && npages != old.npages)
			
 
				+		goto out_free;
			
 
				+
			
 
				+	/* Check for overlaps */
			
 
				+	r = -EEXIST;
			
 
				+	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
			
 
				+		struct kvm_memory_slot *s = &kvm->memslots[i];
			
 
				+
			
 
				+		if (s == memslot)
			
 
				+			continue;
			
 
				+		if (!((base_gfn + npages <= s->base_gfn) ||
			
 
				+		      (base_gfn >= s->base_gfn + s->npages)))
			
 
				+			goto out_free;
			
 
				+	}
			
 
				+
			
 
				+	/* Free page dirty bitmap if unneeded */
			
 
				+	if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES))
			
 
				+		new.dirty_bitmap = NULL;
			
 
				+
			
 
				+	r = -ENOMEM;
			
 
				+
			
 
				+	/* Allocate if a slot is being created */
			
 
				+	if (npages && !new.rmap) {
			
 
				+		new.rmap = vmalloc(npages * sizeof(struct page *));
			
 
				+
			
 
				+		if (!new.rmap)
			
 
				+			goto out_free;
			
 
				+
			
 
				+		memset(new.rmap, 0, npages * sizeof(*new.rmap));
			
 
				+
			
 
				+		new.user_alloc = user_alloc;
			
 
				+		new.userspace_addr = mem->userspace_addr;
			
 
				+	}
			
 
				+
			
 
				+	/* Allocate page dirty bitmap if needed */
			
 
				+	if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
			
 
				+		unsigned dirty_bytes = ALIGN(npages, BITS_PER_LONG) / 8;
			
 
				+
			
 
				+		new.dirty_bitmap = vmalloc(dirty_bytes);
			
 
				+		if (!new.dirty_bitmap)
			
 
				+			goto out_free;
			
 
				+		memset(new.dirty_bitmap, 0, dirty_bytes);
			
 
				+	}
			
 
				+
			
 
				+	if (mem->slot >= kvm->nmemslots)
			
 
				+		kvm->nmemslots = mem->slot + 1;
			
 
				+
			
 
				+	*memslot = new;
			
 
				+
			
 
				+	r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc);
			
 
				+	if (r) {
			
 
				+		*memslot = old;
			
 
				+		goto out_free;
			
 
				+	}
			
 
				+
			
 
				+	kvm_free_physmem_slot(&old, &new);
			
 
				+	return 0;
			
 
				+
			
 
				+out_free:
			
 
				+	kvm_free_physmem_slot(&new, &old);
			
 
				+out:
			
 
				+	return r;
			
 
				+
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(__kvm_set_memory_region);
			
 
				+
			
 
				+int kvm_set_memory_region(struct kvm *kvm,
			
 
				+			  struct kvm_userspace_memory_region *mem,
			
 
				+			  int user_alloc)
			
 
				+{
			
 
				+	int r;
			
 
				+
			
 
				+	down_write(&current->mm->mmap_sem);
			
 
				+	r = __kvm_set_memory_region(kvm, mem, user_alloc);
			
 
				+	up_write(&current->mm->mmap_sem);
			
 
				+	return r;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(kvm_set_memory_region);
			
 
				+
			
 
				+int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
			
 
				+				   struct
			
 
				+				   kvm_userspace_memory_region *mem,
			
 
				+				   int user_alloc)
			
 
				+{
			
 
				+	if (mem->slot >= KVM_MEMORY_SLOTS)
			
 
				+		return -EINVAL;
			
 
				+	return kvm_set_memory_region(kvm, mem, user_alloc);
			
 
				+}
			
 
				+
			
 
				+int kvm_get_dirty_log(struct kvm *kvm,
			
 
				+			struct kvm_dirty_log *log, int *is_dirty)
			
 
				+{
			
 
				+	struct kvm_memory_slot *memslot;
			
 
				+	int r, i;
			
 
				+	int n;
			
 
				+	unsigned long any = 0;
			
 
				+
			
 
				+	r = -EINVAL;
			
 
				+	if (log->slot >= KVM_MEMORY_SLOTS)
			
 
				+		goto out;
			
 
				+
			
 
				+	memslot = &kvm->memslots[log->slot];
			
 
				+	r = -ENOENT;
			
 
				+	if (!memslot->dirty_bitmap)
			
 
				+		goto out;
			
 
				+
			
 
				+	n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
			
 
				+
			
 
				+	for (i = 0; !any && i < n/sizeof(long); ++i)
			
 
				+		any = memslot->dirty_bitmap[i];
			
 
				+
			
 
				+	r = -EFAULT;
			
 
				+	if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
			
 
				+		goto out;
			
 
				+
			
 
				+	if (any)
			
 
				+		*is_dirty = 1;
			
 
				+
			
 
				+	r = 0;
			
 
				+out:
			
 
				+	return r;
			
 
				+}
			
 
				+
			
 
				+int is_error_page(struct page *page)
			
 
				+{
			
 
				+	return page == bad_page;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(is_error_page);
			
 
				+
			
 
				+static inline unsigned long bad_hva(void)
			
 
				+{
			
 
				+	return PAGE_OFFSET;
			
 
				+}
			
 
				+
			
 
				+int kvm_is_error_hva(unsigned long addr)
			
 
				+{
			
 
				+	return addr == bad_hva();
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(kvm_is_error_hva);
			
 
				+
			
 
				+static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < kvm->nmemslots; ++i) {
			
 
				+		struct kvm_memory_slot *memslot = &kvm->memslots[i];
			
 
				+
			
 
				+		if (gfn >= memslot->base_gfn
			
 
				+		    && gfn < memslot->base_gfn + memslot->npages)
			
 
				+			return memslot;
			
 
				+	}
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
			
 
				+{
			
 
				+	gfn = unalias_gfn(kvm, gfn);
			
 
				+	return __gfn_to_memslot(kvm, gfn);
			
 
				+}
			
 
				+
			
 
				+int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	gfn = unalias_gfn(kvm, gfn);
			
 
				+	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
			
 
				+		struct kvm_memory_slot *memslot = &kvm->memslots[i];
			
 
				+
			
 
				+		if (gfn >= memslot->base_gfn
			
 
				+		    && gfn < memslot->base_gfn + memslot->npages)
			
 
				+			return 1;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
			
 
				+
			
 
				+static unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
			
 
				+{
			
 
				+	struct kvm_memory_slot *slot;
			
 
				+
			
 
				+	gfn = unalias_gfn(kvm, gfn);
			
 
				+	slot = __gfn_to_memslot(kvm, gfn);
			
 
				+	if (!slot)
			
 
				+		return bad_hva();
			
 
				+	return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Requires current->mm->mmap_sem to be held
			
 
				+ */
			
 
				+struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
			
 
				+{
			
 
				+	struct page *page[1];
			
 
				+	unsigned long addr;
			
 
				+	int npages;
			
 
				+
			
 
				+	might_sleep();
			
 
				+
			
 
				+	addr = gfn_to_hva(kvm, gfn);
			
 
				+	if (kvm_is_error_hva(addr)) {
			
 
				+		get_page(bad_page);
			
 
				+		return bad_page;
			
 
				+	}
			
 
				+
			
 
				+	npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page,
			
 
				+				NULL);
			
 
				+
			
 
				+	if (npages != 1) {
			
 
				+		get_page(bad_page);
			
 
				+		return bad_page;
			
 
				+	}
			
 
				+
			
 
				+	return page[0];
			
 
				+}
			
 
				+
			
 
				+EXPORT_SYMBOL_GPL(gfn_to_page);
			
 
				+
			
 
				+void kvm_release_page_clean(struct page *page)
			
 
				+{
			
 
				+	put_page(page);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(kvm_release_page_clean);
			
 
				+
			
 
				+void kvm_release_page_dirty(struct page *page)
			
 
				+{
			
 
				+	if (!PageReserved(page))
			
 
				+		SetPageDirty(page);
			
 
				+	put_page(page);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(kvm_release_page_dirty);
			
 
				+
			
 
				+static int next_segment(unsigned long len, int offset)
			
 
				+{
			
 
				+	if (len > PAGE_SIZE - offset)
			
 
				+		return PAGE_SIZE - offset;
			
 
				+	else
			
 
				+		return len;
			
 
				+}
			
 
				+
			
 
				+int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
			
 
				+			int len)
			
 
				+{
			
 
				+	int r;
			
 
				+	unsigned long addr;
			
 
				+
			
 
				+	addr = gfn_to_hva(kvm, gfn);
			
 
				+	if (kvm_is_error_hva(addr))
			
 
				+		return -EFAULT;
			
 
				+	r = copy_from_user(data, (void __user *)addr + offset, len);
			
 
				+	if (r)
			
 
				+		return -EFAULT;
			
 
				+	return 0;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(kvm_read_guest_page);
			
 
				+
			
 
				+int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len)
			
 
				+{
			
 
				+	gfn_t gfn = gpa >> PAGE_SHIFT;
			
 
				+	int seg;
			
 
				+	int offset = offset_in_page(gpa);
			
 
				+	int ret;
			
 
				+
			
 
				+	while ((seg = next_segment(len, offset)) != 0) {
			
 
				+		ret = kvm_read_guest_page(kvm, gfn, data, offset, seg);
			
 
				+		if (ret < 0)
			
 
				+			return ret;
			
 
				+		offset = 0;
			
 
				+		len -= seg;
			
 
				+		data += seg;
			
 
				+		++gfn;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(kvm_read_guest);
			
 
				+
			
 
				+int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
			
 
				+			  unsigned long len)
			
 
				+{
			
 
				+	int r;
			
 
				+	unsigned long addr;
			
 
				+	gfn_t gfn = gpa >> PAGE_SHIFT;
			
 
				+	int offset = offset_in_page(gpa);
			
 
				+
			
 
				+	addr = gfn_to_hva(kvm, gfn);
			
 
				+	if (kvm_is_error_hva(addr))
			
 
				+		return -EFAULT;
			
 
				+	r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len);
			
 
				+	if (r)
			
 
				+		return -EFAULT;
			
 
				+	return 0;
			
 
				+}
			
 
				+EXPORT_SYMBOL(kvm_read_guest_atomic);
			
 
				+
			
 
				+int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
			
 
				+			 int offset, int len)
			
 
				+{
			
 
				+	int r;
			
 
				+	unsigned long addr;
			
 
				+
			
 
				+	addr = gfn_to_hva(kvm, gfn);
			
 
				+	if (kvm_is_error_hva(addr))
			
 
				+		return -EFAULT;
			
 
				+	r = copy_to_user((void __user *)addr + offset, data, len);
			
 
				+	if (r)
			
 
				+		return -EFAULT;
			
 
				+	mark_page_dirty(kvm, gfn);
			
 
				+	return 0;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(kvm_write_guest_page);
			
 
				+
			
 
				+int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
			
 
				+		    unsigned long len)
			
 
				+{
			
 
				+	gfn_t gfn = gpa >> PAGE_SHIFT;
			
 
				+	int seg;
			
 
				+	int offset = offset_in_page(gpa);
			
 
				+	int ret;
			
 
				+
			
 
				+	while ((seg = next_segment(len, offset)) != 0) {
			
 
				+		ret = kvm_write_guest_page(kvm, gfn, data, offset, seg);
			
 
				+		if (ret < 0)
			
 
				+			return ret;
			
 
				+		offset = 0;
			
 
				+		len -= seg;
			
 
				+		data += seg;
			
 
				+		++gfn;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
			
 
				+{
			
 
				+	return kvm_write_guest_page(kvm, gfn, empty_zero_page, offset, len);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(kvm_clear_guest_page);
			
 
				+
			
 
				+int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
			
 
				+{
			
 
				+	gfn_t gfn = gpa >> PAGE_SHIFT;
			
 
				+	int seg;
			
 
				+	int offset = offset_in_page(gpa);
			
 
				+	int ret;
			
 
				+
			
 
				+        while ((seg = next_segment(len, offset)) != 0) {
			
 
				+		ret = kvm_clear_guest_page(kvm, gfn, offset, seg);
			
 
				+		if (ret < 0)
			
 
				+			return ret;
			
 
				+		offset = 0;
			
 
				+		len -= seg;
			
 
				+		++gfn;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(kvm_clear_guest);
			
 
				+
			
 
				+void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
			
 
				+{
			
 
				+	struct kvm_memory_slot *memslot;
			
 
				+
			
 
				+	gfn = unalias_gfn(kvm, gfn);
			
 
				+	memslot = __gfn_to_memslot(kvm, gfn);
			
 
				+	if (memslot && memslot->dirty_bitmap) {
			
 
				+		unsigned long rel_gfn = gfn - memslot->base_gfn;
			
 
				+
			
 
				+		/* avoid RMW */
			
 
				+		if (!test_bit(rel_gfn, memslot->dirty_bitmap))
			
 
				+			set_bit(rel_gfn, memslot->dirty_bitmap);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * The vCPU has executed a HLT instruction with in-kernel mode enabled.
			
 
				+ */
			
 
				+void kvm_vcpu_block(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	DECLARE_WAITQUEUE(wait, current);
			
 
				+
			
 
				+	add_wait_queue(&vcpu->wq, &wait);
			
 
				+
			
 
				+	/*
			
 
				+	 * We will block until either an interrupt or a signal wakes us up
			
 
				+	 */
			
 
				+	while (!kvm_cpu_has_interrupt(vcpu)
			
 
				+	       && !signal_pending(current)
			
 
				+	       && !kvm_arch_vcpu_runnable(vcpu)) {
			
 
				+		set_current_state(TASK_INTERRUPTIBLE);
			
 
				+		vcpu_put(vcpu);
			
 
				+		schedule();
			
 
				+		vcpu_load(vcpu);
			
 
				+	}
			
 
				+
			
 
				+	__set_current_state(TASK_RUNNING);
			
 
				+	remove_wait_queue(&vcpu->wq, &wait);
			
 
				+}
			
 
				+
			
 
				+void kvm_resched(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	if (!need_resched())
			
 
				+		return;
			
 
				+	cond_resched();
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(kvm_resched);
			
 
				+
			
 
				+static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
			
 
				+{
			
 
				+	struct kvm_vcpu *vcpu = vma->vm_file->private_data;
			
 
				+	struct page *page;
			
 
				+
			
 
				+	if (vmf->pgoff == 0)
			
 
				+		page = virt_to_page(vcpu->run);
			
 
				+	else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET)
			
 
				+		page = virt_to_page(vcpu->arch.pio_data);
			
 
				+	else
			
 
				+		return VM_FAULT_SIGBUS;
			
 
				+	get_page(page);
			
 
				+	vmf->page = page;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static struct vm_operations_struct kvm_vcpu_vm_ops = {
			
 
				+	.fault = kvm_vcpu_fault,
			
 
				+};
			
 
				+
			
 
				+static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma)
			
 
				+{
			
 
				+	vma->vm_ops = &kvm_vcpu_vm_ops;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int kvm_vcpu_release(struct inode *inode, struct file *filp)
			
 
				+{
			
 
				+	struct kvm_vcpu *vcpu = filp->private_data;
			
 
				+
			
 
				+	fput(vcpu->kvm->filp);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static struct file_operations kvm_vcpu_fops = {
			
 
				+	.release        = kvm_vcpu_release,
			
 
				+	.unlocked_ioctl = kvm_vcpu_ioctl,
			
 
				+	.compat_ioctl   = kvm_vcpu_ioctl,
			
 
				+	.mmap           = kvm_vcpu_mmap,
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * Allocates an inode for the vcpu.
			
 
				+ */
			
 
				+static int create_vcpu_fd(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	int fd, r;
			
 
				+	struct inode *inode;
			
 
				+	struct file *file;
			
 
				+
			
 
				+	r = anon_inode_getfd(&fd, &inode, &file,
			
 
				+			     "kvm-vcpu", &kvm_vcpu_fops, vcpu);
			
 
				+	if (r)
			
 
				+		return r;
			
 
				+	atomic_inc(&vcpu->kvm->filp->f_count);
			
 
				+	return fd;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Creates some virtual cpus.  Good luck creating more than one.
			
 
				+ */
			
 
				+static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
			
 
				+{
			
 
				+	int r;
			
 
				+	struct kvm_vcpu *vcpu;
			
 
				+
			
 
				+	if (!valid_vcpu(n))
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	vcpu = kvm_arch_vcpu_create(kvm, n);
			
 
				+	if (IS_ERR(vcpu))
			
 
				+		return PTR_ERR(vcpu);
			
 
				+
			
 
				+	preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);
			
 
				+
			
 
				+	r = kvm_arch_vcpu_setup(vcpu);
			
 
				+	if (r)
			
 
				+		goto vcpu_destroy;
			
 
				+
			
 
				+	mutex_lock(&kvm->lock);
			
 
				+	if (kvm->vcpus[n]) {
			
 
				+		r = -EEXIST;
			
 
				+		mutex_unlock(&kvm->lock);
			
 
				+		goto vcpu_destroy;
			
 
				+	}
			
 
				+	kvm->vcpus[n] = vcpu;
			
 
				+	mutex_unlock(&kvm->lock);
			
 
				+
			
 
				+	/* Now it's all set up, let userspace reach it */
			
 
				+	r = create_vcpu_fd(vcpu);
			
 
				+	if (r < 0)
			
 
				+		goto unlink;
			
 
				+	return r;
			
 
				+
			
 
				+unlink:
			
 
				+	mutex_lock(&kvm->lock);
			
 
				+	kvm->vcpus[n] = NULL;
			
 
				+	mutex_unlock(&kvm->lock);
			
 
				+vcpu_destroy:
			
 
				+	kvm_arch_vcpu_destroy(vcpu);
			
 
				+	return r;
			
 
				+}
			
 
				+
			
 
				+static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset)
			
 
				+{
			
 
				+	if (sigset) {
			
 
				+		sigdelsetmask(sigset, sigmask(SIGKILL)|sigmask(SIGSTOP));
			
 
				+		vcpu->sigset_active = 1;
			
 
				+		vcpu->sigset = *sigset;
			
 
				+	} else
			
 
				+		vcpu->sigset_active = 0;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static long kvm_vcpu_ioctl(struct file *filp,
			
 
				+			   unsigned int ioctl, unsigned long arg)
			
 
				+{
			
 
				+	struct kvm_vcpu *vcpu = filp->private_data;
			
 
				+	void __user *argp = (void __user *)arg;
			
 
				+	int r;
			
 
				+
			
 
				+	if (vcpu->kvm->mm != current->mm)
			
 
				+		return -EIO;
			
 
				+	switch (ioctl) {
			
 
				+	case KVM_RUN:
			
 
				+		r = -EINVAL;
			
 
				+		if (arg)
			
 
				+			goto out;
			
 
				+		r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
			
 
				+		break;
			
 
				+	case KVM_GET_REGS: {
			
 
				+		struct kvm_regs kvm_regs;
			
 
				+
			
 
				+		memset(&kvm_regs, 0, sizeof kvm_regs);
			
 
				+		r = kvm_arch_vcpu_ioctl_get_regs(vcpu, &kvm_regs);
			
 
				+		if (r)
			
 
				+			goto out;
			
 
				+		r = -EFAULT;
			
 
				+		if (copy_to_user(argp, &kvm_regs, sizeof kvm_regs))
			
 
				+			goto out;
			
 
				+		r = 0;
			
 
				+		break;
			
 
				+	}
			
 
				+	case KVM_SET_REGS: {
			
 
				+		struct kvm_regs kvm_regs;
			
 
				+
			
 
				+		r = -EFAULT;
			
 
				+		if (copy_from_user(&kvm_regs, argp, sizeof kvm_regs))
			
 
				+			goto out;
			
 
				+		r = kvm_arch_vcpu_ioctl_set_regs(vcpu, &kvm_regs);
			
 
				+		if (r)
			
 
				+			goto out;
			
 
				+		r = 0;
			
 
				+		break;
			
 
				+	}
			
 
				+	case KVM_GET_SREGS: {
			
 
				+		struct kvm_sregs kvm_sregs;
			
 
				+
			
 
				+		memset(&kvm_sregs, 0, sizeof kvm_sregs);
			
 
				+		r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, &kvm_sregs);
			
 
				+		if (r)
			
 
				+			goto out;
			
 
				+		r = -EFAULT;
			
 
				+		if (copy_to_user(argp, &kvm_sregs, sizeof kvm_sregs))
			
 
				+			goto out;
			
 
				+		r = 0;
			
 
				+		break;
			
 
				+	}
			
 
				+	case KVM_SET_SREGS: {
			
 
				+		struct kvm_sregs kvm_sregs;
			
 
				+
			
 
				+		r = -EFAULT;
			
 
				+		if (copy_from_user(&kvm_sregs, argp, sizeof kvm_sregs))
			
 
				+			goto out;
			
 
				+		r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, &kvm_sregs);
			
 
				+		if (r)
			
 
				+			goto out;
			
 
				+		r = 0;
			
 
				+		break;
			
 
				+	}
			
 
				+	case KVM_TRANSLATE: {
			
 
				+		struct kvm_translation tr;
			
 
				+
			
 
				+		r = -EFAULT;
			
 
				+		if (copy_from_user(&tr, argp, sizeof tr))
			
 
				+			goto out;
			
 
				+		r = kvm_arch_vcpu_ioctl_translate(vcpu, &tr);
			
 
				+		if (r)
			
 
				+			goto out;
			
 
				+		r = -EFAULT;
			
 
				+		if (copy_to_user(argp, &tr, sizeof tr))
			
 
				+			goto out;
			
 
				+		r = 0;
			
 
				+		break;
			
 
				+	}
			
 
				+	case KVM_DEBUG_GUEST: {
			
 
				+		struct kvm_debug_guest dbg;
			
 
				+
			
 
				+		r = -EFAULT;
			
 
				+		if (copy_from_user(&dbg, argp, sizeof dbg))
			
 
				+			goto out;
			
 
				+		r = kvm_arch_vcpu_ioctl_debug_guest(vcpu, &dbg);
			
 
				+		if (r)
			
 
				+			goto out;
			
 
				+		r = 0;
			
 
				+		break;
			
 
				+	}
			
 
				+	case KVM_SET_SIGNAL_MASK: {
			
 
				+		struct kvm_signal_mask __user *sigmask_arg = argp;
			
 
				+		struct kvm_signal_mask kvm_sigmask;
			
 
				+		sigset_t sigset, *p;
			
 
				+
			
 
				+		p = NULL;
			
 
				+		if (argp) {
			
 
				+			r = -EFAULT;
			
 
				+			if (copy_from_user(&kvm_sigmask, argp,
			
 
				+					   sizeof kvm_sigmask))
			
 
				+				goto out;
			
 
				+			r = -EINVAL;
			
 
				+			if (kvm_sigmask.len != sizeof sigset)
			
 
				+				goto out;
			
 
				+			r = -EFAULT;
			
 
				+			if (copy_from_user(&sigset, sigmask_arg->sigset,
			
 
				+					   sizeof sigset))
			
 
				+				goto out;
			
 
				+			p = &sigset;
			
 
				+		}
			
 
				+		r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset);
			
 
				+		break;
			
 
				+	}
			
 
				+	case KVM_GET_FPU: {
			
 
				+		struct kvm_fpu fpu;
			
 
				+
			
 
				+		memset(&fpu, 0, sizeof fpu);
			
 
				+		r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, &fpu);
			
 
				+		if (r)
			
 
				+			goto out;
			
 
				+		r = -EFAULT;
			
 
				+		if (copy_to_user(argp, &fpu, sizeof fpu))
			
 
				+			goto out;
			
 
				+		r = 0;
			
 
				+		break;
			
 
				+	}
			
 
				+	case KVM_SET_FPU: {
			
 
				+		struct kvm_fpu fpu;
			
 
				+
			
 
				+		r = -EFAULT;
			
 
				+		if (copy_from_user(&fpu, argp, sizeof fpu))
			
 
				+			goto out;
			
 
				+		r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, &fpu);
			
 
				+		if (r)
			
 
				+			goto out;
			
 
				+		r = 0;
			
 
				+		break;
			
 
				+	}
			
 
				+	default:
			
 
				+		r = kvm_arch_vcpu_ioctl(filp, ioctl, arg);
			
 
				+	}
			
 
				+out:
			
 
				+	return r;
			
 
				+}
			
 
				+
			
 
				+static long kvm_vm_ioctl(struct file *filp,
			
 
				+			   unsigned int ioctl, unsigned long arg)
			
 
				+{
			
 
				+	struct kvm *kvm = filp->private_data;
			
 
				+	void __user *argp = (void __user *)arg;
			
 
				+	int r;
			
 
				+
			
 
				+	if (kvm->mm != current->mm)
			
 
				+		return -EIO;
			
 
				+	switch (ioctl) {
			
 
				+	case KVM_CREATE_VCPU:
			
 
				+		r = kvm_vm_ioctl_create_vcpu(kvm, arg);
			
 
				+		if (r < 0)
			
 
				+			goto out;
			
 
				+		break;
			
 
				+	case KVM_SET_USER_MEMORY_REGION: {
			
 
				+		struct kvm_userspace_memory_region kvm_userspace_mem;
			
 
				+
			
 
				+		r = -EFAULT;
			
 
				+		if (copy_from_user(&kvm_userspace_mem, argp,
			
 
				+						sizeof kvm_userspace_mem))
			
 
				+			goto out;
			
 
				+
			
 
				+		r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 1);
			
 
				+		if (r)
			
 
				+			goto out;
			
 
				+		break;
			
 
				+	}
			
 
				+	case KVM_GET_DIRTY_LOG: {
			
 
				+		struct kvm_dirty_log log;
			
 
				+
			
 
				+		r = -EFAULT;
			
 
				+		if (copy_from_user(&log, argp, sizeof log))
			
 
				+			goto out;
			
 
				+		r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
			
 
				+		if (r)
			
 
				+			goto out;
			
 
				+		break;
			
 
				+	}
			
 
				+	default:
			
 
				+		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
			
 
				+	}
			
 
				+out:
			
 
				+	return r;
			
 
				+}
			
 
				+
			
 
				+static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
			
 
				+{
			
 
				+	struct kvm *kvm = vma->vm_file->private_data;
			
 
				+	struct page *page;
			
 
				+
			
 
				+	if (!kvm_is_visible_gfn(kvm, vmf->pgoff))
			
 
				+		return VM_FAULT_SIGBUS;
			
 
				+	page = gfn_to_page(kvm, vmf->pgoff);
			
 
				+	if (is_error_page(page)) {
			
 
				+		kvm_release_page_clean(page);
			
 
				+		return VM_FAULT_SIGBUS;
			
 
				+	}
			
 
				+	vmf->page = page;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static struct vm_operations_struct kvm_vm_vm_ops = {
			
 
				+	.fault = kvm_vm_fault,
			
 
				+};
			
 
				+
			
 
				+static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma)
			
 
				+{
			
 
				+	vma->vm_ops = &kvm_vm_vm_ops;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static struct file_operations kvm_vm_fops = {
			
 
				+	.release        = kvm_vm_release,
			
 
				+	.unlocked_ioctl = kvm_vm_ioctl,
			
 
				+	.compat_ioctl   = kvm_vm_ioctl,
			
 
				+	.mmap           = kvm_vm_mmap,
			
 
				+};
			
 
				+
			
 
				+static int kvm_dev_ioctl_create_vm(void)
			
 
				+{
			
 
				+	int fd, r;
			
 
				+	struct inode *inode;
			
 
				+	struct file *file;
			
 
				+	struct kvm *kvm;
			
 
				+
			
 
				+	kvm = kvm_create_vm();
			
 
				+	if (IS_ERR(kvm))
			
 
				+		return PTR_ERR(kvm);
			
 
				+	r = anon_inode_getfd(&fd, &inode, &file, "kvm-vm", &kvm_vm_fops, kvm);
			
 
				+	if (r) {
			
 
				+		kvm_destroy_vm(kvm);
			
 
				+		return r;
			
 
				+	}
			
 
				+
			
 
				+	kvm->filp = file;
			
 
				+
			
 
				+	return fd;
			
 
				+}
			
 
				+
			
 
				+static long kvm_dev_ioctl(struct file *filp,
			
 
				+			  unsigned int ioctl, unsigned long arg)
			
 
				+{
			
 
				+	void __user *argp = (void __user *)arg;
			
 
				+	long r = -EINVAL;
			
 
				+
			
 
				+	switch (ioctl) {
			
 
				+	case KVM_GET_API_VERSION:
			
 
				+		r = -EINVAL;
			
 
				+		if (arg)
			
 
				+			goto out;
			
 
				+		r = KVM_API_VERSION;
			
 
				+		break;
			
 
				+	case KVM_CREATE_VM:
			
 
				+		r = -EINVAL;
			
 
				+		if (arg)
			
 
				+			goto out;
			
 
				+		r = kvm_dev_ioctl_create_vm();
			
 
				+		break;
			
 
				+	case KVM_CHECK_EXTENSION:
			
 
				+		r = kvm_dev_ioctl_check_extension((long)argp);
			
 
				+		break;
			
 
				+	case KVM_GET_VCPU_MMAP_SIZE:
			
 
				+		r = -EINVAL;
			
 
				+		if (arg)
			
 
				+			goto out;
			
 
				+		r = 2 * PAGE_SIZE;
			
 
				+		break;
			
 
				+	default:
			
 
				+		return kvm_arch_dev_ioctl(filp, ioctl, arg);
			
 
				+	}
			
 
				+out:
			
 
				+	return r;
			
 
				+}
			
 
				+
			
 
				+static struct file_operations kvm_chardev_ops = {
			
 
				+	.unlocked_ioctl = kvm_dev_ioctl,
			
 
				+	.compat_ioctl   = kvm_dev_ioctl,
			
 
				+};
			
 
				+
			
 
				+static struct miscdevice kvm_dev = {
			
 
				+	KVM_MINOR,
			
 
				+	"kvm",
			
 
				+	&kvm_chardev_ops,
			
 
				+};
			
 
				+
			
 
				+static void hardware_enable(void *junk)
			
 
				+{
			
 
				+	int cpu = raw_smp_processor_id();
			
 
				+
			
 
				+	if (cpu_isset(cpu, cpus_hardware_enabled))
			
 
				+		return;
			
 
				+	cpu_set(cpu, cpus_hardware_enabled);
			
 
				+	kvm_arch_hardware_enable(NULL);
			
 
				+}
			
 
				+
			
 
				+static void hardware_disable(void *junk)
			
 
				+{
			
 
				+	int cpu = raw_smp_processor_id();
			
 
				+
			
 
				+	if (!cpu_isset(cpu, cpus_hardware_enabled))
			
 
				+		return;
			
 
				+	cpu_clear(cpu, cpus_hardware_enabled);
			
 
				+	decache_vcpus_on_cpu(cpu);
			
 
				+	kvm_arch_hardware_disable(NULL);
			
 
				+}
			
 
				+
			
 
				+static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
			
 
				+			   void *v)
			
 
				+{
			
 
				+	int cpu = (long)v;
			
 
				+
			
 
				+	val &= ~CPU_TASKS_FROZEN;
			
 
				+	switch (val) {
			
 
				+	case CPU_DYING:
			
 
				+		printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
			
 
				+		       cpu);
			
 
				+		hardware_disable(NULL);
			
 
				+		break;
			
 
				+	case CPU_UP_CANCELED:
			
 
				+		printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
			
 
				+		       cpu);
			
 
				+		smp_call_function_single(cpu, hardware_disable, NULL, 0, 1);
			
 
				+		break;
			
 
				+	case CPU_ONLINE:
			
 
				+		printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
			
 
				+		       cpu);
			
 
				+		smp_call_function_single(cpu, hardware_enable, NULL, 0, 1);
			
 
				+		break;
			
 
				+	}
			
 
				+	return NOTIFY_OK;
			
 
				+}
			
 
				+
			
 
				+static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
			
 
				+		      void *v)
			
 
				+{
			
 
				+	if (val == SYS_RESTART) {
			
 
				+		/*
			
 
				+		 * Some (well, at least mine) BIOSes hang on reboot if
			
 
				+		 * in vmx root mode.
			
 
				+		 */
			
 
				+		printk(KERN_INFO "kvm: exiting hardware virtualization\n");
			
 
				+		on_each_cpu(hardware_disable, NULL, 0, 1);
			
 
				+	}
			
 
				+	return NOTIFY_OK;
			
 
				+}
			
 
				+
			
 
				+static struct notifier_block kvm_reboot_notifier = {
			
 
				+	.notifier_call = kvm_reboot,
			
 
				+	.priority = 0,
			
 
				+};
			
 
				+
			
 
				+void kvm_io_bus_init(struct kvm_io_bus *bus)
			
 
				+{
			
 
				+	memset(bus, 0, sizeof(*bus));
			
 
				+}
			
 
				+
			
 
				+void kvm_io_bus_destroy(struct kvm_io_bus *bus)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < bus->dev_count; i++) {
			
 
				+		struct kvm_io_device *pos = bus->devs[i];
			
 
				+
			
 
				+		kvm_iodevice_destructor(pos);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < bus->dev_count; i++) {
			
 
				+		struct kvm_io_device *pos = bus->devs[i];
			
 
				+
			
 
				+		if (pos->in_range(pos, addr))
			
 
				+			return pos;
			
 
				+	}
			
 
				+
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev)
			
 
				+{
			
 
				+	BUG_ON(bus->dev_count > (NR_IOBUS_DEVS-1));
			
 
				+
			
 
				+	bus->devs[bus->dev_count++] = dev;
			
 
				+}
			
 
				+
			
 
				+static struct notifier_block kvm_cpu_notifier = {
			
 
				+	.notifier_call = kvm_cpu_hotplug,
			
 
				+	.priority = 20, /* must be > scheduler priority */
			
 
				+};
			
 
				+
			
 
				+static u64 vm_stat_get(void *_offset)
			
 
				+{
			
 
				+	unsigned offset = (long)_offset;
			
 
				+	u64 total = 0;
			
 
				+	struct kvm *kvm;
			
 
				+
			
 
				+	spin_lock(&kvm_lock);
			
 
				+	list_for_each_entry(kvm, &vm_list, vm_list)
			
 
				+		total += *(u32 *)((void *)kvm + offset);
			
 
				+	spin_unlock(&kvm_lock);
			
 
				+	return total;
			
 
				+}
			
 
				+
			
 
				+DEFINE_SIMPLE_ATTRIBUTE(vm_stat_fops, vm_stat_get, NULL, "%llu\n");
			
 
				+
			
 
				+static u64 vcpu_stat_get(void *_offset)
			
 
				+{
			
 
				+	unsigned offset = (long)_offset;
			
 
				+	u64 total = 0;
			
 
				+	struct kvm *kvm;
			
 
				+	struct kvm_vcpu *vcpu;
			
 
				+	int i;
			
 
				+
			
 
				+	spin_lock(&kvm_lock);
			
 
				+	list_for_each_entry(kvm, &vm_list, vm_list)
			
 
				+		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
			
 
				+			vcpu = kvm->vcpus[i];
			
 
				+			if (vcpu)
			
 
				+				total += *(u32 *)((void *)vcpu + offset);
			
 
				+		}
			
 
				+	spin_unlock(&kvm_lock);
			
 
				+	return total;
			
 
				+}
			
 
				+
			
 
				+DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, NULL, "%llu\n");
			
 
				+
			
 
				+static struct file_operations *stat_fops[] = {
			
 
				+	[KVM_STAT_VCPU] = &vcpu_stat_fops,
			
 
				+	[KVM_STAT_VM]   = &vm_stat_fops,
			
 
				+};
			
 
				+
			
 
				+static void kvm_init_debug(void)
			
 
				+{
			
 
				+	struct kvm_stats_debugfs_item *p;
			
 
				+
			
 
				+	debugfs_dir = debugfs_create_dir("kvm", NULL);
			
 
				+	for (p = debugfs_entries; p->name; ++p)
			
 
				+		p->dentry = debugfs_create_file(p->name, 0444, debugfs_dir,
			
 
				+						(void *)(long)p->offset,
			
 
				+						stat_fops[p->kind]);
			
 
				+}
			
 
				+
			
 
				+static void kvm_exit_debug(void)
			
 
				+{
			
 
				+	struct kvm_stats_debugfs_item *p;
			
 
				+
			
 
				+	for (p = debugfs_entries; p->name; ++p)
			
 
				+		debugfs_remove(p->dentry);
			
 
				+	debugfs_remove(debugfs_dir);
			
 
				+}
			
 
				+
			
 
				+static int kvm_suspend(struct sys_device *dev, pm_message_t state)
			
 
				+{
			
 
				+	hardware_disable(NULL);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int kvm_resume(struct sys_device *dev)
			
 
				+{
			
 
				+	hardware_enable(NULL);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static struct sysdev_class kvm_sysdev_class = {
			
 
				+	.name = "kvm",
			
 
				+	.suspend = kvm_suspend,
			
 
				+	.resume = kvm_resume,
			
 
				+};
			
 
				+
			
 
				+static struct sys_device kvm_sysdev = {
			
 
				+	.id = 0,
			
 
				+	.cls = &kvm_sysdev_class,
			
 
				+};
			
 
				+
			
 
				+struct page *bad_page;
			
 
				+
			
 
				+static inline
			
 
				+struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
			
 
				+{
			
 
				+	return container_of(pn, struct kvm_vcpu, preempt_notifier);
			
 
				+}
			
 
				+
			
 
				+static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
			
 
				+{
			
 
				+	struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
			
 
				+
			
 
				+	kvm_arch_vcpu_load(vcpu, cpu);
			
 
				+}
			
 
				+
			
 
				+static void kvm_sched_out(struct preempt_notifier *pn,
			
 
				+			  struct task_struct *next)
			
 
				+{
			
 
				+	struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
			
 
				+
			
 
				+	kvm_arch_vcpu_put(vcpu);
			
 
				+}
			
 
				+
			
 
				+int kvm_init(void *opaque, unsigned int vcpu_size,
			
 
				+		  struct module *module)
			
 
				+{
			
 
				+	int r;
			
 
				+	int cpu;
			
 
				+
			
 
				+	kvm_init_debug();
			
 
				+
			
 
				+	r = kvm_arch_init(opaque);
			
 
				+	if (r)
			
 
				+		goto out_fail;
			
 
				+
			
 
				+	bad_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
			
 
				+
			
 
				+	if (bad_page == NULL) {
			
 
				+		r = -ENOMEM;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	r = kvm_arch_hardware_setup();
			
 
				+	if (r < 0)
			
 
				+		goto out_free_0;
			
 
				+
			
 
				+	for_each_online_cpu(cpu) {
			
 
				+		smp_call_function_single(cpu,
			
 
				+				kvm_arch_check_processor_compat,
			
 
				+				&r, 0, 1);
			
 
				+		if (r < 0)
			
 
				+			goto out_free_1;
			
 
				+	}
			
 
				+
			
 
				+	on_each_cpu(hardware_enable, NULL, 0, 1);
			
 
				+	r = register_cpu_notifier(&kvm_cpu_notifier);
			
 
				+	if (r)
			
 
				+		goto out_free_2;
			
 
				+	register_reboot_notifier(&kvm_reboot_notifier);
			
 
				+
			
 
				+	r = sysdev_class_register(&kvm_sysdev_class);
			
 
				+	if (r)
			
 
				+		goto out_free_3;
			
 
				+
			
 
				+	r = sysdev_register(&kvm_sysdev);
			
 
				+	if (r)
			
 
				+		goto out_free_4;
			
 
				+
			
 
				+	/* A kmem cache lets us meet the alignment requirements of fx_save. */
			
 
				+	kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size,
			
 
				+					   __alignof__(struct kvm_vcpu),
			
 
				+					   0, NULL);
			
 
				+	if (!kvm_vcpu_cache) {
			
 
				+		r = -ENOMEM;
			
 
				+		goto out_free_5;
			
 
				+	}
			
 
				+
			
 
				+	kvm_chardev_ops.owner = module;
			
 
				+
			
 
				+	r = misc_register(&kvm_dev);
			
 
				+	if (r) {
			
 
				+		printk(KERN_ERR "kvm: misc device register failed\n");
			
 
				+		goto out_free;
			
 
				+	}
			
 
				+
			
 
				+	kvm_preempt_ops.sched_in = kvm_sched_in;
			
 
				+	kvm_preempt_ops.sched_out = kvm_sched_out;
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+out_free:
			
 
				+	kmem_cache_destroy(kvm_vcpu_cache);
			
 
				+out_free_5:
			
 
				+	sysdev_unregister(&kvm_sysdev);
			
 
				+out_free_4:
			
 
				+	sysdev_class_unregister(&kvm_sysdev_class);
			
 
				+out_free_3:
			
 
				+	unregister_reboot_notifier(&kvm_reboot_notifier);
			
 
				+	unregister_cpu_notifier(&kvm_cpu_notifier);
			
 
				+out_free_2:
			
 
				+	on_each_cpu(hardware_disable, NULL, 0, 1);
			
 
				+out_free_1:
			
 
				+	kvm_arch_hardware_unsetup();
			
 
				+out_free_0:
			
 
				+	__free_page(bad_page);
			
 
				+out:
			
 
				+	kvm_arch_exit();
			
 
				+	kvm_exit_debug();
			
 
				+out_fail:
			
 
				+	return r;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(kvm_init);
			
 
				+
			
 
				+void kvm_exit(void)
			
 
				+{
			
 
				+	misc_deregister(&kvm_dev);
			
 
				+	kmem_cache_destroy(kvm_vcpu_cache);
			
 
				+	sysdev_unregister(&kvm_sysdev);
			
 
				+	sysdev_class_unregister(&kvm_sysdev_class);
			
 
				+	unregister_reboot_notifier(&kvm_reboot_notifier);
			
 
				+	unregister_cpu_notifier(&kvm_cpu_notifier);
			
 
				+	on_each_cpu(hardware_disable, NULL, 0, 1);
			
 
				+	kvm_arch_hardware_unsetup();
			
 
				+	kvm_arch_exit();
			
 
				+	kvm_exit_debug();
			
 
				+	__free_page(bad_page);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(kvm_exit);