16 years ago · 69def9f05d
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -193,7 +193,7 @@ Code	Seq#	Include File		Comments
 
				 0xAD	00	Netfilter device	in development:
			
 
				 					<mailto:rusty@rustcorp.com.au>	
			
 
				 0xAE	all	linux/kvm.h		Kernel-based Virtual Machine
			
 
				-					<mailto:kvm-devel@lists.sourceforge.net>
			
 
				+					<mailto:kvm@vger.kernel.org>
			
 
				 0xB0	all	RATIO devices		in development:
			
 
				 					<mailto:vgo@ratio.de>
			
 
				 0xB1	00-1F	PPPoX			<mailto:mostrows@styx.uwaterloo.ca>
			
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -57,6 +57,7 @@ parameter is applicable:
 
				 	ISAPNP	ISA PnP code is enabled.
			
 
				 	ISDN	Appropriate ISDN support is enabled.
			
 
				 	JOY	Appropriate joystick support is enabled.
			
 
				+	KVM	Kernel Virtual Machine support is enabled.
			
 
				 	LIBATA  Libata driver is enabled
			
 
				 	LP	Printer support is enabled.
			
 
				 	LOOP	Loopback device support is enabled.
			
@@ -1098,6 +1099,44 @@ and is between 256 and 4096 characters. It is defined in the file
 
				 	kstack=N	[X86] Print N words from the kernel stack
			
 
				 			in oops dumps.
			
 
				 
			
 
				+	kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs.
			
 
				+			Default is 0 (don't ignore, but inject #GP)
			
 
				+
			
 
				+	kvm.oos_shadow=	[KVM] Disable out-of-sync shadow paging.
			
 
				+			Default is 1 (enabled)
			
 
				+
			
 
				+	kvm-amd.nested=	[KVM,AMD] Allow nested virtualization in KVM/SVM.
			
 
				+			Default is 0 (off)
			
 
				+
			
 
				+	kvm-amd.npt=	[KVM,AMD] Disable nested paging (virtualized MMU)
			
 
				+			for all guests.
			
 
				+			Default is 1 (enabled) if in 64bit or 32bit-PAE mode
			
 
				+
			
 
				+	kvm-intel.bypass_guest_pf=
			
 
				+			[KVM,Intel] Disables bypassing of guest page faults
			
 
				+			on Intel chips. Default is 1 (enabled)
			
 
				+
			
 
				+	kvm-intel.ept=	[KVM,Intel] Disable extended page tables
			
 
				+			(virtualized MMU) support on capable Intel chips.
			
 
				+			Default is 1 (enabled)
			
 
				+
			
 
				+	kvm-intel.emulate_invalid_guest_state=
			
 
				+			[KVM,Intel] Enable emulation of invalid guest states
			
 
				+			Default is 0 (disabled)
			
 
				+
			
 
				+	kvm-intel.flexpriority=
			
 
				+			[KVM,Intel] Disable FlexPriority feature (TPR shadow).
			
 
				+			Default is 1 (enabled)
			
 
				+
			
 
				+	kvm-intel.unrestricted_guest=
			
 
				+			[KVM,Intel] Disable unrestricted guest feature
			
 
				+			(virtualized real and unpaged mode) on capable
			
 
				+			Intel chips. Default is 1 (enabled)
			
 
				+
			
 
				+	kvm-intel.vpid=	[KVM,Intel] Disable Virtual Processor Identification
			
 
				+			feature (tagged TLBs) on capable Intel chips.
			
 
				+			Default is 1 (enabled)
			
 
				+
			
 
				 	l2cr=		[PPC]
			
 
				 
			
 
				 	l3cr=		[PPC]
			
--- a/Documentation/kvm/api.txt
+++ b/Documentation/kvm/api.txt
@@ -0,0 +1,759 @@
 
				+The Definitive KVM (Kernel-based Virtual Machine) API Documentation
			
 
				+===================================================================
			
 
				+
			
 
				+1. General description
			
 
				+
			
 
				+The kvm API is a set of ioctls that are issued to control various aspects
			
 
				+of a virtual machine.  The ioctls belong to three classes
			
 
				+
			
 
				+ - System ioctls: These query and set global attributes which affect the
			
 
				+   whole kvm subsystem.  In addition a system ioctl is used to create
			
 
				+   virtual machines
			
 
				+
			
 
				+ - VM ioctls: These query and set attributes that affect an entire virtual
			
 
				+   machine, for example memory layout.  In addition a VM ioctl is used to
			
 
				+   create virtual cpus (vcpus).
			
 
				+
			
 
				+   Only run VM ioctls from the same process (address space) that was used
			
 
				+   to create the VM.
			
 
				+
			
 
				+ - vcpu ioctls: These query and set attributes that control the operation
			
 
				+   of a single virtual cpu.
			
 
				+
			
 
				+   Only run vcpu ioctls from the same thread that was used to create the
			
 
				+   vcpu.
			
 
				+
			
 
				+2. File descritpors
			
 
				+
			
 
				+The kvm API is centered around file descriptors.  An initial
			
 
				+open("/dev/kvm") obtains a handle to the kvm subsystem; this handle
			
 
				+can be used to issue system ioctls.  A KVM_CREATE_VM ioctl on this
			
 
				+handle will create a VM file descripror which can be used to issue VM
			
 
				+ioctls.  A KVM_CREATE_VCPU ioctl on a VM fd will create a virtual cpu
			
 
				+and return a file descriptor pointing to it.  Finally, ioctls on a vcpu
			
 
				+fd can be used to control the vcpu, including the important task of
			
 
				+actually running guest code.
			
 
				+
			
 
				+In general file descriptors can be migrated among processes by means
			
 
				+of fork() and the SCM_RIGHTS facility of unix domain socket.  These
			
 
				+kinds of tricks are explicitly not supported by kvm.  While they will
			
 
				+not cause harm to the host, their actual behavior is not guaranteed by
			
 
				+the API.  The only supported use is one virtual machine per process,
			
 
				+and one vcpu per thread.
			
 
				+
			
 
				+3. Extensions
			
 
				+
			
 
				+As of Linux 2.6.22, the KVM ABI has been stabilized: no backward
			
 
				+incompatible change are allowed.  However, there is an extension
			
 
				+facility that allows backward-compatible extensions to the API to be
			
 
				+queried and used.
			
 
				+
			
 
				+The extension mechanism is not based on on the Linux version number.
			
 
				+Instead, kvm defines extension identifiers and a facility to query
			
 
				+whether a particular extension identifier is available.  If it is, a
			
 
				+set of ioctls is available for application use.
			
 
				+
			
 
				+4. API description
			
 
				+
			
 
				+This section describes ioctls that can be used to control kvm guests.
			
 
				+For each ioctl, the following information is provided along with a
			
 
				+description:
			
 
				+
			
 
				+  Capability: which KVM extension provides this ioctl.  Can be 'basic',
			
 
				+      which means that is will be provided by any kernel that supports
			
 
				+      API version 12 (see section 4.1), or a KVM_CAP_xyz constant, which
			
 
				+      means availability needs to be checked with KVM_CHECK_EXTENSION
			
 
				+      (see section 4.4).
			
 
				+
			
 
				+  Architectures: which instruction set architectures provide this ioctl.
			
 
				+      x86 includes both i386 and x86_64.
			
 
				+
			
 
				+  Type: system, vm, or vcpu.
			
 
				+
			
 
				+  Parameters: what parameters are accepted by the ioctl.
			
 
				+
			
 
				+  Returns: the return value.  General error numbers (EBADF, ENOMEM, EINVAL)
			
 
				+      are not detailed, but errors with specific meanings are.
			
 
				+
			
 
				+4.1 KVM_GET_API_VERSION
			
 
				+
			
 
				+Capability: basic
			
 
				+Architectures: all
			
 
				+Type: system ioctl
			
 
				+Parameters: none
			
 
				+Returns: the constant KVM_API_VERSION (=12)
			
 
				+
			
 
				+This identifies the API version as the stable kvm API. It is not
			
 
				+expected that this number will change.  However, Linux 2.6.20 and
			
 
				+2.6.21 report earlier versions; these are not documented and not
			
 
				+supported.  Applications should refuse to run if KVM_GET_API_VERSION
			
 
				+returns a value other than 12.  If this check passes, all ioctls
			
 
				+described as 'basic' will be available.
			
 
				+
			
 
				+4.2 KVM_CREATE_VM
			
 
				+
			
 
				+Capability: basic
			
 
				+Architectures: all
			
 
				+Type: system ioctl
			
 
				+Parameters: none
			
 
				+Returns: a VM fd that can be used to control the new virtual machine.
			
 
				+
			
 
				+The new VM has no virtual cpus and no memory.  An mmap() of a VM fd
			
 
				+will access the virtual machine's physical address space; offset zero
			
 
				+corresponds to guest physical address zero.  Use of mmap() on a VM fd
			
 
				+is discouraged if userspace memory allocation (KVM_CAP_USER_MEMORY) is
			
 
				+available.
			
 
				+
			
 
				+4.3 KVM_GET_MSR_INDEX_LIST
			
 
				+
			
 
				+Capability: basic
			
 
				+Architectures: x86
			
 
				+Type: system
			
 
				+Parameters: struct kvm_msr_list (in/out)
			
 
				+Returns: 0 on success; -1 on error
			
 
				+Errors:
			
 
				+  E2BIG:     the msr index list is to be to fit in the array specified by
			
 
				+             the user.
			
 
				+
			
 
				+struct kvm_msr_list {
			
 
				+	__u32 nmsrs; /* number of msrs in entries */
			
 
				+	__u32 indices[0];
			
 
				+};
			
 
				+
			
 
				+This ioctl returns the guest msrs that are supported.  The list varies
			
 
				+by kvm version and host processor, but does not change otherwise.  The
			
 
				+user fills in the size of the indices array in nmsrs, and in return
			
 
				+kvm adjusts nmsrs to reflect the actual number of msrs and fills in
			
 
				+the indices array with their numbers.
			
 
				+
			
 
				+4.4 KVM_CHECK_EXTENSION
			
 
				+
			
 
				+Capability: basic
			
 
				+Architectures: all
			
 
				+Type: system ioctl
			
 
				+Parameters: extension identifier (KVM_CAP_*)
			
 
				+Returns: 0 if unsupported; 1 (or some other positive integer) if supported
			
 
				+
			
 
				+The API allows the application to query about extensions to the core
			
 
				+kvm API.  Userspace passes an extension identifier (an integer) and
			
 
				+receives an integer that describes the extension availability.
			
 
				+Generally 0 means no and 1 means yes, but some extensions may report
			
 
				+additional information in the integer return value.
			
 
				+
			
 
				+4.5 KVM_GET_VCPU_MMAP_SIZE
			
 
				+
			
 
				+Capability: basic
			
 
				+Architectures: all
			
 
				+Type: system ioctl
			
 
				+Parameters: none
			
 
				+Returns: size of vcpu mmap area, in bytes
			
 
				+
			
 
				+The KVM_RUN ioctl (cf.) communicates with userspace via a shared
			
 
				+memory region.  This ioctl returns the size of that region.  See the
			
 
				+KVM_RUN documentation for details.
			
 
				+
			
 
				+4.6 KVM_SET_MEMORY_REGION
			
 
				+
			
 
				+Capability: basic
			
 
				+Architectures: all
			
 
				+Type: vm ioctl
			
 
				+Parameters: struct kvm_memory_region (in)
			
 
				+Returns: 0 on success, -1 on error
			
 
				+
			
 
				+struct kvm_memory_region {
			
 
				+	__u32 slot;
			
 
				+	__u32 flags;
			
 
				+	__u64 guest_phys_addr;
			
 
				+	__u64 memory_size; /* bytes */
			
 
				+};
			
 
				+
			
 
				+/* for kvm_memory_region::flags */
			
 
				+#define KVM_MEM_LOG_DIRTY_PAGES  1UL
			
 
				+
			
 
				+This ioctl allows the user to create or modify a guest physical memory
			
 
				+slot.  When changing an existing slot, it may be moved in the guest
			
 
				+physical memory space, or its flags may be modified.  It may not be
			
 
				+resized.  Slots may not overlap.
			
 
				+
			
 
				+The flags field supports just one flag, KVM_MEM_LOG_DIRTY_PAGES, which
			
 
				+instructs kvm to keep track of writes to memory within the slot.  See
			
 
				+the KVM_GET_DIRTY_LOG ioctl.
			
 
				+
			
 
				+It is recommended to use the KVM_SET_USER_MEMORY_REGION ioctl instead
			
 
				+of this API, if available.  This newer API allows placing guest memory
			
 
				+at specified locations in the host address space, yielding better
			
 
				+control and easy access.
			
 
				+
			
 
				+4.6 KVM_CREATE_VCPU
			
 
				+
			
 
				+Capability: basic
			
 
				+Architectures: all
			
 
				+Type: vm ioctl
			
 
				+Parameters: vcpu id (apic id on x86)
			
 
				+Returns: vcpu fd on success, -1 on error
			
 
				+
			
 
				+This API adds a vcpu to a virtual machine.  The vcpu id is a small integer
			
 
				+in the range [0, max_vcpus).
			
 
				+
			
 
				+4.7 KVM_GET_DIRTY_LOG (vm ioctl)
			
 
				+
			
 
				+Capability: basic
			
 
				+Architectures: x86
			
 
				+Type: vm ioctl
			
 
				+Parameters: struct kvm_dirty_log (in/out)
			
 
				+Returns: 0 on success, -1 on error
			
 
				+
			
 
				+/* for KVM_GET_DIRTY_LOG */
			
 
				+struct kvm_dirty_log {
			
 
				+	__u32 slot;
			
 
				+	__u32 padding;
			
 
				+	union {
			
 
				+		void __user *dirty_bitmap; /* one bit per page */
			
 
				+		__u64 padding;
			
 
				+	};
			
 
				+};
			
 
				+
			
 
				+Given a memory slot, return a bitmap containing any pages dirtied
			
 
				+since the last call to this ioctl.  Bit 0 is the first page in the
			
 
				+memory slot.  Ensure the entire structure is cleared to avoid padding
			
 
				+issues.
			
 
				+
			
 
				+4.8 KVM_SET_MEMORY_ALIAS
			
 
				+
			
 
				+Capability: basic
			
 
				+Architectures: x86
			
 
				+Type: vm ioctl
			
 
				+Parameters: struct kvm_memory_alias (in)
			
 
				+Returns: 0 (success), -1 (error)
			
 
				+
			
 
				+struct kvm_memory_alias {
			
 
				+	__u32 slot;  /* this has a different namespace than memory slots */
			
 
				+	__u32 flags;
			
 
				+	__u64 guest_phys_addr;
			
 
				+	__u64 memory_size;
			
 
				+	__u64 target_phys_addr;
			
 
				+};
			
 
				+
			
 
				+Defines a guest physical address space region as an alias to another
			
 
				+region.  Useful for aliased address, for example the VGA low memory
			
 
				+window. Should not be used with userspace memory.
			
 
				+
			
 
				+4.9 KVM_RUN
			
 
				+
			
 
				+Capability: basic
			
 
				+Architectures: all
			
 
				+Type: vcpu ioctl
			
 
				+Parameters: none
			
 
				+Returns: 0 on success, -1 on error
			
 
				+Errors:
			
 
				+  EINTR:     an unmasked signal is pending
			
 
				+
			
 
				+This ioctl is used to run a guest virtual cpu.  While there are no
			
 
				+explicit parameters, there is an implicit parameter block that can be
			
 
				+obtained by mmap()ing the vcpu fd at offset 0, with the size given by
			
 
				+KVM_GET_VCPU_MMAP_SIZE.  The parameter block is formatted as a 'struct
			
 
				+kvm_run' (see below).
			
 
				+
			
 
				+4.10 KVM_GET_REGS
			
 
				+
			
 
				+Capability: basic
			
 
				+Architectures: all
			
 
				+Type: vcpu ioctl
			
 
				+Parameters: struct kvm_regs (out)
			
 
				+Returns: 0 on success, -1 on error
			
 
				+
			
 
				+Reads the general purpose registers from the vcpu.
			
 
				+
			
 
				+/* x86 */
			
 
				+struct kvm_regs {
			
 
				+	/* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
			
 
				+	__u64 rax, rbx, rcx, rdx;
			
 
				+	__u64 rsi, rdi, rsp, rbp;
			
 
				+	__u64 r8,  r9,  r10, r11;
			
 
				+	__u64 r12, r13, r14, r15;
			
 
				+	__u64 rip, rflags;
			
 
				+};
			
 
				+
			
 
				+4.11 KVM_SET_REGS
			
 
				+
			
 
				+Capability: basic
			
 
				+Architectures: all
			
 
				+Type: vcpu ioctl
			
 
				+Parameters: struct kvm_regs (in)
			
 
				+Returns: 0 on success, -1 on error
			
 
				+
			
 
				+Writes the general purpose registers into the vcpu.
			
 
				+
			
 
				+See KVM_GET_REGS for the data structure.
			
 
				+
			
 
				+4.12 KVM_GET_SREGS
			
 
				+
			
 
				+Capability: basic
			
 
				+Architectures: x86
			
 
				+Type: vcpu ioctl
			
 
				+Parameters: struct kvm_sregs (out)
			
 
				+Returns: 0 on success, -1 on error
			
 
				+
			
 
				+Reads special registers from the vcpu.
			
 
				+
			
 
				+/* x86 */
			
 
				+struct kvm_sregs {
			
 
				+	struct kvm_segment cs, ds, es, fs, gs, ss;
			
 
				+	struct kvm_segment tr, ldt;
			
 
				+	struct kvm_dtable gdt, idt;
			
 
				+	__u64 cr0, cr2, cr3, cr4, cr8;
			
 
				+	__u64 efer;
			
 
				+	__u64 apic_base;
			
 
				+	__u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
			
 
				+};
			
 
				+
			
 
				+interrupt_bitmap is a bitmap of pending external interrupts.  At most
			
 
				+one bit may be set.  This interrupt has been acknowledged by the APIC
			
 
				+but not yet injected into the cpu core.
			
 
				+
			
 
				+4.13 KVM_SET_SREGS
			
 
				+
			
 
				+Capability: basic
			
 
				+Architectures: x86
			
 
				+Type: vcpu ioctl
			
 
				+Parameters: struct kvm_sregs (in)
			
 
				+Returns: 0 on success, -1 on error
			
 
				+
			
 
				+Writes special registers into the vcpu.  See KVM_GET_SREGS for the
			
 
				+data structures.
			
 
				+
			
 
				+4.14 KVM_TRANSLATE
			
 
				+
			
 
				+Capability: basic
			
 
				+Architectures: x86
			
 
				+Type: vcpu ioctl
			
 
				+Parameters: struct kvm_translation (in/out)
			
 
				+Returns: 0 on success, -1 on error
			
 
				+
			
 
				+Translates a virtual address according to the vcpu's current address
			
 
				+translation mode.
			
 
				+
			
 
				+struct kvm_translation {
			
 
				+	/* in */
			
 
				+	__u64 linear_address;
			
 
				+
			
 
				+	/* out */
			
 
				+	__u64 physical_address;
			
 
				+	__u8  valid;
			
 
				+	__u8  writeable;
			
 
				+	__u8  usermode;
			
 
				+	__u8  pad[5];
			
 
				+};
			
 
				+
			
 
				+4.15 KVM_INTERRUPT
			
 
				+
			
 
				+Capability: basic
			
 
				+Architectures: x86
			
 
				+Type: vcpu ioctl
			
 
				+Parameters: struct kvm_interrupt (in)
			
 
				+Returns: 0 on success, -1 on error
			
 
				+
			
 
				+Queues a hardware interrupt vector to be injected.  This is only
			
 
				+useful if in-kernel local APIC is not used.
			
 
				+
			
 
				+/* for KVM_INTERRUPT */
			
 
				+struct kvm_interrupt {
			
 
				+	/* in */
			
 
				+	__u32 irq;
			
 
				+};
			
 
				+
			
 
				+Note 'irq' is an interrupt vector, not an interrupt pin or line.
			
 
				+
			
 
				+4.16 KVM_DEBUG_GUEST
			
 
				+
			
 
				+Capability: basic
			
 
				+Architectures: none
			
 
				+Type: vcpu ioctl
			
 
				+Parameters: none)
			
 
				+Returns: -1 on error
			
 
				+
			
 
				+Support for this has been removed.  Use KVM_SET_GUEST_DEBUG instead.
			
 
				+
			
 
				+4.17 KVM_GET_MSRS
			
 
				+
			
 
				+Capability: basic
			
 
				+Architectures: x86
			
 
				+Type: vcpu ioctl
			
 
				+Parameters: struct kvm_msrs (in/out)
			
 
				+Returns: 0 on success, -1 on error
			
 
				+
			
 
				+Reads model-specific registers from the vcpu.  Supported msr indices can
			
 
				+be obtained using KVM_GET_MSR_INDEX_LIST.
			
 
				+
			
 
				+struct kvm_msrs {
			
 
				+	__u32 nmsrs; /* number of msrs in entries */
			
 
				+	__u32 pad;
			
 
				+
			
 
				+	struct kvm_msr_entry entries[0];
			
 
				+};
			
 
				+
			
 
				+struct kvm_msr_entry {
			
 
				+	__u32 index;
			
 
				+	__u32 reserved;
			
 
				+	__u64 data;
			
 
				+};
			
 
				+
			
 
				+Application code should set the 'nmsrs' member (which indicates the
			
 
				+size of the entries array) and the 'index' member of each array entry.
			
 
				+kvm will fill in the 'data' member.
			
 
				+
			
 
				+4.18 KVM_SET_MSRS
			
 
				+
			
 
				+Capability: basic
			
 
				+Architectures: x86
			
 
				+Type: vcpu ioctl
			
 
				+Parameters: struct kvm_msrs (in)
			
 
				+Returns: 0 on success, -1 on error
			
 
				+
			
 
				+Writes model-specific registers to the vcpu.  See KVM_GET_MSRS for the
			
 
				+data structures.
			
 
				+
			
 
				+Application code should set the 'nmsrs' member (which indicates the
			
 
				+size of the entries array), and the 'index' and 'data' members of each
			
 
				+array entry.
			
 
				+
			
 
				+4.19 KVM_SET_CPUID
			
 
				+
			
 
				+Capability: basic
			
 
				+Architectures: x86
			
 
				+Type: vcpu ioctl
			
 
				+Parameters: struct kvm_cpuid (in)
			
 
				+Returns: 0 on success, -1 on error
			
 
				+
			
 
				+Defines the vcpu responses to the cpuid instruction.  Applications
			
 
				+should use the KVM_SET_CPUID2 ioctl if available.
			
 
				+
			
 
				+
			
 
				+struct kvm_cpuid_entry {
			
 
				+	__u32 function;
			
 
				+	__u32 eax;
			
 
				+	__u32 ebx;
			
 
				+	__u32 ecx;
			
 
				+	__u32 edx;
			
 
				+	__u32 padding;
			
 
				+};
			
 
				+
			
 
				+/* for KVM_SET_CPUID */
			
 
				+struct kvm_cpuid {
			
 
				+	__u32 nent;
			
 
				+	__u32 padding;
			
 
				+	struct kvm_cpuid_entry entries[0];
			
 
				+};
			
 
				+
			
 
				+4.20 KVM_SET_SIGNAL_MASK
			
 
				+
			
 
				+Capability: basic
			
 
				+Architectures: x86
			
 
				+Type: vcpu ioctl
			
 
				+Parameters: struct kvm_signal_mask (in)
			
 
				+Returns: 0 on success, -1 on error
			
 
				+
			
 
				+Defines which signals are blocked during execution of KVM_RUN.  This
			
 
				+signal mask temporarily overrides the threads signal mask.  Any
			
 
				+unblocked signal received (except SIGKILL and SIGSTOP, which retain
			
 
				+their traditional behaviour) will cause KVM_RUN to return with -EINTR.
			
 
				+
			
 
				+Note the signal will only be delivered if not blocked by the original
			
 
				+signal mask.
			
 
				+
			
 
				+/* for KVM_SET_SIGNAL_MASK */
			
 
				+struct kvm_signal_mask {
			
 
				+	__u32 len;
			
 
				+	__u8  sigset[0];
			
 
				+};
			
 
				+
			
 
				+4.21 KVM_GET_FPU
			
 
				+
			
 
				+Capability: basic
			
 
				+Architectures: x86
			
 
				+Type: vcpu ioctl
			
 
				+Parameters: struct kvm_fpu (out)
			
 
				+Returns: 0 on success, -1 on error
			
 
				+
			
 
				+Reads the floating point state from the vcpu.
			
 
				+
			
 
				+/* for KVM_GET_FPU and KVM_SET_FPU */
			
 
				+struct kvm_fpu {
			
 
				+	__u8  fpr[8][16];
			
 
				+	__u16 fcw;
			
 
				+	__u16 fsw;
			
 
				+	__u8  ftwx;  /* in fxsave format */
			
 
				+	__u8  pad1;
			
 
				+	__u16 last_opcode;
			
 
				+	__u64 last_ip;
			
 
				+	__u64 last_dp;
			
 
				+	__u8  xmm[16][16];
			
 
				+	__u32 mxcsr;
			
 
				+	__u32 pad2;
			
 
				+};
			
 
				+
			
 
				+4.22 KVM_SET_FPU
			
 
				+
			
 
				+Capability: basic
			
 
				+Architectures: x86
			
 
				+Type: vcpu ioctl
			
 
				+Parameters: struct kvm_fpu (in)
			
 
				+Returns: 0 on success, -1 on error
			
 
				+
			
 
				+Writes the floating point state to the vcpu.
			
 
				+
			
 
				+/* for KVM_GET_FPU and KVM_SET_FPU */
			
 
				+struct kvm_fpu {
			
 
				+	__u8  fpr[8][16];
			
 
				+	__u16 fcw;
			
 
				+	__u16 fsw;
			
 
				+	__u8  ftwx;  /* in fxsave format */
			
 
				+	__u8  pad1;
			
 
				+	__u16 last_opcode;
			
 
				+	__u64 last_ip;
			
 
				+	__u64 last_dp;
			
 
				+	__u8  xmm[16][16];
			
 
				+	__u32 mxcsr;
			
 
				+	__u32 pad2;
			
 
				+};
			
 
				+
			
 
				+4.23 KVM_CREATE_IRQCHIP
			
 
				+
			
 
				+Capability: KVM_CAP_IRQCHIP
			
 
				+Architectures: x86, ia64
			
 
				+Type: vm ioctl
			
 
				+Parameters: none
			
 
				+Returns: 0 on success, -1 on error
			
 
				+
			
 
				+Creates an interrupt controller model in the kernel.  On x86, creates a virtual
			
 
				+ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a
			
 
				+local APIC.  IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23
			
 
				+only go to the IOAPIC.  On ia64, a IOSAPIC is created.
			
 
				+
			
 
				+4.24 KVM_IRQ_LINE
			
 
				+
			
 
				+Capability: KVM_CAP_IRQCHIP
			
 
				+Architectures: x86, ia64
			
 
				+Type: vm ioctl
			
 
				+Parameters: struct kvm_irq_level
			
 
				+Returns: 0 on success, -1 on error
			
 
				+
			
 
				+Sets the level of a GSI input to the interrupt controller model in the kernel.
			
 
				+Requires that an interrupt controller model has been previously created with
			
 
				+KVM_CREATE_IRQCHIP.  Note that edge-triggered interrupts require the level
			
 
				+to be set to 1 and then back to 0.
			
 
				+
			
 
				+struct kvm_irq_level {
			
 
				+	union {
			
 
				+		__u32 irq;     /* GSI */
			
 
				+		__s32 status;  /* not used for KVM_IRQ_LEVEL */
			
 
				+	};
			
 
				+	__u32 level;           /* 0 or 1 */
			
 
				+};
			
 
				+
			
 
				+4.25 KVM_GET_IRQCHIP
			
 
				+
			
 
				+Capability: KVM_CAP_IRQCHIP
			
 
				+Architectures: x86, ia64
			
 
				+Type: vm ioctl
			
 
				+Parameters: struct kvm_irqchip (in/out)
			
 
				+Returns: 0 on success, -1 on error
			
 
				+
			
 
				+Reads the state of a kernel interrupt controller created with
			
 
				+KVM_CREATE_IRQCHIP into a buffer provided by the caller.
			
 
				+
			
 
				+struct kvm_irqchip {
			
 
				+	__u32 chip_id;  /* 0 = PIC1, 1 = PIC2, 2 = IOAPIC */
			
 
				+	__u32 pad;
			
 
				+        union {
			
 
				+		char dummy[512];  /* reserving space */
			
 
				+		struct kvm_pic_state pic;
			
 
				+		struct kvm_ioapic_state ioapic;
			
 
				+	} chip;
			
 
				+};
			
 
				+
			
 
				+4.26 KVM_SET_IRQCHIP
			
 
				+
			
 
				+Capability: KVM_CAP_IRQCHIP
			
 
				+Architectures: x86, ia64
			
 
				+Type: vm ioctl
			
 
				+Parameters: struct kvm_irqchip (in)
			
 
				+Returns: 0 on success, -1 on error
			
 
				+
			
 
				+Sets the state of a kernel interrupt controller created with
			
 
				+KVM_CREATE_IRQCHIP from a buffer provided by the caller.
			
 
				+
			
 
				+struct kvm_irqchip {
			
 
				+	__u32 chip_id;  /* 0 = PIC1, 1 = PIC2, 2 = IOAPIC */
			
 
				+	__u32 pad;
			
 
				+        union {
			
 
				+		char dummy[512];  /* reserving space */
			
 
				+		struct kvm_pic_state pic;
			
 
				+		struct kvm_ioapic_state ioapic;
			
 
				+	} chip;
			
 
				+};
			
 
				+
			
 
				+5. The kvm_run structure
			
 
				+
			
 
				+Application code obtains a pointer to the kvm_run structure by
			
 
				+mmap()ing a vcpu fd.  From that point, application code can control
			
 
				+execution by changing fields in kvm_run prior to calling the KVM_RUN
			
 
				+ioctl, and obtain information about the reason KVM_RUN returned by
			
 
				+looking up structure members.
			
 
				+
			
 
				+struct kvm_run {
			
 
				+	/* in */
			
 
				+	__u8 request_interrupt_window;
			
 
				+
			
 
				+Request that KVM_RUN return when it becomes possible to inject external
			
 
				+interrupts into the guest.  Useful in conjunction with KVM_INTERRUPT.
			
 
				+
			
 
				+	__u8 padding1[7];
			
 
				+
			
 
				+	/* out */
			
 
				+	__u32 exit_reason;
			
 
				+
			
 
				+When KVM_RUN has returned successfully (return value 0), this informs
			
 
				+application code why KVM_RUN has returned.  Allowable values for this
			
 
				+field are detailed below.
			
 
				+
			
 
				+	__u8 ready_for_interrupt_injection;
			
 
				+
			
 
				+If request_interrupt_window has been specified, this field indicates
			
 
				+an interrupt can be injected now with KVM_INTERRUPT.
			
 
				+
			
 
				+	__u8 if_flag;
			
 
				+
			
 
				+The value of the current interrupt flag.  Only valid if in-kernel
			
 
				+local APIC is not used.
			
 
				+
			
 
				+	__u8 padding2[2];
			
 
				+
			
 
				+	/* in (pre_kvm_run), out (post_kvm_run) */
			
 
				+	__u64 cr8;
			
 
				+
			
 
				+The value of the cr8 register.  Only valid if in-kernel local APIC is
			
 
				+not used.  Both input and output.
			
 
				+
			
 
				+	__u64 apic_base;
			
 
				+
			
 
				+The value of the APIC BASE msr.  Only valid if in-kernel local
			
 
				+APIC is not used.  Both input and output.
			
 
				+
			
 
				+	union {
			
 
				+		/* KVM_EXIT_UNKNOWN */
			
 
				+		struct {
			
 
				+			__u64 hardware_exit_reason;
			
 
				+		} hw;
			
 
				+
			
 
				+If exit_reason is KVM_EXIT_UNKNOWN, the vcpu has exited due to unknown
			
 
				+reasons.  Further architecture-specific information is available in
			
 
				+hardware_exit_reason.
			
 
				+
			
 
				+		/* KVM_EXIT_FAIL_ENTRY */
			
 
				+		struct {
			
 
				+			__u64 hardware_entry_failure_reason;
			
 
				+		} fail_entry;
			
 
				+
			
 
				+If exit_reason is KVM_EXIT_FAIL_ENTRY, the vcpu could not be run due
			
 
				+to unknown reasons.  Further architecture-specific information is
			
 
				+available in hardware_entry_failure_reason.
			
 
				+
			
 
				+		/* KVM_EXIT_EXCEPTION */
			
 
				+		struct {
			
 
				+			__u32 exception;
			
 
				+			__u32 error_code;
			
 
				+		} ex;
			
 
				+
			
 
				+Unused.
			
 
				+
			
 
				+		/* KVM_EXIT_IO */
			
 
				+		struct {
			
 
				+#define KVM_EXIT_IO_IN  0
			
 
				+#define KVM_EXIT_IO_OUT 1
			
 
				+			__u8 direction;
			
 
				+			__u8 size; /* bytes */
			
 
				+			__u16 port;
			
 
				+			__u32 count;
			
 
				+			__u64 data_offset; /* relative to kvm_run start */
			
 
				+		} io;
			
 
				+
			
 
				+If exit_reason is KVM_EXIT_IO_IN or KVM_EXIT_IO_OUT, then the vcpu has
			
 
				+executed a port I/O instruction which could not be satisfied by kvm.
			
 
				+data_offset describes where the data is located (KVM_EXIT_IO_OUT) or
			
 
				+where kvm expects application code to place the data for the next
			
 
				+KVM_RUN invocation (KVM_EXIT_IO_IN).  Data format is a patcked array.
			
 
				+
			
 
				+		struct {
			
 
				+			struct kvm_debug_exit_arch arch;
			
 
				+		} debug;
			
 
				+
			
 
				+Unused.
			
 
				+
			
 
				+		/* KVM_EXIT_MMIO */
			
 
				+		struct {
			
 
				+			__u64 phys_addr;
			
 
				+			__u8  data[8];
			
 
				+			__u32 len;
			
 
				+			__u8  is_write;
			
 
				+		} mmio;
			
 
				+
			
 
				+If exit_reason is KVM_EXIT_MMIO or KVM_EXIT_IO_OUT, then the vcpu has
			
 
				+executed a memory-mapped I/O instruction which could not be satisfied
			
 
				+by kvm.  The 'data' member contains the written data if 'is_write' is
			
 
				+true, and should be filled by application code otherwise.
			
 
				+
			
 
				+		/* KVM_EXIT_HYPERCALL */
			
 
				+		struct {
			
 
				+			__u64 nr;
			
 
				+			__u64 args[6];
			
 
				+			__u64 ret;
			
 
				+			__u32 longmode;
			
 
				+			__u32 pad;
			
 
				+		} hypercall;
			
 
				+
			
 
				+Unused.
			
 
				+
			
 
				+		/* KVM_EXIT_TPR_ACCESS */
			
 
				+		struct {
			
 
				+			__u64 rip;
			
 
				+			__u32 is_write;
			
 
				+			__u32 pad;
			
 
				+		} tpr_access;
			
 
				+
			
 
				+To be documented (KVM_TPR_ACCESS_REPORTING).
			
 
				+
			
 
				+		/* KVM_EXIT_S390_SIEIC */
			
 
				+		struct {
			
 
				+			__u8 icptcode;
			
 
				+			__u64 mask; /* psw upper half */
			
 
				+			__u64 addr; /* psw lower half */
			
 
				+			__u16 ipa;
			
 
				+			__u32 ipb;
			
 
				+		} s390_sieic;
			
 
				+
			
 
				+s390 specific.
			
 
				+
			
 
				+		/* KVM_EXIT_S390_RESET */
			
 
				+#define KVM_S390_RESET_POR       1
			
 
				+#define KVM_S390_RESET_CLEAR     2
			
 
				+#define KVM_S390_RESET_SUBSYSTEM 4
			
 
				+#define KVM_S390_RESET_CPU_INIT  8
			
 
				+#define KVM_S390_RESET_IPL       16
			
 
				+		__u64 s390_reset_flags;
			
 
				+
			
 
				+s390 specific.
			
 
				+
			
 
				+		/* KVM_EXIT_DCR */
			
 
				+		struct {
			
 
				+			__u32 dcrn;
			
 
				+			__u32 data;
			
 
				+			__u8  is_write;
			
 
				+		} dcr;
			
 
				+
			
 
				+powerpc specific.
			
 
				+
			
 
				+		/* Fix the size of the union. */
			
 
				+		char padding[256];
			
 
				+	};
			
 
				+};
			
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2926,6 +2926,7 @@ F:	include/linux/sunrpc/
 
				 
			
 
				 KERNEL VIRTUAL MACHINE (KVM)
			
 
				 M:	Avi Kivity <avi@redhat.com>
			
 
				+M:	Marcelo Tosatti <mtosatti@redhat.com>
			
 
				 L:	kvm@vger.kernel.org
			
 
				 W:	http://kvm.qumranet.com
			
 
				 S:	Supported
			
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -235,7 +235,8 @@ struct kvm_vm_data {
 
				 #define KVM_REQ_PTC_G		32
			
 
				 #define KVM_REQ_RESUME		33
			
 
				 
			
 
				-#define KVM_PAGES_PER_HPAGE	1
			
 
				+#define KVM_NR_PAGE_SIZES	1
			
 
				+#define KVM_PAGES_PER_HPAGE(x)	1
			
 
				 
			
 
				 struct kvm;
			
 
				 struct kvm_vcpu;
			
@@ -465,7 +466,6 @@ struct kvm_arch {
 
				 	unsigned long	metaphysical_rr4;
			
 
				 	unsigned long	vmm_init_rr;
			
 
				 
			
 
				-	int		online_vcpus;
			
 
				 	int		is_sn2;
			
 
				 
			
 
				 	struct kvm_ioapic *vioapic;
			
--- a/arch/ia64/include/asm/kvm_para.h
+++ b/arch/ia64/include/asm/kvm_para.h
@@ -19,9 +19,13 @@
 
				  *
			
 
				  */
			
 
				 
			
 
				+#ifdef __KERNEL__
			
 
				+
			
 
				 static inline unsigned int kvm_arch_para_features(void)
			
 
				 {
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				 #endif
			
 
				+
			
 
				+#endif
			
--- a/arch/ia64/kvm/Kconfig
+++ b/arch/ia64/kvm/Kconfig
@@ -1,12 +1,8 @@
 
				 #
			
 
				 # KVM configuration
			
 
				 #
			
 
				-config HAVE_KVM
			
 
				-	bool
			
 
				 
			
 
				-config HAVE_KVM_IRQCHIP
			
 
				-       bool
			
 
				-       default y
			
 
				+source "virt/kvm/Kconfig"
			
 
				 
			
 
				 menuconfig VIRTUALIZATION
			
 
				 	bool "Virtualization"
			
@@ -28,6 +24,8 @@ config KVM
 
				 	depends on PCI
			
 
				 	select PREEMPT_NOTIFIERS
			
 
				 	select ANON_INODES
			
 
				+	select HAVE_KVM_IRQCHIP
			
 
				+	select KVM_APIC_ARCHITECTURE
			
 
				 	---help---
			
 
				 	  Support hosting fully virtualized guest machines using hardware
			
 
				 	  virtualization extensions.  You will need a fairly recent
			
@@ -49,9 +47,6 @@ config KVM_INTEL
 
				 	  Provides support for KVM on Itanium 2 processors equipped with the VT
			
 
				 	  extensions.
			
 
				 
			
 
				-config KVM_TRACE
			
 
				-       bool
			
 
				-
			
 
				 source drivers/virtio/Kconfig
			
 
				 
			
 
				 endif # VIRTUALIZATION
			
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -210,16 +210,6 @@ int kvm_dev_ioctl_check_extension(long ext)
 
				 
			
 
				 }
			
 
				 
			
 
				-static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
			
 
				-					gpa_t addr, int len, int is_write)
			
 
				-{
			
 
				-	struct kvm_io_device *dev;
			
 
				-
			
 
				-	dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len, is_write);
			
 
				-
			
 
				-	return dev;
			
 
				-}
			
 
				-
			
 
				 static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
			
 
				 {
			
 
				 	kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
			
@@ -231,6 +221,7 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 {
			
 
				 	struct kvm_mmio_req *p;
			
 
				 	struct kvm_io_device *mmio_dev;
			
 
				+	int r;
			
 
				 
			
 
				 	p = kvm_get_vcpu_ioreq(vcpu);
			
 
				 
			
@@ -247,16 +238,13 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 	kvm_run->exit_reason = KVM_EXIT_MMIO;
			
 
				 	return 0;
			
 
				 mmio:
			
 
				-	mmio_dev = vcpu_find_mmio_dev(vcpu, p->addr, p->size, !p->dir);
			
 
				-	if (mmio_dev) {
			
 
				-		if (!p->dir)
			
 
				-			kvm_iodevice_write(mmio_dev, p->addr, p->size,
			
 
				-						&p->data);
			
 
				-		else
			
 
				-			kvm_iodevice_read(mmio_dev, p->addr, p->size,
			
 
				-						&p->data);
			
 
				-
			
 
				-	} else
			
 
				+	if (p->dir)
			
 
				+		r = kvm_io_bus_read(&vcpu->kvm->mmio_bus, p->addr,
			
 
				+				    p->size, &p->data);
			
 
				+	else
			
 
				+		r = kvm_io_bus_write(&vcpu->kvm->mmio_bus, p->addr,
			
 
				+				     p->size, &p->data);
			
 
				+	if (r)
			
 
				 		printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr);
			
 
				 	p->state = STATE_IORESP_READY;
			
 
				 
			
@@ -337,13 +325,12 @@ static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id,
 
				 {
			
 
				 	union ia64_lid lid;
			
 
				 	int i;
			
 
				+	struct kvm_vcpu *vcpu;
			
 
				 
			
 
				-	for (i = 0; i < kvm->arch.online_vcpus; i++) {
			
 
				-		if (kvm->vcpus[i]) {
			
 
				-			lid.val = VCPU_LID(kvm->vcpus[i]);
			
 
				-			if (lid.id == id && lid.eid == eid)
			
 
				-				return kvm->vcpus[i];
			
 
				-		}
			
 
				+	kvm_for_each_vcpu(i, vcpu, kvm) {
			
 
				+		lid.val = VCPU_LID(vcpu);
			
 
				+		if (lid.id == id && lid.eid == eid)
			
 
				+			return vcpu;
			
 
				 	}
			
 
				 
			
 
				 	return NULL;
			
@@ -409,21 +396,21 @@ static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 	struct kvm *kvm = vcpu->kvm;
			
 
				 	struct call_data call_data;
			
 
				 	int i;
			
 
				+	struct kvm_vcpu *vcpui;
			
 
				 
			
 
				 	call_data.ptc_g_data = p->u.ptc_g_data;
			
 
				 
			
 
				-	for (i = 0; i < kvm->arch.online_vcpus; i++) {
			
 
				-		if (!kvm->vcpus[i] || kvm->vcpus[i]->arch.mp_state ==
			
 
				-						KVM_MP_STATE_UNINITIALIZED ||
			
 
				-					vcpu == kvm->vcpus[i])
			
 
				+	kvm_for_each_vcpu(i, vcpui, kvm) {
			
 
				+		if (vcpui->arch.mp_state == KVM_MP_STATE_UNINITIALIZED ||
			
 
				+				vcpu == vcpui)
			
 
				 			continue;
			
 
				 
			
 
				-		if (waitqueue_active(&kvm->vcpus[i]->wq))
			
 
				-			wake_up_interruptible(&kvm->vcpus[i]->wq);
			
 
				+		if (waitqueue_active(&vcpui->wq))
			
 
				+			wake_up_interruptible(&vcpui->wq);
			
 
				 
			
 
				-		if (kvm->vcpus[i]->cpu != -1) {
			
 
				-			call_data.vcpu = kvm->vcpus[i];
			
 
				-			smp_call_function_single(kvm->vcpus[i]->cpu,
			
 
				+		if (vcpui->cpu != -1) {
			
 
				+			call_data.vcpu = vcpui;
			
 
				+			smp_call_function_single(vcpui->cpu,
			
 
				 					vcpu_global_purge, &call_data, 1);
			
 
				 		} else
			
 
				 			printk(KERN_WARNING"kvm: Uninit vcpu received ipi!\n");
			
@@ -852,8 +839,6 @@ struct  kvm *kvm_arch_create_vm(void)
 
				 
			
 
				 	kvm_init_vm(kvm);
			
 
				 
			
 
				-	kvm->arch.online_vcpus = 0;
			
 
				-
			
 
				 	return kvm;
			
 
				 
			
 
				 }
			
@@ -1000,10 +985,10 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
				 			goto out;
			
 
				 		if (irqchip_in_kernel(kvm)) {
			
 
				 			__s32 status;
			
 
				-			mutex_lock(&kvm->lock);
			
 
				+			mutex_lock(&kvm->irq_lock);
			
 
				 			status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
			
 
				 				    irq_event.irq, irq_event.level);
			
 
				-			mutex_unlock(&kvm->lock);
			
 
				+			mutex_unlock(&kvm->irq_lock);
			
 
				 			if (ioctl == KVM_IRQ_LINE_STATUS) {
			
 
				 				irq_event.status = status;
			
 
				 				if (copy_to_user(argp, &irq_event,
			
@@ -1216,7 +1201,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
				 	if (IS_ERR(vmm_vcpu))
			
 
				 		return PTR_ERR(vmm_vcpu);
			
 
				 
			
 
				-	if (vcpu->vcpu_id == 0) {
			
 
				+	if (kvm_vcpu_is_bsp(vcpu)) {
			
 
				 		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
			
 
				 
			
 
				 		/*Set entry address for first run.*/
			
@@ -1224,7 +1209,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
				 
			
 
				 		/*Initialize itc offset for vcpus*/
			
 
				 		itc_offset = 0UL - kvm_get_itc(vcpu);
			
 
				-		for (i = 0; i < kvm->arch.online_vcpus; i++) {
			
 
				+		for (i = 0; i < KVM_MAX_VCPUS; i++) {
			
 
				 			v = (struct kvm_vcpu *)((char *)vcpu +
			
 
				 					sizeof(struct kvm_vcpu_data) * i);
			
 
				 			v->arch.itc_offset = itc_offset;
			
@@ -1356,8 +1341,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 
				 		goto fail;
			
 
				 	}
			
 
				 
			
 
				-	kvm->arch.online_vcpus++;
			
 
				-
			
 
				 	return vcpu;
			
 
				 fail:
			
 
				 	return ERR_PTR(r);
			
@@ -1952,19 +1935,6 @@ int kvm_highest_pending_irq(struct kvm_vcpu *vcpu)
 
				     return find_highest_bits((int *)&vpd->irr[0]);
			
 
				 }
			
 
				 
			
 
				-int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	if (kvm_highest_pending_irq(vcpu) != -1)
			
 
				-		return 1;
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	/* do real check here */
			
 
				-	return 1;
			
 
				-}
			
 
				-
			
 
				 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	return vcpu->arch.timer_fired;
			
@@ -1977,7 +1947,8 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
 
				 
			
 
				 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				-	return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE;
			
 
				+	return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) ||
			
 
				+		(kvm_highest_pending_irq(vcpu) != -1);
			
 
				 }
			
 
				 
			
 
				 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
			
--- a/arch/ia64/kvm/vcpu.c
+++ b/arch/ia64/kvm/vcpu.c
@@ -830,8 +830,8 @@ static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
 
				 
			
 
				 	kvm = (struct kvm *)KVM_VM_BASE;
			
 
				 
			
 
				-	if (vcpu->vcpu_id == 0) {
			
 
				-		for (i = 0; i < kvm->arch.online_vcpus; i++) {
			
 
				+	if (kvm_vcpu_is_bsp(vcpu)) {
			
 
				+		for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) {
			
 
				 			v = (struct kvm_vcpu *)((char *)vcpu +
			
 
				 					sizeof(struct kvm_vcpu_data) * i);
			
 
				 			VMX(v, itc_offset) = itc_offset;
			
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -34,7 +34,8 @@
 
				 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
			
 
				 
			
 
				 /* We don't currently support large pages. */
			
 
				-#define KVM_PAGES_PER_HPAGE (1UL << 31)
			
 
				+#define KVM_NR_PAGE_SIZES	1
			
 
				+#define KVM_PAGES_PER_HPAGE(x)	(1UL<<31)
			
 
				 
			
 
				 struct kvm;
			
 
				 struct kvm_run;
			
@@ -153,7 +154,6 @@ struct kvm_vcpu_arch {
 
				 	u32 pid;
			
 
				 	u32 swap_pid;
			
 
				 
			
 
				-	u32 pvr;
			
 
				 	u32 ccr0;
			
 
				 	u32 ccr1;
			
 
				 	u32 dbcr0;
			
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -138,7 +138,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
 
				 	kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
			
 
				 }
			
 
				 
			
 
				-static int kvmppc_44x_init(void)
			
 
				+static int __init kvmppc_44x_init(void)
			
 
				 {
			
 
				 	int r;
			
 
				 
			
@@ -149,7 +149,7 @@ static int kvmppc_44x_init(void)
 
				 	return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), THIS_MODULE);
			
 
				 }
			
 
				 
			
 
				-static void kvmppc_44x_exit(void)
			
 
				+static void __exit kvmppc_44x_exit(void)
			
 
				 {
			
 
				 	kvmppc_booke_exit();
			
 
				 }
			
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -30,6 +30,7 @@
 
				 #include "timing.h"
			
 
				 
			
 
				 #include "44x_tlb.h"
			
 
				+#include "trace.h"
			
 
				 
			
 
				 #ifndef PPC44x_TLBE_SIZE
			
 
				 #define PPC44x_TLBE_SIZE	PPC44x_TLB_4K
			
@@ -263,7 +264,7 @@ static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x,
 
				 
			
 
				 	/* XXX set tlb_44x_index to stlb_index? */
			
 
				 
			
 
				-	KVMTRACE_1D(STLB_INVAL, &vcpu_44x->vcpu, stlb_index, handler);
			
 
				+	trace_kvm_stlb_inval(stlb_index);
			
 
				 }
			
 
				 
			
 
				 void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
			
@@ -365,8 +366,8 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
 
				 	/* Insert shadow mapping into hardware TLB. */
			
 
				 	kvmppc_44x_tlbe_set_modified(vcpu_44x, victim);
			
 
				 	kvmppc_44x_tlbwe(victim, &stlbe);
			
 
				-	KVMTRACE_5D(STLB_WRITE, vcpu, victim, stlbe.tid, stlbe.word0, stlbe.word1,
			
 
				-	            stlbe.word2, handler);
			
 
				+	trace_kvm_stlb_write(victim, stlbe.tid, stlbe.word0, stlbe.word1,
			
 
				+			     stlbe.word2);
			
 
				 }
			
 
				 
			
 
				 /* For a particular guest TLB entry, invalidate the corresponding host TLB
			
@@ -485,8 +486,8 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
 
				 		kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index);
			
 
				 	}
			
 
				 
			
 
				-	KVMTRACE_5D(GTLB_WRITE, vcpu, gtlb_index, tlbe->tid, tlbe->word0,
			
 
				-	            tlbe->word1, tlbe->word2, handler);
			
 
				+	trace_kvm_gtlb_write(gtlb_index, tlbe->tid, tlbe->word0, tlbe->word1,
			
 
				+			     tlbe->word2);
			
 
				 
			
 
				 	kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS);
			
 
				 	return EMULATE_DONE;
			
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -2,8 +2,7 @@
 
				 # KVM configuration
			
 
				 #
			
 
				 
			
 
				-config HAVE_KVM_IRQCHIP
			
 
				-       bool
			
 
				+source "virt/kvm/Kconfig"
			
 
				 
			
 
				 menuconfig VIRTUALIZATION
			
 
				 	bool "Virtualization"
			
@@ -59,17 +58,6 @@ config KVM_E500
 
				 
			
 
				 	  If unsure, say N.
			
 
				 
			
 
				-config KVM_TRACE
			
 
				-	bool "KVM trace support"
			
 
				-	depends on KVM && MARKERS && SYSFS
			
 
				-	select RELAY
			
 
				-	select DEBUG_FS
			
 
				-	default n
			
 
				-	---help---
			
 
				-	  This option allows reading a trace of kvm-related events through
			
 
				-	  relayfs.  Note the ABI is not considered stable and will be
			
 
				-	  modified in future updates.
			
 
				-
			
 
				 source drivers/virtio/Kconfig
			
 
				 
			
 
				 endif # VIRTUALIZATION
			
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -8,7 +8,9 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/powerpc/kvm
 
				 
			
 
				 common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
			
 
				 
			
 
				-common-objs-$(CONFIG_KVM_TRACE)  += $(addprefix ../../../virt/kvm/, kvm_trace.o)
			
 
				+CFLAGS_44x_tlb.o  := -I.
			
 
				+CFLAGS_e500_tlb.o := -I.
			
 
				+CFLAGS_emulate.o  := -I.
			
 
				 
			
 
				 kvm-objs := $(common-objs-y) powerpc.o emulate.o
			
 
				 obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o
			
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -520,7 +520,7 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 
				 	return kvmppc_core_vcpu_translate(vcpu, tr);
			
 
				 }
			
 
				 
			
 
				-int kvmppc_booke_init(void)
			
 
				+int __init kvmppc_booke_init(void)
			
 
				 {
			
 
				 	unsigned long ivor[16];
			
 
				 	unsigned long max_ivor = 0;
			
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -60,9 +60,6 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
 
				 
			
 
				 	kvmppc_e500_tlb_setup(vcpu_e500);
			
 
				 
			
 
				-	/* Use the same core vertion as host's */
			
 
				-	vcpu->arch.pvr = mfspr(SPRN_PVR);
			
 
				-
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -132,7 +129,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
 
				 	kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
			
 
				 }
			
 
				 
			
 
				-static int kvmppc_e500_init(void)
			
 
				+static int __init kvmppc_e500_init(void)
			
 
				 {
			
 
				 	int r, i;
			
 
				 	unsigned long ivor[3];
			
@@ -160,7 +157,7 @@ static int kvmppc_e500_init(void)
 
				 	return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), THIS_MODULE);
			
 
				 }
			
 
				 
			
 
				-static void kvmppc_e500_exit(void)
			
 
				+static void __init kvmppc_e500_exit(void)
			
 
				 {
			
 
				 	kvmppc_booke_exit();
			
 
				 }
			
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -180,6 +180,9 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
 
				 	case SPRN_MMUCSR0:
			
 
				 		vcpu->arch.gpr[rt] = 0; break;
			
 
				 
			
 
				+	case SPRN_MMUCFG:
			
 
				+		vcpu->arch.gpr[rt] = mfspr(SPRN_MMUCFG); break;
			
 
				+
			
 
				 	/* extra exceptions */
			
 
				 	case SPRN_IVOR32:
			
 
				 		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
			
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -22,6 +22,7 @@
 
				 
			
 
				 #include "../mm/mmu_decl.h"
			
 
				 #include "e500_tlb.h"
			
 
				+#include "trace.h"
			
 
				 
			
 
				 #define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1)
			
 
				 
			
@@ -224,9 +225,8 @@ static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
 
				 
			
 
				 	kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel);
			
 
				 	stlbe->mas1 = 0;
			
 
				-	KVMTRACE_5D(STLB_INVAL, &vcpu_e500->vcpu, index_of(tlbsel, esel),
			
 
				-			stlbe->mas1, stlbe->mas2, stlbe->mas3, stlbe->mas7,
			
 
				-			handler);
			
 
				+	trace_kvm_stlb_inval(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
			
 
				+			     stlbe->mas3, stlbe->mas7);
			
 
				 }
			
 
				 
			
 
				 static void kvmppc_e500_tlb1_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
			
@@ -269,7 +269,7 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
 
				 	tlbsel = (vcpu_e500->mas4 >> 28) & 0x1;
			
 
				 	victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0;
			
 
				 	pidsel = (vcpu_e500->mas4 >> 16) & 0xf;
			
 
				-	tsized = (vcpu_e500->mas4 >> 8) & 0xf;
			
 
				+	tsized = (vcpu_e500->mas4 >> 7) & 0x1f;
			
 
				 
			
 
				 	vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim)
			
 
				 		| MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]);
			
@@ -309,7 +309,7 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 
				 	vcpu_e500->shadow_pages[tlbsel][esel] = new_page;
			
 
				 
			
 
				 	/* Force TS=1 IPROT=0 TSIZE=4KB for all guest mappings. */
			
 
				-	stlbe->mas1 = MAS1_TSIZE(BOOKE_PAGESZ_4K)
			
 
				+	stlbe->mas1 = MAS1_TSIZE(BOOK3E_PAGESZ_4K)
			
 
				 		| MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID;
			
 
				 	stlbe->mas2 = (gvaddr & MAS2_EPN)
			
 
				 		| e500_shadow_mas2_attrib(gtlbe->mas2,
			
@@ -319,9 +319,8 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 
				 				vcpu_e500->vcpu.arch.msr & MSR_PR);
			
 
				 	stlbe->mas7 = (hpaddr >> 32) & MAS7_RPN;
			
 
				 
			
 
				-	KVMTRACE_5D(STLB_WRITE, &vcpu_e500->vcpu, index_of(tlbsel, esel),
			
 
				-			stlbe->mas1, stlbe->mas2, stlbe->mas3, stlbe->mas7,
			
 
				-			handler);
			
 
				+	trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
			
 
				+			     stlbe->mas3, stlbe->mas7);
			
 
				 }
			
 
				 
			
 
				 /* XXX only map the one-one case, for now use TLB0 */
			
@@ -535,9 +534,8 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
 
				 	gtlbe->mas3 = vcpu_e500->mas3;
			
 
				 	gtlbe->mas7 = vcpu_e500->mas7;
			
 
				 
			
 
				-	KVMTRACE_5D(GTLB_WRITE, vcpu, vcpu_e500->mas0,
			
 
				-			gtlbe->mas1, gtlbe->mas2, gtlbe->mas3, gtlbe->mas7,
			
 
				-			handler);
			
 
				+	trace_kvm_gtlb_write(vcpu_e500->mas0, gtlbe->mas1, gtlbe->mas2,
			
 
				+			     gtlbe->mas3, gtlbe->mas7);
			
 
				 
			
 
				 	/* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
			
 
				 	if (tlbe_is_host_safe(vcpu, gtlbe)) {
			
@@ -545,7 +543,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
 
				 		case 0:
			
 
				 			/* TLB0 */
			
 
				 			gtlbe->mas1 &= ~MAS1_TSIZE(~0);
			
 
				-			gtlbe->mas1 |= MAS1_TSIZE(BOOKE_PAGESZ_4K);
			
 
				+			gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K);
			
 
				 
			
 
				 			stlbsel = 0;
			
 
				 			sesel = kvmppc_e500_stlbe_map(vcpu_e500, 0, esel);
			
@@ -679,14 +677,14 @@ void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
 
				 
			
 
				 	/* Insert large initial mapping for guest. */
			
 
				 	tlbe = &vcpu_e500->guest_tlb[1][0];
			
 
				-	tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOKE_PAGESZ_256M);
			
 
				+	tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M);
			
 
				 	tlbe->mas2 = 0;
			
 
				 	tlbe->mas3 = E500_TLB_SUPER_PERM_MASK;
			
 
				 	tlbe->mas7 = 0;
			
 
				 
			
 
				 	/* 4K map for serial output. Used by kernel wrapper. */
			
 
				 	tlbe = &vcpu_e500->guest_tlb[1][1];
			
 
				-	tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOKE_PAGESZ_4K);
			
 
				+	tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K);
			
 
				 	tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G;
			
 
				 	tlbe->mas3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
			
 
				 	tlbe->mas7 = 0;
			
--- a/arch/powerpc/kvm/e500_tlb.h
+++ b/arch/powerpc/kvm/e500_tlb.h
@@ -16,7 +16,7 @@
 
				 #define __KVM_E500_TLB_H__
			
 
				 
			
 
				 #include <linux/kvm_host.h>
			
 
				-#include <asm/mmu-fsl-booke.h>
			
 
				+#include <asm/mmu-book3e.h>
			
 
				 #include <asm/tlb.h>
			
 
				 #include <asm/kvm_e500.h>
			
 
				 
			
@@ -59,7 +59,7 @@ extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *);
 
				 /* TLB helper functions */
			
 
				 static inline unsigned int get_tlb_size(const struct tlbe *tlbe)
			
 
				 {
			
 
				-	return (tlbe->mas1 >> 8) & 0xf;
			
 
				+	return (tlbe->mas1 >> 7) & 0x1f;
			
 
				 }
			
 
				 
			
 
				 static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe)
			
@@ -70,7 +70,7 @@ static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe)
 
				 static inline u64 get_tlb_bytes(const struct tlbe *tlbe)
			
 
				 {
			
 
				 	unsigned int pgsize = get_tlb_size(tlbe);
			
 
				-	return 1ULL << 10 << (pgsize << 1);
			
 
				+	return 1ULL << 10 << pgsize;
			
 
				 }
			
 
				 
			
 
				 static inline gva_t get_tlb_end(const struct tlbe *tlbe)
			
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -29,6 +29,7 @@
 
				 #include <asm/kvm_ppc.h>
			
 
				 #include <asm/disassemble.h>
			
 
				 #include "timing.h"
			
 
				+#include "trace.h"
			
 
				 
			
 
				 #define OP_TRAP 3
			
 
				 
			
@@ -187,7 +188,9 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
				 			case SPRN_SRR1:
			
 
				 				vcpu->arch.gpr[rt] = vcpu->arch.srr1; break;
			
 
				 			case SPRN_PVR:
			
 
				-				vcpu->arch.gpr[rt] = vcpu->arch.pvr; break;
			
 
				+				vcpu->arch.gpr[rt] = mfspr(SPRN_PVR); break;
			
 
				+			case SPRN_PIR:
			
 
				+				vcpu->arch.gpr[rt] = mfspr(SPRN_PIR); break;
			
 
				 
			
 
				 			/* Note: mftb and TBRL/TBWL are user-accessible, so
			
 
				 			 * the guest can always access the real TB anyways.
			
@@ -417,7 +420,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	KVMTRACE_3D(PPC_INSTR, vcpu, inst, (int)vcpu->arch.pc, emulated, entryexit);
			
 
				+	trace_kvm_ppc_instr(inst, vcpu->arch.pc, emulated);
			
 
				 
			
 
				 	if (advance)
			
 
				 		vcpu->arch.pc += 4; /* Advance past emulated instruction. */
			
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -31,25 +31,17 @@
 
				 #include "timing.h"
			
 
				 #include "../mm/mmu_decl.h"
			
 
				 
			
 
				+#define CREATE_TRACE_POINTS
			
 
				+#include "trace.h"
			
 
				+
			
 
				 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
			
 
				 {
			
 
				 	return gfn;
			
 
				 }
			
 
				 
			
 
				-int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
			
 
				-{
			
 
				-	return !!(v->arch.pending_exceptions);
			
 
				-}
			
 
				-
			
 
				-int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	/* do real check here */
			
 
				-	return 1;
			
 
				-}
			
 
				-
			
 
				 int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
			
 
				 {
			
 
				-	return !(v->arch.msr & MSR_WE);
			
 
				+	return !(v->arch.msr & MSR_WE) || !!(v->arch.pending_exceptions);
			
 
				 }
			
 
				 
			
 
				 
			
@@ -122,13 +114,17 @@ struct kvm *kvm_arch_create_vm(void)
 
				 static void kvmppc_free_vcpus(struct kvm *kvm)
			
 
				 {
			
 
				 	unsigned int i;
			
 
				+	struct kvm_vcpu *vcpu;
			
 
				 
			
 
				-	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
			
 
				-		if (kvm->vcpus[i]) {
			
 
				-			kvm_arch_vcpu_free(kvm->vcpus[i]);
			
 
				-			kvm->vcpus[i] = NULL;
			
 
				-		}
			
 
				-	}
			
 
				+	kvm_for_each_vcpu(i, vcpu, kvm)
			
 
				+		kvm_arch_vcpu_free(vcpu);
			
 
				+
			
 
				+	mutex_lock(&kvm->lock);
			
 
				+	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
			
 
				+		kvm->vcpus[i] = NULL;
			
 
				+
			
 
				+	atomic_set(&kvm->online_vcpus, 0);
			
 
				+	mutex_unlock(&kvm->lock);
			
 
				 }
			
 
				 
			
 
				 void kvm_arch_sync_events(struct kvm *kvm)
			
--- a/arch/powerpc/kvm/trace.h
+++ b/arch/powerpc/kvm/trace.h
@@ -0,0 +1,104 @@
 
				+#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
			
 
				+#define _TRACE_KVM_H
			
 
				+
			
 
				+#include <linux/tracepoint.h>
			
 
				+
			
 
				+#undef TRACE_SYSTEM
			
 
				+#define TRACE_SYSTEM kvm
			
 
				+#define TRACE_INCLUDE_PATH .
			
 
				+#define TRACE_INCLUDE_FILE trace
			
 
				+
			
 
				+/*
			
 
				+ * Tracepoint for guest mode entry.
			
 
				+ */
			
 
				+TRACE_EVENT(kvm_ppc_instr,
			
 
				+	TP_PROTO(unsigned int inst, unsigned long pc, unsigned int emulate),
			
 
				+	TP_ARGS(inst, pc, emulate),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	unsigned int,	inst		)
			
 
				+		__field(	unsigned long,	pc		)
			
 
				+		__field(	unsigned int,	emulate		)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->inst		= inst;
			
 
				+		__entry->pc		= pc;
			
 
				+		__entry->emulate	= emulate;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("inst %u pc 0x%lx emulate %u\n",
			
 
				+		  __entry->inst, __entry->pc, __entry->emulate)
			
 
				+);
			
 
				+
			
 
				+TRACE_EVENT(kvm_stlb_inval,
			
 
				+	TP_PROTO(unsigned int stlb_index),
			
 
				+	TP_ARGS(stlb_index),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	unsigned int,	stlb_index	)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->stlb_index	= stlb_index;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("stlb_index %u", __entry->stlb_index)
			
 
				+);
			
 
				+
			
 
				+TRACE_EVENT(kvm_stlb_write,
			
 
				+	TP_PROTO(unsigned int victim, unsigned int tid, unsigned int word0,
			
 
				+		 unsigned int word1, unsigned int word2),
			
 
				+	TP_ARGS(victim, tid, word0, word1, word2),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	unsigned int,	victim		)
			
 
				+		__field(	unsigned int,	tid		)
			
 
				+		__field(	unsigned int,	word0		)
			
 
				+		__field(	unsigned int,	word1		)
			
 
				+		__field(	unsigned int,	word2		)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->victim		= victim;
			
 
				+		__entry->tid		= tid;
			
 
				+		__entry->word0		= word0;
			
 
				+		__entry->word1		= word1;
			
 
				+		__entry->word2		= word2;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("victim %u tid %u w0 %u w1 %u w2 %u",
			
 
				+		__entry->victim, __entry->tid, __entry->word0,
			
 
				+		__entry->word1, __entry->word2)
			
 
				+);
			
 
				+
			
 
				+TRACE_EVENT(kvm_gtlb_write,
			
 
				+	TP_PROTO(unsigned int gtlb_index, unsigned int tid, unsigned int word0,
			
 
				+		 unsigned int word1, unsigned int word2),
			
 
				+	TP_ARGS(gtlb_index, tid, word0, word1, word2),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	unsigned int,	gtlb_index	)
			
 
				+		__field(	unsigned int,	tid		)
			
 
				+		__field(	unsigned int,	word0		)
			
 
				+		__field(	unsigned int,	word1		)
			
 
				+		__field(	unsigned int,	word2		)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->gtlb_index	= gtlb_index;
			
 
				+		__entry->tid		= tid;
			
 
				+		__entry->word0		= word0;
			
 
				+		__entry->word1		= word1;
			
 
				+		__entry->word2		= word2;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("gtlb_index %u tid %u w0 %u w1 %u w2 %u",
			
 
				+		__entry->gtlb_index, __entry->tid, __entry->word0,
			
 
				+		__entry->word1, __entry->word2)
			
 
				+);
			
 
				+
			
 
				+#endif /* _TRACE_KVM_H */
			
 
				+
			
 
				+/* This part must be outside protection */
			
 
				+#include <trace/define_trace.h>
			
--- a/arch/s390/include/asm/kvm.h
+++ b/arch/s390/include/asm/kvm.h
@@ -15,15 +15,6 @@
 
				  */
			
 
				 #include <linux/types.h>
			
 
				 
			
 
				-/* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */
			
 
				-struct kvm_pic_state {
			
 
				-	/* no PIC for s390 */
			
 
				-};
			
 
				-
			
 
				-struct kvm_ioapic_state {
			
 
				-	/* no IOAPIC for s390 */
			
 
				-};
			
 
				-
			
 
				 /* for KVM_GET_REGS and KVM_SET_REGS */
			
 
				 struct kvm_regs {
			
 
				 	/* general purpose regs for s390 */
			
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -1,7 +1,7 @@
 
				 /*
			
 
				  * asm-s390/kvm_host.h - definition for kernel virtual machines on s390
			
 
				  *
			
 
				- * Copyright IBM Corp. 2008
			
 
				+ * Copyright IBM Corp. 2008,2009
			
 
				  *
			
 
				  * This program is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU General Public License (version 2 only)
			
@@ -40,7 +40,11 @@ struct sca_block {
 
				 	struct sca_entry cpu[64];
			
 
				 } __attribute__((packed));
			
 
				 
			
 
				-#define KVM_PAGES_PER_HPAGE 256
			
 
				+#define KVM_NR_PAGE_SIZES 2
			
 
				+#define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + ((x) - 1) * 8)
			
 
				+#define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x))
			
 
				+#define KVM_HPAGE_MASK(x)	(~(KVM_HPAGE_SIZE(x) - 1))
			
 
				+#define KVM_PAGES_PER_HPAGE(x)	(KVM_HPAGE_SIZE(x) / PAGE_SIZE)
			
 
				 
			
 
				 #define CPUSTAT_HOST       0x80000000
			
 
				 #define CPUSTAT_WAIT       0x10000000
			
@@ -182,8 +186,9 @@ struct kvm_s390_interrupt_info {
 
				 };
			
 
				 
			
 
				 /* for local_interrupt.action_flags */
			
 
				-#define ACTION_STORE_ON_STOP 1
			
 
				-#define ACTION_STOP_ON_STOP  2
			
 
				+#define ACTION_STORE_ON_STOP		(1<<0)
			
 
				+#define ACTION_STOP_ON_STOP		(1<<1)
			
 
				+#define ACTION_RELOADVCPU_ON_STOP	(1<<2)
			
 
				 
			
 
				 struct kvm_s390_local_interrupt {
			
 
				 	spinlock_t lock;
			
@@ -227,8 +232,6 @@ struct kvm_vm_stat {
 
				 };
			
 
				 
			
 
				 struct kvm_arch{
			
 
				-	unsigned long guest_origin;
			
 
				-	unsigned long guest_memsize;
			
 
				 	struct sca_block *sca;
			
 
				 	debug_info_t *dbf;
			
 
				 	struct kvm_s390_float_interrupt float_int;
			
--- a/arch/s390/include/asm/kvm_para.h
+++ b/arch/s390/include/asm/kvm_para.h
@@ -13,6 +13,8 @@
 
				 #ifndef __S390_KVM_PARA_H
			
 
				 #define __S390_KVM_PARA_H
			
 
				 
			
 
				+#ifdef __KERNEL__
			
 
				+
			
 
				 /*
			
 
				  * Hypercalls for KVM on s390. The calling convention is similar to the
			
 
				  * s390 ABI, so we use R2-R6 for parameters 1-5. In addition we use R1
			
@@ -147,4 +149,6 @@ static inline unsigned int kvm_arch_para_features(void)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+#endif
			
 
				+
			
 
				 #endif /* __S390_KVM_PARA_H */
			
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -1,11 +1,7 @@
 
				 #
			
 
				 # KVM configuration
			
 
				 #
			
 
				-config HAVE_KVM
			
 
				-       bool
			
 
				-
			
 
				-config HAVE_KVM_IRQCHIP
			
 
				-       bool
			
 
				+source "virt/kvm/Kconfig"
			
 
				 
			
 
				 menuconfig VIRTUALIZATION
			
 
				 	bool "Virtualization"
			
@@ -38,9 +34,6 @@ config KVM
 
				 
			
 
				 	  If unsure, say N.
			
 
				 
			
 
				-config KVM_TRACE
			
 
				-       bool
			
 
				-
			
 
				 # OK, it's a little counter-intuitive to do this, but it puts it neatly under
			
 
				 # the virtualization menu.
			
 
				 source drivers/virtio/Kconfig
			
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -1,7 +1,7 @@
 
				 /*
			
 
				  * gaccess.h -  access guest memory
			
 
				  *
			
 
				- * Copyright IBM Corp. 2008
			
 
				+ * Copyright IBM Corp. 2008,2009
			
 
				  *
			
 
				  * This program is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU General Public License (version 2 only)
			
@@ -16,13 +16,14 @@
 
				 #include <linux/compiler.h>
			
 
				 #include <linux/kvm_host.h>
			
 
				 #include <asm/uaccess.h>
			
 
				+#include "kvm-s390.h"
			
 
				 
			
 
				 static inline void __user *__guestaddr_to_user(struct kvm_vcpu *vcpu,
			
 
				 					       unsigned long guestaddr)
			
 
				 {
			
 
				 	unsigned long prefix  = vcpu->arch.sie_block->prefix;
			
 
				-	unsigned long origin  = vcpu->kvm->arch.guest_origin;
			
 
				-	unsigned long memsize = vcpu->kvm->arch.guest_memsize;
			
 
				+	unsigned long origin  = vcpu->arch.sie_block->gmsor;
			
 
				+	unsigned long memsize = kvm_s390_vcpu_get_memsize(vcpu);
			
 
				 
			
 
				 	if (guestaddr < 2 * PAGE_SIZE)
			
 
				 		guestaddr += prefix;
			
@@ -158,8 +159,8 @@ static inline int copy_to_guest(struct kvm_vcpu *vcpu, unsigned long guestdest,
 
				 				const void *from, unsigned long n)
			
 
				 {
			
 
				 	unsigned long prefix  = vcpu->arch.sie_block->prefix;
			
 
				-	unsigned long origin  = vcpu->kvm->arch.guest_origin;
			
 
				-	unsigned long memsize = vcpu->kvm->arch.guest_memsize;
			
 
				+	unsigned long origin  = vcpu->arch.sie_block->gmsor;
			
 
				+	unsigned long memsize = kvm_s390_vcpu_get_memsize(vcpu);
			
 
				 
			
 
				 	if ((guestdest < 2 * PAGE_SIZE) && (guestdest + n > 2 * PAGE_SIZE))
			
 
				 		goto slowpath;
			
@@ -209,8 +210,8 @@ static inline int copy_from_guest(struct kvm_vcpu *vcpu, void *to,
 
				 				  unsigned long guestsrc, unsigned long n)
			
 
				 {
			
 
				 	unsigned long prefix  = vcpu->arch.sie_block->prefix;
			
 
				-	unsigned long origin  = vcpu->kvm->arch.guest_origin;
			
 
				-	unsigned long memsize = vcpu->kvm->arch.guest_memsize;
			
 
				+	unsigned long origin  = vcpu->arch.sie_block->gmsor;
			
 
				+	unsigned long memsize = kvm_s390_vcpu_get_memsize(vcpu);
			
 
				 
			
 
				 	if ((guestsrc < 2 * PAGE_SIZE) && (guestsrc + n > 2 * PAGE_SIZE))
			
 
				 		goto slowpath;
			
@@ -244,8 +245,8 @@ static inline int copy_to_guest_absolute(struct kvm_vcpu *vcpu,
 
				 					 unsigned long guestdest,
			
 
				 					 const void *from, unsigned long n)
			
 
				 {
			
 
				-	unsigned long origin  = vcpu->kvm->arch.guest_origin;
			
 
				-	unsigned long memsize = vcpu->kvm->arch.guest_memsize;
			
 
				+	unsigned long origin  = vcpu->arch.sie_block->gmsor;
			
 
				+	unsigned long memsize = kvm_s390_vcpu_get_memsize(vcpu);
			
 
				 
			
 
				 	if (guestdest + n > memsize)
			
 
				 		return -EFAULT;
			
@@ -262,8 +263,8 @@ static inline int copy_from_guest_absolute(struct kvm_vcpu *vcpu, void *to,
 
				 					   unsigned long guestsrc,
			
 
				 					   unsigned long n)
			
 
				 {
			
 
				-	unsigned long origin  = vcpu->kvm->arch.guest_origin;
			
 
				-	unsigned long memsize = vcpu->kvm->arch.guest_memsize;
			
 
				+	unsigned long origin  = vcpu->arch.sie_block->gmsor;
			
 
				+	unsigned long memsize = kvm_s390_vcpu_get_memsize(vcpu);
			
 
				 
			
 
				 	if (guestsrc + n > memsize)
			
 
				 		return -EFAULT;
			
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -1,7 +1,7 @@
 
				 /*
			
 
				  * intercept.c - in-kernel handling for sie intercepts
			
 
				  *
			
 
				- * Copyright IBM Corp. 2008
			
 
				+ * Copyright IBM Corp. 2008,2009
			
 
				  *
			
 
				  * This program is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU General Public License (version 2 only)
			
@@ -128,7 +128,7 @@ static int handle_noop(struct kvm_vcpu *vcpu)
 
				 
			
 
				 static int handle_stop(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				-	int rc;
			
 
				+	int rc = 0;
			
 
				 
			
 
				 	vcpu->stat.exit_stop_request++;
			
 
				 	atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
			
@@ -141,12 +141,18 @@ static int handle_stop(struct kvm_vcpu *vcpu)
 
				 			rc = -ENOTSUPP;
			
 
				 	}
			
 
				 
			
 
				+	if (vcpu->arch.local_int.action_bits & ACTION_RELOADVCPU_ON_STOP) {
			
 
				+		vcpu->arch.local_int.action_bits &= ~ACTION_RELOADVCPU_ON_STOP;
			
 
				+		rc = SIE_INTERCEPT_RERUNVCPU;
			
 
				+		vcpu->run->exit_reason = KVM_EXIT_INTR;
			
 
				+	}
			
 
				+
			
 
				 	if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) {
			
 
				 		vcpu->arch.local_int.action_bits &= ~ACTION_STOP_ON_STOP;
			
 
				 		VCPU_EVENT(vcpu, 3, "%s", "cpu stopped");
			
 
				 		rc = -ENOTSUPP;
			
 
				-	} else
			
 
				-		rc = 0;
			
 
				+	}
			
 
				+
			
 
				 	spin_unlock_bh(&vcpu->arch.local_int.lock);
			
 
				 	return rc;
			
 
				 }
			
@@ -158,9 +164,9 @@ static int handle_validity(struct kvm_vcpu *vcpu)
 
				 
			
 
				 	vcpu->stat.exit_validity++;
			
 
				 	if ((viwhy == 0x37) && (vcpu->arch.sie_block->prefix
			
 
				-		<= vcpu->kvm->arch.guest_memsize - 2*PAGE_SIZE)){
			
 
				+		<= kvm_s390_vcpu_get_memsize(vcpu) - 2*PAGE_SIZE)) {
			
 
				 		rc = fault_in_pages_writeable((char __user *)
			
 
				-			 vcpu->kvm->arch.guest_origin +
			
 
				+			 vcpu->arch.sie_block->gmsor +
			
 
				 			 vcpu->arch.sie_block->prefix,
			
 
				 			 2*PAGE_SIZE);
			
 
				 		if (rc)
			
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -283,7 +283,7 @@ static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
			
 
				+static int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
			
 
				 	struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
			
@@ -320,12 +320,6 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
 
				 	return rc;
			
 
				 }
			
 
				 
			
 
				-int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	/* do real check here */
			
 
				-	return 1;
			
 
				-}
			
 
				-
			
 
				 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	return 0;
			
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1,7 +1,7 @@
 
				 /*
			
 
				  * s390host.c --  hosting zSeries kernel virtual machines
			
 
				  *
			
 
				- * Copyright IBM Corp. 2008
			
 
				+ * Copyright IBM Corp. 2008,2009
			
 
				  *
			
 
				  * This program is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU General Public License (version 2 only)
			
@@ -10,6 +10,7 @@
 
				  *    Author(s): Carsten Otte <cotte@de.ibm.com>
			
 
				  *               Christian Borntraeger <borntraeger@de.ibm.com>
			
 
				  *               Heiko Carstens <heiko.carstens@de.ibm.com>
			
 
				+ *               Christian Ehrhardt <ehrhardt@de.ibm.com>
			
 
				  */
			
 
				 
			
 
				 #include <linux/compiler.h>
			
@@ -210,13 +211,17 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 
				 static void kvm_free_vcpus(struct kvm *kvm)
			
 
				 {
			
 
				 	unsigned int i;
			
 
				+	struct kvm_vcpu *vcpu;
			
 
				 
			
 
				-	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
			
 
				-		if (kvm->vcpus[i]) {
			
 
				-			kvm_arch_vcpu_destroy(kvm->vcpus[i]);
			
 
				-			kvm->vcpus[i] = NULL;
			
 
				-		}
			
 
				-	}
			
 
				+	kvm_for_each_vcpu(i, vcpu, kvm)
			
 
				+		kvm_arch_vcpu_destroy(vcpu);
			
 
				+
			
 
				+	mutex_lock(&kvm->lock);
			
 
				+	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
			
 
				+		kvm->vcpus[i] = NULL;
			
 
				+
			
 
				+	atomic_set(&kvm->online_vcpus, 0);
			
 
				+	mutex_unlock(&kvm->lock);
			
 
				 }
			
 
				 
			
 
				 void kvm_arch_sync_events(struct kvm *kvm)
			
@@ -278,16 +283,10 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
 
				 	vcpu->arch.sie_block->gbea = 1;
			
 
				 }
			
 
				 
			
 
				-/* The current code can have up to 256 pages for virtio */
			
 
				-#define VIRTIODESCSPACE (256ul * 4096ul)
			
 
				-
			
 
				 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH);
			
 
				-	vcpu->arch.sie_block->gmslm = vcpu->kvm->arch.guest_memsize +
			
 
				-				      vcpu->kvm->arch.guest_origin +
			
 
				-				      VIRTIODESCSPACE - 1ul;
			
 
				-	vcpu->arch.sie_block->gmsor = vcpu->kvm->arch.guest_origin;
			
 
				+	set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests);
			
 
				 	vcpu->arch.sie_block->ecb   = 2;
			
 
				 	vcpu->arch.sie_block->eca   = 0xC1002001U;
			
 
				 	vcpu->arch.sie_block->fac   = (int) (long) facilities;
			
@@ -319,8 +318,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 
				 	BUG_ON(!kvm->arch.sca);
			
 
				 	if (!kvm->arch.sca->cpu[id].sda)
			
 
				 		kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
			
 
				-	else
			
 
				-		BUG_ON(!kvm->vcpus[id]); /* vcpu does already exist */
			
 
				 	vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
			
 
				 	vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
			
 
				 
			
@@ -490,9 +487,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 
			
 
				 	vcpu_load(vcpu);
			
 
				 
			
 
				+rerun_vcpu:
			
 
				+	if (vcpu->requests)
			
 
				+		if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
			
 
				+			kvm_s390_vcpu_set_mem(vcpu);
			
 
				+
			
 
				 	/* verify, that memory has been registered */
			
 
				-	if (!vcpu->kvm->arch.guest_memsize) {
			
 
				+	if (!vcpu->arch.sie_block->gmslm) {
			
 
				 		vcpu_put(vcpu);
			
 
				+		VCPU_EVENT(vcpu, 3, "%s", "no memory registered to run vcpu");
			
 
				 		return -EINVAL;
			
 
				 	}
			
 
				 
			
@@ -509,6 +512,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 		vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr;
			
 
				 		break;
			
 
				 	case KVM_EXIT_UNKNOWN:
			
 
				+	case KVM_EXIT_INTR:
			
 
				 	case KVM_EXIT_S390_RESET:
			
 
				 		break;
			
 
				 	default:
			
@@ -522,8 +526,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 		rc = kvm_handle_sie_intercept(vcpu);
			
 
				 	} while (!signal_pending(current) && !rc);
			
 
				 
			
 
				-	if (signal_pending(current) && !rc)
			
 
				+	if (rc == SIE_INTERCEPT_RERUNVCPU)
			
 
				+		goto rerun_vcpu;
			
 
				+
			
 
				+	if (signal_pending(current) && !rc) {
			
 
				+		kvm_run->exit_reason = KVM_EXIT_INTR;
			
 
				 		rc = -EINTR;
			
 
				+	}
			
 
				 
			
 
				 	if (rc == -ENOTSUPP) {
			
 
				 		/* intercept cannot be handled in-kernel, prepare kvm-run */
			
@@ -676,6 +685,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 
				 				int user_alloc)
			
 
				 {
			
 
				 	int i;
			
 
				+	struct kvm_vcpu *vcpu;
			
 
				 
			
 
				 	/* A few sanity checks. We can have exactly one memory slot which has
			
 
				 	   to start at guest virtual zero and which has to be located at a
			
@@ -684,7 +694,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 
				 	   vmas. It is okay to mmap() and munmap() stuff in this slot after
			
 
				 	   doing this call at any time */
			
 
				 
			
 
				-	if (mem->slot || kvm->arch.guest_memsize)
			
 
				+	if (mem->slot)
			
 
				 		return -EINVAL;
			
 
				 
			
 
				 	if (mem->guest_phys_addr)
			
@@ -699,36 +709,14 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 
				 	if (!user_alloc)
			
 
				 		return -EINVAL;
			
 
				 
			
 
				-	/* lock all vcpus */
			
 
				-	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
			
 
				-		if (!kvm->vcpus[i])
			
 
				+	/* request update of sie control block for all available vcpus */
			
 
				+	kvm_for_each_vcpu(i, vcpu, kvm) {
			
 
				+		if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
			
 
				 			continue;
			
 
				-		if (!mutex_trylock(&kvm->vcpus[i]->mutex))
			
 
				-			goto fail_out;
			
 
				-	}
			
 
				-
			
 
				-	kvm->arch.guest_origin = mem->userspace_addr;
			
 
				-	kvm->arch.guest_memsize = mem->memory_size;
			
 
				-
			
 
				-	/* update sie control blocks, and unlock all vcpus */
			
 
				-	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
			
 
				-		if (kvm->vcpus[i]) {
			
 
				-			kvm->vcpus[i]->arch.sie_block->gmsor =
			
 
				-				kvm->arch.guest_origin;
			
 
				-			kvm->vcpus[i]->arch.sie_block->gmslm =
			
 
				-				kvm->arch.guest_memsize +
			
 
				-				kvm->arch.guest_origin +
			
 
				-				VIRTIODESCSPACE - 1ul;
			
 
				-			mutex_unlock(&kvm->vcpus[i]->mutex);
			
 
				-		}
			
 
				+		kvm_s390_inject_sigp_stop(vcpu, ACTION_RELOADVCPU_ON_STOP);
			
 
				 	}
			
 
				 
			
 
				 	return 0;
			
 
				-
			
 
				-fail_out:
			
 
				-	for (; i >= 0; i--)
			
 
				-		mutex_unlock(&kvm->vcpus[i]->mutex);
			
 
				-	return -EINVAL;
			
 
				 }
			
 
				 
			
 
				 void kvm_arch_flush_shadow(struct kvm *kvm)
			
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -1,7 +1,7 @@
 
				 /*
			
 
				  * kvm_s390.h -  definition for kvm on s390
			
 
				  *
			
 
				- * Copyright IBM Corp. 2008
			
 
				+ * Copyright IBM Corp. 2008,2009
			
 
				  *
			
 
				  * This program is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU General Public License (version 2 only)
			
@@ -9,6 +9,7 @@
 
				  *
			
 
				  *    Author(s): Carsten Otte <cotte@de.ibm.com>
			
 
				  *               Christian Borntraeger <borntraeger@de.ibm.com>
			
 
				+ *               Christian Ehrhardt <ehrhardt@de.ibm.com>
			
 
				  */
			
 
				 
			
 
				 #ifndef ARCH_S390_KVM_S390_H
			
@@ -18,8 +19,13 @@
 
				 #include <linux/kvm.h>
			
 
				 #include <linux/kvm_host.h>
			
 
				 
			
 
				+/* The current code can have up to 256 pages for virtio */
			
 
				+#define VIRTIODESCSPACE (256ul * 4096ul)
			
 
				+
			
 
				 typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
			
 
				 
			
 
				+/* negativ values are error codes, positive values for internal conditions */
			
 
				+#define SIE_INTERCEPT_RERUNVCPU		(1<<0)
			
 
				 int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
			
 
				 
			
 
				 #define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
			
@@ -50,6 +56,30 @@ int kvm_s390_inject_vm(struct kvm *kvm,
 
				 int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
			
 
				 		struct kvm_s390_interrupt *s390int);
			
 
				 int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
			
 
				+int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action);
			
 
				+
			
 
				+static inline int kvm_s390_vcpu_get_memsize(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	return vcpu->arch.sie_block->gmslm
			
 
				+		- vcpu->arch.sie_block->gmsor
			
 
				+		- VIRTIODESCSPACE + 1ul;
			
 
				+}
			
 
				+
			
 
				+static inline void kvm_s390_vcpu_set_mem(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	struct kvm_memory_slot *mem;
			
 
				+
			
 
				+	down_read(&vcpu->kvm->slots_lock);
			
 
				+	mem = &vcpu->kvm->memslots[0];
			
 
				+
			
 
				+	vcpu->arch.sie_block->gmsor = mem->userspace_addr;
			
 
				+	vcpu->arch.sie_block->gmslm =
			
 
				+		mem->userspace_addr +
			
 
				+		(mem->npages << PAGE_SHIFT) +
			
 
				+		VIRTIODESCSPACE - 1ul;
			
 
				+
			
 
				+	up_read(&vcpu->kvm->slots_lock);
			
 
				+}
			
 
				 
			
 
				 /* implemented in priv.c */
			
 
				 int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
			
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -1,7 +1,7 @@
 
				 /*
			
 
				  * sigp.c - handlinge interprocessor communication
			
 
				  *
			
 
				- * Copyright IBM Corp. 2008
			
 
				+ * Copyright IBM Corp. 2008,2009
			
 
				  *
			
 
				  * This program is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU General Public License (version 2 only)
			
@@ -9,6 +9,7 @@
 
				  *
			
 
				  *    Author(s): Carsten Otte <cotte@de.ibm.com>
			
 
				  *               Christian Borntraeger <borntraeger@de.ibm.com>
			
 
				+ *               Christian Ehrhardt <ehrhardt@de.ibm.com>
			
 
				  */
			
 
				 
			
 
				 #include <linux/kvm.h>
			
@@ -107,46 +108,57 @@ unlock:
 
				 	return rc;
			
 
				 }
			
 
				 
			
 
				-static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store)
			
 
				+static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
			
 
				 {
			
 
				-	struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
			
 
				-	struct kvm_s390_local_interrupt *li;
			
 
				 	struct kvm_s390_interrupt_info *inti;
			
 
				-	int rc;
			
 
				-
			
 
				-	if (cpu_addr >= KVM_MAX_VCPUS)
			
 
				-		return 3; /* not operational */
			
 
				 
			
 
				 	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
			
 
				 	if (!inti)
			
 
				 		return -ENOMEM;
			
 
				-
			
 
				 	inti->type = KVM_S390_SIGP_STOP;
			
 
				 
			
 
				-	spin_lock(&fi->lock);
			
 
				-	li = fi->local_int[cpu_addr];
			
 
				-	if (li == NULL) {
			
 
				-		rc = 3; /* not operational */
			
 
				-		kfree(inti);
			
 
				-		goto unlock;
			
 
				-	}
			
 
				 	spin_lock_bh(&li->lock);
			
 
				 	list_add_tail(&inti->list, &li->list);
			
 
				 	atomic_set(&li->active, 1);
			
 
				 	atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
			
 
				-	if (store)
			
 
				-		li->action_bits |= ACTION_STORE_ON_STOP;
			
 
				-	li->action_bits |= ACTION_STOP_ON_STOP;
			
 
				+	li->action_bits |= action;
			
 
				 	if (waitqueue_active(&li->wq))
			
 
				 		wake_up_interruptible(&li->wq);
			
 
				 	spin_unlock_bh(&li->lock);
			
 
				-	rc = 0; /* order accepted */
			
 
				+
			
 
				+	return 0; /* order accepted */
			
 
				+}
			
 
				+
			
 
				+static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action)
			
 
				+{
			
 
				+	struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
			
 
				+	struct kvm_s390_local_interrupt *li;
			
 
				+	int rc;
			
 
				+
			
 
				+	if (cpu_addr >= KVM_MAX_VCPUS)
			
 
				+		return 3; /* not operational */
			
 
				+
			
 
				+	spin_lock(&fi->lock);
			
 
				+	li = fi->local_int[cpu_addr];
			
 
				+	if (li == NULL) {
			
 
				+		rc = 3; /* not operational */
			
 
				+		goto unlock;
			
 
				+	}
			
 
				+
			
 
				+	rc = __inject_sigp_stop(li, action);
			
 
				+
			
 
				 unlock:
			
 
				 	spin_unlock(&fi->lock);
			
 
				 	VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr);
			
 
				 	return rc;
			
 
				 }
			
 
				 
			
 
				+int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action)
			
 
				+{
			
 
				+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
			
 
				+	return __inject_sigp_stop(li, action);
			
 
				+}
			
 
				+
			
 
				 static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
			
 
				 {
			
 
				 	int rc;
			
@@ -177,9 +189,9 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
 
				 	/* make sure that the new value is valid memory */
			
 
				 	address = address & 0x7fffe000u;
			
 
				 	if ((copy_from_guest(vcpu, &tmp,
			
 
				-		(u64) (address + vcpu->kvm->arch.guest_origin) , 1)) ||
			
 
				+		(u64) (address + vcpu->arch.sie_block->gmsor) , 1)) ||
			
 
				 	   (copy_from_guest(vcpu, &tmp, (u64) (address +
			
 
				-			vcpu->kvm->arch.guest_origin + PAGE_SIZE), 1))) {
			
 
				+			vcpu->arch.sie_block->gmsor + PAGE_SIZE), 1))) {
			
 
				 		*reg |= SIGP_STAT_INVALID_PARAMETER;
			
 
				 		return 1; /* invalid parameter */
			
 
				 	}
			
@@ -262,11 +274,11 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
 
				 		break;
			
 
				 	case SIGP_STOP:
			
 
				 		vcpu->stat.instruction_sigp_stop++;
			
 
				-		rc = __sigp_stop(vcpu, cpu_addr, 0);
			
 
				+		rc = __sigp_stop(vcpu, cpu_addr, ACTION_STOP_ON_STOP);
			
 
				 		break;
			
 
				 	case SIGP_STOP_STORE_STATUS:
			
 
				 		vcpu->stat.instruction_sigp_stop++;
			
 
				-		rc = __sigp_stop(vcpu, cpu_addr, 1);
			
 
				+		rc = __sigp_stop(vcpu, cpu_addr, ACTION_STORE_ON_STOP);
			
 
				 		break;
			
 
				 	case SIGP_SET_ARCH:
			
 
				 		vcpu->stat.instruction_sigp_arch++;
			
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -15,6 +15,7 @@
 
				 
			
 
				 #define	APIC_LVR	0x30
			
 
				 #define		APIC_LVR_MASK		0xFF00FF
			
 
				+#define		APIC_LVR_DIRECTED_EOI	(1 << 24)
			
 
				 #define		GET_APIC_VERSION(x)	((x) & 0xFFu)
			
 
				 #define		GET_APIC_MAXLVT(x)	(((x) >> 16) & 0xFFu)
			
 
				 #ifdef CONFIG_X86_32
			
@@ -41,6 +42,7 @@
 
				 #define		APIC_DFR_CLUSTER		0x0FFFFFFFul
			
 
				 #define		APIC_DFR_FLAT			0xFFFFFFFFul
			
 
				 #define	APIC_SPIV	0xF0
			
 
				+#define		APIC_SPIV_DIRECTED_EOI		(1 << 12)
			
 
				 #define		APIC_SPIV_FOCUS_DISABLED	(1 << 9)
			
 
				 #define		APIC_SPIV_APIC_ENABLED		(1 << 8)
			
 
				 #define	APIC_ISR	0x100
			
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -17,6 +17,8 @@
 
				 #define __KVM_HAVE_USER_NMI
			
 
				 #define __KVM_HAVE_GUEST_DEBUG
			
 
				 #define __KVM_HAVE_MSIX
			
 
				+#define __KVM_HAVE_MCE
			
 
				+#define __KVM_HAVE_PIT_STATE2
			
 
				 
			
 
				 /* Architectural interrupt line count. */
			
 
				 #define KVM_NR_INTERRUPTS 256
			
@@ -236,6 +238,14 @@ struct kvm_pit_state {
 
				 	struct kvm_pit_channel_state channels[3];
			
 
				 };
			
 
				 
			
 
				+#define KVM_PIT_FLAGS_HPET_LEGACY  0x00000001
			
 
				+
			
 
				+struct kvm_pit_state2 {
			
 
				+	struct kvm_pit_channel_state channels[3];
			
 
				+	__u32 flags;
			
 
				+	__u32 reserved[9];
			
 
				+};
			
 
				+
			
 
				 struct kvm_reinject_control {
			
 
				 	__u8 pit_reinject;
			
 
				 	__u8 reserved[31];
			
--- a/arch/x86/include/asm/kvm_x86_emulate.h
+++ b/arch/x86/include/asm/kvm_x86_emulate.h
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -14,6 +14,7 @@
 
				 #include <linux/types.h>
			
 
				 #include <linux/mm.h>
			
 
				 #include <linux/mmu_notifier.h>
			
 
				+#include <linux/tracepoint.h>
			
 
				 
			
 
				 #include <linux/kvm.h>
			
 
				 #include <linux/kvm_para.h>
			
@@ -37,12 +38,14 @@
 
				 #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS |	\
			
 
				 				  0xFFFFFF0000000000ULL)
			
 
				 
			
 
				-#define KVM_GUEST_CR0_MASK				   \
			
 
				-	(X86_CR0_PG | X86_CR0_PE | X86_CR0_WP | X86_CR0_NE \
			
 
				-	 | X86_CR0_NW | X86_CR0_CD)
			
 
				+#define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST				\
			
 
				+	(X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD)
			
 
				+#define KVM_GUEST_CR0_MASK						\
			
 
				+	(KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
			
 
				+#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST				\
			
 
				+	(X86_CR0_WP | X86_CR0_NE | X86_CR0_TS | X86_CR0_MP)
			
 
				 #define KVM_VM_CR0_ALWAYS_ON						\
			
 
				-	(X86_CR0_PG | X86_CR0_PE | X86_CR0_WP | X86_CR0_NE | X86_CR0_TS \
			
 
				-	 | X86_CR0_MP)
			
 
				+	(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
			
 
				 #define KVM_GUEST_CR4_MASK						\
			
 
				 	(X86_CR4_VME | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_PGE | X86_CR4_VMXE)
			
 
				 #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
			
@@ -51,12 +54,12 @@
 
				 #define INVALID_PAGE (~(hpa_t)0)
			
 
				 #define UNMAPPED_GVA (~(gpa_t)0)
			
 
				 
			
 
				-/* shadow tables are PAE even on non-PAE hosts */
			
 
				-#define KVM_HPAGE_SHIFT 21
			
 
				-#define KVM_HPAGE_SIZE (1UL << KVM_HPAGE_SHIFT)
			
 
				-#define KVM_HPAGE_MASK (~(KVM_HPAGE_SIZE - 1))
			
 
				-
			
 
				-#define KVM_PAGES_PER_HPAGE (KVM_HPAGE_SIZE / PAGE_SIZE)
			
 
				+/* KVM Hugepage definitions for x86 */
			
 
				+#define KVM_NR_PAGE_SIZES	3
			
 
				+#define KVM_HPAGE_SHIFT(x)	(PAGE_SHIFT + (((x) - 1) * 9))
			
 
				+#define KVM_HPAGE_SIZE(x)	(1UL << KVM_HPAGE_SHIFT(x))
			
 
				+#define KVM_HPAGE_MASK(x)	(~(KVM_HPAGE_SIZE(x) - 1))
			
 
				+#define KVM_PAGES_PER_HPAGE(x)	(KVM_HPAGE_SIZE(x) / PAGE_SIZE)
			
 
				 
			
 
				 #define DE_VECTOR 0
			
 
				 #define DB_VECTOR 1
			
@@ -120,6 +123,10 @@ enum kvm_reg {
 
				 	NR_VCPU_REGS
			
 
				 };
			
 
				 
			
 
				+enum kvm_reg_ex {
			
 
				+	VCPU_EXREG_PDPTR = NR_VCPU_REGS,
			
 
				+};
			
 
				+
			
 
				 enum {
			
 
				 	VCPU_SREG_ES,
			
 
				 	VCPU_SREG_CS,
			
@@ -131,7 +138,7 @@ enum {
 
				 	VCPU_SREG_LDTR,
			
 
				 };
			
 
				 
			
 
				-#include <asm/kvm_x86_emulate.h>
			
 
				+#include <asm/kvm_emulate.h>
			
 
				 
			
 
				 #define KVM_NR_MEM_OBJS 40
			
 
				 
			
@@ -308,7 +315,6 @@ struct kvm_vcpu_arch {
 
				 	struct {
			
 
				 		gfn_t gfn;	/* presumed gfn during guest pte update */
			
 
				 		pfn_t pfn;	/* pfn corresponding to that gfn */
			
 
				-		int largepage;
			
 
				 		unsigned long mmu_seq;
			
 
				 	} update_pte;
			
 
				 
			
@@ -334,16 +340,6 @@ struct kvm_vcpu_arch {
 
				 		u8 nr;
			
 
				 	} interrupt;
			
 
				 
			
 
				-	struct {
			
 
				-		int vm86_active;
			
 
				-		u8 save_iopl;
			
 
				-		struct kvm_save_segment {
			
 
				-			u16 selector;
			
 
				-			unsigned long base;
			
 
				-			u32 limit;
			
 
				-			u32 ar;
			
 
				-		} tr, es, ds, fs, gs;
			
 
				-	} rmode;
			
 
				 	int halt_request; /* real mode on Intel only */
			
 
				 
			
 
				 	int cpuid_nent;
			
@@ -366,13 +362,15 @@ struct kvm_vcpu_arch {
 
				 	u32 pat;
			
 
				 
			
 
				 	int switch_db_regs;
			
 
				-	unsigned long host_db[KVM_NR_DB_REGS];
			
 
				-	unsigned long host_dr6;
			
 
				-	unsigned long host_dr7;
			
 
				 	unsigned long db[KVM_NR_DB_REGS];
			
 
				 	unsigned long dr6;
			
 
				 	unsigned long dr7;
			
 
				 	unsigned long eff_db[KVM_NR_DB_REGS];
			
 
				+
			
 
				+	u64 mcg_cap;
			
 
				+	u64 mcg_status;
			
 
				+	u64 mcg_ctl;
			
 
				+	u64 *mce_banks;
			
 
				 };
			
 
				 
			
 
				 struct kvm_mem_alias {
			
@@ -409,6 +407,7 @@ struct kvm_arch{
 
				 
			
 
				 	struct page *ept_identity_pagetable;
			
 
				 	bool ept_identity_pagetable_done;
			
 
				+	gpa_t ept_identity_map_addr;
			
 
				 
			
 
				 	unsigned long irq_sources_bitmap;
			
 
				 	unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
			
@@ -526,6 +525,9 @@ struct kvm_x86_ops {
 
				 	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
			
 
				 	int (*get_tdp_level)(void);
			
 
				 	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
			
 
				+	bool (*gb_page_enable)(void);
			
 
				+
			
 
				+	const struct trace_print_flags *exit_reasons_str;
			
 
				 };
			
 
				 
			
 
				 extern struct kvm_x86_ops *kvm_x86_ops;
			
@@ -618,6 +620,7 @@ void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
 
				 void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
			
 
				 void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2,
			
 
				 			   u32 error_code);
			
 
				+bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
			
 
				 
			
 
				 int kvm_pic_set_irq(void *opaque, int irq, int level);
			
 
				 
			
@@ -752,8 +755,6 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
 
				 	kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
			
 
				 }
			
 
				 
			
 
				-#define MSR_IA32_TIME_STAMP_COUNTER		0x010
			
 
				-
			
 
				 #define TSS_IOPB_BASE_OFFSET 0x66
			
 
				 #define TSS_BASE_SIZE 0x68
			
 
				 #define TSS_IOPB_SIZE (65536 / 8)
			
@@ -796,5 +797,8 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
 
				 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
			
 
				 int kvm_age_hva(struct kvm *kvm, unsigned long hva);
			
 
				 int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
			
 
				+int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
			
 
				+int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
			
 
				+int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
			
 
				 
			
 
				 #endif /* _ASM_X86_KVM_HOST_H */
			
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -1,6 +1,8 @@
 
				 #ifndef _ASM_X86_KVM_PARA_H
			
 
				 #define _ASM_X86_KVM_PARA_H
			
 
				 
			
 
				+#include <linux/types.h>
			
 
				+
			
 
				 /* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx.  It
			
 
				  * should be used to determine that a VM is running under KVM.
			
 
				  */
			
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -374,6 +374,7 @@
 
				 /* AMD-V MSRs */
			
 
				 
			
 
				 #define MSR_VM_CR                       0xc0010114
			
 
				+#define MSR_VM_IGNNE                    0xc0010115
			
 
				 #define MSR_VM_HSAVE_PA                 0xc0010117
			
 
				 
			
 
				 #endif /* _ASM_X86_MSR_INDEX_H */
			
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -55,6 +55,7 @@
 
				 #define SECONDARY_EXEC_ENABLE_EPT               0x00000002
			
 
				 #define SECONDARY_EXEC_ENABLE_VPID              0x00000020
			
 
				 #define SECONDARY_EXEC_WBINVD_EXITING		0x00000040
			
 
				+#define SECONDARY_EXEC_UNRESTRICTED_GUEST	0x00000080
			
 
				 
			
 
				 
			
 
				 #define PIN_BASED_EXT_INTR_MASK                 0x00000001
			
@@ -351,9 +352,16 @@ enum vmcs_field {
 
				 #define VMX_EPT_EXTENT_INDIVIDUAL_ADDR		0
			
 
				 #define VMX_EPT_EXTENT_CONTEXT			1
			
 
				 #define VMX_EPT_EXTENT_GLOBAL			2
			
 
				+
			
 
				+#define VMX_EPT_EXECUTE_ONLY_BIT		(1ull)
			
 
				+#define VMX_EPT_PAGE_WALK_4_BIT			(1ull << 6)
			
 
				+#define VMX_EPTP_UC_BIT				(1ull << 8)
			
 
				+#define VMX_EPTP_WB_BIT				(1ull << 14)
			
 
				+#define VMX_EPT_2MB_PAGE_BIT			(1ull << 16)
			
 
				 #define VMX_EPT_EXTENT_INDIVIDUAL_BIT		(1ull << 24)
			
 
				 #define VMX_EPT_EXTENT_CONTEXT_BIT		(1ull << 25)
			
 
				 #define VMX_EPT_EXTENT_GLOBAL_BIT		(1ull << 26)
			
 
				+
			
 
				 #define VMX_EPT_DEFAULT_GAW			3
			
 
				 #define VMX_EPT_MAX_GAW				0x4
			
 
				 #define VMX_EPT_MT_EPTE_SHIFT			3
			
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -34,7 +34,6 @@
 
				 struct kvm_para_state {
			
 
				 	u8 mmu_queue[MMU_QUEUE_SIZE];
			
 
				 	int mmu_queue_len;
			
 
				-	enum paravirt_lazy_mode mode;
			
 
				 };
			
 
				 
			
 
				 static DEFINE_PER_CPU(struct kvm_para_state, para_state);
			
@@ -77,7 +76,7 @@ static void kvm_deferred_mmu_op(void *buffer, int len)
 
				 {
			
 
				 	struct kvm_para_state *state = kvm_para_state();
			
 
				 
			
 
				-	if (state->mode != PARAVIRT_LAZY_MMU) {
			
 
				+	if (paravirt_get_lazy_mode() != PARAVIRT_LAZY_MMU) {
			
 
				 		kvm_mmu_op(buffer, len);
			
 
				 		return;
			
 
				 	}
			
@@ -185,10 +184,7 @@ static void kvm_release_pt(unsigned long pfn)
 
				 
			
 
				 static void kvm_enter_lazy_mmu(void)
			
 
				 {
			
 
				-	struct kvm_para_state *state = kvm_para_state();
			
 
				-
			
 
				 	paravirt_enter_lazy_mmu();
			
 
				-	state->mode = paravirt_get_lazy_mode();
			
 
				 }
			
 
				 
			
 
				 static void kvm_leave_lazy_mmu(void)
			
@@ -197,7 +193,6 @@ static void kvm_leave_lazy_mmu(void)
 
				 
			
 
				 	mmu_queue_flush(state);
			
 
				 	paravirt_leave_lazy_mmu();
			
 
				-	state->mode = paravirt_get_lazy_mode();
			
 
				 }
			
 
				 
			
 
				 static void __init paravirt_ops_setup(void)
			
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -50,8 +50,8 @@ static unsigned long kvm_get_wallclock(void)
 
				 	struct timespec ts;
			
 
				 	int low, high;
			
 
				 
			
 
				-	low = (int)__pa(&wall_clock);
			
 
				-	high = ((u64)__pa(&wall_clock) >> 32);
			
 
				+	low = (int)__pa_symbol(&wall_clock);
			
 
				+	high = ((u64)__pa_symbol(&wall_clock) >> 32);
			
 
				 	native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
			
 
				 
			
 
				 	vcpu_time = &get_cpu_var(hv_clock);
			
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -1,12 +1,8 @@
 
				 #
			
 
				 # KVM configuration
			
 
				 #
			
 
				-config HAVE_KVM
			
 
				-       bool
			
 
				 
			
 
				-config HAVE_KVM_IRQCHIP
			
 
				-       bool
			
 
				-       default y
			
 
				+source "virt/kvm/Kconfig"
			
 
				 
			
 
				 menuconfig VIRTUALIZATION
			
 
				 	bool "Virtualization"
			
@@ -29,6 +25,9 @@ config KVM
 
				 	select PREEMPT_NOTIFIERS
			
 
				 	select MMU_NOTIFIER
			
 
				 	select ANON_INODES
			
 
				+	select HAVE_KVM_IRQCHIP
			
 
				+	select HAVE_KVM_EVENTFD
			
 
				+	select KVM_APIC_ARCHITECTURE
			
 
				 	---help---
			
 
				 	  Support hosting fully virtualized guest machines using hardware
			
 
				 	  virtualization extensions.  You will need a fairly recent
			
@@ -63,18 +62,6 @@ config KVM_AMD
 
				 	  To compile this as a module, choose M here: the module
			
 
				 	  will be called kvm-amd.
			
 
				 
			
 
				-config KVM_TRACE
			
 
				-	bool "KVM trace support"
			
 
				-	depends on KVM && SYSFS
			
 
				-	select MARKERS
			
 
				-	select RELAY
			
 
				-	select DEBUG_FS
			
 
				-	default n
			
 
				-	---help---
			
 
				-	  This option allows reading a trace of kvm-related events through
			
 
				-	  relayfs.  Note the ABI is not considered stable and will be
			
 
				-	  modified in future updates.
			
 
				-
			
 
				 # OK, it's a little counter-intuitive to do this, but it puts it neatly under
			
 
				 # the virtualization menu.
			
 
				 source drivers/lguest/Kconfig
			
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -1,22 +1,19 @@
 
				-#
			
 
				-# Makefile for Kernel-based Virtual Machine module
			
 
				-#
			
 
				-
			
 
				-common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
			
 
				-                coalesced_mmio.o irq_comm.o)
			
 
				-ifeq ($(CONFIG_KVM_TRACE),y)
			
 
				-common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o)
			
 
				-endif
			
 
				-ifeq ($(CONFIG_IOMMU_API),y)
			
 
				-common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
			
 
				-endif
			
 
				 
			
 
				 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
			
 
				 
			
 
				-kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
			
 
				-	i8254.o timer.o
			
 
				-obj-$(CONFIG_KVM) += kvm.o
			
 
				-kvm-intel-objs = vmx.o
			
 
				-obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
			
 
				-kvm-amd-objs = svm.o
			
 
				-obj-$(CONFIG_KVM_AMD) += kvm-amd.o
			
 
				+CFLAGS_x86.o := -I.
			
 
				+CFLAGS_svm.o := -I.
			
 
				+CFLAGS_vmx.o := -I.
			
 
				+
			
 
				+kvm-y			+= $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
			
 
				+				coalesced_mmio.o irq_comm.o eventfd.o)
			
 
				+kvm-$(CONFIG_IOMMU_API)	+= $(addprefix ../../../virt/kvm/, iommu.o)
			
 
				+
			
 
				+kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
			
 
				+			   i8254.o timer.o
			
 
				+kvm-intel-y		+= vmx.o
			
 
				+kvm-amd-y		+= svm.o
			
 
				+
			
 
				+obj-$(CONFIG_KVM)	+= kvm.o
			
 
				+obj-$(CONFIG_KVM_INTEL)	+= kvm-intel.o
			
 
				+obj-$(CONFIG_KVM_AMD)	+= kvm-amd.o
			
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1,5 +1,5 @@
 
				 /******************************************************************************
			
 
				- * x86_emulate.c
			
 
				+ * emulate.c
			
 
				  *
			
 
				  * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
			
 
				  *
			
@@ -30,7 +30,9 @@
 
				 #define DPRINTF(x...) do {} while (0)
			
 
				 #endif
			
 
				 #include <linux/module.h>
			
 
				-#include <asm/kvm_x86_emulate.h>
			
 
				+#include <asm/kvm_emulate.h>
			
 
				+
			
 
				+#include "mmu.h"		/* for is_long_mode() */
			
 
				 
			
 
				 /*
			
 
				  * Opcode effective-address decode tables.
			
@@ -60,6 +62,7 @@
 
				 #define SrcImmByte  (6<<4)	/* 8-bit sign-extended immediate operand. */
			
 
				 #define SrcOne      (7<<4)	/* Implied '1' */
			
 
				 #define SrcImmUByte (8<<4)      /* 8-bit unsigned immediate operand. */
			
 
				+#define SrcImmU     (9<<4)      /* Immediate operand, unsigned */
			
 
				 #define SrcMask     (0xf<<4)
			
 
				 /* Generic ModRM decode. */
			
 
				 #define ModRM       (1<<8)
			
@@ -97,11 +100,11 @@ static u32 opcode_table[256] = {
 
				 	/* 0x10 - 0x17 */
			
 
				 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
			
 
				 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
			
 
				-	0, 0, 0, 0,
			
 
				+	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
			
 
				 	/* 0x18 - 0x1F */
			
 
				 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
			
 
				 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
			
 
				-	0, 0, 0, 0,
			
 
				+	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
			
 
				 	/* 0x20 - 0x27 */
			
 
				 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
			
 
				 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
			
@@ -195,7 +198,7 @@ static u32 opcode_table[256] = {
 
				 	ByteOp | SrcImmUByte, SrcImmUByte,
			
 
				 	/* 0xE8 - 0xEF */
			
 
				 	SrcImm | Stack, SrcImm | ImplicitOps,
			
 
				-	SrcImm | Src2Imm16, SrcImmByte | ImplicitOps,
			
 
				+	SrcImmU | Src2Imm16, SrcImmByte | ImplicitOps,
			
 
				 	SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
			
 
				 	SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
			
 
				 	/* 0xF0 - 0xF7 */
			
@@ -208,7 +211,7 @@ static u32 opcode_table[256] = {
 
				 
			
 
				 static u32 twobyte_table[256] = {
			
 
				 	/* 0x00 - 0x0F */
			
 
				-	0, Group | GroupDual | Group7, 0, 0, 0, 0, ImplicitOps, 0,
			
 
				+	0, Group | GroupDual | Group7, 0, 0, 0, ImplicitOps, ImplicitOps, 0,
			
 
				 	ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0,
			
 
				 	/* 0x10 - 0x1F */
			
 
				 	0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0,
			
@@ -216,7 +219,9 @@ static u32 twobyte_table[256] = {
 
				 	ModRM | ImplicitOps, ModRM, ModRM | ImplicitOps, ModRM, 0, 0, 0, 0,
			
 
				 	0, 0, 0, 0, 0, 0, 0, 0,
			
 
				 	/* 0x30 - 0x3F */
			
 
				-	ImplicitOps, 0, ImplicitOps, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
			
 
				+	ImplicitOps, 0, ImplicitOps, 0,
			
 
				+	ImplicitOps, ImplicitOps, 0, 0,
			
 
				+	0, 0, 0, 0, 0, 0, 0, 0,
			
 
				 	/* 0x40 - 0x47 */
			
 
				 	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
			
 
				 	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
			
@@ -319,8 +324,11 @@ static u32 group2_table[] = {
 
				 };
			
 
				 
			
 
				 /* EFLAGS bit definitions. */
			
 
				+#define EFLG_VM (1<<17)
			
 
				+#define EFLG_RF (1<<16)
			
 
				 #define EFLG_OF (1<<11)
			
 
				 #define EFLG_DF (1<<10)
			
 
				+#define EFLG_IF (1<<9)
			
 
				 #define EFLG_SF (1<<7)
			
 
				 #define EFLG_ZF (1<<6)
			
 
				 #define EFLG_AF (1<<4)
			
@@ -1027,6 +1035,7 @@ done_prefixes:
 
				 		c->src.type = OP_MEM;
			
 
				 		break;
			
 
				 	case SrcImm:
			
 
				+	case SrcImmU:
			
 
				 		c->src.type = OP_IMM;
			
 
				 		c->src.ptr = (unsigned long *)c->eip;
			
 
				 		c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
			
@@ -1044,6 +1053,19 @@ done_prefixes:
 
				 			c->src.val = insn_fetch(s32, 4, c->eip);
			
 
				 			break;
			
 
				 		}
			
 
				+		if ((c->d & SrcMask) == SrcImmU) {
			
 
				+			switch (c->src.bytes) {
			
 
				+			case 1:
			
 
				+				c->src.val &= 0xff;
			
 
				+				break;
			
 
				+			case 2:
			
 
				+				c->src.val &= 0xffff;
			
 
				+				break;
			
 
				+			case 4:
			
 
				+				c->src.val &= 0xffffffff;
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				 		break;
			
 
				 	case SrcImmByte:
			
 
				 	case SrcImmUByte:
			
@@ -1375,6 +1397,217 @@ static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask)
 
				 		ctxt->interruptibility = mask;
			
 
				 }
			
 
				 
			
 
				+static inline void
			
 
				+setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
			
 
				+	struct kvm_segment *cs, struct kvm_segment *ss)
			
 
				+{
			
 
				+	memset(cs, 0, sizeof(struct kvm_segment));
			
 
				+	kvm_x86_ops->get_segment(ctxt->vcpu, cs, VCPU_SREG_CS);
			
 
				+	memset(ss, 0, sizeof(struct kvm_segment));
			
 
				+
			
 
				+	cs->l = 0;		/* will be adjusted later */
			
 
				+	cs->base = 0;		/* flat segment */
			
 
				+	cs->g = 1;		/* 4kb granularity */
			
 
				+	cs->limit = 0xffffffff;	/* 4GB limit */
			
 
				+	cs->type = 0x0b;	/* Read, Execute, Accessed */
			
 
				+	cs->s = 1;
			
 
				+	cs->dpl = 0;		/* will be adjusted later */
			
 
				+	cs->present = 1;
			
 
				+	cs->db = 1;
			
 
				+
			
 
				+	ss->unusable = 0;
			
 
				+	ss->base = 0;		/* flat segment */
			
 
				+	ss->limit = 0xffffffff;	/* 4GB limit */
			
 
				+	ss->g = 1;		/* 4kb granularity */
			
 
				+	ss->s = 1;
			
 
				+	ss->type = 0x03;	/* Read/Write, Accessed */
			
 
				+	ss->db = 1;		/* 32bit stack segment */
			
 
				+	ss->dpl = 0;
			
 
				+	ss->present = 1;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+emulate_syscall(struct x86_emulate_ctxt *ctxt)
			
 
				+{
			
 
				+	struct decode_cache *c = &ctxt->decode;
			
 
				+	struct kvm_segment cs, ss;
			
 
				+	u64 msr_data;
			
 
				+
			
 
				+	/* syscall is not available in real mode */
			
 
				+	if (c->lock_prefix || ctxt->mode == X86EMUL_MODE_REAL
			
 
				+		|| !(ctxt->vcpu->arch.cr0 & X86_CR0_PE))
			
 
				+		return -1;
			
 
				+
			
 
				+	setup_syscalls_segments(ctxt, &cs, &ss);
			
 
				+
			
 
				+	kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
			
 
				+	msr_data >>= 32;
			
 
				+	cs.selector = (u16)(msr_data & 0xfffc);
			
 
				+	ss.selector = (u16)(msr_data + 8);
			
 
				+
			
 
				+	if (is_long_mode(ctxt->vcpu)) {
			
 
				+		cs.db = 0;
			
 
				+		cs.l = 1;
			
 
				+	}
			
 
				+	kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
			
 
				+	kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
			
 
				+
			
 
				+	c->regs[VCPU_REGS_RCX] = c->eip;
			
 
				+	if (is_long_mode(ctxt->vcpu)) {
			
 
				+#ifdef CONFIG_X86_64
			
 
				+		c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF;
			
 
				+
			
 
				+		kvm_x86_ops->get_msr(ctxt->vcpu,
			
 
				+			ctxt->mode == X86EMUL_MODE_PROT64 ?
			
 
				+			MSR_LSTAR : MSR_CSTAR, &msr_data);
			
 
				+		c->eip = msr_data;
			
 
				+
			
 
				+		kvm_x86_ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data);
			
 
				+		ctxt->eflags &= ~(msr_data | EFLG_RF);
			
 
				+#endif
			
 
				+	} else {
			
 
				+		/* legacy mode */
			
 
				+		kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
			
 
				+		c->eip = (u32)msr_data;
			
 
				+
			
 
				+		ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+emulate_sysenter(struct x86_emulate_ctxt *ctxt)
			
 
				+{
			
 
				+	struct decode_cache *c = &ctxt->decode;
			
 
				+	struct kvm_segment cs, ss;
			
 
				+	u64 msr_data;
			
 
				+
			
 
				+	/* inject #UD if LOCK prefix is used */
			
 
				+	if (c->lock_prefix)
			
 
				+		return -1;
			
 
				+
			
 
				+	/* inject #GP if in real mode or paging is disabled */
			
 
				+	if (ctxt->mode == X86EMUL_MODE_REAL ||
			
 
				+		!(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) {
			
 
				+		kvm_inject_gp(ctxt->vcpu, 0);
			
 
				+		return -1;
			
 
				+	}
			
 
				+
			
 
				+	/* XXX sysenter/sysexit have not been tested in 64bit mode.
			
 
				+	* Therefore, we inject an #UD.
			
 
				+	*/
			
 
				+	if (ctxt->mode == X86EMUL_MODE_PROT64)
			
 
				+		return -1;
			
 
				+
			
 
				+	setup_syscalls_segments(ctxt, &cs, &ss);
			
 
				+
			
 
				+	kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
			
 
				+	switch (ctxt->mode) {
			
 
				+	case X86EMUL_MODE_PROT32:
			
 
				+		if ((msr_data & 0xfffc) == 0x0) {
			
 
				+			kvm_inject_gp(ctxt->vcpu, 0);
			
 
				+			return -1;
			
 
				+		}
			
 
				+		break;
			
 
				+	case X86EMUL_MODE_PROT64:
			
 
				+		if (msr_data == 0x0) {
			
 
				+			kvm_inject_gp(ctxt->vcpu, 0);
			
 
				+			return -1;
			
 
				+		}
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
			
 
				+	cs.selector = (u16)msr_data;
			
 
				+	cs.selector &= ~SELECTOR_RPL_MASK;
			
 
				+	ss.selector = cs.selector + 8;
			
 
				+	ss.selector &= ~SELECTOR_RPL_MASK;
			
 
				+	if (ctxt->mode == X86EMUL_MODE_PROT64
			
 
				+		|| is_long_mode(ctxt->vcpu)) {
			
 
				+		cs.db = 0;
			
 
				+		cs.l = 1;
			
 
				+	}
			
 
				+
			
 
				+	kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
			
 
				+	kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
			
 
				+
			
 
				+	kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data);
			
 
				+	c->eip = msr_data;
			
 
				+
			
 
				+	kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data);
			
 
				+	c->regs[VCPU_REGS_RSP] = msr_data;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+emulate_sysexit(struct x86_emulate_ctxt *ctxt)
			
 
				+{
			
 
				+	struct decode_cache *c = &ctxt->decode;
			
 
				+	struct kvm_segment cs, ss;
			
 
				+	u64 msr_data;
			
 
				+	int usermode;
			
 
				+
			
 
				+	/* inject #UD if LOCK prefix is used */
			
 
				+	if (c->lock_prefix)
			
 
				+		return -1;
			
 
				+
			
 
				+	/* inject #GP if in real mode or paging is disabled */
			
 
				+	if (ctxt->mode == X86EMUL_MODE_REAL
			
 
				+		|| !(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) {
			
 
				+		kvm_inject_gp(ctxt->vcpu, 0);
			
 
				+		return -1;
			
 
				+	}
			
 
				+
			
 
				+	/* sysexit must be called from CPL 0 */
			
 
				+	if (kvm_x86_ops->get_cpl(ctxt->vcpu) != 0) {
			
 
				+		kvm_inject_gp(ctxt->vcpu, 0);
			
 
				+		return -1;
			
 
				+	}
			
 
				+
			
 
				+	setup_syscalls_segments(ctxt, &cs, &ss);
			
 
				+
			
 
				+	if ((c->rex_prefix & 0x8) != 0x0)
			
 
				+		usermode = X86EMUL_MODE_PROT64;
			
 
				+	else
			
 
				+		usermode = X86EMUL_MODE_PROT32;
			
 
				+
			
 
				+	cs.dpl = 3;
			
 
				+	ss.dpl = 3;
			
 
				+	kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
			
 
				+	switch (usermode) {
			
 
				+	case X86EMUL_MODE_PROT32:
			
 
				+		cs.selector = (u16)(msr_data + 16);
			
 
				+		if ((msr_data & 0xfffc) == 0x0) {
			
 
				+			kvm_inject_gp(ctxt->vcpu, 0);
			
 
				+			return -1;
			
 
				+		}
			
 
				+		ss.selector = (u16)(msr_data + 24);
			
 
				+		break;
			
 
				+	case X86EMUL_MODE_PROT64:
			
 
				+		cs.selector = (u16)(msr_data + 32);
			
 
				+		if (msr_data == 0x0) {
			
 
				+			kvm_inject_gp(ctxt->vcpu, 0);
			
 
				+			return -1;
			
 
				+		}
			
 
				+		ss.selector = cs.selector + 8;
			
 
				+		cs.db = 0;
			
 
				+		cs.l = 1;
			
 
				+		break;
			
 
				+	}
			
 
				+	cs.selector |= SELECTOR_RPL_MASK;
			
 
				+	ss.selector |= SELECTOR_RPL_MASK;
			
 
				+
			
 
				+	kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
			
 
				+	kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
			
 
				+
			
 
				+	c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX];
			
 
				+	c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX];
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 int
			
 
				 x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
			
 
				 {
			
@@ -1970,6 +2203,12 @@ twobyte_insn:
 
				 			goto cannot_emulate;
			
 
				 		}
			
 
				 		break;
			
 
				+	case 0x05: 		/* syscall */
			
 
				+		if (emulate_syscall(ctxt) == -1)
			
 
				+			goto cannot_emulate;
			
 
				+		else
			
 
				+			goto writeback;
			
 
				+		break;
			
 
				 	case 0x06:
			
 
				 		emulate_clts(ctxt->vcpu);
			
 
				 		c->dst.type = OP_NONE;
			
@@ -2036,6 +2275,18 @@ twobyte_insn:
 
				 		rc = X86EMUL_CONTINUE;
			
 
				 		c->dst.type = OP_NONE;
			
 
				 		break;
			
 
				+	case 0x34:		/* sysenter */
			
 
				+		if (emulate_sysenter(ctxt) == -1)
			
 
				+			goto cannot_emulate;
			
 
				+		else
			
 
				+			goto writeback;
			
 
				+		break;
			
 
				+	case 0x35:		/* sysexit */
			
 
				+		if (emulate_sysexit(ctxt) == -1)
			
 
				+			goto cannot_emulate;
			
 
				+		else
			
 
				+			goto writeback;
			
 
				+		break;
			
 
				 	case 0x40 ... 0x4f:	/* cmov */
			
 
				 		c->dst.val = c->dst.orig_val = c->src.val;
			
 
				 		if (!test_cc(c->b, ctxt->eflags))
			
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -231,7 +231,7 @@ int pit_has_pending_timer(struct kvm_vcpu *vcpu)
 
				 {
			
 
				 	struct kvm_pit *pit = vcpu->kvm->arch.vpit;
			
 
				 
			
 
				-	if (pit && vcpu->vcpu_id == 0 && pit->pit_state.irq_ack)
			
 
				+	if (pit && kvm_vcpu_is_bsp(vcpu) && pit->pit_state.irq_ack)
			
 
				 		return atomic_read(&pit->pit_state.pit_timer.pending);
			
 
				 	return 0;
			
 
				 }
			
@@ -252,7 +252,7 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
 
				 	struct kvm_pit *pit = vcpu->kvm->arch.vpit;
			
 
				 	struct hrtimer *timer;
			
 
				 
			
 
				-	if (vcpu->vcpu_id != 0 || !pit)
			
 
				+	if (!kvm_vcpu_is_bsp(vcpu) || !pit)
			
 
				 		return;
			
 
				 
			
 
				 	timer = &pit->pit_state.pit_timer.timer;
			
@@ -294,7 +294,7 @@ static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)
 
				 	pt->timer.function = kvm_timer_fn;
			
 
				 	pt->t_ops = &kpit_ops;
			
 
				 	pt->kvm = ps->pit->kvm;
			
 
				-	pt->vcpu_id = 0;
			
 
				+	pt->vcpu = pt->kvm->bsp_vcpu;
			
 
				 
			
 
				 	atomic_set(&pt->pending, 0);
			
 
				 	ps->irq_ack = 1;
			
@@ -332,33 +332,62 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
 
				 	case 1:
			
 
				         /* FIXME: enhance mode 4 precision */
			
 
				 	case 4:
			
 
				-		create_pit_timer(ps, val, 0);
			
 
				+		if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)) {
			
 
				+			create_pit_timer(ps, val, 0);
			
 
				+		}
			
 
				 		break;
			
 
				 	case 2:
			
 
				 	case 3:
			
 
				-		create_pit_timer(ps, val, 1);
			
 
				+		if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)){
			
 
				+			create_pit_timer(ps, val, 1);
			
 
				+		}
			
 
				 		break;
			
 
				 	default:
			
 
				 		destroy_pit_timer(&ps->pit_timer);
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				-void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val)
			
 
				+void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start)
			
 
				+{
			
 
				+	u8 saved_mode;
			
 
				+	if (hpet_legacy_start) {
			
 
				+		/* save existing mode for later reenablement */
			
 
				+		saved_mode = kvm->arch.vpit->pit_state.channels[0].mode;
			
 
				+		kvm->arch.vpit->pit_state.channels[0].mode = 0xff; /* disable timer */
			
 
				+		pit_load_count(kvm, channel, val);
			
 
				+		kvm->arch.vpit->pit_state.channels[0].mode = saved_mode;
			
 
				+	} else {
			
 
				+		pit_load_count(kvm, channel, val);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static inline struct kvm_pit *dev_to_pit(struct kvm_io_device *dev)
			
 
				+{
			
 
				+	return container_of(dev, struct kvm_pit, dev);
			
 
				+}
			
 
				+
			
 
				+static inline struct kvm_pit *speaker_to_pit(struct kvm_io_device *dev)
			
 
				 {
			
 
				-	mutex_lock(&kvm->arch.vpit->pit_state.lock);
			
 
				-	pit_load_count(kvm, channel, val);
			
 
				-	mutex_unlock(&kvm->arch.vpit->pit_state.lock);
			
 
				+	return container_of(dev, struct kvm_pit, speaker_dev);
			
 
				 }
			
 
				 
			
 
				-static void pit_ioport_write(struct kvm_io_device *this,
			
 
				-			     gpa_t addr, int len, const void *data)
			
 
				+static inline int pit_in_range(gpa_t addr)
			
 
				 {
			
 
				-	struct kvm_pit *pit = (struct kvm_pit *)this->private;
			
 
				+	return ((addr >= KVM_PIT_BASE_ADDRESS) &&
			
 
				+		(addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
			
 
				+}
			
 
				+
			
 
				+static int pit_ioport_write(struct kvm_io_device *this,
			
 
				+			    gpa_t addr, int len, const void *data)
			
 
				+{
			
 
				+	struct kvm_pit *pit = dev_to_pit(this);
			
 
				 	struct kvm_kpit_state *pit_state = &pit->pit_state;
			
 
				 	struct kvm *kvm = pit->kvm;
			
 
				 	int channel, access;
			
 
				 	struct kvm_kpit_channel_state *s;
			
 
				 	u32 val = *(u32 *) data;
			
 
				+	if (!pit_in_range(addr))
			
 
				+		return -EOPNOTSUPP;
			
 
				 
			
 
				 	val  &= 0xff;
			
 
				 	addr &= KVM_PIT_CHANNEL_MASK;
			
@@ -421,16 +450,19 @@ static void pit_ioport_write(struct kvm_io_device *this,
 
				 	}
			
 
				 
			
 
				 	mutex_unlock(&pit_state->lock);
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				-static void pit_ioport_read(struct kvm_io_device *this,
			
 
				-			    gpa_t addr, int len, void *data)
			
 
				+static int pit_ioport_read(struct kvm_io_device *this,
			
 
				+			   gpa_t addr, int len, void *data)
			
 
				 {
			
 
				-	struct kvm_pit *pit = (struct kvm_pit *)this->private;
			
 
				+	struct kvm_pit *pit = dev_to_pit(this);
			
 
				 	struct kvm_kpit_state *pit_state = &pit->pit_state;
			
 
				 	struct kvm *kvm = pit->kvm;
			
 
				 	int ret, count;
			
 
				 	struct kvm_kpit_channel_state *s;
			
 
				+	if (!pit_in_range(addr))
			
 
				+		return -EOPNOTSUPP;
			
 
				 
			
 
				 	addr &= KVM_PIT_CHANNEL_MASK;
			
 
				 	s = &pit_state->channels[addr];
			
@@ -485,37 +517,36 @@ static void pit_ioport_read(struct kvm_io_device *this,
 
				 	memcpy(data, (char *)&ret, len);
			
 
				 
			
 
				 	mutex_unlock(&pit_state->lock);
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				-static int pit_in_range(struct kvm_io_device *this, gpa_t addr,
			
 
				-			int len, int is_write)
			
 
				-{
			
 
				-	return ((addr >= KVM_PIT_BASE_ADDRESS) &&
			
 
				-		(addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
			
 
				-}
			
 
				-
			
 
				-static void speaker_ioport_write(struct kvm_io_device *this,
			
 
				-				 gpa_t addr, int len, const void *data)
			
 
				+static int speaker_ioport_write(struct kvm_io_device *this,
			
 
				+				gpa_t addr, int len, const void *data)
			
 
				 {
			
 
				-	struct kvm_pit *pit = (struct kvm_pit *)this->private;
			
 
				+	struct kvm_pit *pit = speaker_to_pit(this);
			
 
				 	struct kvm_kpit_state *pit_state = &pit->pit_state;
			
 
				 	struct kvm *kvm = pit->kvm;
			
 
				 	u32 val = *(u32 *) data;
			
 
				+	if (addr != KVM_SPEAKER_BASE_ADDRESS)
			
 
				+		return -EOPNOTSUPP;
			
 
				 
			
 
				 	mutex_lock(&pit_state->lock);
			
 
				 	pit_state->speaker_data_on = (val >> 1) & 1;
			
 
				 	pit_set_gate(kvm, 2, val & 1);
			
 
				 	mutex_unlock(&pit_state->lock);
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				-static void speaker_ioport_read(struct kvm_io_device *this,
			
 
				-				gpa_t addr, int len, void *data)
			
 
				+static int speaker_ioport_read(struct kvm_io_device *this,
			
 
				+			       gpa_t addr, int len, void *data)
			
 
				 {
			
 
				-	struct kvm_pit *pit = (struct kvm_pit *)this->private;
			
 
				+	struct kvm_pit *pit = speaker_to_pit(this);
			
 
				 	struct kvm_kpit_state *pit_state = &pit->pit_state;
			
 
				 	struct kvm *kvm = pit->kvm;
			
 
				 	unsigned int refresh_clock;
			
 
				 	int ret;
			
 
				+	if (addr != KVM_SPEAKER_BASE_ADDRESS)
			
 
				+		return -EOPNOTSUPP;
			
 
				 
			
 
				 	/* Refresh clock toggles at about 15us. We approximate as 2^14ns. */
			
 
				 	refresh_clock = ((unsigned int)ktime_to_ns(ktime_get()) >> 14) & 1;
			
@@ -527,12 +558,7 @@ static void speaker_ioport_read(struct kvm_io_device *this,
 
				 		len = sizeof(ret);
			
 
				 	memcpy(data, (char *)&ret, len);
			
 
				 	mutex_unlock(&pit_state->lock);
			
 
				-}
			
 
				-
			
 
				-static int speaker_in_range(struct kvm_io_device *this, gpa_t addr,
			
 
				-			    int len, int is_write)
			
 
				-{
			
 
				-	return (addr == KVM_SPEAKER_BASE_ADDRESS);
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 void kvm_pit_reset(struct kvm_pit *pit)
			
@@ -541,6 +567,7 @@ void kvm_pit_reset(struct kvm_pit *pit)
 
				 	struct kvm_kpit_channel_state *c;
			
 
				 
			
 
				 	mutex_lock(&pit->pit_state.lock);
			
 
				+	pit->pit_state.flags = 0;
			
 
				 	for (i = 0; i < 3; i++) {
			
 
				 		c = &pit->pit_state.channels[i];
			
 
				 		c->mode = 0xff;
			
@@ -563,10 +590,22 @@ static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-struct kvm_pit *kvm_create_pit(struct kvm *kvm)
			
 
				+static const struct kvm_io_device_ops pit_dev_ops = {
			
 
				+	.read     = pit_ioport_read,
			
 
				+	.write    = pit_ioport_write,
			
 
				+};
			
 
				+
			
 
				+static const struct kvm_io_device_ops speaker_dev_ops = {
			
 
				+	.read     = speaker_ioport_read,
			
 
				+	.write    = speaker_ioport_write,
			
 
				+};
			
 
				+
			
 
				+/* Caller must have writers lock on slots_lock */
			
 
				+struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
			
 
				 {
			
 
				 	struct kvm_pit *pit;
			
 
				 	struct kvm_kpit_state *pit_state;
			
 
				+	int ret;
			
 
				 
			
 
				 	pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL);
			
 
				 	if (!pit)
			
@@ -582,19 +621,6 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
 
				 	mutex_lock(&pit->pit_state.lock);
			
 
				 	spin_lock_init(&pit->pit_state.inject_lock);
			
 
				 
			
 
				-	/* Initialize PIO device */
			
 
				-	pit->dev.read = pit_ioport_read;
			
 
				-	pit->dev.write = pit_ioport_write;
			
 
				-	pit->dev.in_range = pit_in_range;
			
 
				-	pit->dev.private = pit;
			
 
				-	kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
			
 
				-
			
 
				-	pit->speaker_dev.read = speaker_ioport_read;
			
 
				-	pit->speaker_dev.write = speaker_ioport_write;
			
 
				-	pit->speaker_dev.in_range = speaker_in_range;
			
 
				-	pit->speaker_dev.private = pit;
			
 
				-	kvm_io_bus_register_dev(&kvm->pio_bus, &pit->speaker_dev);
			
 
				-
			
 
				 	kvm->arch.vpit = pit;
			
 
				 	pit->kvm = kvm;
			
 
				 
			
@@ -613,7 +639,30 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
 
				 	pit->mask_notifier.func = pit_mask_notifer;
			
 
				 	kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
			
 
				 
			
 
				+	kvm_iodevice_init(&pit->dev, &pit_dev_ops);
			
 
				+	ret = __kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
			
 
				+	if (ret < 0)
			
 
				+		goto fail;
			
 
				+
			
 
				+	if (flags & KVM_PIT_SPEAKER_DUMMY) {
			
 
				+		kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops);
			
 
				+		ret = __kvm_io_bus_register_dev(&kvm->pio_bus,
			
 
				+						&pit->speaker_dev);
			
 
				+		if (ret < 0)
			
 
				+			goto fail_unregister;
			
 
				+	}
			
 
				+
			
 
				 	return pit;
			
 
				+
			
 
				+fail_unregister:
			
 
				+	__kvm_io_bus_unregister_dev(&kvm->pio_bus, &pit->dev);
			
 
				+
			
 
				+fail:
			
 
				+	if (pit->irq_source_id >= 0)
			
 
				+		kvm_free_irq_source_id(kvm, pit->irq_source_id);
			
 
				+
			
 
				+	kfree(pit);
			
 
				+	return NULL;
			
 
				 }
			
 
				 
			
 
				 void kvm_free_pit(struct kvm *kvm)
			
@@ -623,6 +672,8 @@ void kvm_free_pit(struct kvm *kvm)
 
				 	if (kvm->arch.vpit) {
			
 
				 		kvm_unregister_irq_mask_notifier(kvm, 0,
			
 
				 					       &kvm->arch.vpit->mask_notifier);
			
 
				+		kvm_unregister_irq_ack_notifier(kvm,
			
 
				+				&kvm->arch.vpit->pit_state.irq_ack_notifier);
			
 
				 		mutex_lock(&kvm->arch.vpit->pit_state.lock);
			
 
				 		timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
			
 
				 		hrtimer_cancel(timer);
			
@@ -637,10 +688,10 @@ static void __inject_pit_timer_intr(struct kvm *kvm)
 
				 	struct kvm_vcpu *vcpu;
			
 
				 	int i;
			
 
				 
			
 
				-	mutex_lock(&kvm->lock);
			
 
				+	mutex_lock(&kvm->irq_lock);
			
 
				 	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
			
 
				 	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
			
 
				-	mutex_unlock(&kvm->lock);
			
 
				+	mutex_unlock(&kvm->irq_lock);
			
 
				 
			
 
				 	/*
			
 
				 	 * Provides NMI watchdog support via Virtual Wire mode.
			
@@ -652,11 +703,8 @@ static void __inject_pit_timer_intr(struct kvm *kvm)
 
				 	 * VCPU0, and only if its LVT0 is in EXTINT mode.
			
 
				 	 */
			
 
				 	if (kvm->arch.vapics_in_nmi_mode > 0)
			
 
				-		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
			
 
				-			vcpu = kvm->vcpus[i];
			
 
				-			if (vcpu)
			
 
				-				kvm_apic_nmi_wd_deliver(vcpu);
			
 
				-		}
			
 
				+		kvm_for_each_vcpu(i, vcpu, kvm)
			
 
				+			kvm_apic_nmi_wd_deliver(vcpu);
			
 
				 }
			
 
				 
			
 
				 void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
			
@@ -665,7 +713,7 @@ void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
 
				 	struct kvm *kvm = vcpu->kvm;
			
 
				 	struct kvm_kpit_state *ps;
			
 
				 
			
 
				-	if (vcpu && pit) {
			
 
				+	if (pit) {
			
 
				 		int inject = 0;
			
 
				 		ps = &pit->pit_state;
			
 
				 
			
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -21,6 +21,7 @@ struct kvm_kpit_channel_state {
 
				 
			
 
				 struct kvm_kpit_state {
			
 
				 	struct kvm_kpit_channel_state channels[3];
			
 
				+	u32 flags;
			
 
				 	struct kvm_timer pit_timer;
			
 
				 	bool is_periodic;
			
 
				 	u32    speaker_data_on;
			
@@ -49,8 +50,8 @@ struct kvm_pit {
 
				 #define KVM_PIT_CHANNEL_MASK	    0x3
			
 
				 
			
 
				 void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu);
			
 
				-void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val);
			
 
				-struct kvm_pit *kvm_create_pit(struct kvm *kvm);
			
 
				+void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start);
			
 
				+struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags);
			
 
				 void kvm_free_pit(struct kvm *kvm);
			
 
				 void kvm_pit_reset(struct kvm_pit *pit);
			
 
				 
			
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -30,50 +30,24 @@
 
				 #include "irq.h"
			
 
				 
			
 
				 #include <linux/kvm_host.h>
			
 
				-
			
 
				-static void pic_lock(struct kvm_pic *s)
			
 
				-	__acquires(&s->lock)
			
 
				-{
			
 
				-	spin_lock(&s->lock);
			
 
				-}
			
 
				-
			
 
				-static void pic_unlock(struct kvm_pic *s)
			
 
				-	__releases(&s->lock)
			
 
				-{
			
 
				-	struct kvm *kvm = s->kvm;
			
 
				-	unsigned acks = s->pending_acks;
			
 
				-	bool wakeup = s->wakeup_needed;
			
 
				-	struct kvm_vcpu *vcpu;
			
 
				-
			
 
				-	s->pending_acks = 0;
			
 
				-	s->wakeup_needed = false;
			
 
				-
			
 
				-	spin_unlock(&s->lock);
			
 
				-
			
 
				-	while (acks) {
			
 
				-		kvm_notify_acked_irq(kvm, SELECT_PIC(__ffs(acks)),
			
 
				-				     __ffs(acks));
			
 
				-		acks &= acks - 1;
			
 
				-	}
			
 
				-
			
 
				-	if (wakeup) {
			
 
				-		vcpu = s->kvm->vcpus[0];
			
 
				-		if (vcpu)
			
 
				-			kvm_vcpu_kick(vcpu);
			
 
				-	}
			
 
				-}
			
 
				+#include "trace.h"
			
 
				 
			
 
				 static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
			
 
				 {
			
 
				 	s->isr &= ~(1 << irq);
			
 
				 	s->isr_ack |= (1 << irq);
			
 
				+	if (s != &s->pics_state->pics[0])
			
 
				+		irq += 8;
			
 
				+	kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq);
			
 
				 }
			
 
				 
			
 
				 void kvm_pic_clear_isr_ack(struct kvm *kvm)
			
 
				 {
			
 
				 	struct kvm_pic *s = pic_irqchip(kvm);
			
 
				+	spin_lock(&s->lock);
			
 
				 	s->pics[0].isr_ack = 0xff;
			
 
				 	s->pics[1].isr_ack = 0xff;
			
 
				+	spin_unlock(&s->lock);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -174,9 +148,9 @@ static void pic_update_irq(struct kvm_pic *s)
 
				 
			
 
				 void kvm_pic_update_irq(struct kvm_pic *s)
			
 
				 {
			
 
				-	pic_lock(s);
			
 
				+	spin_lock(&s->lock);
			
 
				 	pic_update_irq(s);
			
 
				-	pic_unlock(s);
			
 
				+	spin_unlock(&s->lock);
			
 
				 }
			
 
				 
			
 
				 int kvm_pic_set_irq(void *opaque, int irq, int level)
			
@@ -184,12 +158,14 @@ int kvm_pic_set_irq(void *opaque, int irq, int level)
 
				 	struct kvm_pic *s = opaque;
			
 
				 	int ret = -1;
			
 
				 
			
 
				-	pic_lock(s);
			
 
				+	spin_lock(&s->lock);
			
 
				 	if (irq >= 0 && irq < PIC_NUM_PINS) {
			
 
				 		ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
			
 
				 		pic_update_irq(s);
			
 
				+		trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr,
			
 
				+				      s->pics[irq >> 3].imr, ret == 0);
			
 
				 	}
			
 
				-	pic_unlock(s);
			
 
				+	spin_unlock(&s->lock);
			
 
				 
			
 
				 	return ret;
			
 
				 }
			
@@ -217,7 +193,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
 
				 	int irq, irq2, intno;
			
 
				 	struct kvm_pic *s = pic_irqchip(kvm);
			
 
				 
			
 
				-	pic_lock(s);
			
 
				+	spin_lock(&s->lock);
			
 
				 	irq = pic_get_irq(&s->pics[0]);
			
 
				 	if (irq >= 0) {
			
 
				 		pic_intack(&s->pics[0], irq);
			
@@ -242,8 +218,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
 
				 		intno = s->pics[0].irq_base + irq;
			
 
				 	}
			
 
				 	pic_update_irq(s);
			
 
				-	pic_unlock(s);
			
 
				-	kvm_notify_acked_irq(kvm, SELECT_PIC(irq), irq);
			
 
				+	spin_unlock(&s->lock);
			
 
				 
			
 
				 	return intno;
			
 
				 }
			
@@ -252,7 +227,7 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
 
				 {
			
 
				 	int irq, irqbase, n;
			
 
				 	struct kvm *kvm = s->pics_state->irq_request_opaque;
			
 
				-	struct kvm_vcpu *vcpu0 = kvm->vcpus[0];
			
 
				+	struct kvm_vcpu *vcpu0 = kvm->bsp_vcpu;
			
 
				 
			
 
				 	if (s == &s->pics_state->pics[0])
			
 
				 		irqbase = 0;
			
@@ -263,7 +238,7 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
 
				 		if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0))
			
 
				 			if (s->irr & (1 << irq) || s->isr & (1 << irq)) {
			
 
				 				n = irq + irqbase;
			
 
				-				s->pics_state->pending_acks |= 1 << n;
			
 
				+				kvm_notify_acked_irq(kvm, SELECT_PIC(n), n);
			
 
				 			}
			
 
				 	}
			
 
				 	s->last_irr = 0;
			
@@ -428,8 +403,7 @@ static u32 elcr_ioport_read(void *opaque, u32 addr1)
 
				 	return s->elcr;
			
 
				 }
			
 
				 
			
 
				-static int picdev_in_range(struct kvm_io_device *this, gpa_t addr,
			
 
				-			   int len, int is_write)
			
 
				+static int picdev_in_range(gpa_t addr)
			
 
				 {
			
 
				 	switch (addr) {
			
 
				 	case 0x20:
			
@@ -444,18 +418,25 @@ static int picdev_in_range(struct kvm_io_device *this, gpa_t addr,
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static void picdev_write(struct kvm_io_device *this,
			
 
				+static inline struct kvm_pic *to_pic(struct kvm_io_device *dev)
			
 
				+{
			
 
				+	return container_of(dev, struct kvm_pic, dev);
			
 
				+}
			
 
				+
			
 
				+static int picdev_write(struct kvm_io_device *this,
			
 
				 			 gpa_t addr, int len, const void *val)
			
 
				 {
			
 
				-	struct kvm_pic *s = this->private;
			
 
				+	struct kvm_pic *s = to_pic(this);
			
 
				 	unsigned char data = *(unsigned char *)val;
			
 
				+	if (!picdev_in_range(addr))
			
 
				+		return -EOPNOTSUPP;
			
 
				 
			
 
				 	if (len != 1) {
			
 
				 		if (printk_ratelimit())
			
 
				 			printk(KERN_ERR "PIC: non byte write\n");
			
 
				-		return;
			
 
				+		return 0;
			
 
				 	}
			
 
				-	pic_lock(s);
			
 
				+	spin_lock(&s->lock);
			
 
				 	switch (addr) {
			
 
				 	case 0x20:
			
 
				 	case 0x21:
			
@@ -468,21 +449,24 @@ static void picdev_write(struct kvm_io_device *this,
 
				 		elcr_ioport_write(&s->pics[addr & 1], addr, data);
			
 
				 		break;
			
 
				 	}
			
 
				-	pic_unlock(s);
			
 
				+	spin_unlock(&s->lock);
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				-static void picdev_read(struct kvm_io_device *this,
			
 
				-			gpa_t addr, int len, void *val)
			
 
				+static int picdev_read(struct kvm_io_device *this,
			
 
				+		       gpa_t addr, int len, void *val)
			
 
				 {
			
 
				-	struct kvm_pic *s = this->private;
			
 
				+	struct kvm_pic *s = to_pic(this);
			
 
				 	unsigned char data = 0;
			
 
				+	if (!picdev_in_range(addr))
			
 
				+		return -EOPNOTSUPP;
			
 
				 
			
 
				 	if (len != 1) {
			
 
				 		if (printk_ratelimit())
			
 
				 			printk(KERN_ERR "PIC: non byte read\n");
			
 
				-		return;
			
 
				+		return 0;
			
 
				 	}
			
 
				-	pic_lock(s);
			
 
				+	spin_lock(&s->lock);
			
 
				 	switch (addr) {
			
 
				 	case 0x20:
			
 
				 	case 0x21:
			
@@ -496,7 +480,8 @@ static void picdev_read(struct kvm_io_device *this,
 
				 		break;
			
 
				 	}
			
 
				 	*(unsigned char *)val = data;
			
 
				-	pic_unlock(s);
			
 
				+	spin_unlock(&s->lock);
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -505,20 +490,27 @@ static void picdev_read(struct kvm_io_device *this,
 
				 static void pic_irq_request(void *opaque, int level)
			
 
				 {
			
 
				 	struct kvm *kvm = opaque;
			
 
				-	struct kvm_vcpu *vcpu = kvm->vcpus[0];
			
 
				+	struct kvm_vcpu *vcpu = kvm->bsp_vcpu;
			
 
				 	struct kvm_pic *s = pic_irqchip(kvm);
			
 
				 	int irq = pic_get_irq(&s->pics[0]);
			
 
				 
			
 
				 	s->output = level;
			
 
				 	if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) {
			
 
				 		s->pics[0].isr_ack &= ~(1 << irq);
			
 
				-		s->wakeup_needed = true;
			
 
				+		kvm_vcpu_kick(vcpu);
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				+static const struct kvm_io_device_ops picdev_ops = {
			
 
				+	.read     = picdev_read,
			
 
				+	.write    = picdev_write,
			
 
				+};
			
 
				+
			
 
				 struct kvm_pic *kvm_create_pic(struct kvm *kvm)
			
 
				 {
			
 
				 	struct kvm_pic *s;
			
 
				+	int ret;
			
 
				+
			
 
				 	s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL);
			
 
				 	if (!s)
			
 
				 		return NULL;
			
@@ -534,10 +526,12 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
 
				 	/*
			
 
				 	 * Initialize PIO device
			
 
				 	 */
			
 
				-	s->dev.read = picdev_read;
			
 
				-	s->dev.write = picdev_write;
			
 
				-	s->dev.in_range = picdev_in_range;
			
 
				-	s->dev.private = s;
			
 
				-	kvm_io_bus_register_dev(&kvm->pio_bus, &s->dev);
			
 
				+	kvm_iodevice_init(&s->dev, &picdev_ops);
			
 
				+	ret = kvm_io_bus_register_dev(kvm, &kvm->pio_bus, &s->dev);
			
 
				+	if (ret < 0) {
			
 
				+		kfree(s);
			
 
				+		return NULL;
			
 
				+	}
			
 
				+
			
 
				 	return s;
			
 
				 }
			
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -63,7 +63,6 @@ struct kvm_kpic_state {
 
				 
			
 
				 struct kvm_pic {
			
 
				 	spinlock_t lock;
			
 
				-	bool wakeup_needed;
			
 
				 	unsigned pending_acks;
			
 
				 	struct kvm *kvm;
			
 
				 	struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
			
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -29,4 +29,13 @@ static inline void kvm_rip_write(struct kvm_vcpu *vcpu, unsigned long val)
 
				 	kvm_register_write(vcpu, VCPU_REGS_RIP, val);
			
 
				 }
			
 
				 
			
 
				+static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index)
			
 
				+{
			
 
				+	if (!test_bit(VCPU_EXREG_PDPTR,
			
 
				+		      (unsigned long *)&vcpu->arch.regs_avail))
			
 
				+		kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_PDPTR);
			
 
				+
			
 
				+	return vcpu->arch.pdptrs[index];
			
 
				+}
			
 
				+
			
 
				 #endif
			
--- a/arch/x86/kvm/kvm_svm.h
+++ b/arch/x86/kvm/kvm_svm.h
@@ -1,51 +0,0 @@
 
				-#ifndef __KVM_SVM_H
			
 
				-#define __KVM_SVM_H
			
 
				-
			
 
				-#include <linux/kernel.h>
			
 
				-#include <linux/types.h>
			
 
				-#include <linux/list.h>
			
 
				-#include <linux/kvm_host.h>
			
 
				-#include <asm/msr.h>
			
 
				-
			
 
				-#include <asm/svm.h>
			
 
				-
			
 
				-static const u32 host_save_user_msrs[] = {
			
 
				-#ifdef CONFIG_X86_64
			
 
				-	MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
			
 
				-	MSR_FS_BASE,
			
 
				-#endif
			
 
				-	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
			
 
				-};
			
 
				-
			
 
				-#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
			
 
				-
			
 
				-struct kvm_vcpu;
			
 
				-
			
 
				-struct vcpu_svm {
			
 
				-	struct kvm_vcpu vcpu;
			
 
				-	struct vmcb *vmcb;
			
 
				-	unsigned long vmcb_pa;
			
 
				-	struct svm_cpu_data *svm_data;
			
 
				-	uint64_t asid_generation;
			
 
				-
			
 
				-	u64 next_rip;
			
 
				-
			
 
				-	u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
			
 
				-	u64 host_gs_base;
			
 
				-	unsigned long host_cr2;
			
 
				-
			
 
				-	u32 *msrpm;
			
 
				-	struct vmcb *hsave;
			
 
				-	u64 hsave_msr;
			
 
				-
			
 
				-	u64 nested_vmcb;
			
 
				-
			
 
				-	/* These are the merged vectors */
			
 
				-	u32 *nested_msrpm;
			
 
				-
			
 
				-	/* gpa pointers to the real vectors */
			
 
				-	u64 nested_vmcb_msrpm;
			
 
				-};
			
 
				-
			
 
				-#endif
			
 
				-
			
--- a/arch/x86/kvm/kvm_timer.h
+++ b/arch/x86/kvm/kvm_timer.h
@@ -6,7 +6,7 @@ struct kvm_timer {
 
				 	bool reinject;
			
 
				 	struct kvm_timer_ops *t_ops;
			
 
				 	struct kvm *kvm;
			
 
				-	int vcpu_id;
			
 
				+	struct kvm_vcpu *vcpu;
			
 
				 };
			
 
				 
			
 
				 struct kvm_timer_ops {
			
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -32,8 +32,11 @@
 
				 #include <asm/current.h>
			
 
				 #include <asm/apicdef.h>
			
 
				 #include <asm/atomic.h>
			
 
				+#include <asm/apicdef.h>
			
 
				 #include "kvm_cache_regs.h"
			
 
				 #include "irq.h"
			
 
				+#include "trace.h"
			
 
				+#include "x86.h"
			
 
				 
			
 
				 #ifndef CONFIG_X86_64
			
 
				 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
			
@@ -141,6 +144,26 @@ static inline int apic_lvt_nmi_mode(u32 lvt_val)
 
				 	return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
			
 
				 }
			
 
				 
			
 
				+void kvm_apic_set_version(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	struct kvm_lapic *apic = vcpu->arch.apic;
			
 
				+	struct kvm_cpuid_entry2 *feat;
			
 
				+	u32 v = APIC_VERSION;
			
 
				+
			
 
				+	if (!irqchip_in_kernel(vcpu->kvm))
			
 
				+		return;
			
 
				+
			
 
				+	feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
			
 
				+	if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))))
			
 
				+		v |= APIC_LVR_DIRECTED_EOI;
			
 
				+	apic_set_reg(apic, APIC_LVR, v);
			
 
				+}
			
 
				+
			
 
				+static inline int apic_x2apic_mode(struct kvm_lapic *apic)
			
 
				+{
			
 
				+	return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
			
 
				+}
			
 
				+
			
 
				 static unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
			
 
				 	LVT_MASK | APIC_LVT_TIMER_PERIODIC,	/* LVTT */
			
 
				 	LVT_MASK | APIC_MODE_MASK,	/* LVTTHMR */
			
@@ -165,36 +188,52 @@ static int find_highest_vector(void *bitmap)
 
				 
			
 
				 static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic)
			
 
				 {
			
 
				+	apic->irr_pending = true;
			
 
				 	return apic_test_and_set_vector(vec, apic->regs + APIC_IRR);
			
 
				 }
			
 
				 
			
 
				-static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
			
 
				+static inline int apic_search_irr(struct kvm_lapic *apic)
			
 
				 {
			
 
				-	apic_clear_vector(vec, apic->regs + APIC_IRR);
			
 
				+	return find_highest_vector(apic->regs + APIC_IRR);
			
 
				 }
			
 
				 
			
 
				 static inline int apic_find_highest_irr(struct kvm_lapic *apic)
			
 
				 {
			
 
				 	int result;
			
 
				 
			
 
				-	result = find_highest_vector(apic->regs + APIC_IRR);
			
 
				+	if (!apic->irr_pending)
			
 
				+		return -1;
			
 
				+
			
 
				+	result = apic_search_irr(apic);
			
 
				 	ASSERT(result == -1 || result >= 16);
			
 
				 
			
 
				 	return result;
			
 
				 }
			
 
				 
			
 
				+static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
			
 
				+{
			
 
				+	apic->irr_pending = false;
			
 
				+	apic_clear_vector(vec, apic->regs + APIC_IRR);
			
 
				+	if (apic_search_irr(apic) != -1)
			
 
				+		apic->irr_pending = true;
			
 
				+}
			
 
				+
			
 
				 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	struct kvm_lapic *apic = vcpu->arch.apic;
			
 
				 	int highest_irr;
			
 
				 
			
 
				+	/* This may race with setting of irr in __apic_accept_irq() and
			
 
				+	 * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq
			
 
				+	 * will cause vmexit immediately and the value will be recalculated
			
 
				+	 * on the next vmentry.
			
 
				+	 */
			
 
				 	if (!apic)
			
 
				 		return 0;
			
 
				 	highest_irr = apic_find_highest_irr(apic);
			
 
				 
			
 
				 	return highest_irr;
			
 
				 }
			
 
				-EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);
			
 
				 
			
 
				 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
			
 
				 			     int vector, int level, int trig_mode);
			
@@ -251,7 +290,12 @@ int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
 
				 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
			
 
				 {
			
 
				 	int result = 0;
			
 
				-	u8 logical_id;
			
 
				+	u32 logical_id;
			
 
				+
			
 
				+	if (apic_x2apic_mode(apic)) {
			
 
				+		logical_id = apic_get_reg(apic, APIC_LDR);
			
 
				+		return logical_id & mda;
			
 
				+	}
			
 
				 
			
 
				 	logical_id = GET_APIC_LOGICAL_ID(apic_get_reg(apic, APIC_LDR));
			
 
				 
			
@@ -331,6 +375,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 
				 			break;
			
 
				 
			
 
				 		result = !apic_test_and_set_irr(vector, apic);
			
 
				+		trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
			
 
				+					  trig_mode, vector, !result);
			
 
				 		if (!result) {
			
 
				 			if (trig_mode)
			
 
				 				apic_debug("level trig mode repeatedly for "
			
@@ -425,7 +471,11 @@ static void apic_set_eoi(struct kvm_lapic *apic)
 
				 		trigger_mode = IOAPIC_LEVEL_TRIG;
			
 
				 	else
			
 
				 		trigger_mode = IOAPIC_EDGE_TRIG;
			
 
				-	kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
			
 
				+	if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)) {
			
 
				+		mutex_lock(&apic->vcpu->kvm->irq_lock);
			
 
				+		kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
			
 
				+		mutex_unlock(&apic->vcpu->kvm->irq_lock);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 static void apic_send_ipi(struct kvm_lapic *apic)
			
@@ -440,7 +490,12 @@ static void apic_send_ipi(struct kvm_lapic *apic)
 
				 	irq.level = icr_low & APIC_INT_ASSERT;
			
 
				 	irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
			
 
				 	irq.shorthand = icr_low & APIC_SHORT_MASK;
			
 
				-	irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
			
 
				+	if (apic_x2apic_mode(apic))
			
 
				+		irq.dest_id = icr_high;
			
 
				+	else
			
 
				+		irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
			
 
				+
			
 
				+	trace_kvm_apic_ipi(icr_low, irq.dest_id);
			
 
				 
			
 
				 	apic_debug("icr_high 0x%x, icr_low 0x%x, "
			
 
				 		   "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, "
			
@@ -449,7 +504,9 @@ static void apic_send_ipi(struct kvm_lapic *apic)
 
				 		   irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
			
 
				 		   irq.vector);
			
 
				 
			
 
				+	mutex_lock(&apic->vcpu->kvm->irq_lock);
			
 
				 	kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq);
			
 
				+	mutex_unlock(&apic->vcpu->kvm->irq_lock);
			
 
				 }
			
 
				 
			
 
				 static u32 apic_get_tmcct(struct kvm_lapic *apic)
			
@@ -495,12 +552,16 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
 
				 {
			
 
				 	u32 val = 0;
			
 
				 
			
 
				-	KVMTRACE_1D(APIC_ACCESS, apic->vcpu, (u32)offset, handler);
			
 
				-
			
 
				 	if (offset >= LAPIC_MMIO_LENGTH)
			
 
				 		return 0;
			
 
				 
			
 
				 	switch (offset) {
			
 
				+	case APIC_ID:
			
 
				+		if (apic_x2apic_mode(apic))
			
 
				+			val = kvm_apic_id(apic);
			
 
				+		else
			
 
				+			val = kvm_apic_id(apic) << 24;
			
 
				+		break;
			
 
				 	case APIC_ARBPRI:
			
 
				 		printk(KERN_WARNING "Access APIC ARBPRI register "
			
 
				 		       "which is for P6\n");
			
@@ -522,21 +583,35 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
 
				 	return val;
			
 
				 }
			
 
				 
			
 
				-static void apic_mmio_read(struct kvm_io_device *this,
			
 
				-			   gpa_t address, int len, void *data)
			
 
				+static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
			
 
				+{
			
 
				+	return container_of(dev, struct kvm_lapic, dev);
			
 
				+}
			
 
				+
			
 
				+static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
			
 
				+		void *data)
			
 
				 {
			
 
				-	struct kvm_lapic *apic = (struct kvm_lapic *)this->private;
			
 
				-	unsigned int offset = address - apic->base_address;
			
 
				 	unsigned char alignment = offset & 0xf;
			
 
				 	u32 result;
			
 
				+	/* this bitmask has a bit cleared for each reserver register */
			
 
				+	static const u64 rmask = 0x43ff01ffffffe70cULL;
			
 
				 
			
 
				 	if ((alignment + len) > 4) {
			
 
				-		printk(KERN_ERR "KVM_APIC_READ: alignment error %lx %d",
			
 
				-		       (unsigned long)address, len);
			
 
				-		return;
			
 
				+		apic_debug("KVM_APIC_READ: alignment error %x %d\n",
			
 
				+			   offset, len);
			
 
				+		return 1;
			
 
				 	}
			
 
				+
			
 
				+	if (offset > 0x3f0 || !(rmask & (1ULL << (offset >> 4)))) {
			
 
				+		apic_debug("KVM_APIC_READ: read reserved register %x\n",
			
 
				+			   offset);
			
 
				+		return 1;
			
 
				+	}
			
 
				+
			
 
				 	result = __apic_read(apic, offset & ~0xf);
			
 
				 
			
 
				+	trace_kvm_apic_read(offset, result);
			
 
				+
			
 
				 	switch (len) {
			
 
				 	case 1:
			
 
				 	case 2:
			
@@ -548,6 +623,28 @@ static void apic_mmio_read(struct kvm_io_device *this,
 
				 		       "should be 1,2, or 4 instead\n", len);
			
 
				 		break;
			
 
				 	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
			
 
				+{
			
 
				+	return apic_hw_enabled(apic) &&
			
 
				+	    addr >= apic->base_address &&
			
 
				+	    addr < apic->base_address + LAPIC_MMIO_LENGTH;
			
 
				+}
			
 
				+
			
 
				+static int apic_mmio_read(struct kvm_io_device *this,
			
 
				+			   gpa_t address, int len, void *data)
			
 
				+{
			
 
				+	struct kvm_lapic *apic = to_lapic(this);
			
 
				+	u32 offset = address - apic->base_address;
			
 
				+
			
 
				+	if (!apic_mmio_in_range(apic, address))
			
 
				+		return -EOPNOTSUPP;
			
 
				+
			
 
				+	apic_reg_read(apic, offset, len, data);
			
 
				+
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 static void update_divide_count(struct kvm_lapic *apic)
			
@@ -573,6 +670,15 @@ static void start_apic_timer(struct kvm_lapic *apic)
 
				 
			
 
				 	if (!apic->lapic_timer.period)
			
 
				 		return;
			
 
				+	/*
			
 
				+	 * Do not allow the guest to program periodic timers with small
			
 
				+	 * interval, since the hrtimers are not throttled by the host
			
 
				+	 * scheduler.
			
 
				+	 */
			
 
				+	if (apic_lvtt_period(apic)) {
			
 
				+		if (apic->lapic_timer.period < NSEC_PER_MSEC/2)
			
 
				+			apic->lapic_timer.period = NSEC_PER_MSEC/2;
			
 
				+	}
			
 
				 
			
 
				 	hrtimer_start(&apic->lapic_timer.timer,
			
 
				 		      ktime_add_ns(now, apic->lapic_timer.period),
			
@@ -603,40 +709,18 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
 
				 		apic->vcpu->kvm->arch.vapics_in_nmi_mode--;
			
 
				 }
			
 
				 
			
 
				-static void apic_mmio_write(struct kvm_io_device *this,
			
 
				-			    gpa_t address, int len, const void *data)
			
 
				+static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
			
 
				 {
			
 
				-	struct kvm_lapic *apic = (struct kvm_lapic *)this->private;
			
 
				-	unsigned int offset = address - apic->base_address;
			
 
				-	unsigned char alignment = offset & 0xf;
			
 
				-	u32 val;
			
 
				-
			
 
				-	/*
			
 
				-	 * APIC register must be aligned on 128-bits boundary.
			
 
				-	 * 32/64/128 bits registers must be accessed thru 32 bits.
			
 
				-	 * Refer SDM 8.4.1
			
 
				-	 */
			
 
				-	if (len != 4 || alignment) {
			
 
				-		/* Don't shout loud, $infamous_os would cause only noise. */
			
 
				-		apic_debug("apic write: bad size=%d %lx\n",
			
 
				-			   len, (long)address);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	val = *(u32 *) data;
			
 
				-
			
 
				-	/* too common printing */
			
 
				-	if (offset != APIC_EOI)
			
 
				-		apic_debug("%s: offset 0x%x with length 0x%x, and value is "
			
 
				-			   "0x%x\n", __func__, offset, len, val);
			
 
				-
			
 
				-	offset &= 0xff0;
			
 
				+	int ret = 0;
			
 
				 
			
 
				-	KVMTRACE_1D(APIC_ACCESS, apic->vcpu, (u32)offset, handler);
			
 
				+	trace_kvm_apic_write(reg, val);
			
 
				 
			
 
				-	switch (offset) {
			
 
				+	switch (reg) {
			
 
				 	case APIC_ID:		/* Local APIC ID */
			
 
				-		apic_set_reg(apic, APIC_ID, val);
			
 
				+		if (!apic_x2apic_mode(apic))
			
 
				+			apic_set_reg(apic, APIC_ID, val);
			
 
				+		else
			
 
				+			ret = 1;
			
 
				 		break;
			
 
				 
			
 
				 	case APIC_TASKPRI:
			
@@ -649,15 +733,24 @@ static void apic_mmio_write(struct kvm_io_device *this,
 
				 		break;
			
 
				 
			
 
				 	case APIC_LDR:
			
 
				-		apic_set_reg(apic, APIC_LDR, val & APIC_LDR_MASK);
			
 
				+		if (!apic_x2apic_mode(apic))
			
 
				+			apic_set_reg(apic, APIC_LDR, val & APIC_LDR_MASK);
			
 
				+		else
			
 
				+			ret = 1;
			
 
				 		break;
			
 
				 
			
 
				 	case APIC_DFR:
			
 
				-		apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
			
 
				+		if (!apic_x2apic_mode(apic))
			
 
				+			apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
			
 
				+		else
			
 
				+			ret = 1;
			
 
				 		break;
			
 
				 
			
 
				-	case APIC_SPIV:
			
 
				-		apic_set_reg(apic, APIC_SPIV, val & 0x3ff);
			
 
				+	case APIC_SPIV: {
			
 
				+		u32 mask = 0x3ff;
			
 
				+		if (apic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
			
 
				+			mask |= APIC_SPIV_DIRECTED_EOI;
			
 
				+		apic_set_reg(apic, APIC_SPIV, val & mask);
			
 
				 		if (!(val & APIC_SPIV_APIC_ENABLED)) {
			
 
				 			int i;
			
 
				 			u32 lvt_val;
			
@@ -672,7 +765,7 @@ static void apic_mmio_write(struct kvm_io_device *this,
 
				 
			
 
				 		}
			
 
				 		break;
			
 
				-
			
 
				+	}
			
 
				 	case APIC_ICR:
			
 
				 		/* No delay here, so we always clear the pending bit */
			
 
				 		apic_set_reg(apic, APIC_ICR, val & ~(1 << 12));
			
@@ -680,7 +773,9 @@ static void apic_mmio_write(struct kvm_io_device *this,
 
				 		break;
			
 
				 
			
 
				 	case APIC_ICR2:
			
 
				-		apic_set_reg(apic, APIC_ICR2, val & 0xff000000);
			
 
				+		if (!apic_x2apic_mode(apic))
			
 
				+			val &= 0xff000000;
			
 
				+		apic_set_reg(apic, APIC_ICR2, val);
			
 
				 		break;
			
 
				 
			
 
				 	case APIC_LVT0:
			
@@ -694,8 +789,8 @@ static void apic_mmio_write(struct kvm_io_device *this,
 
				 		if (!apic_sw_enabled(apic))
			
 
				 			val |= APIC_LVT_MASKED;
			
 
				 
			
 
				-		val &= apic_lvt_mask[(offset - APIC_LVTT) >> 4];
			
 
				-		apic_set_reg(apic, offset, val);
			
 
				+		val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4];
			
 
				+		apic_set_reg(apic, reg, val);
			
 
				 
			
 
				 		break;
			
 
				 
			
@@ -703,7 +798,7 @@ static void apic_mmio_write(struct kvm_io_device *this,
 
				 		hrtimer_cancel(&apic->lapic_timer.timer);
			
 
				 		apic_set_reg(apic, APIC_TMICT, val);
			
 
				 		start_apic_timer(apic);
			
 
				-		return;
			
 
				+		break;
			
 
				 
			
 
				 	case APIC_TDCR:
			
 
				 		if (val & 4)
			
@@ -712,27 +807,59 @@ static void apic_mmio_write(struct kvm_io_device *this,
 
				 		update_divide_count(apic);
			
 
				 		break;
			
 
				 
			
 
				+	case APIC_ESR:
			
 
				+		if (apic_x2apic_mode(apic) && val != 0) {
			
 
				+			printk(KERN_ERR "KVM_WRITE:ESR not zero %x\n", val);
			
 
				+			ret = 1;
			
 
				+		}
			
 
				+		break;
			
 
				+
			
 
				+	case APIC_SELF_IPI:
			
 
				+		if (apic_x2apic_mode(apic)) {
			
 
				+			apic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff));
			
 
				+		} else
			
 
				+			ret = 1;
			
 
				+		break;
			
 
				 	default:
			
 
				-		apic_debug("Local APIC Write to read-only register %x\n",
			
 
				-			   offset);
			
 
				+		ret = 1;
			
 
				 		break;
			
 
				 	}
			
 
				-
			
 
				+	if (ret)
			
 
				+		apic_debug("Local APIC Write to read-only register %x\n", reg);
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				-static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr,
			
 
				-			   int len, int size)
			
 
				+static int apic_mmio_write(struct kvm_io_device *this,
			
 
				+			    gpa_t address, int len, const void *data)
			
 
				 {
			
 
				-	struct kvm_lapic *apic = (struct kvm_lapic *)this->private;
			
 
				-	int ret = 0;
			
 
				+	struct kvm_lapic *apic = to_lapic(this);
			
 
				+	unsigned int offset = address - apic->base_address;
			
 
				+	u32 val;
			
 
				 
			
 
				+	if (!apic_mmio_in_range(apic, address))
			
 
				+		return -EOPNOTSUPP;
			
 
				 
			
 
				-	if (apic_hw_enabled(apic) &&
			
 
				-	    (addr >= apic->base_address) &&
			
 
				-	    (addr < (apic->base_address + LAPIC_MMIO_LENGTH)))
			
 
				-		ret = 1;
			
 
				+	/*
			
 
				+	 * APIC register must be aligned on 128-bits boundary.
			
 
				+	 * 32/64/128 bits registers must be accessed thru 32 bits.
			
 
				+	 * Refer SDM 8.4.1
			
 
				+	 */
			
 
				+	if (len != 4 || (offset & 0xf)) {
			
 
				+		/* Don't shout loud, $infamous_os would cause only noise. */
			
 
				+		apic_debug("apic write: bad size=%d %lx\n", len, (long)address);
			
 
				+		return 0;
			
 
				+	}
			
 
				 
			
 
				-	return ret;
			
 
				+	val = *(u32*)data;
			
 
				+
			
 
				+	/* too common printing */
			
 
				+	if (offset != APIC_EOI)
			
 
				+		apic_debug("%s: offset 0x%x with length 0x%x, and value is "
			
 
				+			   "0x%x\n", __func__, offset, len, val);
			
 
				+
			
 
				+	apic_reg_write(apic, offset & 0xff0, val);
			
 
				+
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 void kvm_free_lapic(struct kvm_vcpu *vcpu)
			
@@ -763,7 +890,6 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
 
				 	apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
			
 
				 		     | (apic_get_reg(apic, APIC_TASKPRI) & 4));
			
 
				 }
			
 
				-EXPORT_SYMBOL_GPL(kvm_lapic_set_tpr);
			
 
				 
			
 
				 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
			
 
				 {
			
@@ -776,7 +902,6 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
 
				 
			
 
				 	return (tpr & 0xf0) >> 4;
			
 
				 }
			
 
				-EXPORT_SYMBOL_GPL(kvm_lapic_get_cr8);
			
 
				 
			
 
				 void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
			
 
				 {
			
@@ -787,10 +912,16 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 
				 		vcpu->arch.apic_base = value;
			
 
				 		return;
			
 
				 	}
			
 
				-	if (apic->vcpu->vcpu_id)
			
 
				+
			
 
				+	if (!kvm_vcpu_is_bsp(apic->vcpu))
			
 
				 		value &= ~MSR_IA32_APICBASE_BSP;
			
 
				 
			
 
				 	vcpu->arch.apic_base = value;
			
 
				+	if (apic_x2apic_mode(apic)) {
			
 
				+		u32 id = kvm_apic_id(apic);
			
 
				+		u32 ldr = ((id & ~0xf) << 16) | (1 << (id & 0xf));
			
 
				+		apic_set_reg(apic, APIC_LDR, ldr);
			
 
				+	}
			
 
				 	apic->base_address = apic->vcpu->arch.apic_base &
			
 
				 			     MSR_IA32_APICBASE_BASE;
			
 
				 
			
@@ -800,12 +931,6 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 
				 
			
 
				 }
			
 
				 
			
 
				-u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	return vcpu->arch.apic_base;
			
 
				-}
			
 
				-EXPORT_SYMBOL_GPL(kvm_lapic_get_base);
			
 
				-
			
 
				 void kvm_lapic_reset(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	struct kvm_lapic *apic;
			
@@ -821,7 +946,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 
				 	hrtimer_cancel(&apic->lapic_timer.timer);
			
 
				 
			
 
				 	apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24);
			
 
				-	apic_set_reg(apic, APIC_LVR, APIC_VERSION);
			
 
				+	kvm_apic_set_version(apic->vcpu);
			
 
				 
			
 
				 	for (i = 0; i < APIC_LVT_NUM; i++)
			
 
				 		apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
			
@@ -842,9 +967,10 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 
				 		apic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
			
 
				 		apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
			
 
				 	}
			
 
				+	apic->irr_pending = false;
			
 
				 	update_divide_count(apic);
			
 
				 	atomic_set(&apic->lapic_timer.pending, 0);
			
 
				-	if (vcpu->vcpu_id == 0)
			
 
				+	if (kvm_vcpu_is_bsp(vcpu))
			
 
				 		vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
			
 
				 	apic_update_ppr(apic);
			
 
				 
			
@@ -855,7 +981,6 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 
				 		   vcpu, kvm_apic_id(apic),
			
 
				 		   vcpu->arch.apic_base, apic->base_address);
			
 
				 }
			
 
				-EXPORT_SYMBOL_GPL(kvm_lapic_reset);
			
 
				 
			
 
				 bool kvm_apic_present(struct kvm_vcpu *vcpu)
			
 
				 {
			
@@ -866,7 +991,6 @@ int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
 
				 {
			
 
				 	return kvm_apic_present(vcpu) && apic_sw_enabled(vcpu->arch.apic);
			
 
				 }
			
 
				-EXPORT_SYMBOL_GPL(kvm_lapic_enabled);
			
 
				 
			
 
				 /*
			
 
				  *----------------------------------------------------------------------
			
@@ -917,6 +1041,11 @@ static struct kvm_timer_ops lapic_timer_ops = {
 
				 	.is_periodic = lapic_is_periodic,
			
 
				 };
			
 
				 
			
 
				+static const struct kvm_io_device_ops apic_mmio_ops = {
			
 
				+	.read     = apic_mmio_read,
			
 
				+	.write    = apic_mmio_write,
			
 
				+};
			
 
				+
			
 
				 int kvm_create_lapic(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	struct kvm_lapic *apic;
			
@@ -945,16 +1074,13 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
 
				 	apic->lapic_timer.timer.function = kvm_timer_fn;
			
 
				 	apic->lapic_timer.t_ops = &lapic_timer_ops;
			
 
				 	apic->lapic_timer.kvm = vcpu->kvm;
			
 
				-	apic->lapic_timer.vcpu_id = vcpu->vcpu_id;
			
 
				+	apic->lapic_timer.vcpu = vcpu;
			
 
				 
			
 
				 	apic->base_address = APIC_DEFAULT_PHYS_BASE;
			
 
				 	vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE;
			
 
				 
			
 
				 	kvm_lapic_reset(vcpu);
			
 
				-	apic->dev.read = apic_mmio_read;
			
 
				-	apic->dev.write = apic_mmio_write;
			
 
				-	apic->dev.in_range = apic_mmio_range;
			
 
				-	apic->dev.private = apic;
			
 
				+	kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
			
 
				 
			
 
				 	return 0;
			
 
				 nomem_free_apic:
			
@@ -962,7 +1088,6 @@ nomem_free_apic:
 
				 nomem:
			
 
				 	return -ENOMEM;
			
 
				 }
			
 
				-EXPORT_SYMBOL_GPL(kvm_create_lapic);
			
 
				 
			
 
				 int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
			
 
				 {
			
@@ -985,7 +1110,7 @@ int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
 
				 	u32 lvt0 = apic_get_reg(vcpu->arch.apic, APIC_LVT0);
			
 
				 	int r = 0;
			
 
				 
			
 
				-	if (vcpu->vcpu_id == 0) {
			
 
				+	if (kvm_vcpu_is_bsp(vcpu)) {
			
 
				 		if (!apic_hw_enabled(vcpu->arch.apic))
			
 
				 			r = 1;
			
 
				 		if ((lvt0 & APIC_LVT_MASKED) == 0 &&
			
@@ -1025,7 +1150,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
 
				 
			
 
				 	apic->base_address = vcpu->arch.apic_base &
			
 
				 			     MSR_IA32_APICBASE_BASE;
			
 
				-	apic_set_reg(apic, APIC_LVR, APIC_VERSION);
			
 
				+	kvm_apic_set_version(vcpu);
			
 
				+
			
 
				 	apic_update_ppr(apic);
			
 
				 	hrtimer_cancel(&apic->lapic_timer.timer);
			
 
				 	update_divide_count(apic);
			
@@ -1092,3 +1218,35 @@ void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
 
				 
			
 
				 	vcpu->arch.apic->vapic_addr = vapic_addr;
			
 
				 }
			
 
				+
			
 
				+int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
			
 
				+{
			
 
				+	struct kvm_lapic *apic = vcpu->arch.apic;
			
 
				+	u32 reg = (msr - APIC_BASE_MSR) << 4;
			
 
				+
			
 
				+	if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
			
 
				+		return 1;
			
 
				+
			
 
				+	/* if this is ICR write vector before command */
			
 
				+	if (msr == 0x830)
			
 
				+		apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
			
 
				+	return apic_reg_write(apic, reg, (u32)data);
			
 
				+}
			
 
				+
			
 
				+int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
			
 
				+{
			
 
				+	struct kvm_lapic *apic = vcpu->arch.apic;
			
 
				+	u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0;
			
 
				+
			
 
				+	if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
			
 
				+		return 1;
			
 
				+
			
 
				+	if (apic_reg_read(apic, reg, 4, &low))
			
 
				+		return 1;
			
 
				+	if (msr == 0x830)
			
 
				+		apic_reg_read(apic, APIC_ICR2, 4, &high);
			
 
				+
			
 
				+	*data = (((u64)high) << 32) | low;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -12,6 +12,7 @@ struct kvm_lapic {
 
				 	struct kvm_timer lapic_timer;
			
 
				 	u32 divide_count;
			
 
				 	struct kvm_vcpu *vcpu;
			
 
				+	bool irr_pending;
			
 
				 	struct page *regs_page;
			
 
				 	void *regs;
			
 
				 	gpa_t vapic_addr;
			
@@ -28,6 +29,7 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
 
				 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
			
 
				 void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
			
 
				 u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
			
 
				+void kvm_apic_set_version(struct kvm_vcpu *vcpu);
			
 
				 
			
 
				 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
			
 
				 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
			
@@ -44,4 +46,6 @@ void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
 
				 void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
			
 
				 void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu);
			
 
				 
			
 
				+int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data);
			
 
				+int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
			
 
				 #endif
			
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -37,6 +37,8 @@
 
				 #define PT32_ROOT_LEVEL 2
			
 
				 #define PT32E_ROOT_LEVEL 3
			
 
				 
			
 
				+int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
			
 
				+
			
 
				 static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	if (unlikely(vcpu->kvm->arch.n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES))
			
@@ -75,7 +77,7 @@ static inline int is_paging(struct kvm_vcpu *vcpu)
 
				 	return vcpu->arch.cr0 & X86_CR0_PG;
			
 
				 }
			
 
				 
			
 
				-static inline int is_present_pte(unsigned long pte)
			
 
				+static inline int is_present_gpte(unsigned long pte)
			
 
				 {
			
 
				 	return pte & PT_PRESENT_MASK;
			
 
				 }
			
--- a/arch/x86/kvm/mmutrace.h
+++ b/arch/x86/kvm/mmutrace.h
@@ -0,0 +1,220 @@
 
				+#if !defined(_TRACE_KVMMMU_H) || defined(TRACE_HEADER_MULTI_READ)
			
 
				+#define _TRACE_KVMMMU_H
			
 
				+
			
 
				+#include <linux/tracepoint.h>
			
 
				+#include <linux/ftrace_event.h>
			
 
				+
			
 
				+#undef TRACE_SYSTEM
			
 
				+#define TRACE_SYSTEM kvmmmu
			
 
				+#define TRACE_INCLUDE_PATH .
			
 
				+#define TRACE_INCLUDE_FILE mmutrace
			
 
				+
			
 
				+#define KVM_MMU_PAGE_FIELDS \
			
 
				+	__field(__u64, gfn) \
			
 
				+	__field(__u32, role) \
			
 
				+	__field(__u32, root_count) \
			
 
				+	__field(__u32, unsync)
			
 
				+
			
 
				+#define KVM_MMU_PAGE_ASSIGN(sp)			     \
			
 
				+	__entry->gfn = sp->gfn;			     \
			
 
				+	__entry->role = sp->role.word;		     \
			
 
				+	__entry->root_count = sp->root_count;        \
			
 
				+	__entry->unsync = sp->unsync;
			
 
				+
			
 
				+#define KVM_MMU_PAGE_PRINTK() ({				        \
			
 
				+	const char *ret = p->buffer + p->len;				\
			
 
				+	static const char *access_str[] = {			        \
			
 
				+		"---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux"  \
			
 
				+	};							        \
			
 
				+	union kvm_mmu_page_role role;				        \
			
 
				+								        \
			
 
				+	role.word = __entry->role;					\
			
 
				+									\
			
 
				+	trace_seq_printf(p, "sp gfn %llx %u/%u q%u%s %s%s %spge"	\
			
 
				+			 " %snxe root %u %s%c",				\
			
 
				+			 __entry->gfn, role.level, role.glevels,	\
			
 
				+			 role.quadrant,					\
			
 
				+			 role.direct ? " direct" : "",			\
			
 
				+			 access_str[role.access],			\
			
 
				+			 role.invalid ? " invalid" : "",		\
			
 
				+			 role.cr4_pge ? "" : "!",			\
			
 
				+			 role.nxe ? "" : "!",				\
			
 
				+			 __entry->root_count,				\
			
 
				+			 __entry->unsync ? "unsync" : "sync", 0);	\
			
 
				+	ret;								\
			
 
				+		})
			
 
				+
			
 
				+#define kvm_mmu_trace_pferr_flags       \
			
 
				+	{ PFERR_PRESENT_MASK, "P" },	\
			
 
				+	{ PFERR_WRITE_MASK, "W" },	\
			
 
				+	{ PFERR_USER_MASK, "U" },	\
			
 
				+	{ PFERR_RSVD_MASK, "RSVD" },	\
			
 
				+	{ PFERR_FETCH_MASK, "F" }
			
 
				+
			
 
				+/*
			
 
				+ * A pagetable walk has started
			
 
				+ */
			
 
				+TRACE_EVENT(
			
 
				+	kvm_mmu_pagetable_walk,
			
 
				+	TP_PROTO(u64 addr, int write_fault, int user_fault, int fetch_fault),
			
 
				+	TP_ARGS(addr, write_fault, user_fault, fetch_fault),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(__u64, addr)
			
 
				+		__field(__u32, pferr)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->addr = addr;
			
 
				+		__entry->pferr = (!!write_fault << 1) | (!!user_fault << 2)
			
 
				+		                 | (!!fetch_fault << 4);
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("addr %llx pferr %x %s", __entry->addr, __entry->pferr,
			
 
				+		  __print_flags(__entry->pferr, "|", kvm_mmu_trace_pferr_flags))
			
 
				+);
			
 
				+
			
 
				+
			
 
				+/* We just walked a paging element */
			
 
				+TRACE_EVENT(
			
 
				+	kvm_mmu_paging_element,
			
 
				+	TP_PROTO(u64 pte, int level),
			
 
				+	TP_ARGS(pte, level),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(__u64, pte)
			
 
				+		__field(__u32, level)
			
 
				+		),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->pte = pte;
			
 
				+		__entry->level = level;
			
 
				+		),
			
 
				+
			
 
				+	TP_printk("pte %llx level %u", __entry->pte, __entry->level)
			
 
				+);
			
 
				+
			
 
				+/* We set a pte accessed bit */
			
 
				+TRACE_EVENT(
			
 
				+	kvm_mmu_set_accessed_bit,
			
 
				+	TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size),
			
 
				+	TP_ARGS(table_gfn, index, size),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(__u64, gpa)
			
 
				+		),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->gpa = ((u64)table_gfn << PAGE_SHIFT)
			
 
				+				+ index * size;
			
 
				+		),
			
 
				+
			
 
				+	TP_printk("gpa %llx", __entry->gpa)
			
 
				+);
			
 
				+
			
 
				+/* We set a pte dirty bit */
			
 
				+TRACE_EVENT(
			
 
				+	kvm_mmu_set_dirty_bit,
			
 
				+	TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size),
			
 
				+	TP_ARGS(table_gfn, index, size),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(__u64, gpa)
			
 
				+		),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->gpa = ((u64)table_gfn << PAGE_SHIFT)
			
 
				+				+ index * size;
			
 
				+		),
			
 
				+
			
 
				+	TP_printk("gpa %llx", __entry->gpa)
			
 
				+);
			
 
				+
			
 
				+TRACE_EVENT(
			
 
				+	kvm_mmu_walker_error,
			
 
				+	TP_PROTO(u32 pferr),
			
 
				+	TP_ARGS(pferr),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(__u32, pferr)
			
 
				+		),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->pferr = pferr;
			
 
				+		),
			
 
				+
			
 
				+	TP_printk("pferr %x %s", __entry->pferr,
			
 
				+		  __print_flags(__entry->pferr, "|", kvm_mmu_trace_pferr_flags))
			
 
				+);
			
 
				+
			
 
				+TRACE_EVENT(
			
 
				+	kvm_mmu_get_page,
			
 
				+	TP_PROTO(struct kvm_mmu_page *sp, bool created),
			
 
				+	TP_ARGS(sp, created),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		KVM_MMU_PAGE_FIELDS
			
 
				+		__field(bool, created)
			
 
				+		),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		KVM_MMU_PAGE_ASSIGN(sp)
			
 
				+		__entry->created = created;
			
 
				+		),
			
 
				+
			
 
				+	TP_printk("%s %s", KVM_MMU_PAGE_PRINTK(),
			
 
				+		  __entry->created ? "new" : "existing")
			
 
				+);
			
 
				+
			
 
				+TRACE_EVENT(
			
 
				+	kvm_mmu_sync_page,
			
 
				+	TP_PROTO(struct kvm_mmu_page *sp),
			
 
				+	TP_ARGS(sp),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		KVM_MMU_PAGE_FIELDS
			
 
				+		),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		KVM_MMU_PAGE_ASSIGN(sp)
			
 
				+		),
			
 
				+
			
 
				+	TP_printk("%s", KVM_MMU_PAGE_PRINTK())
			
 
				+);
			
 
				+
			
 
				+TRACE_EVENT(
			
 
				+	kvm_mmu_unsync_page,
			
 
				+	TP_PROTO(struct kvm_mmu_page *sp),
			
 
				+	TP_ARGS(sp),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		KVM_MMU_PAGE_FIELDS
			
 
				+		),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		KVM_MMU_PAGE_ASSIGN(sp)
			
 
				+		),
			
 
				+
			
 
				+	TP_printk("%s", KVM_MMU_PAGE_PRINTK())
			
 
				+);
			
 
				+
			
 
				+TRACE_EVENT(
			
 
				+	kvm_mmu_zap_page,
			
 
				+	TP_PROTO(struct kvm_mmu_page *sp),
			
 
				+	TP_ARGS(sp),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		KVM_MMU_PAGE_FIELDS
			
 
				+		),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		KVM_MMU_PAGE_ASSIGN(sp)
			
 
				+		),
			
 
				+
			
 
				+	TP_printk("%s", KVM_MMU_PAGE_PRINTK())
			
 
				+);
			
 
				+
			
 
				+#endif /* _TRACE_KVMMMU_H */
			
 
				+
			
 
				+/* This part must be outside protection */
			
 
				+#include <trace/define_trace.h>
			
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -27,7 +27,8 @@
 
				 	#define guest_walker guest_walker64
			
 
				 	#define FNAME(name) paging##64_##name
			
 
				 	#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
			
 
				-	#define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK
			
 
				+	#define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl)
			
 
				+	#define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
			
 
				 	#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
			
 
				 	#define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level)
			
 
				 	#define PT_LEVEL_BITS PT64_LEVEL_BITS
			
@@ -43,7 +44,8 @@
 
				 	#define guest_walker guest_walker32
			
 
				 	#define FNAME(name) paging##32_##name
			
 
				 	#define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
			
 
				-	#define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK
			
 
				+	#define PT_LVL_ADDR_MASK(lvl) PT32_LVL_ADDR_MASK(lvl)
			
 
				+	#define PT_LVL_OFFSET_MASK(lvl) PT32_LVL_OFFSET_MASK(lvl)
			
 
				 	#define PT_INDEX(addr, level) PT32_INDEX(addr, level)
			
 
				 	#define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level)
			
 
				 	#define PT_LEVEL_BITS PT32_LEVEL_BITS
			
@@ -53,8 +55,8 @@
 
				 	#error Invalid PTTYPE value
			
 
				 #endif
			
 
				 
			
 
				-#define gpte_to_gfn FNAME(gpte_to_gfn)
			
 
				-#define gpte_to_gfn_pde FNAME(gpte_to_gfn_pde)
			
 
				+#define gpte_to_gfn_lvl FNAME(gpte_to_gfn_lvl)
			
 
				+#define gpte_to_gfn(pte) gpte_to_gfn_lvl((pte), PT_PAGE_TABLE_LEVEL)
			
 
				 
			
 
				 /*
			
 
				  * The guest_walker structure emulates the behavior of the hardware page
			
@@ -71,14 +73,9 @@ struct guest_walker {
 
				 	u32 error_code;
			
 
				 };
			
 
				 
			
 
				-static gfn_t gpte_to_gfn(pt_element_t gpte)
			
 
				+static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl)
			
 
				 {
			
 
				-	return (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
			
 
				-}
			
 
				-
			
 
				-static gfn_t gpte_to_gfn_pde(pt_element_t gpte)
			
 
				-{
			
 
				-	return (gpte & PT_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT;
			
 
				+	return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT;
			
 
				 }
			
 
				 
			
 
				 static bool FNAME(cmpxchg_gpte)(struct kvm *kvm,
			
@@ -125,14 +122,16 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
 
				 	gpa_t pte_gpa;
			
 
				 	int rsvd_fault = 0;
			
 
				 
			
 
				-	pgprintk("%s: addr %lx\n", __func__, addr);
			
 
				+	trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault,
			
 
				+				     fetch_fault);
			
 
				 walk:
			
 
				 	walker->level = vcpu->arch.mmu.root_level;
			
 
				 	pte = vcpu->arch.cr3;
			
 
				 #if PTTYPE == 64
			
 
				 	if (!is_long_mode(vcpu)) {
			
 
				-		pte = vcpu->arch.pdptrs[(addr >> 30) & 3];
			
 
				-		if (!is_present_pte(pte))
			
 
				+		pte = kvm_pdptr_read(vcpu, (addr >> 30) & 3);
			
 
				+		trace_kvm_mmu_paging_element(pte, walker->level);
			
 
				+		if (!is_present_gpte(pte))
			
 
				 			goto not_present;
			
 
				 		--walker->level;
			
 
				 	}
			
@@ -150,12 +149,11 @@ walk:
 
				 		pte_gpa += index * sizeof(pt_element_t);
			
 
				 		walker->table_gfn[walker->level - 1] = table_gfn;
			
 
				 		walker->pte_gpa[walker->level - 1] = pte_gpa;
			
 
				-		pgprintk("%s: table_gfn[%d] %lx\n", __func__,
			
 
				-			 walker->level - 1, table_gfn);
			
 
				 
			
 
				 		kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte));
			
 
				+		trace_kvm_mmu_paging_element(pte, walker->level);
			
 
				 
			
 
				-		if (!is_present_pte(pte))
			
 
				+		if (!is_present_gpte(pte))
			
 
				 			goto not_present;
			
 
				 
			
 
				 		rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level);
			
@@ -175,6 +173,8 @@ walk:
 
				 #endif
			
 
				 
			
 
				 		if (!(pte & PT_ACCESSED_MASK)) {
			
 
				+			trace_kvm_mmu_set_accessed_bit(table_gfn, index,
			
 
				+						       sizeof(pte));
			
 
				 			mark_page_dirty(vcpu->kvm, table_gfn);
			
 
				 			if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn,
			
 
				 			    index, pte, pte|PT_ACCESSED_MASK))
			
@@ -186,18 +186,24 @@ walk:
 
				 
			
 
				 		walker->ptes[walker->level - 1] = pte;
			
 
				 
			
 
				-		if (walker->level == PT_PAGE_TABLE_LEVEL) {
			
 
				-			walker->gfn = gpte_to_gfn(pte);
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		if (walker->level == PT_DIRECTORY_LEVEL
			
 
				-		    && (pte & PT_PAGE_SIZE_MASK)
			
 
				-		    && (PTTYPE == 64 || is_pse(vcpu))) {
			
 
				-			walker->gfn = gpte_to_gfn_pde(pte);
			
 
				-			walker->gfn += PT_INDEX(addr, PT_PAGE_TABLE_LEVEL);
			
 
				-			if (PTTYPE == 32 && is_cpuid_PSE36())
			
 
				+		if ((walker->level == PT_PAGE_TABLE_LEVEL) ||
			
 
				+		    ((walker->level == PT_DIRECTORY_LEVEL) &&
			
 
				+				(pte & PT_PAGE_SIZE_MASK)  &&
			
 
				+				(PTTYPE == 64 || is_pse(vcpu))) ||
			
 
				+		    ((walker->level == PT_PDPE_LEVEL) &&
			
 
				+				(pte & PT_PAGE_SIZE_MASK)  &&
			
 
				+				is_long_mode(vcpu))) {
			
 
				+			int lvl = walker->level;
			
 
				+
			
 
				+			walker->gfn = gpte_to_gfn_lvl(pte, lvl);
			
 
				+			walker->gfn += (addr & PT_LVL_OFFSET_MASK(lvl))
			
 
				+					>> PAGE_SHIFT;
			
 
				+
			
 
				+			if (PTTYPE == 32 &&
			
 
				+			    walker->level == PT_DIRECTORY_LEVEL &&
			
 
				+			    is_cpuid_PSE36())
			
 
				 				walker->gfn += pse36_gfn_delta(pte);
			
 
				+
			
 
				 			break;
			
 
				 		}
			
 
				 
			
@@ -205,9 +211,10 @@ walk:
 
				 		--walker->level;
			
 
				 	}
			
 
				 
			
 
				-	if (write_fault && !is_dirty_pte(pte)) {
			
 
				+	if (write_fault && !is_dirty_gpte(pte)) {
			
 
				 		bool ret;
			
 
				 
			
 
				+		trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
			
 
				 		mark_page_dirty(vcpu->kvm, table_gfn);
			
 
				 		ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte,
			
 
				 			    pte|PT_DIRTY_MASK);
			
@@ -239,6 +246,7 @@ err:
 
				 		walker->error_code |= PFERR_FETCH_MASK;
			
 
				 	if (rsvd_fault)
			
 
				 		walker->error_code |= PFERR_RSVD_MASK;
			
 
				+	trace_kvm_mmu_walker_error(walker->error_code);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -248,12 +256,11 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
 
				 	pt_element_t gpte;
			
 
				 	unsigned pte_access;
			
 
				 	pfn_t pfn;
			
 
				-	int largepage = vcpu->arch.update_pte.largepage;
			
 
				 
			
 
				 	gpte = *(const pt_element_t *)pte;
			
 
				 	if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {
			
 
				-		if (!is_present_pte(gpte))
			
 
				-			set_shadow_pte(spte, shadow_notrap_nonpresent_pte);
			
 
				+		if (!is_present_gpte(gpte))
			
 
				+			__set_spte(spte, shadow_notrap_nonpresent_pte);
			
 
				 		return;
			
 
				 	}
			
 
				 	pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
			
@@ -267,7 +274,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
 
				 		return;
			
 
				 	kvm_get_pfn(pfn);
			
 
				 	mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
			
 
				-		     gpte & PT_DIRTY_MASK, NULL, largepage,
			
 
				+		     gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL,
			
 
				 		     gpte_to_gfn(gpte), pfn, true);
			
 
				 }
			
 
				 
			
@@ -276,7 +283,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
 
				  */
			
 
				 static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
			
 
				 			 struct guest_walker *gw,
			
 
				-			 int user_fault, int write_fault, int largepage,
			
 
				+			 int user_fault, int write_fault, int hlevel,
			
 
				 			 int *ptwrite, pfn_t pfn)
			
 
				 {
			
 
				 	unsigned access = gw->pt_access;
			
@@ -289,19 +296,18 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 
				 	pt_element_t curr_pte;
			
 
				 	struct kvm_shadow_walk_iterator iterator;
			
 
				 
			
 
				-	if (!is_present_pte(gw->ptes[gw->level - 1]))
			
 
				+	if (!is_present_gpte(gw->ptes[gw->level - 1]))
			
 
				 		return NULL;
			
 
				 
			
 
				 	for_each_shadow_entry(vcpu, addr, iterator) {
			
 
				 		level = iterator.level;
			
 
				 		sptep = iterator.sptep;
			
 
				-		if (level == PT_PAGE_TABLE_LEVEL
			
 
				-		    || (largepage && level == PT_DIRECTORY_LEVEL)) {
			
 
				+		if (iterator.level == hlevel) {
			
 
				 			mmu_set_spte(vcpu, sptep, access,
			
 
				 				     gw->pte_access & access,
			
 
				 				     user_fault, write_fault,
			
 
				 				     gw->ptes[gw->level-1] & PT_DIRTY_MASK,
			
 
				-				     ptwrite, largepage,
			
 
				+				     ptwrite, level,
			
 
				 				     gw->gfn, pfn, false);
			
 
				 			break;
			
 
				 		}
			
@@ -311,16 +317,19 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 
				 
			
 
				 		if (is_large_pte(*sptep)) {
			
 
				 			rmap_remove(vcpu->kvm, sptep);
			
 
				-			set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
			
 
				+			__set_spte(sptep, shadow_trap_nonpresent_pte);
			
 
				 			kvm_flush_remote_tlbs(vcpu->kvm);
			
 
				 		}
			
 
				 
			
 
				-		if (level == PT_DIRECTORY_LEVEL
			
 
				-		    && gw->level == PT_DIRECTORY_LEVEL) {
			
 
				+		if (level <= gw->level) {
			
 
				+			int delta = level - gw->level + 1;
			
 
				 			direct = 1;
			
 
				-			if (!is_dirty_pte(gw->ptes[level - 1]))
			
 
				+			if (!is_dirty_gpte(gw->ptes[level - delta]))
			
 
				 				access &= ~ACC_WRITE_MASK;
			
 
				-			table_gfn = gpte_to_gfn(gw->ptes[level - 1]);
			
 
				+			table_gfn = gpte_to_gfn(gw->ptes[level - delta]);
			
 
				+			/* advance table_gfn when emulating 1gb pages with 4k */
			
 
				+			if (delta == 0)
			
 
				+				table_gfn += PT_INDEX(addr, level);
			
 
				 		} else {
			
 
				 			direct = 0;
			
 
				 			table_gfn = gw->table_gfn[level - 2];
			
@@ -369,11 +378,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 
				 	int user_fault = error_code & PFERR_USER_MASK;
			
 
				 	int fetch_fault = error_code & PFERR_FETCH_MASK;
			
 
				 	struct guest_walker walker;
			
 
				-	u64 *shadow_pte;
			
 
				+	u64 *sptep;
			
 
				 	int write_pt = 0;
			
 
				 	int r;
			
 
				 	pfn_t pfn;
			
 
				-	int largepage = 0;
			
 
				+	int level = PT_PAGE_TABLE_LEVEL;
			
 
				 	unsigned long mmu_seq;
			
 
				 
			
 
				 	pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
			
@@ -399,14 +408,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 
				 		return 0;
			
 
				 	}
			
 
				 
			
 
				-	if (walker.level == PT_DIRECTORY_LEVEL) {
			
 
				-		gfn_t large_gfn;
			
 
				-		large_gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE-1);
			
 
				-		if (is_largepage_backed(vcpu, large_gfn)) {
			
 
				-			walker.gfn = large_gfn;
			
 
				-			largepage = 1;
			
 
				-		}
			
 
				+	if (walker.level >= PT_DIRECTORY_LEVEL) {
			
 
				+		level = min(walker.level, mapping_level(vcpu, walker.gfn));
			
 
				+		walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1);
			
 
				 	}
			
 
				+
			
 
				 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
			
 
				 	smp_rmb();
			
 
				 	pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
			
@@ -422,11 +428,10 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 
				 	if (mmu_notifier_retry(vcpu, mmu_seq))
			
 
				 		goto out_unlock;
			
 
				 	kvm_mmu_free_some_pages(vcpu);
			
 
				-	shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
			
 
				-				  largepage, &write_pt, pfn);
			
 
				-
			
 
				+	sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
			
 
				+			     level, &write_pt, pfn);
			
 
				 	pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__,
			
 
				-		 shadow_pte, *shadow_pte, write_pt);
			
 
				+		 sptep, *sptep, write_pt);
			
 
				 
			
 
				 	if (!write_pt)
			
 
				 		vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
			
@@ -459,8 +464,9 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 
				 		sptep = iterator.sptep;
			
 
				 
			
 
				 		/* FIXME: properly handle invlpg on large guest pages */
			
 
				-		if (level == PT_PAGE_TABLE_LEVEL ||
			
 
				-		    ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) {
			
 
				+		if (level == PT_PAGE_TABLE_LEVEL  ||
			
 
				+		    ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) ||
			
 
				+		    ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) {
			
 
				 			struct kvm_mmu_page *sp = page_header(__pa(sptep));
			
 
				 
			
 
				 			pte_gpa = (sp->gfn << PAGE_SHIFT);
			
@@ -472,7 +478,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 
				 					--vcpu->kvm->stat.lpages;
			
 
				 				need_flush = 1;
			
 
				 			}
			
 
				-			set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
			
 
				+			__set_spte(sptep, shadow_trap_nonpresent_pte);
			
 
				 			break;
			
 
				 		}
			
 
				 
			
@@ -489,7 +495,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 
				 	if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
			
 
				 				  sizeof(pt_element_t)))
			
 
				 		return;
			
 
				-	if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) {
			
 
				+	if (is_present_gpte(gpte) && (gpte & PT_ACCESSED_MASK)) {
			
 
				 		if (mmu_topup_memory_caches(vcpu))
			
 
				 			return;
			
 
				 		kvm_mmu_pte_write(vcpu, pte_gpa, (const u8 *)&gpte,
			
@@ -536,7 +542,7 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
 
				 		r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa, pt, sizeof pt);
			
 
				 		pte_gpa += ARRAY_SIZE(pt) * sizeof(pt_element_t);
			
 
				 		for (j = 0; j < ARRAY_SIZE(pt); ++j)
			
 
				-			if (r || is_present_pte(pt[j]))
			
 
				+			if (r || is_present_gpte(pt[j]))
			
 
				 				sp->spt[i+j] = shadow_trap_nonpresent_pte;
			
 
				 			else
			
 
				 				sp->spt[i+j] = shadow_notrap_nonpresent_pte;
			
@@ -574,23 +580,23 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 
				 					  sizeof(pt_element_t)))
			
 
				 			return -EINVAL;
			
 
				 
			
 
				-		if (gpte_to_gfn(gpte) != gfn || !is_present_pte(gpte) ||
			
 
				+		if (gpte_to_gfn(gpte) != gfn || !is_present_gpte(gpte) ||
			
 
				 		    !(gpte & PT_ACCESSED_MASK)) {
			
 
				 			u64 nonpresent;
			
 
				 
			
 
				 			rmap_remove(vcpu->kvm, &sp->spt[i]);
			
 
				-			if (is_present_pte(gpte))
			
 
				+			if (is_present_gpte(gpte))
			
 
				 				nonpresent = shadow_trap_nonpresent_pte;
			
 
				 			else
			
 
				 				nonpresent = shadow_notrap_nonpresent_pte;
			
 
				-			set_shadow_pte(&sp->spt[i], nonpresent);
			
 
				+			__set_spte(&sp->spt[i], nonpresent);
			
 
				 			continue;
			
 
				 		}
			
 
				 
			
 
				 		nr_present++;
			
 
				 		pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
			
 
				 		set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
			
 
				-			 is_dirty_pte(gpte), 0, gfn,
			
 
				+			 is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn,
			
 
				 			 spte_to_pfn(sp->spt[i]), true, false);
			
 
				 	}
			
 
				 
			
@@ -603,9 +609,10 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 
				 #undef PT_BASE_ADDR_MASK
			
 
				 #undef PT_INDEX
			
 
				 #undef PT_LEVEL_MASK
			
 
				-#undef PT_DIR_BASE_ADDR_MASK
			
 
				+#undef PT_LVL_ADDR_MASK
			
 
				+#undef PT_LVL_OFFSET_MASK
			
 
				 #undef PT_LEVEL_BITS
			
 
				 #undef PT_MAX_FULL_LEVELS
			
 
				 #undef gpte_to_gfn
			
 
				-#undef gpte_to_gfn_pde
			
 
				+#undef gpte_to_gfn_lvl
			
 
				 #undef CMPXCHG
			
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
--- a/arch/x86/kvm/timer.c
+++ b/arch/x86/kvm/timer.c
@@ -9,12 +9,16 @@ static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer)
 
				 	int restart_timer = 0;
			
 
				 	wait_queue_head_t *q = &vcpu->wq;
			
 
				 
			
 
				-	/* FIXME: this code should not know anything about vcpus */
			
 
				-	if (!atomic_inc_and_test(&ktimer->pending))
			
 
				+	/*
			
 
				+	 * There is a race window between reading and incrementing, but we do
			
 
				+	 * not care about potentially loosing timer events in the !reinject
			
 
				+	 * case anyway.
			
 
				+	 */
			
 
				+	if (ktimer->reinject || !atomic_read(&ktimer->pending)) {
			
 
				+		atomic_inc(&ktimer->pending);
			
 
				+		/* FIXME: this code should not know anything about vcpus */
			
 
				 		set_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
			
 
				-
			
 
				-	if (!ktimer->reinject)
			
 
				-		atomic_set(&ktimer->pending, 1);
			
 
				+	}
			
 
				 
			
 
				 	if (waitqueue_active(q))
			
 
				 		wake_up_interruptible(q);
			
@@ -33,7 +37,7 @@ enum hrtimer_restart kvm_timer_fn(struct hrtimer *data)
 
				 	struct kvm_vcpu *vcpu;
			
 
				 	struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
			
 
				 
			
 
				-	vcpu = ktimer->kvm->vcpus[ktimer->vcpu_id];
			
 
				+	vcpu = ktimer->vcpu;
			
 
				 	if (!vcpu)
			
 
				 		return HRTIMER_NORESTART;
			
 
				 
			
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -0,0 +1,355 @@
 
				+#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
			
 
				+#define _TRACE_KVM_H
			
 
				+
			
 
				+#include <linux/tracepoint.h>
			
 
				+
			
 
				+#undef TRACE_SYSTEM
			
 
				+#define TRACE_SYSTEM kvm
			
 
				+#define TRACE_INCLUDE_PATH arch/x86/kvm
			
 
				+#define TRACE_INCLUDE_FILE trace
			
 
				+
			
 
				+/*
			
 
				+ * Tracepoint for guest mode entry.
			
 
				+ */
			
 
				+TRACE_EVENT(kvm_entry,
			
 
				+	TP_PROTO(unsigned int vcpu_id),
			
 
				+	TP_ARGS(vcpu_id),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	unsigned int,	vcpu_id		)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->vcpu_id	= vcpu_id;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("vcpu %u", __entry->vcpu_id)
			
 
				+);
			
 
				+
			
 
				+/*
			
 
				+ * Tracepoint for hypercall.
			
 
				+ */
			
 
				+TRACE_EVENT(kvm_hypercall,
			
 
				+	TP_PROTO(unsigned long nr, unsigned long a0, unsigned long a1,
			
 
				+		 unsigned long a2, unsigned long a3),
			
 
				+	TP_ARGS(nr, a0, a1, a2, a3),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	unsigned long, 	nr		)
			
 
				+		__field(	unsigned long,	a0		)
			
 
				+		__field(	unsigned long,	a1		)
			
 
				+		__field(	unsigned long,	a2		)
			
 
				+		__field(	unsigned long,	a3		)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->nr		= nr;
			
 
				+		__entry->a0		= a0;
			
 
				+		__entry->a1		= a1;
			
 
				+		__entry->a2		= a2;
			
 
				+		__entry->a3		= a3;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("nr 0x%lx a0 0x%lx a1 0x%lx a2 0x%lx a3 0x%lx",
			
 
				+		 __entry->nr, __entry->a0, __entry->a1,  __entry->a2,
			
 
				+		 __entry->a3)
			
 
				+);
			
 
				+
			
 
				+/*
			
 
				+ * Tracepoint for PIO.
			
 
				+ */
			
 
				+TRACE_EVENT(kvm_pio,
			
 
				+	TP_PROTO(unsigned int rw, unsigned int port, unsigned int size,
			
 
				+		 unsigned int count),
			
 
				+	TP_ARGS(rw, port, size, count),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	unsigned int, 	rw		)
			
 
				+		__field(	unsigned int, 	port		)
			
 
				+		__field(	unsigned int, 	size		)
			
 
				+		__field(	unsigned int,	count		)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->rw		= rw;
			
 
				+		__entry->port		= port;
			
 
				+		__entry->size		= size;
			
 
				+		__entry->count		= count;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("pio_%s at 0x%x size %d count %d",
			
 
				+		  __entry->rw ? "write" : "read",
			
 
				+		  __entry->port, __entry->size, __entry->count)
			
 
				+);
			
 
				+
			
 
				+/*
			
 
				+ * Tracepoint for cpuid.
			
 
				+ */
			
 
				+TRACE_EVENT(kvm_cpuid,
			
 
				+	TP_PROTO(unsigned int function, unsigned long rax, unsigned long rbx,
			
 
				+		 unsigned long rcx, unsigned long rdx),
			
 
				+	TP_ARGS(function, rax, rbx, rcx, rdx),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	unsigned int,	function	)
			
 
				+		__field(	unsigned long,	rax		)
			
 
				+		__field(	unsigned long,	rbx		)
			
 
				+		__field(	unsigned long,	rcx		)
			
 
				+		__field(	unsigned long,	rdx		)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->function	= function;
			
 
				+		__entry->rax		= rax;
			
 
				+		__entry->rbx		= rbx;
			
 
				+		__entry->rcx		= rcx;
			
 
				+		__entry->rdx		= rdx;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("func %x rax %lx rbx %lx rcx %lx rdx %lx",
			
 
				+		  __entry->function, __entry->rax,
			
 
				+		  __entry->rbx, __entry->rcx, __entry->rdx)
			
 
				+);
			
 
				+
			
 
				+#define AREG(x) { APIC_##x, "APIC_" #x }
			
 
				+
			
 
				+#define kvm_trace_symbol_apic						    \
			
 
				+	AREG(ID), AREG(LVR), AREG(TASKPRI), AREG(ARBPRI), AREG(PROCPRI),    \
			
 
				+	AREG(EOI), AREG(RRR), AREG(LDR), AREG(DFR), AREG(SPIV), AREG(ISR),  \
			
 
				+	AREG(TMR), AREG(IRR), AREG(ESR), AREG(ICR), AREG(ICR2), AREG(LVTT), \
			
 
				+	AREG(LVTTHMR), AREG(LVTPC), AREG(LVT0), AREG(LVT1), AREG(LVTERR),   \
			
 
				+	AREG(TMICT), AREG(TMCCT), AREG(TDCR), AREG(SELF_IPI), AREG(EFEAT),  \
			
 
				+	AREG(ECTRL)
			
 
				+/*
			
 
				+ * Tracepoint for apic access.
			
 
				+ */
			
 
				+TRACE_EVENT(kvm_apic,
			
 
				+	TP_PROTO(unsigned int rw, unsigned int reg, unsigned int val),
			
 
				+	TP_ARGS(rw, reg, val),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	unsigned int,	rw		)
			
 
				+		__field(	unsigned int,	reg		)
			
 
				+		__field(	unsigned int,	val		)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->rw		= rw;
			
 
				+		__entry->reg		= reg;
			
 
				+		__entry->val		= val;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("apic_%s %s = 0x%x",
			
 
				+		  __entry->rw ? "write" : "read",
			
 
				+		  __print_symbolic(__entry->reg, kvm_trace_symbol_apic),
			
 
				+		  __entry->val)
			
 
				+);
			
 
				+
			
 
				+#define trace_kvm_apic_read(reg, val)		trace_kvm_apic(0, reg, val)
			
 
				+#define trace_kvm_apic_write(reg, val)		trace_kvm_apic(1, reg, val)
			
 
				+
			
 
				+/*
			
 
				+ * Tracepoint for kvm guest exit:
			
 
				+ */
			
 
				+TRACE_EVENT(kvm_exit,
			
 
				+	TP_PROTO(unsigned int exit_reason, unsigned long guest_rip),
			
 
				+	TP_ARGS(exit_reason, guest_rip),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	unsigned int,	exit_reason	)
			
 
				+		__field(	unsigned long,	guest_rip	)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->exit_reason	= exit_reason;
			
 
				+		__entry->guest_rip	= guest_rip;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("reason %s rip 0x%lx",
			
 
				+		 ftrace_print_symbols_seq(p, __entry->exit_reason,
			
 
				+					  kvm_x86_ops->exit_reasons_str),
			
 
				+		 __entry->guest_rip)
			
 
				+);
			
 
				+
			
 
				+/*
			
 
				+ * Tracepoint for kvm interrupt injection:
			
 
				+ */
			
 
				+TRACE_EVENT(kvm_inj_virq,
			
 
				+	TP_PROTO(unsigned int irq),
			
 
				+	TP_ARGS(irq),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	unsigned int,	irq		)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->irq		= irq;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("irq %u", __entry->irq)
			
 
				+);
			
 
				+
			
 
				+/*
			
 
				+ * Tracepoint for page fault.
			
 
				+ */
			
 
				+TRACE_EVENT(kvm_page_fault,
			
 
				+	TP_PROTO(unsigned long fault_address, unsigned int error_code),
			
 
				+	TP_ARGS(fault_address, error_code),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	unsigned long,	fault_address	)
			
 
				+		__field(	unsigned int,	error_code	)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->fault_address	= fault_address;
			
 
				+		__entry->error_code	= error_code;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("address %lx error_code %x",
			
 
				+		  __entry->fault_address, __entry->error_code)
			
 
				+);
			
 
				+
			
 
				+/*
			
 
				+ * Tracepoint for guest MSR access.
			
 
				+ */
			
 
				+TRACE_EVENT(kvm_msr,
			
 
				+	TP_PROTO(unsigned int rw, unsigned int ecx, unsigned long data),
			
 
				+	TP_ARGS(rw, ecx, data),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	unsigned int,	rw		)
			
 
				+		__field(	unsigned int,	ecx		)
			
 
				+		__field(	unsigned long,	data		)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->rw		= rw;
			
 
				+		__entry->ecx		= ecx;
			
 
				+		__entry->data		= data;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("msr_%s %x = 0x%lx",
			
 
				+		  __entry->rw ? "write" : "read",
			
 
				+		  __entry->ecx, __entry->data)
			
 
				+);
			
 
				+
			
 
				+#define trace_kvm_msr_read(ecx, data)		trace_kvm_msr(0, ecx, data)
			
 
				+#define trace_kvm_msr_write(ecx, data)		trace_kvm_msr(1, ecx, data)
			
 
				+
			
 
				+/*
			
 
				+ * Tracepoint for guest CR access.
			
 
				+ */
			
 
				+TRACE_EVENT(kvm_cr,
			
 
				+	TP_PROTO(unsigned int rw, unsigned int cr, unsigned long val),
			
 
				+	TP_ARGS(rw, cr, val),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	unsigned int,	rw		)
			
 
				+		__field(	unsigned int,	cr		)
			
 
				+		__field(	unsigned long,	val		)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->rw		= rw;
			
 
				+		__entry->cr		= cr;
			
 
				+		__entry->val		= val;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("cr_%s %x = 0x%lx",
			
 
				+		  __entry->rw ? "write" : "read",
			
 
				+		  __entry->cr, __entry->val)
			
 
				+);
			
 
				+
			
 
				+#define trace_kvm_cr_read(cr, val)		trace_kvm_cr(0, cr, val)
			
 
				+#define trace_kvm_cr_write(cr, val)		trace_kvm_cr(1, cr, val)
			
 
				+
			
 
				+TRACE_EVENT(kvm_pic_set_irq,
			
 
				+	    TP_PROTO(__u8 chip, __u8 pin, __u8 elcr, __u8 imr, bool coalesced),
			
 
				+	    TP_ARGS(chip, pin, elcr, imr, coalesced),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	__u8,		chip		)
			
 
				+		__field(	__u8,		pin		)
			
 
				+		__field(	__u8,		elcr		)
			
 
				+		__field(	__u8,		imr		)
			
 
				+		__field(	bool,		coalesced	)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->chip		= chip;
			
 
				+		__entry->pin		= pin;
			
 
				+		__entry->elcr		= elcr;
			
 
				+		__entry->imr		= imr;
			
 
				+		__entry->coalesced	= coalesced;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("chip %u pin %u (%s%s)%s",
			
 
				+		  __entry->chip, __entry->pin,
			
 
				+		  (__entry->elcr & (1 << __entry->pin)) ? "level":"edge",
			
 
				+		  (__entry->imr & (1 << __entry->pin)) ? "|masked":"",
			
 
				+		  __entry->coalesced ? " (coalesced)" : "")
			
 
				+);
			
 
				+
			
 
				+#define kvm_apic_dst_shorthand		\
			
 
				+	{0x0, "dst"},			\
			
 
				+	{0x1, "self"},			\
			
 
				+	{0x2, "all"},			\
			
 
				+	{0x3, "all-but-self"}
			
 
				+
			
 
				+TRACE_EVENT(kvm_apic_ipi,
			
 
				+	    TP_PROTO(__u32 icr_low, __u32 dest_id),
			
 
				+	    TP_ARGS(icr_low, dest_id),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	__u32,		icr_low		)
			
 
				+		__field(	__u32,		dest_id		)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->icr_low	= icr_low;
			
 
				+		__entry->dest_id	= dest_id;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("dst %x vec %u (%s|%s|%s|%s|%s)",
			
 
				+		  __entry->dest_id, (u8)__entry->icr_low,
			
 
				+		  __print_symbolic((__entry->icr_low >> 8 & 0x7),
			
 
				+				   kvm_deliver_mode),
			
 
				+		  (__entry->icr_low & (1<<11)) ? "logical" : "physical",
			
 
				+		  (__entry->icr_low & (1<<14)) ? "assert" : "de-assert",
			
 
				+		  (__entry->icr_low & (1<<15)) ? "level" : "edge",
			
 
				+		  __print_symbolic((__entry->icr_low >> 18 & 0x3),
			
 
				+				   kvm_apic_dst_shorthand))
			
 
				+);
			
 
				+
			
 
				+TRACE_EVENT(kvm_apic_accept_irq,
			
 
				+	    TP_PROTO(__u32 apicid, __u16 dm, __u8 tm, __u8 vec, bool coalesced),
			
 
				+	    TP_ARGS(apicid, dm, tm, vec, coalesced),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	__u32,		apicid		)
			
 
				+		__field(	__u16,		dm		)
			
 
				+		__field(	__u8,		tm		)
			
 
				+		__field(	__u8,		vec		)
			
 
				+		__field(	bool,		coalesced	)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->apicid		= apicid;
			
 
				+		__entry->dm		= dm;
			
 
				+		__entry->tm		= tm;
			
 
				+		__entry->vec		= vec;
			
 
				+		__entry->coalesced	= coalesced;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("apicid %x vec %u (%s|%s)%s",
			
 
				+		  __entry->apicid, __entry->vec,
			
 
				+		  __print_symbolic((__entry->dm >> 8 & 0x7), kvm_deliver_mode),
			
 
				+		  __entry->tm ? "level" : "edge",
			
 
				+		  __entry->coalesced ? " (coalesced)" : "")
			
 
				+);
			
 
				+
			
 
				+#endif /* _TRACE_KVM_H */
			
 
				+
			
 
				+/* This part must be outside protection */
			
 
				+#include <trace/define_trace.h>
			
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -31,4 +31,8 @@ static inline bool kvm_exception_is_soft(unsigned int nr)
 
				 {
			
 
				 	return (nr == BP_VECTOR) || (nr == OF_VECTOR);
			
 
				 }
			
 
				+
			
 
				+struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
			
 
				+                                             u32 function, u32 index);
			
 
				+
			
 
				 #endif
			
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -104,6 +104,7 @@ EXPORT_SYMBOL(kunmap);
 
				 EXPORT_SYMBOL(kmap_atomic);
			
 
				 EXPORT_SYMBOL(kunmap_atomic);
			
 
				 EXPORT_SYMBOL(kmap_atomic_prot);
			
 
				+EXPORT_SYMBOL(kmap_atomic_to_page);
			
 
				 
			
 
				 void __init set_highmem_pages_init(void)
			
 
				 {
			
--- a/include/asm-generic/Kbuild.asm
+++ b/include/asm-generic/Kbuild.asm
@@ -3,6 +3,11 @@ ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/kvm.h \
 
				 header-y  += kvm.h
			
 
				 endif
			
 
				 
			
 
				+ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/kvm_para.h \
			
 
				+		  $(srctree)/include/asm-$(SRCARCH)/kvm_para.h),)
			
 
				+header-y  += kvm_para.h
			
 
				+endif
			
 
				+
			
 
				 ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/a.out.h \
			
 
				       		  $(srctree)/include/asm-$(SRCARCH)/a.out.h),)
			
 
				 unifdef-y += a.out.h
			
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -268,6 +268,10 @@ ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/kvm.h \
 
				       		  $(srctree)/include/asm-$(SRCARCH)/kvm.h),)
			
 
				 unifdef-y += kvm.h
			
 
				 endif
			
 
				+ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/kvm_para.h \
			
 
				+		  $(srctree)/include/asm-$(SRCARCH)/kvm_para.h),)
			
 
				+unifdef-y += kvm_para.h
			
 
				+endif
			
 
				 unifdef-y += llc.h
			
 
				 unifdef-y += loop.h
			
 
				 unifdef-y += lp.h
			
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -14,7 +14,7 @@
 
				 
			
 
				 #define KVM_API_VERSION 12
			
 
				 
			
 
				-/* for KVM_TRACE_ENABLE */
			
 
				+/* for KVM_TRACE_ENABLE, deprecated */
			
 
				 struct kvm_user_trace_setup {
			
 
				 	__u32 buf_size; /* sub_buffer size of each per-cpu */
			
 
				 	__u32 buf_nr; /* the number of sub_buffers of each per-cpu */
			
@@ -70,6 +70,14 @@ struct kvm_irqchip {
 
				 	} chip;
			
 
				 };
			
 
				 
			
 
				+/* for KVM_CREATE_PIT2 */
			
 
				+struct kvm_pit_config {
			
 
				+	__u32 flags;
			
 
				+	__u32 pad[15];
			
 
				+};
			
 
				+
			
 
				+#define KVM_PIT_SPEAKER_DUMMY     1
			
 
				+
			
 
				 #define KVM_EXIT_UNKNOWN          0
			
 
				 #define KVM_EXIT_EXCEPTION        1
			
 
				 #define KVM_EXIT_IO               2
			
@@ -87,6 +95,10 @@ struct kvm_irqchip {
 
				 #define KVM_EXIT_S390_RESET       14
			
 
				 #define KVM_EXIT_DCR              15
			
 
				 #define KVM_EXIT_NMI              16
			
 
				+#define KVM_EXIT_INTERNAL_ERROR   17
			
 
				+
			
 
				+/* For KVM_EXIT_INTERNAL_ERROR */
			
 
				+#define KVM_INTERNAL_ERROR_EMULATION 1
			
 
				 
			
 
				 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
			
 
				 struct kvm_run {
			
@@ -173,6 +185,9 @@ struct kvm_run {
 
				 			__u32 data;
			
 
				 			__u8  is_write;
			
 
				 		} dcr;
			
 
				+		struct {
			
 
				+			__u32 suberror;
			
 
				+		} internal;
			
 
				 		/* Fix the size of the union. */
			
 
				 		char padding[256];
			
 
				 	};
			
@@ -292,6 +307,28 @@ struct kvm_guest_debug {
 
				 	struct kvm_guest_debug_arch arch;
			
 
				 };
			
 
				 
			
 
				+enum {
			
 
				+	kvm_ioeventfd_flag_nr_datamatch,
			
 
				+	kvm_ioeventfd_flag_nr_pio,
			
 
				+	kvm_ioeventfd_flag_nr_deassign,
			
 
				+	kvm_ioeventfd_flag_nr_max,
			
 
				+};
			
 
				+
			
 
				+#define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch)
			
 
				+#define KVM_IOEVENTFD_FLAG_PIO       (1 << kvm_ioeventfd_flag_nr_pio)
			
 
				+#define KVM_IOEVENTFD_FLAG_DEASSIGN  (1 << kvm_ioeventfd_flag_nr_deassign)
			
 
				+
			
 
				+#define KVM_IOEVENTFD_VALID_FLAG_MASK  ((1 << kvm_ioeventfd_flag_nr_max) - 1)
			
 
				+
			
 
				+struct kvm_ioeventfd {
			
 
				+	__u64 datamatch;
			
 
				+	__u64 addr;        /* legal pio/mmio address */
			
 
				+	__u32 len;         /* 1, 2, 4, or 8 bytes    */
			
 
				+	__s32 fd;
			
 
				+	__u32 flags;
			
 
				+	__u8  pad[36];
			
 
				+};
			
 
				+
			
 
				 #define KVM_TRC_SHIFT           16
			
 
				 /*
			
 
				  * kvm trace categories
			
@@ -310,35 +347,6 @@ struct kvm_guest_debug {
 
				 #define KVM_TRC_CYCLE_SIZE      8
			
 
				 #define KVM_TRC_EXTRA_MAX       7
			
 
				 
			
 
				-/* This structure represents a single trace buffer record. */
			
 
				-struct kvm_trace_rec {
			
 
				-	/* variable rec_val
			
 
				-	 * is split into:
			
 
				-	 * bits 0 - 27  -> event id
			
 
				-	 * bits 28 -30  -> number of extra data args of size u32
			
 
				-	 * bits 31      -> binary indicator for if tsc is in record
			
 
				-	 */
			
 
				-	__u32 rec_val;
			
 
				-	__u32 pid;
			
 
				-	__u32 vcpu_id;
			
 
				-	union {
			
 
				-		struct {
			
 
				-			__u64 timestamp;
			
 
				-			__u32 extra_u32[KVM_TRC_EXTRA_MAX];
			
 
				-		} __attribute__((packed)) timestamp;
			
 
				-		struct {
			
 
				-			__u32 extra_u32[KVM_TRC_EXTRA_MAX];
			
 
				-		} notimestamp;
			
 
				-	} u;
			
 
				-};
			
 
				-
			
 
				-#define TRACE_REC_EVENT_ID(val) \
			
 
				-		(0x0fffffff & (val))
			
 
				-#define TRACE_REC_NUM_DATA_ARGS(val) \
			
 
				-		(0x70000000 & ((val) << 28))
			
 
				-#define TRACE_REC_TCS(val) \
			
 
				-		(0x80000000 & ((val) << 31))
			
 
				-
			
 
				 #define KVMIO 0xAE
			
 
				 
			
 
				 /*
			
@@ -415,6 +423,19 @@ struct kvm_trace_rec {
 
				 #define KVM_CAP_ASSIGN_DEV_IRQ 29
			
 
				 /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */
			
 
				 #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30
			
 
				+#ifdef __KVM_HAVE_MCE
			
 
				+#define KVM_CAP_MCE 31
			
 
				+#endif
			
 
				+#define KVM_CAP_IRQFD 32
			
 
				+#ifdef __KVM_HAVE_PIT
			
 
				+#define KVM_CAP_PIT2 33
			
 
				+#endif
			
 
				+#define KVM_CAP_SET_BOOT_CPU_ID 34
			
 
				+#ifdef __KVM_HAVE_PIT_STATE2
			
 
				+#define KVM_CAP_PIT_STATE2 35
			
 
				+#endif
			
 
				+#define KVM_CAP_IOEVENTFD 36
			
 
				+#define KVM_CAP_SET_IDENTITY_MAP_ADDR 37
			
 
				 
			
 
				 #ifdef KVM_CAP_IRQ_ROUTING
			
 
				 
			
@@ -454,15 +475,32 @@ struct kvm_irq_routing {
 
				 
			
 
				 #endif
			
 
				 
			
 
				+#ifdef KVM_CAP_MCE
			
 
				+/* x86 MCE */
			
 
				+struct kvm_x86_mce {
			
 
				+	__u64 status;
			
 
				+	__u64 addr;
			
 
				+	__u64 misc;
			
 
				+	__u64 mcg_status;
			
 
				+	__u8 bank;
			
 
				+	__u8 pad1[7];
			
 
				+	__u64 pad2[3];
			
 
				+};
			
 
				+#endif
			
 
				+
			
 
				+#define KVM_IRQFD_FLAG_DEASSIGN (1 << 0)
			
 
				+
			
 
				+struct kvm_irqfd {
			
 
				+	__u32 fd;
			
 
				+	__u32 gsi;
			
 
				+	__u32 flags;
			
 
				+	__u8  pad[20];
			
 
				+};
			
 
				+
			
 
				 /*
			
 
				  * ioctls for VM fds
			
 
				  */
			
 
				 #define KVM_SET_MEMORY_REGION     _IOW(KVMIO, 0x40, struct kvm_memory_region)
			
 
				-#define KVM_SET_NR_MMU_PAGES      _IO(KVMIO, 0x44)
			
 
				-#define KVM_GET_NR_MMU_PAGES      _IO(KVMIO, 0x45)
			
 
				-#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46,\
			
 
				-					struct kvm_userspace_memory_region)
			
 
				-#define KVM_SET_TSS_ADDR          _IO(KVMIO, 0x47)
			
 
				 /*
			
 
				  * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns
			
 
				  * a vcpu fd.
			
@@ -470,6 +508,12 @@ struct kvm_irq_routing {
 
				 #define KVM_CREATE_VCPU           _IO(KVMIO,  0x41)
			
 
				 #define KVM_GET_DIRTY_LOG         _IOW(KVMIO, 0x42, struct kvm_dirty_log)
			
 
				 #define KVM_SET_MEMORY_ALIAS      _IOW(KVMIO, 0x43, struct kvm_memory_alias)
			
 
				+#define KVM_SET_NR_MMU_PAGES      _IO(KVMIO, 0x44)
			
 
				+#define KVM_GET_NR_MMU_PAGES      _IO(KVMIO, 0x45)
			
 
				+#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46,\
			
 
				+					struct kvm_userspace_memory_region)
			
 
				+#define KVM_SET_TSS_ADDR          _IO(KVMIO, 0x47)
			
 
				+#define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64)
			
 
				 /* Device model IOC */
			
 
				 #define KVM_CREATE_IRQCHIP	  _IO(KVMIO,  0x60)
			
 
				 #define KVM_IRQ_LINE		  _IOW(KVMIO, 0x61, struct kvm_irq_level)
			
@@ -498,6 +542,10 @@ struct kvm_irq_routing {
 
				 #define KVM_ASSIGN_SET_MSIX_ENTRY \
			
 
				 			_IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry)
			
 
				 #define KVM_DEASSIGN_DEV_IRQ       _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
			
 
				+#define KVM_IRQFD                  _IOW(KVMIO, 0x76, struct kvm_irqfd)
			
 
				+#define KVM_CREATE_PIT2		   _IOW(KVMIO, 0x77, struct kvm_pit_config)
			
 
				+#define KVM_SET_BOOT_CPU_ID        _IO(KVMIO, 0x78)
			
 
				+#define KVM_IOEVENTFD             _IOW(KVMIO, 0x79, struct kvm_ioeventfd)
			
 
				 
			
 
				 /*
			
 
				  * ioctls for vcpu fds
			
@@ -541,6 +589,10 @@ struct kvm_irq_routing {
 
				 #define KVM_NMI                   _IO(KVMIO,  0x9a)
			
 
				 /* Available with KVM_CAP_SET_GUEST_DEBUG */
			
 
				 #define KVM_SET_GUEST_DEBUG       _IOW(KVMIO,  0x9b, struct kvm_guest_debug)
			
 
				+/* MCE for x86 */
			
 
				+#define KVM_X86_SETUP_MCE         _IOW(KVMIO,  0x9c, __u64)
			
 
				+#define KVM_X86_GET_MCE_CAP_SUPPORTED _IOR(KVMIO,  0x9d, __u64)
			
 
				+#define KVM_X86_SET_MCE           _IOW(KVMIO,  0x9e, struct kvm_x86_mce)
			
 
				 
			
 
				 /*
			
 
				  * Deprecated interfaces
			
@@ -563,6 +615,9 @@ struct kvm_debug_guest {
 
				 #define KVM_IA64_VCPU_GET_STACK   _IOR(KVMIO,  0x9a, void *)
			
 
				 #define KVM_IA64_VCPU_SET_STACK   _IOW(KVMIO,  0x9b, void *)
			
 
				 
			
 
				+#define KVM_GET_PIT2   _IOR(KVMIO,   0x9f, struct kvm_pit_state2)
			
 
				+#define KVM_SET_PIT2   _IOW(KVMIO,   0xa0, struct kvm_pit_state2)
			
 
				+
			
 
				 #define KVM_TRC_INJ_VIRQ         (KVM_TRC_HANDLER + 0x02)
			
 
				 #define KVM_TRC_REDELIVER_EVT    (KVM_TRC_HANDLER + 0x03)
			
 
				 #define KVM_TRC_PEND_INTR        (KVM_TRC_HANDLER + 0x04)
			
@@ -633,7 +688,7 @@ struct kvm_assigned_msix_nr {
 
				 	__u16 padding;
			
 
				 };
			
 
				 
			
 
				-#define KVM_MAX_MSIX_PER_DEV		512
			
 
				+#define KVM_MAX_MSIX_PER_DEV		256
			
 
				 struct kvm_assigned_msix_entry {
			
 
				 	__u32 assigned_dev_id;
			
 
				 	__u32 gsi;
			
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -42,6 +42,7 @@
 
				 
			
 
				 #define KVM_USERSPACE_IRQ_SOURCE_ID	0
			
 
				 
			
 
				+struct kvm;
			
 
				 struct kvm_vcpu;
			
 
				 extern struct kmem_cache *kvm_vcpu_cache;
			
 
				 
			
@@ -59,10 +60,18 @@ struct kvm_io_bus {
 
				 
			
 
				 void kvm_io_bus_init(struct kvm_io_bus *bus);
			
 
				 void kvm_io_bus_destroy(struct kvm_io_bus *bus);
			
 
				-struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus,
			
 
				-					  gpa_t addr, int len, int is_write);
			
 
				-void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
			
 
				-			     struct kvm_io_device *dev);
			
 
				+int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr, int len,
			
 
				+		     const void *val);
			
 
				+int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len,
			
 
				+		    void *val);
			
 
				+int __kvm_io_bus_register_dev(struct kvm_io_bus *bus,
			
 
				+			       struct kvm_io_device *dev);
			
 
				+int kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus,
			
 
				+			    struct kvm_io_device *dev);
			
 
				+void __kvm_io_bus_unregister_dev(struct kvm_io_bus *bus,
			
 
				+				 struct kvm_io_device *dev);
			
 
				+void kvm_io_bus_unregister_dev(struct kvm *kvm, struct kvm_io_bus *bus,
			
 
				+			       struct kvm_io_device *dev);
			
 
				 
			
 
				 struct kvm_vcpu {
			
 
				 	struct kvm *kvm;
			
@@ -103,7 +112,7 @@ struct kvm_memory_slot {
 
				 	struct {
			
 
				 		unsigned long rmap_pde;
			
 
				 		int write_count;
			
 
				-	} *lpage_info;
			
 
				+	} *lpage_info[KVM_NR_PAGE_SIZES - 1];
			
 
				 	unsigned long userspace_addr;
			
 
				 	int user_alloc;
			
 
				 };
			
@@ -124,7 +133,6 @@ struct kvm_kernel_irq_routing_entry {
 
				 };
			
 
				 
			
 
				 struct kvm {
			
 
				-	struct mutex lock; /* protects the vcpus array and APIC accesses */
			
 
				 	spinlock_t mmu_lock;
			
 
				 	spinlock_t requests_lock;
			
 
				 	struct rw_semaphore slots_lock;
			
@@ -132,10 +140,23 @@ struct kvm {
 
				 	int nmemslots;
			
 
				 	struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS +
			
 
				 					KVM_PRIVATE_MEM_SLOTS];
			
 
				+#ifdef CONFIG_KVM_APIC_ARCHITECTURE
			
 
				+	u32 bsp_vcpu_id;
			
 
				+	struct kvm_vcpu *bsp_vcpu;
			
 
				+#endif
			
 
				 	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
			
 
				+	atomic_t online_vcpus;
			
 
				 	struct list_head vm_list;
			
 
				+	struct mutex lock;
			
 
				 	struct kvm_io_bus mmio_bus;
			
 
				 	struct kvm_io_bus pio_bus;
			
 
				+#ifdef CONFIG_HAVE_KVM_EVENTFD
			
 
				+	struct {
			
 
				+		spinlock_t        lock;
			
 
				+		struct list_head  items;
			
 
				+	} irqfds;
			
 
				+	struct list_head ioeventfds;
			
 
				+#endif
			
 
				 	struct kvm_vm_stat stat;
			
 
				 	struct kvm_arch arch;
			
 
				 	atomic_t users_count;
			
@@ -144,6 +165,7 @@ struct kvm {
 
				 	struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
			
 
				 #endif
			
 
				 
			
 
				+	struct mutex irq_lock;
			
 
				 #ifdef CONFIG_HAVE_KVM_IRQCHIP
			
 
				 	struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
			
 
				 	struct hlist_head mask_notifier_list;
			
@@ -167,6 +189,17 @@ struct kvm {
 
				 #define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt)
			
 
				 #define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt)
			
 
				 
			
 
				+static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
			
 
				+{
			
 
				+	smp_rmb();
			
 
				+	return kvm->vcpus[i];
			
 
				+}
			
 
				+
			
 
				+#define kvm_for_each_vcpu(idx, vcpup, kvm) \
			
 
				+	for (idx = 0, vcpup = kvm_get_vcpu(kvm, idx); \
			
 
				+	     idx < atomic_read(&kvm->online_vcpus) && vcpup; \
			
 
				+	     vcpup = kvm_get_vcpu(kvm, ++idx))
			
 
				+
			
 
				 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
			
 
				 void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
			
 
				 
			
@@ -201,6 +234,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 
				 				struct kvm_userspace_memory_region *mem,
			
 
				 				struct kvm_memory_slot old,
			
 
				 				int user_alloc);
			
 
				+void kvm_disable_largepages(void);
			
 
				 void kvm_arch_flush_shadow(struct kvm *kvm);
			
 
				 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
			
 
				 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
			
@@ -243,8 +277,6 @@ long kvm_arch_dev_ioctl(struct file *filp,
 
				 			unsigned int ioctl, unsigned long arg);
			
 
				 long kvm_arch_vcpu_ioctl(struct file *filp,
			
 
				 			 unsigned int ioctl, unsigned long arg);
			
 
				-void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
			
 
				-void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
			
 
				 
			
 
				 int kvm_dev_ioctl_check_extension(long ext);
			
 
				 
			
@@ -300,7 +332,6 @@ int kvm_arch_hardware_setup(void);
 
				 void kvm_arch_hardware_unsetup(void);
			
 
				 void kvm_arch_check_processor_compat(void *rtn);
			
 
				 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
			
 
				-int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
			
 
				 
			
 
				 void kvm_free_physmem(struct kvm *kvm);
			
 
				 
			
@@ -309,8 +340,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm);
 
				 void kvm_free_all_assigned_devices(struct kvm *kvm);
			
 
				 void kvm_arch_sync_events(struct kvm *kvm);
			
 
				 
			
 
				-int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
			
 
				-int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
			
 
				 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
			
 
				 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
			
 
				 
			
@@ -366,7 +395,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
 
				 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
			
 
				 void kvm_register_irq_ack_notifier(struct kvm *kvm,
			
 
				 				   struct kvm_irq_ack_notifier *kian);
			
 
				-void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian);
			
 
				+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
			
 
				+				   struct kvm_irq_ack_notifier *kian);
			
 
				 int kvm_request_irq_source_id(struct kvm *kvm);
			
 
				 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
			
 
				 
			
@@ -459,37 +489,6 @@ struct kvm_stats_debugfs_item {
 
				 extern struct kvm_stats_debugfs_item debugfs_entries[];
			
 
				 extern struct dentry *kvm_debugfs_dir;
			
 
				 
			
 
				-#define KVMTRACE_5D(evt, vcpu, d1, d2, d3, d4, d5, name) \
			
 
				-	trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
			
 
				-						vcpu, 5, d1, d2, d3, d4, d5)
			
 
				-#define KVMTRACE_4D(evt, vcpu, d1, d2, d3, d4, name) \
			
 
				-	trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
			
 
				-						vcpu, 4, d1, d2, d3, d4, 0)
			
 
				-#define KVMTRACE_3D(evt, vcpu, d1, d2, d3, name) \
			
 
				-	trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
			
 
				-						vcpu, 3, d1, d2, d3, 0, 0)
			
 
				-#define KVMTRACE_2D(evt, vcpu, d1, d2, name) \
			
 
				-	trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
			
 
				-						vcpu, 2, d1, d2, 0, 0, 0)
			
 
				-#define KVMTRACE_1D(evt, vcpu, d1, name) \
			
 
				-	trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
			
 
				-						vcpu, 1, d1, 0, 0, 0, 0)
			
 
				-#define KVMTRACE_0D(evt, vcpu, name) \
			
 
				-	trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
			
 
				-						vcpu, 0, 0, 0, 0, 0, 0)
			
 
				-
			
 
				-#ifdef CONFIG_KVM_TRACE
			
 
				-int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg);
			
 
				-void kvm_trace_cleanup(void);
			
 
				-#else
			
 
				-static inline
			
 
				-int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg)
			
 
				-{
			
 
				-	return -EINVAL;
			
 
				-}
			
 
				-#define kvm_trace_cleanup() ((void)0)
			
 
				-#endif
			
 
				-
			
 
				 #ifdef KVM_ARCH_WANT_MMU_NOTIFIER
			
 
				 static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_seq)
			
 
				 {
			
@@ -525,4 +524,33 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {}
 
				 
			
 
				 #endif
			
 
				 
			
 
				+#ifdef CONFIG_HAVE_KVM_EVENTFD
			
 
				+
			
 
				+void kvm_eventfd_init(struct kvm *kvm);
			
 
				+int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags);
			
 
				+void kvm_irqfd_release(struct kvm *kvm);
			
 
				+int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+static inline void kvm_eventfd_init(struct kvm *kvm) {}
			
 
				+static inline int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
			
 
				+{
			
 
				+	return -EINVAL;
			
 
				+}
			
 
				+
			
 
				+static inline void kvm_irqfd_release(struct kvm *kvm) {}
			
 
				+static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
			
 
				+{
			
 
				+	return -ENOSYS;
			
 
				+}
			
 
				+
			
 
				+#endif /* CONFIG_HAVE_KVM_EVENTFD */
			
 
				+
			
 
				+#ifdef CONFIG_KVM_APIC_ARCHITECTURE
			
 
				+static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id;
			
 
				+}
			
 
				+#endif
			
 
				 #endif
			
--- a/include/linux/kvm_para.h
+++ b/include/linux/kvm_para.h
@@ -13,6 +13,7 @@
 
				 #define KVM_ENOSYS		1000
			
 
				 #define KVM_EFAULT		EFAULT
			
 
				 #define KVM_E2BIG		E2BIG
			
 
				+#define KVM_EPERM		EPERM
			
 
				 
			
 
				 #define KVM_HC_VAPIC_POLL_IRQ		1
			
 
				 #define KVM_HC_MMU_OP			2
			
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -0,0 +1,151 @@
 
				+#if !defined(_TRACE_KVM_MAIN_H) || defined(TRACE_HEADER_MULTI_READ)
			
 
				+#define _TRACE_KVM_MAIN_H
			
 
				+
			
 
				+#include <linux/tracepoint.h>
			
 
				+
			
 
				+#undef TRACE_SYSTEM
			
 
				+#define TRACE_SYSTEM kvm
			
 
				+#define TRACE_INCLUDE_FILE kvm
			
 
				+
			
 
				+#if defined(__KVM_HAVE_IOAPIC)
			
 
				+TRACE_EVENT(kvm_set_irq,
			
 
				+	TP_PROTO(unsigned int gsi, int level, int irq_source_id),
			
 
				+	TP_ARGS(gsi, level, irq_source_id),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	unsigned int,	gsi		)
			
 
				+		__field(	int,		level		)
			
 
				+		__field(	int,		irq_source_id	)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->gsi		= gsi;
			
 
				+		__entry->level		= level;
			
 
				+		__entry->irq_source_id	= irq_source_id;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("gsi %u level %d source %d",
			
 
				+		  __entry->gsi, __entry->level, __entry->irq_source_id)
			
 
				+);
			
 
				+
			
 
				+#define kvm_deliver_mode		\
			
 
				+	{0x0, "Fixed"},			\
			
 
				+	{0x1, "LowPrio"},		\
			
 
				+	{0x2, "SMI"},			\
			
 
				+	{0x3, "Res3"},			\
			
 
				+	{0x4, "NMI"},			\
			
 
				+	{0x5, "INIT"},			\
			
 
				+	{0x6, "SIPI"},			\
			
 
				+	{0x7, "ExtINT"}
			
 
				+
			
 
				+TRACE_EVENT(kvm_ioapic_set_irq,
			
 
				+	    TP_PROTO(__u64 e, int pin, bool coalesced),
			
 
				+	    TP_ARGS(e, pin, coalesced),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	__u64,		e		)
			
 
				+		__field(	int,		pin		)
			
 
				+		__field(	bool,		coalesced	)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->e		= e;
			
 
				+		__entry->pin		= pin;
			
 
				+		__entry->coalesced	= coalesced;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("pin %u dst %x vec=%u (%s|%s|%s%s)%s",
			
 
				+		  __entry->pin, (u8)(__entry->e >> 56), (u8)__entry->e,
			
 
				+		  __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode),
			
 
				+		  (__entry->e & (1<<11)) ? "logical" : "physical",
			
 
				+		  (__entry->e & (1<<15)) ? "level" : "edge",
			
 
				+		  (__entry->e & (1<<16)) ? "|masked" : "",
			
 
				+		  __entry->coalesced ? " (coalesced)" : "")
			
 
				+);
			
 
				+
			
 
				+TRACE_EVENT(kvm_msi_set_irq,
			
 
				+	    TP_PROTO(__u64 address, __u64 data),
			
 
				+	    TP_ARGS(address, data),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	__u64,		address		)
			
 
				+		__field(	__u64,		data		)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->address	= address;
			
 
				+		__entry->data		= data;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("dst %u vec %x (%s|%s|%s%s)",
			
 
				+		  (u8)(__entry->address >> 12), (u8)__entry->data,
			
 
				+		  __print_symbolic((__entry->data >> 8 & 0x7), kvm_deliver_mode),
			
 
				+		  (__entry->address & (1<<2)) ? "logical" : "physical",
			
 
				+		  (__entry->data & (1<<15)) ? "level" : "edge",
			
 
				+		  (__entry->address & (1<<3)) ? "|rh" : "")
			
 
				+);
			
 
				+
			
 
				+#define kvm_irqchips						\
			
 
				+	{KVM_IRQCHIP_PIC_MASTER,	"PIC master"},		\
			
 
				+	{KVM_IRQCHIP_PIC_SLAVE,		"PIC slave"},		\
			
 
				+	{KVM_IRQCHIP_IOAPIC,		"IOAPIC"}
			
 
				+
			
 
				+TRACE_EVENT(kvm_ack_irq,
			
 
				+	TP_PROTO(unsigned int irqchip, unsigned int pin),
			
 
				+	TP_ARGS(irqchip, pin),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	unsigned int,	irqchip		)
			
 
				+		__field(	unsigned int,	pin		)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->irqchip	= irqchip;
			
 
				+		__entry->pin		= pin;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("irqchip %s pin %u",
			
 
				+		  __print_symbolic(__entry->irqchip, kvm_irqchips),
			
 
				+		 __entry->pin)
			
 
				+);
			
 
				+
			
 
				+
			
 
				+
			
 
				+#endif /* defined(__KVM_HAVE_IOAPIC) */
			
 
				+
			
 
				+#define KVM_TRACE_MMIO_READ_UNSATISFIED 0
			
 
				+#define KVM_TRACE_MMIO_READ 1
			
 
				+#define KVM_TRACE_MMIO_WRITE 2
			
 
				+
			
 
				+#define kvm_trace_symbol_mmio \
			
 
				+	{ KVM_TRACE_MMIO_READ_UNSATISFIED, "unsatisfied-read" }, \
			
 
				+	{ KVM_TRACE_MMIO_READ, "read" }, \
			
 
				+	{ KVM_TRACE_MMIO_WRITE, "write" }
			
 
				+
			
 
				+TRACE_EVENT(kvm_mmio,
			
 
				+	TP_PROTO(int type, int len, u64 gpa, u64 val),
			
 
				+	TP_ARGS(type, len, gpa, val),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	u32,	type		)
			
 
				+		__field(	u32,	len		)
			
 
				+		__field(	u64,	gpa		)
			
 
				+		__field(	u64,	val		)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->type		= type;
			
 
				+		__entry->len		= len;
			
 
				+		__entry->gpa		= gpa;
			
 
				+		__entry->val		= val;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx",
			
 
				+		  __print_symbolic(__entry->type, kvm_trace_symbol_mmio),
			
 
				+		  __entry->len, __entry->gpa, __entry->val)
			
 
				+);
			
 
				+
			
 
				+#endif /* _TRACE_KVM_MAIN_H */
			
 
				+
			
 
				+/* This part must be outside protection */
			
 
				+#include <trace/define_trace.h>
			
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -234,6 +234,7 @@ unsigned long vma_kernel_pagesize(struct vm_area_struct *vma)
 
				 
			
 
				 	return 1UL << (hstate->order + PAGE_SHIFT);
			
 
				 }
			
 
				+EXPORT_SYMBOL_GPL(vma_kernel_pagesize);
			
 
				 
			
 
				 /*
			
 
				  * Return the page size being used by the MMU to back a VMA. In the majority
			
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -0,0 +1,14 @@
 
				+# KVM common configuration items and defaults
			
 
				+
			
 
				+config HAVE_KVM
			
 
				+       bool
			
 
				+
			
 
				+config HAVE_KVM_IRQCHIP
			
 
				+       bool
			
 
				+
			
 
				+config HAVE_KVM_EVENTFD
			
 
				+       bool
			
 
				+       select EVENTFD
			
 
				+
			
 
				+config KVM_APIC_ARCHITECTURE
			
 
				+       bool
			
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -14,32 +14,28 @@
 
				 
			
 
				 #include "coalesced_mmio.h"
			
 
				 
			
 
				-static int coalesced_mmio_in_range(struct kvm_io_device *this,
			
 
				-				   gpa_t addr, int len, int is_write)
			
 
				+static inline struct kvm_coalesced_mmio_dev *to_mmio(struct kvm_io_device *dev)
			
 
				+{
			
 
				+	return container_of(dev, struct kvm_coalesced_mmio_dev, dev);
			
 
				+}
			
 
				+
			
 
				+static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev,
			
 
				+				   gpa_t addr, int len)
			
 
				 {
			
 
				-	struct kvm_coalesced_mmio_dev *dev =
			
 
				-				(struct kvm_coalesced_mmio_dev*)this->private;
			
 
				 	struct kvm_coalesced_mmio_zone *zone;
			
 
				-	int next;
			
 
				+	struct kvm_coalesced_mmio_ring *ring;
			
 
				+	unsigned avail;
			
 
				 	int i;
			
 
				 
			
 
				-	if (!is_write)
			
 
				-		return 0;
			
 
				-
			
 
				-	/* kvm->lock is taken by the caller and must be not released before
			
 
				-         * dev.read/write
			
 
				-         */
			
 
				-
			
 
				 	/* Are we able to batch it ? */
			
 
				 
			
 
				 	/* last is the first free entry
			
 
				 	 * check if we don't meet the first used entry
			
 
				 	 * there is always one unused entry in the buffer
			
 
				 	 */
			
 
				-
			
 
				-	next = (dev->kvm->coalesced_mmio_ring->last + 1) %
			
 
				-							KVM_COALESCED_MMIO_MAX;
			
 
				-	if (next == dev->kvm->coalesced_mmio_ring->first) {
			
 
				+	ring = dev->kvm->coalesced_mmio_ring;
			
 
				+	avail = (ring->first - ring->last - 1) % KVM_COALESCED_MMIO_MAX;
			
 
				+	if (avail < KVM_MAX_VCPUS) {
			
 
				 		/* full */
			
 
				 		return 0;
			
 
				 	}
			
@@ -60,14 +56,15 @@ static int coalesced_mmio_in_range(struct kvm_io_device *this,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static void coalesced_mmio_write(struct kvm_io_device *this,
			
 
				-				 gpa_t addr, int len, const void *val)
			
 
				+static int coalesced_mmio_write(struct kvm_io_device *this,
			
 
				+				gpa_t addr, int len, const void *val)
			
 
				 {
			
 
				-	struct kvm_coalesced_mmio_dev *dev =
			
 
				-				(struct kvm_coalesced_mmio_dev*)this->private;
			
 
				+	struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
			
 
				 	struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring;
			
 
				+	if (!coalesced_mmio_in_range(dev, addr, len))
			
 
				+		return -EOPNOTSUPP;
			
 
				 
			
 
				-	/* kvm->lock must be taken by caller before call to in_range()*/
			
 
				+	spin_lock(&dev->lock);
			
 
				 
			
 
				 	/* copy data in first free entry of the ring */
			
 
				 
			
@@ -76,29 +73,40 @@ static void coalesced_mmio_write(struct kvm_io_device *this,
 
				 	memcpy(ring->coalesced_mmio[ring->last].data, val, len);
			
 
				 	smp_wmb();
			
 
				 	ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX;
			
 
				+	spin_unlock(&dev->lock);
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 static void coalesced_mmio_destructor(struct kvm_io_device *this)
			
 
				 {
			
 
				-	kfree(this);
			
 
				+	struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
			
 
				+
			
 
				+	kfree(dev);
			
 
				 }
			
 
				 
			
 
				+static const struct kvm_io_device_ops coalesced_mmio_ops = {
			
 
				+	.write      = coalesced_mmio_write,
			
 
				+	.destructor = coalesced_mmio_destructor,
			
 
				+};
			
 
				+
			
 
				 int kvm_coalesced_mmio_init(struct kvm *kvm)
			
 
				 {
			
 
				 	struct kvm_coalesced_mmio_dev *dev;
			
 
				+	int ret;
			
 
				 
			
 
				 	dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL);
			
 
				 	if (!dev)
			
 
				 		return -ENOMEM;
			
 
				-	dev->dev.write  = coalesced_mmio_write;
			
 
				-	dev->dev.in_range  = coalesced_mmio_in_range;
			
 
				-	dev->dev.destructor  = coalesced_mmio_destructor;
			
 
				-	dev->dev.private  = dev;
			
 
				+	spin_lock_init(&dev->lock);
			
 
				+	kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops);
			
 
				 	dev->kvm = kvm;
			
 
				 	kvm->coalesced_mmio_dev = dev;
			
 
				-	kvm_io_bus_register_dev(&kvm->mmio_bus, &dev->dev);
			
 
				 
			
 
				-	return 0;
			
 
				+	ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &dev->dev);
			
 
				+	if (ret < 0)
			
 
				+		kfree(dev);
			
 
				+
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
			
@@ -109,16 +117,16 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
 
				 	if (dev == NULL)
			
 
				 		return -EINVAL;
			
 
				 
			
 
				-	mutex_lock(&kvm->lock);
			
 
				+	down_write(&kvm->slots_lock);
			
 
				 	if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) {
			
 
				-		mutex_unlock(&kvm->lock);
			
 
				+		up_write(&kvm->slots_lock);
			
 
				 		return -ENOBUFS;
			
 
				 	}
			
 
				 
			
 
				 	dev->zone[dev->nb_zones] = *zone;
			
 
				 	dev->nb_zones++;
			
 
				 
			
 
				-	mutex_unlock(&kvm->lock);
			
 
				+	up_write(&kvm->slots_lock);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -132,7 +140,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
 
				 	if (dev == NULL)
			
 
				 		return -EINVAL;
			
 
				 
			
 
				-	mutex_lock(&kvm->lock);
			
 
				+	down_write(&kvm->slots_lock);
			
 
				 
			
 
				 	i = dev->nb_zones;
			
 
				 	while(i) {
			
@@ -150,7 +158,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
 
				 		i--;
			
 
				 	}
			
 
				 
			
 
				-	mutex_unlock(&kvm->lock);
			
 
				+	up_write(&kvm->slots_lock);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
--- a/virt/kvm/coalesced_mmio.h
+++ b/virt/kvm/coalesced_mmio.h
@@ -12,6 +12,7 @@
 
				 struct kvm_coalesced_mmio_dev {
			
 
				 	struct kvm_io_device dev;
			
 
				 	struct kvm *kvm;
			
 
				+	spinlock_t lock;
			
 
				 	int nb_zones;
			
 
				 	struct kvm_coalesced_mmio_zone zone[KVM_COALESCED_MMIO_ZONE_MAX];
			
 
				 };
			
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -0,0 +1,578 @@
 
				+/*
			
 
				+ * kvm eventfd support - use eventfd objects to signal various KVM events
			
 
				+ *
			
 
				+ * Copyright 2009 Novell.  All Rights Reserved.
			
 
				+ *
			
 
				+ * Author:
			
 
				+ *	Gregory Haskins <ghaskins@novell.com>
			
 
				+ *
			
 
				+ * This file is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of version 2 of the GNU General Public License
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write to the Free Software Foundation,
			
 
				+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
			
 
				+ */
			
 
				+
			
 
				+#include <linux/kvm_host.h>
			
 
				+#include <linux/kvm.h>
			
 
				+#include <linux/workqueue.h>
			
 
				+#include <linux/syscalls.h>
			
 
				+#include <linux/wait.h>
			
 
				+#include <linux/poll.h>
			
 
				+#include <linux/file.h>
			
 
				+#include <linux/list.h>
			
 
				+#include <linux/eventfd.h>
			
 
				+#include <linux/kernel.h>
			
 
				+
			
 
				+#include "iodev.h"
			
 
				+
			
 
				+/*
			
 
				+ * --------------------------------------------------------------------
			
 
				+ * irqfd: Allows an fd to be used to inject an interrupt to the guest
			
 
				+ *
			
 
				+ * Credit goes to Avi Kivity for the original idea.
			
 
				+ * --------------------------------------------------------------------
			
 
				+ */
			
 
				+
			
 
				+struct _irqfd {
			
 
				+	struct kvm               *kvm;
			
 
				+	struct eventfd_ctx       *eventfd;
			
 
				+	int                       gsi;
			
 
				+	struct list_head          list;
			
 
				+	poll_table                pt;
			
 
				+	wait_queue_head_t        *wqh;
			
 
				+	wait_queue_t              wait;
			
 
				+	struct work_struct        inject;
			
 
				+	struct work_struct        shutdown;
			
 
				+};
			
 
				+
			
 
				+static struct workqueue_struct *irqfd_cleanup_wq;
			
 
				+
			
 
				+static void
			
 
				+irqfd_inject(struct work_struct *work)
			
 
				+{
			
 
				+	struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
			
 
				+	struct kvm *kvm = irqfd->kvm;
			
 
				+
			
 
				+	mutex_lock(&kvm->irq_lock);
			
 
				+	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
			
 
				+	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
			
 
				+	mutex_unlock(&kvm->irq_lock);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Race-free decouple logic (ordering is critical)
			
 
				+ */
			
 
				+static void
			
 
				+irqfd_shutdown(struct work_struct *work)
			
 
				+{
			
 
				+	struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown);
			
 
				+
			
 
				+	/*
			
 
				+	 * Synchronize with the wait-queue and unhook ourselves to prevent
			
 
				+	 * further events.
			
 
				+	 */
			
 
				+	remove_wait_queue(irqfd->wqh, &irqfd->wait);
			
 
				+
			
 
				+	/*
			
 
				+	 * We know no new events will be scheduled at this point, so block
			
 
				+	 * until all previously outstanding events have completed
			
 
				+	 */
			
 
				+	flush_work(&irqfd->inject);
			
 
				+
			
 
				+	/*
			
 
				+	 * It is now safe to release the object's resources
			
 
				+	 */
			
 
				+	eventfd_ctx_put(irqfd->eventfd);
			
 
				+	kfree(irqfd);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+/* assumes kvm->irqfds.lock is held */
			
 
				+static bool
			
 
				+irqfd_is_active(struct _irqfd *irqfd)
			
 
				+{
			
 
				+	return list_empty(&irqfd->list) ? false : true;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Mark the irqfd as inactive and schedule it for removal
			
 
				+ *
			
 
				+ * assumes kvm->irqfds.lock is held
			
 
				+ */
			
 
				+static void
			
 
				+irqfd_deactivate(struct _irqfd *irqfd)
			
 
				+{
			
 
				+	BUG_ON(!irqfd_is_active(irqfd));
			
 
				+
			
 
				+	list_del_init(&irqfd->list);
			
 
				+
			
 
				+	queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Called with wqh->lock held and interrupts disabled
			
 
				+ */
			
 
				+static int
			
 
				+irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
			
 
				+{
			
 
				+	struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
			
 
				+	unsigned long flags = (unsigned long)key;
			
 
				+
			
 
				+	if (flags & POLLIN)
			
 
				+		/* An event has been signaled, inject an interrupt */
			
 
				+		schedule_work(&irqfd->inject);
			
 
				+
			
 
				+	if (flags & POLLHUP) {
			
 
				+		/* The eventfd is closing, detach from KVM */
			
 
				+		struct kvm *kvm = irqfd->kvm;
			
 
				+		unsigned long flags;
			
 
				+
			
 
				+		spin_lock_irqsave(&kvm->irqfds.lock, flags);
			
 
				+
			
 
				+		/*
			
 
				+		 * We must check if someone deactivated the irqfd before
			
 
				+		 * we could acquire the irqfds.lock since the item is
			
 
				+		 * deactivated from the KVM side before it is unhooked from
			
 
				+		 * the wait-queue.  If it is already deactivated, we can
			
 
				+		 * simply return knowing the other side will cleanup for us.
			
 
				+		 * We cannot race against the irqfd going away since the
			
 
				+		 * other side is required to acquire wqh->lock, which we hold
			
 
				+		 */
			
 
				+		if (irqfd_is_active(irqfd))
			
 
				+			irqfd_deactivate(irqfd);
			
 
				+
			
 
				+		spin_unlock_irqrestore(&kvm->irqfds.lock, flags);
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
			
 
				+			poll_table *pt)
			
 
				+{
			
 
				+	struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt);
			
 
				+
			
 
				+	irqfd->wqh = wqh;
			
 
				+	add_wait_queue(wqh, &irqfd->wait);
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
			
 
				+{
			
 
				+	struct _irqfd *irqfd;
			
 
				+	struct file *file = NULL;
			
 
				+	struct eventfd_ctx *eventfd = NULL;
			
 
				+	int ret;
			
 
				+	unsigned int events;
			
 
				+
			
 
				+	irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
			
 
				+	if (!irqfd)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	irqfd->kvm = kvm;
			
 
				+	irqfd->gsi = gsi;
			
 
				+	INIT_LIST_HEAD(&irqfd->list);
			
 
				+	INIT_WORK(&irqfd->inject, irqfd_inject);
			
 
				+	INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
			
 
				+
			
 
				+	file = eventfd_fget(fd);
			
 
				+	if (IS_ERR(file)) {
			
 
				+		ret = PTR_ERR(file);
			
 
				+		goto fail;
			
 
				+	}
			
 
				+
			
 
				+	eventfd = eventfd_ctx_fileget(file);
			
 
				+	if (IS_ERR(eventfd)) {
			
 
				+		ret = PTR_ERR(eventfd);
			
 
				+		goto fail;
			
 
				+	}
			
 
				+
			
 
				+	irqfd->eventfd = eventfd;
			
 
				+
			
 
				+	/*
			
 
				+	 * Install our own custom wake-up handling so we are notified via
			
 
				+	 * a callback whenever someone signals the underlying eventfd
			
 
				+	 */
			
 
				+	init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
			
 
				+	init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc);
			
 
				+
			
 
				+	events = file->f_op->poll(file, &irqfd->pt);
			
 
				+
			
 
				+	spin_lock_irq(&kvm->irqfds.lock);
			
 
				+	list_add_tail(&irqfd->list, &kvm->irqfds.items);
			
 
				+	spin_unlock_irq(&kvm->irqfds.lock);
			
 
				+
			
 
				+	/*
			
 
				+	 * Check if there was an event already pending on the eventfd
			
 
				+	 * before we registered, and trigger it as if we didn't miss it.
			
 
				+	 */
			
 
				+	if (events & POLLIN)
			
 
				+		schedule_work(&irqfd->inject);
			
 
				+
			
 
				+	/*
			
 
				+	 * do not drop the file until the irqfd is fully initialized, otherwise
			
 
				+	 * we might race against the POLLHUP
			
 
				+	 */
			
 
				+	fput(file);
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+fail:
			
 
				+	if (eventfd && !IS_ERR(eventfd))
			
 
				+		eventfd_ctx_put(eventfd);
			
 
				+
			
 
				+	if (!IS_ERR(file))
			
 
				+		fput(file);
			
 
				+
			
 
				+	kfree(irqfd);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kvm_eventfd_init(struct kvm *kvm)
			
 
				+{
			
 
				+	spin_lock_init(&kvm->irqfds.lock);
			
 
				+	INIT_LIST_HEAD(&kvm->irqfds.items);
			
 
				+	INIT_LIST_HEAD(&kvm->ioeventfds);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * shutdown any irqfd's that match fd+gsi
			
 
				+ */
			
 
				+static int
			
 
				+kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi)
			
 
				+{
			
 
				+	struct _irqfd *irqfd, *tmp;
			
 
				+	struct eventfd_ctx *eventfd;
			
 
				+
			
 
				+	eventfd = eventfd_ctx_fdget(fd);
			
 
				+	if (IS_ERR(eventfd))
			
 
				+		return PTR_ERR(eventfd);
			
 
				+
			
 
				+	spin_lock_irq(&kvm->irqfds.lock);
			
 
				+
			
 
				+	list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
			
 
				+		if (irqfd->eventfd == eventfd && irqfd->gsi == gsi)
			
 
				+			irqfd_deactivate(irqfd);
			
 
				+	}
			
 
				+
			
 
				+	spin_unlock_irq(&kvm->irqfds.lock);
			
 
				+	eventfd_ctx_put(eventfd);
			
 
				+
			
 
				+	/*
			
 
				+	 * Block until we know all outstanding shutdown jobs have completed
			
 
				+	 * so that we guarantee there will not be any more interrupts on this
			
 
				+	 * gsi once this deassign function returns.
			
 
				+	 */
			
 
				+	flush_workqueue(irqfd_cleanup_wq);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
			
 
				+{
			
 
				+	if (flags & KVM_IRQFD_FLAG_DEASSIGN)
			
 
				+		return kvm_irqfd_deassign(kvm, fd, gsi);
			
 
				+
			
 
				+	return kvm_irqfd_assign(kvm, fd, gsi);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * This function is called as the kvm VM fd is being released. Shutdown all
			
 
				+ * irqfds that still remain open
			
 
				+ */
			
 
				+void
			
 
				+kvm_irqfd_release(struct kvm *kvm)
			
 
				+{
			
 
				+	struct _irqfd *irqfd, *tmp;
			
 
				+
			
 
				+	spin_lock_irq(&kvm->irqfds.lock);
			
 
				+
			
 
				+	list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list)
			
 
				+		irqfd_deactivate(irqfd);
			
 
				+
			
 
				+	spin_unlock_irq(&kvm->irqfds.lock);
			
 
				+
			
 
				+	/*
			
 
				+	 * Block until we know all outstanding shutdown jobs have completed
			
 
				+	 * since we do not take a kvm* reference.
			
 
				+	 */
			
 
				+	flush_workqueue(irqfd_cleanup_wq);
			
 
				+
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * create a host-wide workqueue for issuing deferred shutdown requests
			
 
				+ * aggregated from all vm* instances. We need our own isolated single-thread
			
 
				+ * queue to prevent deadlock against flushing the normal work-queue.
			
 
				+ */
			
 
				+static int __init irqfd_module_init(void)
			
 
				+{
			
 
				+	irqfd_cleanup_wq = create_singlethread_workqueue("kvm-irqfd-cleanup");
			
 
				+	if (!irqfd_cleanup_wq)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void __exit irqfd_module_exit(void)
			
 
				+{
			
 
				+	destroy_workqueue(irqfd_cleanup_wq);
			
 
				+}
			
 
				+
			
 
				+module_init(irqfd_module_init);
			
 
				+module_exit(irqfd_module_exit);
			
 
				+
			
 
				+/*
			
 
				+ * --------------------------------------------------------------------
			
 
				+ * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal.
			
 
				+ *
			
 
				+ * userspace can register a PIO/MMIO address with an eventfd for receiving
			
 
				+ * notification when the memory has been touched.
			
 
				+ * --------------------------------------------------------------------
			
 
				+ */
			
 
				+
			
 
				+struct _ioeventfd {
			
 
				+	struct list_head     list;
			
 
				+	u64                  addr;
			
 
				+	int                  length;
			
 
				+	struct eventfd_ctx  *eventfd;
			
 
				+	u64                  datamatch;
			
 
				+	struct kvm_io_device dev;
			
 
				+	bool                 wildcard;
			
 
				+};
			
 
				+
			
 
				+static inline struct _ioeventfd *
			
 
				+to_ioeventfd(struct kvm_io_device *dev)
			
 
				+{
			
 
				+	return container_of(dev, struct _ioeventfd, dev);
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+ioeventfd_release(struct _ioeventfd *p)
			
 
				+{
			
 
				+	eventfd_ctx_put(p->eventfd);
			
 
				+	list_del(&p->list);
			
 
				+	kfree(p);
			
 
				+}
			
 
				+
			
 
				+static bool
			
 
				+ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
			
 
				+{
			
 
				+	u64 _val;
			
 
				+
			
 
				+	if (!(addr == p->addr && len == p->length))
			
 
				+		/* address-range must be precise for a hit */
			
 
				+		return false;
			
 
				+
			
 
				+	if (p->wildcard)
			
 
				+		/* all else equal, wildcard is always a hit */
			
 
				+		return true;
			
 
				+
			
 
				+	/* otherwise, we have to actually compare the data */
			
 
				+
			
 
				+	BUG_ON(!IS_ALIGNED((unsigned long)val, len));
			
 
				+
			
 
				+	switch (len) {
			
 
				+	case 1:
			
 
				+		_val = *(u8 *)val;
			
 
				+		break;
			
 
				+	case 2:
			
 
				+		_val = *(u16 *)val;
			
 
				+		break;
			
 
				+	case 4:
			
 
				+		_val = *(u32 *)val;
			
 
				+		break;
			
 
				+	case 8:
			
 
				+		_val = *(u64 *)val;
			
 
				+		break;
			
 
				+	default:
			
 
				+		return false;
			
 
				+	}
			
 
				+
			
 
				+	return _val == p->datamatch ? true : false;
			
 
				+}
			
 
				+
			
 
				+/* MMIO/PIO writes trigger an event if the addr/val match */
			
 
				+static int
			
 
				+ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len,
			
 
				+		const void *val)
			
 
				+{
			
 
				+	struct _ioeventfd *p = to_ioeventfd(this);
			
 
				+
			
 
				+	if (!ioeventfd_in_range(p, addr, len, val))
			
 
				+		return -EOPNOTSUPP;
			
 
				+
			
 
				+	eventfd_signal(p->eventfd, 1);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * This function is called as KVM is completely shutting down.  We do not
			
 
				+ * need to worry about locking just nuke anything we have as quickly as possible
			
 
				+ */
			
 
				+static void
			
 
				+ioeventfd_destructor(struct kvm_io_device *this)
			
 
				+{
			
 
				+	struct _ioeventfd *p = to_ioeventfd(this);
			
 
				+
			
 
				+	ioeventfd_release(p);
			
 
				+}
			
 
				+
			
 
				+static const struct kvm_io_device_ops ioeventfd_ops = {
			
 
				+	.write      = ioeventfd_write,
			
 
				+	.destructor = ioeventfd_destructor,
			
 
				+};
			
 
				+
			
 
				+/* assumes kvm->slots_lock held */
			
 
				+static bool
			
 
				+ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p)
			
 
				+{
			
 
				+	struct _ioeventfd *_p;
			
 
				+
			
 
				+	list_for_each_entry(_p, &kvm->ioeventfds, list)
			
 
				+		if (_p->addr == p->addr && _p->length == p->length &&
			
 
				+		    (_p->wildcard || p->wildcard ||
			
 
				+		     _p->datamatch == p->datamatch))
			
 
				+			return true;
			
 
				+
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
			
 
				+{
			
 
				+	int                       pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
			
 
				+	struct kvm_io_bus        *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus;
			
 
				+	struct _ioeventfd        *p;
			
 
				+	struct eventfd_ctx       *eventfd;
			
 
				+	int                       ret;
			
 
				+
			
 
				+	/* must be natural-word sized */
			
 
				+	switch (args->len) {
			
 
				+	case 1:
			
 
				+	case 2:
			
 
				+	case 4:
			
 
				+	case 8:
			
 
				+		break;
			
 
				+	default:
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	/* check for range overflow */
			
 
				+	if (args->addr + args->len < args->addr)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	/* check for extra flags that we don't understand */
			
 
				+	if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	eventfd = eventfd_ctx_fdget(args->fd);
			
 
				+	if (IS_ERR(eventfd))
			
 
				+		return PTR_ERR(eventfd);
			
 
				+
			
 
				+	p = kzalloc(sizeof(*p), GFP_KERNEL);
			
 
				+	if (!p) {
			
 
				+		ret = -ENOMEM;
			
 
				+		goto fail;
			
 
				+	}
			
 
				+
			
 
				+	INIT_LIST_HEAD(&p->list);
			
 
				+	p->addr    = args->addr;
			
 
				+	p->length  = args->len;
			
 
				+	p->eventfd = eventfd;
			
 
				+
			
 
				+	/* The datamatch feature is optional, otherwise this is a wildcard */
			
 
				+	if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)
			
 
				+		p->datamatch = args->datamatch;
			
 
				+	else
			
 
				+		p->wildcard = true;
			
 
				+
			
 
				+	down_write(&kvm->slots_lock);
			
 
				+
			
 
				+	/* Verify that there isnt a match already */
			
 
				+	if (ioeventfd_check_collision(kvm, p)) {
			
 
				+		ret = -EEXIST;
			
 
				+		goto unlock_fail;
			
 
				+	}
			
 
				+
			
 
				+	kvm_iodevice_init(&p->dev, &ioeventfd_ops);
			
 
				+
			
 
				+	ret = __kvm_io_bus_register_dev(bus, &p->dev);
			
 
				+	if (ret < 0)
			
 
				+		goto unlock_fail;
			
 
				+
			
 
				+	list_add_tail(&p->list, &kvm->ioeventfds);
			
 
				+
			
 
				+	up_write(&kvm->slots_lock);
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+unlock_fail:
			
 
				+	up_write(&kvm->slots_lock);
			
 
				+
			
 
				+fail:
			
 
				+	kfree(p);
			
 
				+	eventfd_ctx_put(eventfd);
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
			
 
				+{
			
 
				+	int                       pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
			
 
				+	struct kvm_io_bus        *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus;
			
 
				+	struct _ioeventfd        *p, *tmp;
			
 
				+	struct eventfd_ctx       *eventfd;
			
 
				+	int                       ret = -ENOENT;
			
 
				+
			
 
				+	eventfd = eventfd_ctx_fdget(args->fd);
			
 
				+	if (IS_ERR(eventfd))
			
 
				+		return PTR_ERR(eventfd);
			
 
				+
			
 
				+	down_write(&kvm->slots_lock);
			
 
				+
			
 
				+	list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) {
			
 
				+		bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);
			
 
				+
			
 
				+		if (p->eventfd != eventfd  ||
			
 
				+		    p->addr != args->addr  ||
			
 
				+		    p->length != args->len ||
			
 
				+		    p->wildcard != wildcard)
			
 
				+			continue;
			
 
				+
			
 
				+		if (!p->wildcard && p->datamatch != args->datamatch)
			
 
				+			continue;
			
 
				+
			
 
				+		__kvm_io_bus_unregister_dev(bus, &p->dev);
			
 
				+		ioeventfd_release(p);
			
 
				+		ret = 0;
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	up_write(&kvm->slots_lock);
			
 
				+
			
 
				+	eventfd_ctx_put(eventfd);
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
			
 
				+{
			
 
				+	if (args->flags & KVM_IOEVENTFD_FLAG_DEASSIGN)
			
 
				+		return kvm_deassign_ioeventfd(kvm, args);
			
 
				+
			
 
				+	return kvm_assign_ioeventfd(kvm, args);
			
 
				+}
			
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -36,6 +36,7 @@
 
				 #include <asm/processor.h>
			
 
				 #include <asm/page.h>
			
 
				 #include <asm/current.h>
			
 
				+#include <trace/events/kvm.h>
			
 
				 
			
 
				 #include "ioapic.h"
			
 
				 #include "lapic.h"
			
@@ -103,6 +104,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 
				 {
			
 
				 	unsigned index;
			
 
				 	bool mask_before, mask_after;
			
 
				+	union kvm_ioapic_redirect_entry *e;
			
 
				 
			
 
				 	switch (ioapic->ioregsel) {
			
 
				 	case IOAPIC_REG_VERSION:
			
@@ -122,19 +124,20 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 
				 		ioapic_debug("change redir index %x val %x\n", index, val);
			
 
				 		if (index >= IOAPIC_NUM_PINS)
			
 
				 			return;
			
 
				-		mask_before = ioapic->redirtbl[index].fields.mask;
			
 
				+		e = &ioapic->redirtbl[index];
			
 
				+		mask_before = e->fields.mask;
			
 
				 		if (ioapic->ioregsel & 1) {
			
 
				-			ioapic->redirtbl[index].bits &= 0xffffffff;
			
 
				-			ioapic->redirtbl[index].bits |= (u64) val << 32;
			
 
				+			e->bits &= 0xffffffff;
			
 
				+			e->bits |= (u64) val << 32;
			
 
				 		} else {
			
 
				-			ioapic->redirtbl[index].bits &= ~0xffffffffULL;
			
 
				-			ioapic->redirtbl[index].bits |= (u32) val;
			
 
				-			ioapic->redirtbl[index].fields.remote_irr = 0;
			
 
				+			e->bits &= ~0xffffffffULL;
			
 
				+			e->bits |= (u32) val;
			
 
				+			e->fields.remote_irr = 0;
			
 
				 		}
			
 
				-		mask_after = ioapic->redirtbl[index].fields.mask;
			
 
				+		mask_after = e->fields.mask;
			
 
				 		if (mask_before != mask_after)
			
 
				 			kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after);
			
 
				-		if (ioapic->redirtbl[index].fields.trig_mode == IOAPIC_LEVEL_TRIG
			
 
				+		if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG
			
 
				 		    && ioapic->irr & (1 << index))
			
 
				 			ioapic_service(ioapic, index);
			
 
				 		break;
			
@@ -164,7 +167,9 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
 
				 	/* Always delivery PIT interrupt to vcpu 0 */
			
 
				 	if (irq == 0) {
			
 
				 		irqe.dest_mode = 0; /* Physical mode. */
			
 
				-		irqe.dest_id = ioapic->kvm->vcpus[0]->vcpu_id;
			
 
				+		/* need to read apic_id from apic regiest since
			
 
				+		 * it can be rewritten */
			
 
				+		irqe.dest_id = ioapic->kvm->bsp_vcpu->vcpu_id;
			
 
				 	}
			
 
				 #endif
			
 
				 	return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe);
			
@@ -188,7 +193,10 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
 
				 			if ((edge && old_irr != ioapic->irr) ||
			
 
				 			    (!edge && !entry.fields.remote_irr))
			
 
				 				ret = ioapic_service(ioapic, irq);
			
 
				+			else
			
 
				+				ret = 0; /* report coalesced interrupt */
			
 
				 		}
			
 
				+		trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
			
 
				 	}
			
 
				 	return ret;
			
 
				 }
			
@@ -220,24 +228,29 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode)
 
				 			__kvm_ioapic_update_eoi(ioapic, i, trigger_mode);
			
 
				 }
			
 
				 
			
 
				-static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr,
			
 
				-			   int len, int is_write)
			
 
				+static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev)
			
 
				 {
			
 
				-	struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
			
 
				+	return container_of(dev, struct kvm_ioapic, dev);
			
 
				+}
			
 
				 
			
 
				+static inline int ioapic_in_range(struct kvm_ioapic *ioapic, gpa_t addr)
			
 
				+{
			
 
				 	return ((addr >= ioapic->base_address &&
			
 
				 		 (addr < ioapic->base_address + IOAPIC_MEM_LENGTH)));
			
 
				 }
			
 
				 
			
 
				-static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
			
 
				-			     void *val)
			
 
				+static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
			
 
				+			    void *val)
			
 
				 {
			
 
				-	struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
			
 
				+	struct kvm_ioapic *ioapic = to_ioapic(this);
			
 
				 	u32 result;
			
 
				+	if (!ioapic_in_range(ioapic, addr))
			
 
				+		return -EOPNOTSUPP;
			
 
				 
			
 
				 	ioapic_debug("addr %lx\n", (unsigned long)addr);
			
 
				 	ASSERT(!(addr & 0xf));	/* check alignment */
			
 
				 
			
 
				+	mutex_lock(&ioapic->kvm->irq_lock);
			
 
				 	addr &= 0xff;
			
 
				 	switch (addr) {
			
 
				 	case IOAPIC_REG_SELECT:
			
@@ -264,22 +277,28 @@ static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
 
				 	default:
			
 
				 		printk(KERN_WARNING "ioapic: wrong length %d\n", len);
			
 
				 	}
			
 
				+	mutex_unlock(&ioapic->kvm->irq_lock);
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				-static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
			
 
				-			      const void *val)
			
 
				+static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
			
 
				+			     const void *val)
			
 
				 {
			
 
				-	struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
			
 
				+	struct kvm_ioapic *ioapic = to_ioapic(this);
			
 
				 	u32 data;
			
 
				+	if (!ioapic_in_range(ioapic, addr))
			
 
				+		return -EOPNOTSUPP;
			
 
				 
			
 
				 	ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n",
			
 
				 		     (void*)addr, len, val);
			
 
				 	ASSERT(!(addr & 0xf));	/* check alignment */
			
 
				+
			
 
				+	mutex_lock(&ioapic->kvm->irq_lock);
			
 
				 	if (len == 4 || len == 8)
			
 
				 		data = *(u32 *) val;
			
 
				 	else {
			
 
				 		printk(KERN_WARNING "ioapic: Unsupported size %d\n", len);
			
 
				-		return;
			
 
				+		goto unlock;
			
 
				 	}
			
 
				 
			
 
				 	addr &= 0xff;
			
@@ -300,6 +319,9 @@ static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
 
				 	default:
			
 
				 		break;
			
 
				 	}
			
 
				+unlock:
			
 
				+	mutex_unlock(&ioapic->kvm->irq_lock);
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
			
@@ -314,21 +336,27 @@ void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
 
				 	ioapic->id = 0;
			
 
				 }
			
 
				 
			
 
				+static const struct kvm_io_device_ops ioapic_mmio_ops = {
			
 
				+	.read     = ioapic_mmio_read,
			
 
				+	.write    = ioapic_mmio_write,
			
 
				+};
			
 
				+
			
 
				 int kvm_ioapic_init(struct kvm *kvm)
			
 
				 {
			
 
				 	struct kvm_ioapic *ioapic;
			
 
				+	int ret;
			
 
				 
			
 
				 	ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
			
 
				 	if (!ioapic)
			
 
				 		return -ENOMEM;
			
 
				 	kvm->arch.vioapic = ioapic;
			
 
				 	kvm_ioapic_reset(ioapic);
			
 
				-	ioapic->dev.read = ioapic_mmio_read;
			
 
				-	ioapic->dev.write = ioapic_mmio_write;
			
 
				-	ioapic->dev.in_range = ioapic_in_range;
			
 
				-	ioapic->dev.private = ioapic;
			
 
				+	kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
			
 
				 	ioapic->kvm = kvm;
			
 
				-	kvm_io_bus_register_dev(&kvm->mmio_bus, &ioapic->dev);
			
 
				-	return 0;
			
 
				+	ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &ioapic->dev);
			
 
				+	if (ret < 0)
			
 
				+		kfree(ioapic);
			
 
				+
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
--- a/virt/kvm/iodev.h
+++ b/virt/kvm/iodev.h
@@ -17,49 +17,54 @@
 
				 #define __KVM_IODEV_H__
			
 
				 
			
 
				 #include <linux/kvm_types.h>
			
 
				+#include <asm/errno.h>
			
 
				 
			
 
				-struct kvm_io_device {
			
 
				-	void (*read)(struct kvm_io_device *this,
			
 
				+struct kvm_io_device;
			
 
				+
			
 
				+/**
			
 
				+ * kvm_io_device_ops are called under kvm slots_lock.
			
 
				+ * read and write handlers return 0 if the transaction has been handled,
			
 
				+ * or non-zero to have it passed to the next device.
			
 
				+ **/
			
 
				+struct kvm_io_device_ops {
			
 
				+	int (*read)(struct kvm_io_device *this,
			
 
				+		    gpa_t addr,
			
 
				+		    int len,
			
 
				+		    void *val);
			
 
				+	int (*write)(struct kvm_io_device *this,
			
 
				 		     gpa_t addr,
			
 
				 		     int len,
			
 
				-		     void *val);
			
 
				-	void (*write)(struct kvm_io_device *this,
			
 
				-		      gpa_t addr,
			
 
				-		      int len,
			
 
				-		      const void *val);
			
 
				-	int (*in_range)(struct kvm_io_device *this, gpa_t addr, int len,
			
 
				-			int is_write);
			
 
				+		     const void *val);
			
 
				 	void (*destructor)(struct kvm_io_device *this);
			
 
				+};
			
 
				 
			
 
				-	void             *private;
			
 
				+
			
 
				+struct kvm_io_device {
			
 
				+	const struct kvm_io_device_ops *ops;
			
 
				 };
			
 
				 
			
 
				-static inline void kvm_iodevice_read(struct kvm_io_device *dev,
			
 
				-				     gpa_t addr,
			
 
				-				     int len,
			
 
				-				     void *val)
			
 
				+static inline void kvm_iodevice_init(struct kvm_io_device *dev,
			
 
				+				     const struct kvm_io_device_ops *ops)
			
 
				 {
			
 
				-	dev->read(dev, addr, len, val);
			
 
				+	dev->ops = ops;
			
 
				 }
			
 
				 
			
 
				-static inline void kvm_iodevice_write(struct kvm_io_device *dev,
			
 
				-				      gpa_t addr,
			
 
				-				      int len,
			
 
				-				      const void *val)
			
 
				+static inline int kvm_iodevice_read(struct kvm_io_device *dev,
			
 
				+				    gpa_t addr, int l, void *v)
			
 
				 {
			
 
				-	dev->write(dev, addr, len, val);
			
 
				+	return dev->ops->read ? dev->ops->read(dev, addr, l, v) : -EOPNOTSUPP;
			
 
				 }
			
 
				 
			
 
				-static inline int kvm_iodevice_inrange(struct kvm_io_device *dev,
			
 
				-				       gpa_t addr, int len, int is_write)
			
 
				+static inline int kvm_iodevice_write(struct kvm_io_device *dev,
			
 
				+				     gpa_t addr, int l, const void *v)
			
 
				 {
			
 
				-	return dev->in_range(dev, addr, len, is_write);
			
 
				+	return dev->ops->write ? dev->ops->write(dev, addr, l, v) : -EOPNOTSUPP;
			
 
				 }
			
 
				 
			
 
				 static inline void kvm_iodevice_destructor(struct kvm_io_device *dev)
			
 
				 {
			
 
				-	if (dev->destructor)
			
 
				-		dev->destructor(dev);
			
 
				+	if (dev->ops->destructor)
			
 
				+		dev->ops->destructor(dev);
			
 
				 }
			
 
				 
			
 
				 #endif /* __KVM_IODEV_H__ */
			
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -20,6 +20,7 @@
 
				  */
			
 
				 
			
 
				 #include <linux/kvm_host.h>
			
 
				+#include <trace/events/kvm.h>
			
 
				 
			
 
				 #include <asm/msidef.h>
			
 
				 #ifdef CONFIG_IA64
			
@@ -62,14 +63,14 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 
				 	int i, r = -1;
			
 
				 	struct kvm_vcpu *vcpu, *lowest = NULL;
			
 
				 
			
 
				+	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
			
 
				+
			
 
				 	if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
			
 
				 			kvm_is_dm_lowest_prio(irq))
			
 
				 		printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
			
 
				 
			
 
				-	for (i = 0; i < KVM_MAX_VCPUS; i++) {
			
 
				-		vcpu = kvm->vcpus[i];
			
 
				-
			
 
				-		if (!vcpu || !kvm_apic_present(vcpu))
			
 
				+	kvm_for_each_vcpu(i, vcpu, kvm) {
			
 
				+		if (!kvm_apic_present(vcpu))
			
 
				 			continue;
			
 
				 
			
 
				 		if (!kvm_apic_match_dest(vcpu, src, irq->shorthand,
			
@@ -99,6 +100,8 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 
				 {
			
 
				 	struct kvm_lapic_irq irq;
			
 
				 
			
 
				+	trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
			
 
				+
			
 
				 	irq.dest_id = (e->msi.address_lo &
			
 
				 			MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
			
 
				 	irq.vector = (e->msi.data &
			
@@ -113,7 +116,7 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 
				 	return kvm_irq_delivery_to_apic(kvm, NULL, &irq);
			
 
				 }
			
 
				 
			
 
				-/* This should be called with the kvm->lock mutex held
			
 
				+/* This should be called with the kvm->irq_lock mutex held
			
 
				  * Return value:
			
 
				  *  < 0   Interrupt was ignored (masked or not delivered for other reasons)
			
 
				  *  = 0   Interrupt was coalesced (previous irq is still pending)
			
@@ -125,6 +128,10 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
 
				 	unsigned long *irq_state, sig_level;
			
 
				 	int ret = -1;
			
 
				 
			
 
				+	trace_kvm_set_irq(irq, level, irq_source_id);
			
 
				+
			
 
				+	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
			
 
				+
			
 
				 	if (irq < KVM_IOAPIC_NUM_PINS) {
			
 
				 		irq_state = (unsigned long *)&kvm->arch.irq_states[irq];
			
 
				 
			
@@ -134,7 +141,9 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
 
				 		else
			
 
				 			clear_bit(irq_source_id, irq_state);
			
 
				 		sig_level = !!(*irq_state);
			
 
				-	} else /* Deal with MSI/MSI-X */
			
 
				+	} else if (!level)
			
 
				+		return ret;
			
 
				+	else /* Deal with MSI/MSI-X */
			
 
				 		sig_level = 1;
			
 
				 
			
 
				 	/* Not possible to detect if the guest uses the PIC or the
			
@@ -159,6 +168,8 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 
				 	struct hlist_node *n;
			
 
				 	unsigned gsi = pin;
			
 
				 
			
 
				+	trace_kvm_ack_irq(irqchip, pin);
			
 
				+
			
 
				 	list_for_each_entry(e, &kvm->irq_routing, link)
			
 
				 		if (e->type == KVM_IRQ_ROUTING_IRQCHIP &&
			
 
				 		    e->irqchip.irqchip == irqchip &&
			
@@ -175,19 +186,26 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 
				 void kvm_register_irq_ack_notifier(struct kvm *kvm,
			
 
				 				   struct kvm_irq_ack_notifier *kian)
			
 
				 {
			
 
				+	mutex_lock(&kvm->irq_lock);
			
 
				 	hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list);
			
 
				+	mutex_unlock(&kvm->irq_lock);
			
 
				 }
			
 
				 
			
 
				-void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian)
			
 
				+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
			
 
				+				    struct kvm_irq_ack_notifier *kian)
			
 
				 {
			
 
				+	mutex_lock(&kvm->irq_lock);
			
 
				 	hlist_del_init(&kian->link);
			
 
				+	mutex_unlock(&kvm->irq_lock);
			
 
				 }
			
 
				 
			
 
				-/* The caller must hold kvm->lock mutex */
			
 
				 int kvm_request_irq_source_id(struct kvm *kvm)
			
 
				 {
			
 
				 	unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
			
 
				-	int irq_source_id = find_first_zero_bit(bitmap,
			
 
				+	int irq_source_id;
			
 
				+
			
 
				+	mutex_lock(&kvm->irq_lock);
			
 
				+	irq_source_id = find_first_zero_bit(bitmap,
			
 
				 				sizeof(kvm->arch.irq_sources_bitmap));
			
 
				 
			
 
				 	if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
			
@@ -197,6 +215,7 @@ int kvm_request_irq_source_id(struct kvm *kvm)
 
				 
			
 
				 	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
			
 
				 	set_bit(irq_source_id, bitmap);
			
 
				+	mutex_unlock(&kvm->irq_lock);
			
 
				 
			
 
				 	return irq_source_id;
			
 
				 }
			
@@ -207,6 +226,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
 
				 
			
 
				 	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
			
 
				 
			
 
				+	mutex_lock(&kvm->irq_lock);
			
 
				 	if (irq_source_id < 0 ||
			
 
				 	    irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
			
 
				 		printk(KERN_ERR "kvm: IRQ source ID out of range!\n");
			
@@ -215,19 +235,24 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
 
				 	for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
			
 
				 		clear_bit(irq_source_id, &kvm->arch.irq_states[i]);
			
 
				 	clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
			
 
				+	mutex_unlock(&kvm->irq_lock);
			
 
				 }
			
 
				 
			
 
				 void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
			
 
				 				    struct kvm_irq_mask_notifier *kimn)
			
 
				 {
			
 
				+	mutex_lock(&kvm->irq_lock);
			
 
				 	kimn->irq = irq;
			
 
				 	hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
			
 
				+	mutex_unlock(&kvm->irq_lock);
			
 
				 }
			
 
				 
			
 
				 void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
			
 
				 				      struct kvm_irq_mask_notifier *kimn)
			
 
				 {
			
 
				+	mutex_lock(&kvm->irq_lock);
			
 
				 	hlist_del(&kimn->link);
			
 
				+	mutex_unlock(&kvm->irq_lock);
			
 
				 }
			
 
				 
			
 
				 void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
			
@@ -235,6 +260,8 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
 
				 	struct kvm_irq_mask_notifier *kimn;
			
 
				 	struct hlist_node *n;
			
 
				 
			
 
				+	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
			
 
				+
			
 
				 	hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
			
 
				 		if (kimn->irq == irq)
			
 
				 			kimn->func(kimn, mask);
			
@@ -250,7 +277,9 @@ static void __kvm_free_irq_routing(struct list_head *irq_routing)
 
				 
			
 
				 void kvm_free_irq_routing(struct kvm *kvm)
			
 
				 {
			
 
				+	mutex_lock(&kvm->irq_lock);
			
 
				 	__kvm_free_irq_routing(&kvm->irq_routing);
			
 
				+	mutex_unlock(&kvm->irq_lock);
			
 
				 }
			
 
				 
			
 
				 static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
			
@@ -325,13 +354,13 @@ int kvm_set_irq_routing(struct kvm *kvm,
 
				 		e = NULL;
			
 
				 	}
			
 
				 
			
 
				-	mutex_lock(&kvm->lock);
			
 
				+	mutex_lock(&kvm->irq_lock);
			
 
				 	list_splice(&kvm->irq_routing, &tmp);
			
 
				 	INIT_LIST_HEAD(&kvm->irq_routing);
			
 
				 	list_splice(&irq_list, &kvm->irq_routing);
			
 
				 	INIT_LIST_HEAD(&irq_list);
			
 
				 	list_splice(&tmp, &irq_list);
			
 
				-	mutex_unlock(&kvm->lock);
			
 
				+	mutex_unlock(&kvm->irq_lock);
			
 
				 
			
 
				 	r = 0;
			
 
				 
			
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -59,9 +59,18 @@
 
				 #include "irq.h"
			
 
				 #endif
			
 
				 
			
 
				+#define CREATE_TRACE_POINTS
			
 
				+#include <trace/events/kvm.h>
			
 
				+
			
 
				 MODULE_AUTHOR("Qumranet");
			
 
				 MODULE_LICENSE("GPL");
			
 
				 
			
 
				+/*
			
 
				+ * Ordering of locks:
			
 
				+ *
			
 
				+ * 		kvm->slots_lock --> kvm->lock --> kvm->irq_lock
			
 
				+ */
			
 
				+
			
 
				 DEFINE_SPINLOCK(kvm_lock);
			
 
				 LIST_HEAD(vm_list);
			
 
				 
			
@@ -79,6 +88,8 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
 
				 
			
 
				 static bool kvm_rebooting;
			
 
				 
			
 
				+static bool largepages_enabled = true;
			
 
				+
			
 
				 #ifdef KVM_CAP_DEVICE_ASSIGNMENT
			
 
				 static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
			
 
				 						      int assigned_dev_id)
			
@@ -120,17 +131,13 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
 
				 {
			
 
				 	struct kvm_assigned_dev_kernel *assigned_dev;
			
 
				 	struct kvm *kvm;
			
 
				-	int irq, i;
			
 
				+	int i;
			
 
				 
			
 
				 	assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
			
 
				 				    interrupt_work);
			
 
				 	kvm = assigned_dev->kvm;
			
 
				 
			
 
				-	/* This is taken to safely inject irq inside the guest. When
			
 
				-	 * the interrupt injection (or the ioapic code) uses a
			
 
				-	 * finer-grained lock, update this
			
 
				-	 */
			
 
				-	mutex_lock(&kvm->lock);
			
 
				+	mutex_lock(&kvm->irq_lock);
			
 
				 	spin_lock_irq(&assigned_dev->assigned_dev_lock);
			
 
				 	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
			
 
				 		struct kvm_guest_msix_entry *guest_entries =
			
@@ -143,23 +150,13 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
 
				 			kvm_set_irq(assigned_dev->kvm,
			
 
				 				    assigned_dev->irq_source_id,
			
 
				 				    guest_entries[i].vector, 1);
			
 
				-			irq = assigned_dev->host_msix_entries[i].vector;
			
 
				-			if (irq != 0)
			
 
				-				enable_irq(irq);
			
 
				-			assigned_dev->host_irq_disabled = false;
			
 
				 		}
			
 
				-	} else {
			
 
				+	} else
			
 
				 		kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
			
 
				 			    assigned_dev->guest_irq, 1);
			
 
				-		if (assigned_dev->irq_requested_type &
			
 
				-				KVM_DEV_IRQ_GUEST_MSI) {
			
 
				-			enable_irq(assigned_dev->host_irq);
			
 
				-			assigned_dev->host_irq_disabled = false;
			
 
				-		}
			
 
				-	}
			
 
				 
			
 
				 	spin_unlock_irq(&assigned_dev->assigned_dev_lock);
			
 
				-	mutex_unlock(&assigned_dev->kvm->lock);
			
 
				+	mutex_unlock(&assigned_dev->kvm->irq_lock);
			
 
				 }
			
 
				 
			
 
				 static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
			
@@ -179,8 +176,10 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
 
				 
			
 
				 	schedule_work(&assigned_dev->interrupt_work);
			
 
				 
			
 
				-	disable_irq_nosync(irq);
			
 
				-	assigned_dev->host_irq_disabled = true;
			
 
				+	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
			
 
				+		disable_irq_nosync(irq);
			
 
				+		assigned_dev->host_irq_disabled = true;
			
 
				+	}
			
 
				 
			
 
				 out:
			
 
				 	spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags);
			
@@ -215,7 +214,7 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
 
				 static void deassign_guest_irq(struct kvm *kvm,
			
 
				 			       struct kvm_assigned_dev_kernel *assigned_dev)
			
 
				 {
			
 
				-	kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
			
 
				+	kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
			
 
				 	assigned_dev->ack_notifier.gsi = -1;
			
 
				 
			
 
				 	if (assigned_dev->irq_source_id != -1)
			
@@ -417,6 +416,7 @@ static int assigned_device_enable_guest_msi(struct kvm *kvm,
 
				 {
			
 
				 	dev->guest_irq = irq->guest_irq;
			
 
				 	dev->ack_notifier.gsi = -1;
			
 
				+	dev->host_irq_disabled = false;
			
 
				 	return 0;
			
 
				 }
			
 
				 #endif
			
@@ -427,6 +427,7 @@ static int assigned_device_enable_guest_msix(struct kvm *kvm,
 
				 {
			
 
				 	dev->guest_irq = irq->guest_irq;
			
 
				 	dev->ack_notifier.gsi = -1;
			
 
				+	dev->host_irq_disabled = false;
			
 
				 	return 0;
			
 
				 }
			
 
				 #endif
			
@@ -693,11 +694,6 @@ out:
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-static inline int valid_vcpu(int n)
			
 
				-{
			
 
				-	return likely(n >= 0 && n < KVM_MAX_VCPUS);
			
 
				-}
			
 
				-
			
 
				 inline int kvm_is_mmio_pfn(pfn_t pfn)
			
 
				 {
			
 
				 	if (pfn_valid(pfn)) {
			
@@ -745,12 +741,9 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
 
				 	if (alloc_cpumask_var(&cpus, GFP_ATOMIC))
			
 
				 		cpumask_clear(cpus);
			
 
				 
			
 
				-	me = get_cpu();
			
 
				 	spin_lock(&kvm->requests_lock);
			
 
				-	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
			
 
				-		vcpu = kvm->vcpus[i];
			
 
				-		if (!vcpu)
			
 
				-			continue;
			
 
				+	me = smp_processor_id();
			
 
				+	kvm_for_each_vcpu(i, vcpu, kvm) {
			
 
				 		if (test_and_set_bit(req, &vcpu->requests))
			
 
				 			continue;
			
 
				 		cpu = vcpu->cpu;
			
@@ -764,7 +757,6 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
 
				 	else
			
 
				 		called = false;
			
 
				 	spin_unlock(&kvm->requests_lock);
			
 
				-	put_cpu();
			
 
				 	free_cpumask_var(cpus);
			
 
				 	return called;
			
 
				 }
			
@@ -986,7 +978,9 @@ static struct kvm *kvm_create_vm(void)
 
				 	spin_lock_init(&kvm->mmu_lock);
			
 
				 	spin_lock_init(&kvm->requests_lock);
			
 
				 	kvm_io_bus_init(&kvm->pio_bus);
			
 
				+	kvm_eventfd_init(kvm);
			
 
				 	mutex_init(&kvm->lock);
			
 
				+	mutex_init(&kvm->irq_lock);
			
 
				 	kvm_io_bus_init(&kvm->mmio_bus);
			
 
				 	init_rwsem(&kvm->slots_lock);
			
 
				 	atomic_set(&kvm->users_count, 1);
			
@@ -1006,19 +1000,25 @@ out:
 
				 static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
			
 
				 				  struct kvm_memory_slot *dont)
			
 
				 {
			
 
				+	int i;
			
 
				+
			
 
				 	if (!dont || free->rmap != dont->rmap)
			
 
				 		vfree(free->rmap);
			
 
				 
			
 
				 	if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
			
 
				 		vfree(free->dirty_bitmap);
			
 
				 
			
 
				-	if (!dont || free->lpage_info != dont->lpage_info)
			
 
				-		vfree(free->lpage_info);
			
 
				+
			
 
				+	for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
			
 
				+		if (!dont || free->lpage_info[i] != dont->lpage_info[i]) {
			
 
				+			vfree(free->lpage_info[i]);
			
 
				+			free->lpage_info[i] = NULL;
			
 
				+		}
			
 
				+	}
			
 
				 
			
 
				 	free->npages = 0;
			
 
				 	free->dirty_bitmap = NULL;
			
 
				 	free->rmap = NULL;
			
 
				-	free->lpage_info = NULL;
			
 
				 }
			
 
				 
			
 
				 void kvm_free_physmem(struct kvm *kvm)
			
@@ -1071,6 +1071,8 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
 
				 {
			
 
				 	struct kvm *kvm = filp->private_data;
			
 
				 
			
 
				+	kvm_irqfd_release(kvm);
			
 
				+
			
 
				 	kvm_put_kvm(kvm);
			
 
				 	return 0;
			
 
				 }
			
@@ -1089,8 +1091,8 @@ int __kvm_set_memory_region(struct kvm *kvm,
 
				 {
			
 
				 	int r;
			
 
				 	gfn_t base_gfn;
			
 
				-	unsigned long npages, ugfn;
			
 
				-	unsigned long largepages, i;
			
 
				+	unsigned long npages;
			
 
				+	unsigned long i;
			
 
				 	struct kvm_memory_slot *memslot;
			
 
				 	struct kvm_memory_slot old, new;
			
 
				 
			
@@ -1164,31 +1166,51 @@ int __kvm_set_memory_region(struct kvm *kvm,
 
				 		else
			
 
				 			new.userspace_addr = 0;
			
 
				 	}
			
 
				-	if (npages && !new.lpage_info) {
			
 
				-		largepages = 1 + (base_gfn + npages - 1) / KVM_PAGES_PER_HPAGE;
			
 
				-		largepages -= base_gfn / KVM_PAGES_PER_HPAGE;
			
 
				+	if (!npages)
			
 
				+		goto skip_lpage;
			
 
				 
			
 
				-		new.lpage_info = vmalloc(largepages * sizeof(*new.lpage_info));
			
 
				+	for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
			
 
				+		unsigned long ugfn;
			
 
				+		unsigned long j;
			
 
				+		int lpages;
			
 
				+		int level = i + 2;
			
 
				 
			
 
				-		if (!new.lpage_info)
			
 
				+		/* Avoid unused variable warning if no large pages */
			
 
				+		(void)level;
			
 
				+
			
 
				+		if (new.lpage_info[i])
			
 
				+			continue;
			
 
				+
			
 
				+		lpages = 1 + (base_gfn + npages - 1) /
			
 
				+			     KVM_PAGES_PER_HPAGE(level);
			
 
				+		lpages -= base_gfn / KVM_PAGES_PER_HPAGE(level);
			
 
				+
			
 
				+		new.lpage_info[i] = vmalloc(lpages * sizeof(*new.lpage_info[i]));
			
 
				+
			
 
				+		if (!new.lpage_info[i])
			
 
				 			goto out_free;
			
 
				 
			
 
				-		memset(new.lpage_info, 0, largepages * sizeof(*new.lpage_info));
			
 
				+		memset(new.lpage_info[i], 0,
			
 
				+		       lpages * sizeof(*new.lpage_info[i]));
			
 
				 
			
 
				-		if (base_gfn % KVM_PAGES_PER_HPAGE)
			
 
				-			new.lpage_info[0].write_count = 1;
			
 
				-		if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE)
			
 
				-			new.lpage_info[largepages-1].write_count = 1;
			
 
				+		if (base_gfn % KVM_PAGES_PER_HPAGE(level))
			
 
				+			new.lpage_info[i][0].write_count = 1;
			
 
				+		if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE(level))
			
 
				+			new.lpage_info[i][lpages - 1].write_count = 1;
			
 
				 		ugfn = new.userspace_addr >> PAGE_SHIFT;
			
 
				 		/*
			
 
				 		 * If the gfn and userspace address are not aligned wrt each
			
 
				-		 * other, disable large page support for this slot
			
 
				+		 * other, or if explicitly asked to, disable large page
			
 
				+		 * support for this slot
			
 
				 		 */
			
 
				-		if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE - 1))
			
 
				-			for (i = 0; i < largepages; ++i)
			
 
				-				new.lpage_info[i].write_count = 1;
			
 
				+		if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) ||
			
 
				+		    !largepages_enabled)
			
 
				+			for (j = 0; j < lpages; ++j)
			
 
				+				new.lpage_info[i][j].write_count = 1;
			
 
				 	}
			
 
				 
			
 
				+skip_lpage:
			
 
				+
			
 
				 	/* Allocate page dirty bitmap if needed */
			
 
				 	if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
			
 
				 		unsigned dirty_bytes = ALIGN(npages, BITS_PER_LONG) / 8;
			
@@ -1200,6 +1222,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
 
				 		if (old.npages)
			
 
				 			kvm_arch_flush_shadow(kvm);
			
 
				 	}
			
 
				+#else  /* not defined CONFIG_S390 */
			
 
				+	new.user_alloc = user_alloc;
			
 
				+	if (user_alloc)
			
 
				+		new.userspace_addr = mem->userspace_addr;
			
 
				 #endif /* not defined CONFIG_S390 */
			
 
				 
			
 
				 	if (!npages)
			
@@ -1299,6 +1325,12 @@ out:
 
				 	return r;
			
 
				 }
			
 
				 
			
 
				+void kvm_disable_largepages(void)
			
 
				+{
			
 
				+	largepages_enabled = false;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(kvm_disable_largepages);
			
 
				+
			
 
				 int is_error_page(struct page *page)
			
 
				 {
			
 
				 	return page == bad_page;
			
@@ -1635,9 +1667,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 
				 	for (;;) {
			
 
				 		prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
			
 
				 
			
 
				-		if ((kvm_arch_interrupt_allowed(vcpu) &&
			
 
				-					kvm_cpu_has_interrupt(vcpu)) ||
			
 
				-				kvm_arch_vcpu_runnable(vcpu)) {
			
 
				+		if (kvm_arch_vcpu_runnable(vcpu)) {
			
 
				 			set_bit(KVM_REQ_UNHALT, &vcpu->requests);
			
 
				 			break;
			
 
				 		}
			
@@ -1714,24 +1744,18 @@ static struct file_operations kvm_vcpu_fops = {
 
				  */
			
 
				 static int create_vcpu_fd(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				-	int fd = anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, 0);
			
 
				-	if (fd < 0)
			
 
				-		kvm_put_kvm(vcpu->kvm);
			
 
				-	return fd;
			
 
				+	return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, 0);
			
 
				 }
			
 
				 
			
 
				 /*
			
 
				  * Creates some virtual cpus.  Good luck creating more than one.
			
 
				  */
			
 
				-static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
			
 
				+static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
			
 
				 {
			
 
				 	int r;
			
 
				-	struct kvm_vcpu *vcpu;
			
 
				-
			
 
				-	if (!valid_vcpu(n))
			
 
				-		return -EINVAL;
			
 
				+	struct kvm_vcpu *vcpu, *v;
			
 
				 
			
 
				-	vcpu = kvm_arch_vcpu_create(kvm, n);
			
 
				+	vcpu = kvm_arch_vcpu_create(kvm, id);
			
 
				 	if (IS_ERR(vcpu))
			
 
				 		return PTR_ERR(vcpu);
			
 
				 
			
@@ -1742,23 +1766,38 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
 
				 		return r;
			
 
				 
			
 
				 	mutex_lock(&kvm->lock);
			
 
				-	if (kvm->vcpus[n]) {
			
 
				-		r = -EEXIST;
			
 
				+	if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) {
			
 
				+		r = -EINVAL;
			
 
				 		goto vcpu_destroy;
			
 
				 	}
			
 
				-	kvm->vcpus[n] = vcpu;
			
 
				-	mutex_unlock(&kvm->lock);
			
 
				+
			
 
				+	kvm_for_each_vcpu(r, v, kvm)
			
 
				+		if (v->vcpu_id == id) {
			
 
				+			r = -EEXIST;
			
 
				+			goto vcpu_destroy;
			
 
				+		}
			
 
				+
			
 
				+	BUG_ON(kvm->vcpus[atomic_read(&kvm->online_vcpus)]);
			
 
				 
			
 
				 	/* Now it's all set up, let userspace reach it */
			
 
				 	kvm_get_kvm(kvm);
			
 
				 	r = create_vcpu_fd(vcpu);
			
 
				-	if (r < 0)
			
 
				-		goto unlink;
			
 
				+	if (r < 0) {
			
 
				+		kvm_put_kvm(kvm);
			
 
				+		goto vcpu_destroy;
			
 
				+	}
			
 
				+
			
 
				+	kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu;
			
 
				+	smp_wmb();
			
 
				+	atomic_inc(&kvm->online_vcpus);
			
 
				+
			
 
				+#ifdef CONFIG_KVM_APIC_ARCHITECTURE
			
 
				+	if (kvm->bsp_vcpu_id == id)
			
 
				+		kvm->bsp_vcpu = vcpu;
			
 
				+#endif
			
 
				+	mutex_unlock(&kvm->lock);
			
 
				 	return r;
			
 
				 
			
 
				-unlink:
			
 
				-	mutex_lock(&kvm->lock);
			
 
				-	kvm->vcpus[n] = NULL;
			
 
				 vcpu_destroy:
			
 
				 	mutex_unlock(&kvm->lock);
			
 
				 	kvm_arch_vcpu_destroy(vcpu);
			
@@ -2199,6 +2238,7 @@ static long kvm_vm_ioctl(struct file *filp,
 
				 		vfree(entries);
			
 
				 		break;
			
 
				 	}
			
 
				+#endif /* KVM_CAP_IRQ_ROUTING */
			
 
				 #ifdef __KVM_HAVE_MSIX
			
 
				 	case KVM_ASSIGN_SET_MSIX_NR: {
			
 
				 		struct kvm_assigned_msix_nr entry_nr;
			
@@ -2221,7 +2261,35 @@ static long kvm_vm_ioctl(struct file *filp,
 
				 		break;
			
 
				 	}
			
 
				 #endif
			
 
				-#endif /* KVM_CAP_IRQ_ROUTING */
			
 
				+	case KVM_IRQFD: {
			
 
				+		struct kvm_irqfd data;
			
 
				+
			
 
				+		r = -EFAULT;
			
 
				+		if (copy_from_user(&data, argp, sizeof data))
			
 
				+			goto out;
			
 
				+		r = kvm_irqfd(kvm, data.fd, data.gsi, data.flags);
			
 
				+		break;
			
 
				+	}
			
 
				+	case KVM_IOEVENTFD: {
			
 
				+		struct kvm_ioeventfd data;
			
 
				+
			
 
				+		r = -EFAULT;
			
 
				+		if (copy_from_user(&data, argp, sizeof data))
			
 
				+			goto out;
			
 
				+		r = kvm_ioeventfd(kvm, &data);
			
 
				+		break;
			
 
				+	}
			
 
				+#ifdef CONFIG_KVM_APIC_ARCHITECTURE
			
 
				+	case KVM_SET_BOOT_CPU_ID:
			
 
				+		r = 0;
			
 
				+		mutex_lock(&kvm->lock);
			
 
				+		if (atomic_read(&kvm->online_vcpus) != 0)
			
 
				+			r = -EBUSY;
			
 
				+		else
			
 
				+			kvm->bsp_vcpu_id = arg;
			
 
				+		mutex_unlock(&kvm->lock);
			
 
				+		break;
			
 
				+#endif
			
 
				 	default:
			
 
				 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
			
 
				 	}
			
@@ -2288,6 +2356,9 @@ static long kvm_dev_ioctl_check_extension_generic(long arg)
 
				 	case KVM_CAP_USER_MEMORY:
			
 
				 	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
			
 
				 	case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS:
			
 
				+#ifdef CONFIG_KVM_APIC_ARCHITECTURE
			
 
				+	case KVM_CAP_SET_BOOT_CPU_ID:
			
 
				+#endif
			
 
				 		return 1;
			
 
				 #ifdef CONFIG_HAVE_KVM_IRQCHIP
			
 
				 	case KVM_CAP_IRQ_ROUTING:
			
@@ -2335,7 +2406,7 @@ static long kvm_dev_ioctl(struct file *filp,
 
				 	case KVM_TRACE_ENABLE:
			
 
				 	case KVM_TRACE_PAUSE:
			
 
				 	case KVM_TRACE_DISABLE:
			
 
				-		r = kvm_trace_ioctl(ioctl, arg);
			
 
				+		r = -EOPNOTSUPP;
			
 
				 		break;
			
 
				 	default:
			
 
				 		return kvm_arch_dev_ioctl(filp, ioctl, arg);
			
@@ -2449,26 +2520,71 @@ void kvm_io_bus_destroy(struct kvm_io_bus *bus)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus,
			
 
				-					  gpa_t addr, int len, int is_write)
			
 
				+/* kvm_io_bus_write - called under kvm->slots_lock */
			
 
				+int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr,
			
 
				+		     int len, const void *val)
			
 
				 {
			
 
				 	int i;
			
 
				+	for (i = 0; i < bus->dev_count; i++)
			
 
				+		if (!kvm_iodevice_write(bus->devs[i], addr, len, val))
			
 
				+			return 0;
			
 
				+	return -EOPNOTSUPP;
			
 
				+}
			
 
				 
			
 
				-	for (i = 0; i < bus->dev_count; i++) {
			
 
				-		struct kvm_io_device *pos = bus->devs[i];
			
 
				+/* kvm_io_bus_read - called under kvm->slots_lock */
			
 
				+int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len, void *val)
			
 
				+{
			
 
				+	int i;
			
 
				+	for (i = 0; i < bus->dev_count; i++)
			
 
				+		if (!kvm_iodevice_read(bus->devs[i], addr, len, val))
			
 
				+			return 0;
			
 
				+	return -EOPNOTSUPP;
			
 
				+}
			
 
				 
			
 
				-		if (pos->in_range(pos, addr, len, is_write))
			
 
				-			return pos;
			
 
				-	}
			
 
				+int kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus,
			
 
				+			     struct kvm_io_device *dev)
			
 
				+{
			
 
				+	int ret;
			
 
				 
			
 
				-	return NULL;
			
 
				+	down_write(&kvm->slots_lock);
			
 
				+	ret = __kvm_io_bus_register_dev(bus, dev);
			
 
				+	up_write(&kvm->slots_lock);
			
 
				+
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				-void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev)
			
 
				+/* An unlocked version. Caller must have write lock on slots_lock. */
			
 
				+int __kvm_io_bus_register_dev(struct kvm_io_bus *bus,
			
 
				+			      struct kvm_io_device *dev)
			
 
				 {
			
 
				-	BUG_ON(bus->dev_count > (NR_IOBUS_DEVS-1));
			
 
				+	if (bus->dev_count > NR_IOBUS_DEVS-1)
			
 
				+		return -ENOSPC;
			
 
				 
			
 
				 	bus->devs[bus->dev_count++] = dev;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void kvm_io_bus_unregister_dev(struct kvm *kvm,
			
 
				+			       struct kvm_io_bus *bus,
			
 
				+			       struct kvm_io_device *dev)
			
 
				+{
			
 
				+	down_write(&kvm->slots_lock);
			
 
				+	__kvm_io_bus_unregister_dev(bus, dev);
			
 
				+	up_write(&kvm->slots_lock);
			
 
				+}
			
 
				+
			
 
				+/* An unlocked version. Caller must have write lock on slots_lock. */
			
 
				+void __kvm_io_bus_unregister_dev(struct kvm_io_bus *bus,
			
 
				+				 struct kvm_io_device *dev)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < bus->dev_count; i++)
			
 
				+		if (bus->devs[i] == dev) {
			
 
				+			bus->devs[i] = bus->devs[--bus->dev_count];
			
 
				+			break;
			
 
				+		}
			
 
				 }
			
 
				 
			
 
				 static struct notifier_block kvm_cpu_notifier = {
			
@@ -2501,11 +2617,9 @@ static int vcpu_stat_get(void *_offset, u64 *val)
 
				 	*val = 0;
			
 
				 	spin_lock(&kvm_lock);
			
 
				 	list_for_each_entry(kvm, &vm_list, vm_list)
			
 
				-		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
			
 
				-			vcpu = kvm->vcpus[i];
			
 
				-			if (vcpu)
			
 
				-				*val += *(u32 *)((void *)vcpu + offset);
			
 
				-		}
			
 
				+		kvm_for_each_vcpu(i, vcpu, kvm)
			
 
				+			*val += *(u32 *)((void *)vcpu + offset);
			
 
				+
			
 
				 	spin_unlock(&kvm_lock);
			
 
				 	return 0;
			
 
				 }
			
@@ -2679,15 +2793,15 @@ out_free_0:
 
				 	__free_page(bad_page);
			
 
				 out:
			
 
				 	kvm_arch_exit();
			
 
				-	kvm_exit_debug();
			
 
				 out_fail:
			
 
				+	kvm_exit_debug();
			
 
				 	return r;
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(kvm_init);
			
 
				 
			
 
				 void kvm_exit(void)
			
 
				 {
			
 
				-	kvm_trace_cleanup();
			
 
				+	tracepoint_synchronize_unregister();
			
 
				 	misc_deregister(&kvm_dev);
			
 
				 	kmem_cache_destroy(kvm_vcpu_cache);
			
 
				 	sysdev_unregister(&kvm_sysdev);
			
--- a/virt/kvm/kvm_trace.c
+++ b/virt/kvm/kvm_trace.c
@@ -1,285 +0,0 @@
 
				-/*
			
 
				- * kvm trace
			
 
				- *
			
 
				- * It is designed to allow debugging traces of kvm to be generated
			
 
				- * on UP / SMP machines.  Each trace entry can be timestamped so that
			
 
				- * it's possible to reconstruct a chronological record of trace events.
			
 
				- * The implementation refers to blktrace kernel support.
			
 
				- *
			
 
				- * Copyright (c) 2008 Intel Corporation
			
 
				- * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
			
 
				- *
			
 
				- * Authors: Feng(Eric) Liu, eric.e.liu@intel.com
			
 
				- *
			
 
				- * Date:    Feb 2008
			
 
				- */
			
 
				-
			
 
				-#include <linux/module.h>
			
 
				-#include <linux/relay.h>
			
 
				-#include <linux/debugfs.h>
			
 
				-#include <linux/ktime.h>
			
 
				-
			
 
				-#include <linux/kvm_host.h>
			
 
				-
			
 
				-#define KVM_TRACE_STATE_RUNNING 	(1 << 0)
			
 
				-#define KVM_TRACE_STATE_PAUSE 		(1 << 1)
			
 
				-#define KVM_TRACE_STATE_CLEARUP 	(1 << 2)
			
 
				-
			
 
				-struct kvm_trace {
			
 
				-	int trace_state;
			
 
				-	struct rchan *rchan;
			
 
				-	struct dentry *lost_file;
			
 
				-	atomic_t lost_records;
			
 
				-};
			
 
				-static struct kvm_trace *kvm_trace;
			
 
				-
			
 
				-struct kvm_trace_probe {
			
 
				-	const char *name;
			
 
				-	const char *format;
			
 
				-	u32 timestamp_in;
			
 
				-	marker_probe_func *probe_func;
			
 
				-};
			
 
				-
			
 
				-static inline int calc_rec_size(int timestamp, int extra)
			
 
				-{
			
 
				-	int rec_size = KVM_TRC_HEAD_SIZE;
			
 
				-
			
 
				-	rec_size += extra;
			
 
				-	return timestamp ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size;
			
 
				-}
			
 
				-
			
 
				-static void kvm_add_trace(void *probe_private, void *call_data,
			
 
				-			  const char *format, va_list *args)
			
 
				-{
			
 
				-	struct kvm_trace_probe *p = probe_private;
			
 
				-	struct kvm_trace *kt = kvm_trace;
			
 
				-	struct kvm_trace_rec rec;
			
 
				-	struct kvm_vcpu *vcpu;
			
 
				-	int    i, size;
			
 
				-	u32    extra;
			
 
				-
			
 
				-	if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING))
			
 
				-		return;
			
 
				-
			
 
				-	rec.rec_val	= TRACE_REC_EVENT_ID(va_arg(*args, u32));
			
 
				-	vcpu		= va_arg(*args, struct kvm_vcpu *);
			
 
				-	rec.pid		= current->tgid;
			
 
				-	rec.vcpu_id	= vcpu->vcpu_id;
			
 
				-
			
 
				-	extra   	= va_arg(*args, u32);
			
 
				-	WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX));
			
 
				-	extra 		= min_t(u32, extra, KVM_TRC_EXTRA_MAX);
			
 
				-
			
 
				-	rec.rec_val |= TRACE_REC_TCS(p->timestamp_in)
			
 
				-			| TRACE_REC_NUM_DATA_ARGS(extra);
			
 
				-
			
 
				-	if (p->timestamp_in) {
			
 
				-		rec.u.timestamp.timestamp = ktime_to_ns(ktime_get());
			
 
				-
			
 
				-		for (i = 0; i < extra; i++)
			
 
				-			rec.u.timestamp.extra_u32[i] = va_arg(*args, u32);
			
 
				-	} else {
			
 
				-		for (i = 0; i < extra; i++)
			
 
				-			rec.u.notimestamp.extra_u32[i] = va_arg(*args, u32);
			
 
				-	}
			
 
				-
			
 
				-	size = calc_rec_size(p->timestamp_in, extra * sizeof(u32));
			
 
				-	relay_write(kt->rchan, &rec, size);
			
 
				-}
			
 
				-
			
 
				-static struct kvm_trace_probe kvm_trace_probes[] = {
			
 
				-	{ "kvm_trace_entryexit", "%u %p %u %u %u %u %u %u", 1, kvm_add_trace },
			
 
				-	{ "kvm_trace_handler", "%u %p %u %u %u %u %u %u", 0, kvm_add_trace },
			
 
				-};
			
 
				-
			
 
				-static int lost_records_get(void *data, u64 *val)
			
 
				-{
			
 
				-	struct kvm_trace *kt = data;
			
 
				-
			
 
				-	*val = atomic_read(&kt->lost_records);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-DEFINE_SIMPLE_ATTRIBUTE(kvm_trace_lost_ops, lost_records_get, NULL, "%llu\n");
			
 
				-
			
 
				-/*
			
 
				- *  The relay channel is used in "no-overwrite" mode, it keeps trace of how
			
 
				- *  many times we encountered a full subbuffer, to tell user space app the
			
 
				- *  lost records there were.
			
 
				- */
			
 
				-static int kvm_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
			
 
				-				     void *prev_subbuf, size_t prev_padding)
			
 
				-{
			
 
				-	struct kvm_trace *kt;
			
 
				-
			
 
				-	if (!relay_buf_full(buf)) {
			
 
				-		if (!prev_subbuf) {
			
 
				-			/*
			
 
				-			 * executed only once when the channel is opened
			
 
				-			 * save metadata as first record
			
 
				-			 */
			
 
				-			subbuf_start_reserve(buf, sizeof(u32));
			
 
				-			*(u32 *)subbuf = 0x12345678;
			
 
				-		}
			
 
				-
			
 
				-		return 1;
			
 
				-	}
			
 
				-
			
 
				-	kt = buf->chan->private_data;
			
 
				-	atomic_inc(&kt->lost_records);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static struct dentry *kvm_create_buf_file_callack(const char *filename,
			
 
				-						 struct dentry *parent,
			
 
				-						 int mode,
			
 
				-						 struct rchan_buf *buf,
			
 
				-						 int *is_global)
			
 
				-{
			
 
				-	return debugfs_create_file(filename, mode, parent, buf,
			
 
				-				   &relay_file_operations);
			
 
				-}
			
 
				-
			
 
				-static int kvm_remove_buf_file_callback(struct dentry *dentry)
			
 
				-{
			
 
				-	debugfs_remove(dentry);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static struct rchan_callbacks kvm_relay_callbacks = {
			
 
				-	.subbuf_start 		= kvm_subbuf_start_callback,
			
 
				-	.create_buf_file 	= kvm_create_buf_file_callack,
			
 
				-	.remove_buf_file 	= kvm_remove_buf_file_callback,
			
 
				-};
			
 
				-
			
 
				-static int do_kvm_trace_enable(struct kvm_user_trace_setup *kuts)
			
 
				-{
			
 
				-	struct kvm_trace *kt;
			
 
				-	int i, r = -ENOMEM;
			
 
				-
			
 
				-	if (!kuts->buf_size || !kuts->buf_nr)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	kt = kzalloc(sizeof(*kt), GFP_KERNEL);
			
 
				-	if (!kt)
			
 
				-		goto err;
			
 
				-
			
 
				-	r = -EIO;
			
 
				-	atomic_set(&kt->lost_records, 0);
			
 
				-	kt->lost_file = debugfs_create_file("lost_records", 0444, kvm_debugfs_dir,
			
 
				-					    kt, &kvm_trace_lost_ops);
			
 
				-	if (!kt->lost_file)
			
 
				-		goto err;
			
 
				-
			
 
				-	kt->rchan = relay_open("trace", kvm_debugfs_dir, kuts->buf_size,
			
 
				-				kuts->buf_nr, &kvm_relay_callbacks, kt);
			
 
				-	if (!kt->rchan)
			
 
				-		goto err;
			
 
				-
			
 
				-	kvm_trace = kt;
			
 
				-
			
 
				-	for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) {
			
 
				-		struct kvm_trace_probe *p = &kvm_trace_probes[i];
			
 
				-
			
 
				-		r = marker_probe_register(p->name, p->format, p->probe_func, p);
			
 
				-		if (r)
			
 
				-			printk(KERN_INFO "Unable to register probe %s\n",
			
 
				-			       p->name);
			
 
				-	}
			
 
				-
			
 
				-	kvm_trace->trace_state = KVM_TRACE_STATE_RUNNING;
			
 
				-
			
 
				-	return 0;
			
 
				-err:
			
 
				-	if (kt) {
			
 
				-		if (kt->lost_file)
			
 
				-			debugfs_remove(kt->lost_file);
			
 
				-		if (kt->rchan)
			
 
				-			relay_close(kt->rchan);
			
 
				-		kfree(kt);
			
 
				-	}
			
 
				-	return r;
			
 
				-}
			
 
				-
			
 
				-static int kvm_trace_enable(char __user *arg)
			
 
				-{
			
 
				-	struct kvm_user_trace_setup kuts;
			
 
				-	int ret;
			
 
				-
			
 
				-	ret = copy_from_user(&kuts, arg, sizeof(kuts));
			
 
				-	if (ret)
			
 
				-		return -EFAULT;
			
 
				-
			
 
				-	ret = do_kvm_trace_enable(&kuts);
			
 
				-	if (ret)
			
 
				-		return ret;
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int kvm_trace_pause(void)
			
 
				-{
			
 
				-	struct kvm_trace *kt = kvm_trace;
			
 
				-	int r = -EINVAL;
			
 
				-
			
 
				-	if (kt == NULL)
			
 
				-		return r;
			
 
				-
			
 
				-	if (kt->trace_state == KVM_TRACE_STATE_RUNNING) {
			
 
				-		kt->trace_state = KVM_TRACE_STATE_PAUSE;
			
 
				-		relay_flush(kt->rchan);
			
 
				-		r = 0;
			
 
				-	}
			
 
				-
			
 
				-	return r;
			
 
				-}
			
 
				-
			
 
				-void kvm_trace_cleanup(void)
			
 
				-{
			
 
				-	struct kvm_trace *kt = kvm_trace;
			
 
				-	int i;
			
 
				-
			
 
				-	if (kt == NULL)
			
 
				-		return;
			
 
				-
			
 
				-	if (kt->trace_state == KVM_TRACE_STATE_RUNNING ||
			
 
				-	    kt->trace_state == KVM_TRACE_STATE_PAUSE) {
			
 
				-
			
 
				-		kt->trace_state = KVM_TRACE_STATE_CLEARUP;
			
 
				-
			
 
				-		for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) {
			
 
				-			struct kvm_trace_probe *p = &kvm_trace_probes[i];
			
 
				-			marker_probe_unregister(p->name, p->probe_func, p);
			
 
				-		}
			
 
				-		marker_synchronize_unregister();
			
 
				-
			
 
				-		relay_close(kt->rchan);
			
 
				-		debugfs_remove(kt->lost_file);
			
 
				-		kfree(kt);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg)
			
 
				-{
			
 
				-	void __user *argp = (void __user *)arg;
			
 
				-	long r = -EINVAL;
			
 
				-
			
 
				-	if (!capable(CAP_SYS_ADMIN))
			
 
				-		return -EPERM;
			
 
				-
			
 
				-	switch (ioctl) {
			
 
				-	case KVM_TRACE_ENABLE:
			
 
				-		r = kvm_trace_enable(argp);
			
 
				-		break;
			
 
				-	case KVM_TRACE_PAUSE:
			
 
				-		r = kvm_trace_pause();
			
 
				-		break;
			
 
				-	case KVM_TRACE_DISABLE:
			
 
				-		r = 0;
			
 
				-		kvm_trace_cleanup();
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	return r;
			
 
				-}