16 سال پیش · 7f3591cfac
--- a/Documentation/lguest/Makefile
+++ b/Documentation/lguest/Makefile
@@ -1,6 +1,5 @@
 
				 # This creates the demonstration utility "lguest" which runs a Linux guest.
			
 
				-CFLAGS:=-Wall -Wmissing-declarations -Wmissing-prototypes -O3 -I../../include -I../../arch/x86/include -U_FORTIFY_SOURCE
			
 
				-LDLIBS:=-lz
			
 
				+CFLAGS:=-m32 -Wall -Wmissing-declarations -Wmissing-prototypes -O3 -I../../include -I../../arch/x86/include -U_FORTIFY_SOURCE
			
 
				 
			
 
				 all: lguest
			
 
				 
			
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
--- a/Documentation/lguest/lguest.txt
+++ b/Documentation/lguest/lguest.txt
@@ -37,7 +37,6 @@ Running Lguest:
 
				      "Paravirtualized guest support" = Y
			
 
				         "Lguest guest support" = Y
			
 
				      "High Memory Support" = off/4GB
			
 
				-     "PAE (Physical Address Extension) Support" = N
			
 
				      "Alignment value to which kernel should be aligned" = 0x100000
			
 
				         (CONFIG_PARAVIRT=y, CONFIG_LGUEST_GUEST=y, CONFIG_HIGHMEM64G=n and
			
 
				          CONFIG_PHYSICAL_ALIGN=0x100000)
			
--- a/arch/x86/include/asm/lguest.h
+++ b/arch/x86/include/asm/lguest.h
@@ -17,8 +17,13 @@
 
				 /* Pages for switcher itself, then two pages per cpu */
			
 
				 #define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * nr_cpu_ids)
			
 
				 
			
 
				-/* We map at -4M for ease of mapping into the guest (one PTE page). */
			
 
				+/* We map at -4M (-2M when PAE is activated) for ease of mapping
			
 
				+ * into the guest (one PTE page). */
			
 
				+#ifdef CONFIG_X86_PAE
			
 
				+#define SWITCHER_ADDR 0xFFE00000
			
 
				+#else
			
 
				 #define SWITCHER_ADDR 0xFFC00000
			
 
				+#endif
			
 
				 
			
 
				 /* Found in switcher.S */
			
 
				 extern unsigned long default_idt_entries[];
			
--- a/arch/x86/include/asm/lguest_hcall.h
+++ b/arch/x86/include/asm/lguest_hcall.h
@@ -12,11 +12,13 @@
 
				 #define LHCALL_TS		8
			
 
				 #define LHCALL_SET_CLOCKEVENT	9
			
 
				 #define LHCALL_HALT		10
			
 
				+#define LHCALL_SET_PMD		13
			
 
				 #define LHCALL_SET_PTE		14
			
 
				-#define LHCALL_SET_PMD		15
			
 
				+#define LHCALL_SET_PGD		15
			
 
				 #define LHCALL_LOAD_TLS		16
			
 
				 #define LHCALL_NOTIFY		17
			
 
				 #define LHCALL_LOAD_GDT_ENTRY	18
			
 
				+#define LHCALL_SEND_INTERRUPTS	19
			
 
				 
			
 
				 #define LGUEST_TRAP_ENTRY 0x1F
			
 
				 
			
@@ -32,10 +34,10 @@
 
				  * operations?  There are two ways: the direct way is to make a "hypercall",
			
 
				  * to make requests of the Host Itself.
			
 
				  *
			
 
				- * We use the KVM hypercall mechanism. Eighteen hypercalls are
			
 
				+ * We use the KVM hypercall mechanism. Seventeen hypercalls are
			
 
				  * available: the hypercall number is put in the %eax register, and the
			
 
				- * arguments (when required) are placed in %ebx, %ecx and %edx.  If a return
			
 
				- * value makes sense, it's returned in %eax.
			
 
				+ * arguments (when required) are placed in %ebx, %ecx, %edx and %esi.
			
 
				+ * If a return value makes sense, it's returned in %eax.
			
 
				  *
			
 
				  * Grossly invalid calls result in Sudden Death at the hands of the vengeful
			
 
				  * Host, rather than returning failure.  This reflects Winston Churchill's
			
@@ -47,8 +49,9 @@
 
				 
			
 
				 #define LHCALL_RING_SIZE 64
			
 
				 struct hcall_args {
			
 
				-	/* These map directly onto eax, ebx, ecx, edx in struct lguest_regs */
			
 
				-	unsigned long arg0, arg1, arg2, arg3;
			
 
				+	/* These map directly onto eax, ebx, ecx, edx and esi
			
 
				+	 * in struct lguest_regs */
			
 
				+	unsigned long arg0, arg1, arg2, arg3, arg4;
			
 
				 };
			
 
				 
			
 
				 #endif /* !__ASSEMBLY__ */
			
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -126,6 +126,7 @@ void foo(void)
 
				 #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
			
 
				 	BLANK();
			
 
				 	OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
			
 
				+	OFFSET(LGUEST_DATA_irq_pending, lguest_data, irq_pending);
			
 
				 	OFFSET(LGUEST_DATA_pgdir, lguest_data, pgdir);
			
 
				 
			
 
				 	BLANK();
			
--- a/arch/x86/lguest/Kconfig
+++ b/arch/x86/lguest/Kconfig
@@ -2,7 +2,6 @@ config LGUEST_GUEST
 
				 	bool "Lguest guest support"
			
 
				 	select PARAVIRT
			
 
				 	depends on X86_32
			
 
				-	depends on !X86_PAE
			
 
				 	select VIRTIO
			
 
				 	select VIRTIO_RING
			
 
				 	select VIRTIO_CONSOLE
			
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -87,7 +87,7 @@ struct lguest_data lguest_data = {
 
				 
			
 
				 /*G:037 async_hcall() is pretty simple: I'm quite proud of it really.  We have a
			
 
				  * ring buffer of stored hypercalls which the Host will run though next time we
			
 
				- * do a normal hypercall.  Each entry in the ring has 4 slots for the hypercall
			
 
				+ * do a normal hypercall.  Each entry in the ring has 5 slots for the hypercall
			
 
				  * arguments, and a "hcall_status" word which is 0 if the call is ready to go,
			
 
				  * and 255 once the Host has finished with it.
			
 
				  *
			
@@ -96,7 +96,8 @@ struct lguest_data lguest_data = {
 
				  * effect of causing the Host to run all the stored calls in the ring buffer
			
 
				  * which empties it for next time! */
			
 
				 static void async_hcall(unsigned long call, unsigned long arg1,
			
 
				-			unsigned long arg2, unsigned long arg3)
			
 
				+			unsigned long arg2, unsigned long arg3,
			
 
				+			unsigned long arg4)
			
 
				 {
			
 
				 	/* Note: This code assumes we're uniprocessor. */
			
 
				 	static unsigned int next_call;
			
@@ -108,12 +109,13 @@ static void async_hcall(unsigned long call, unsigned long arg1,
 
				 	local_irq_save(flags);
			
 
				 	if (lguest_data.hcall_status[next_call] != 0xFF) {
			
 
				 		/* Table full, so do normal hcall which will flush table. */
			
 
				-		kvm_hypercall3(call, arg1, arg2, arg3);
			
 
				+		kvm_hypercall4(call, arg1, arg2, arg3, arg4);
			
 
				 	} else {
			
 
				 		lguest_data.hcalls[next_call].arg0 = call;
			
 
				 		lguest_data.hcalls[next_call].arg1 = arg1;
			
 
				 		lguest_data.hcalls[next_call].arg2 = arg2;
			
 
				 		lguest_data.hcalls[next_call].arg3 = arg3;
			
 
				+		lguest_data.hcalls[next_call].arg4 = arg4;
			
 
				 		/* Arguments must all be written before we mark it to go */
			
 
				 		wmb();
			
 
				 		lguest_data.hcall_status[next_call] = 0;
			
@@ -141,7 +143,7 @@ static void lazy_hcall1(unsigned long call,
 
				 	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
			
 
				 		kvm_hypercall1(call, arg1);
			
 
				 	else
			
 
				-		async_hcall(call, arg1, 0, 0);
			
 
				+		async_hcall(call, arg1, 0, 0, 0);
			
 
				 }
			
 
				 
			
 
				 static void lazy_hcall2(unsigned long call,
			
@@ -151,7 +153,7 @@ static void lazy_hcall2(unsigned long call,
 
				 	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
			
 
				 		kvm_hypercall2(call, arg1, arg2);
			
 
				 	else
			
 
				-		async_hcall(call, arg1, arg2, 0);
			
 
				+		async_hcall(call, arg1, arg2, 0, 0);
			
 
				 }
			
 
				 
			
 
				 static void lazy_hcall3(unsigned long call,
			
@@ -162,9 +164,23 @@ static void lazy_hcall3(unsigned long call,
 
				 	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
			
 
				 		kvm_hypercall3(call, arg1, arg2, arg3);
			
 
				 	else
			
 
				-		async_hcall(call, arg1, arg2, arg3);
			
 
				+		async_hcall(call, arg1, arg2, arg3, 0);
			
 
				 }
			
 
				 
			
 
				+#ifdef CONFIG_X86_PAE
			
 
				+static void lazy_hcall4(unsigned long call,
			
 
				+		       unsigned long arg1,
			
 
				+		       unsigned long arg2,
			
 
				+		       unsigned long arg3,
			
 
				+		       unsigned long arg4)
			
 
				+{
			
 
				+	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
			
 
				+		kvm_hypercall4(call, arg1, arg2, arg3, arg4);
			
 
				+	else
			
 
				+		async_hcall(call, arg1, arg2, arg3, arg4);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 /* When lazy mode is turned off reset the per-cpu lazy mode variable and then
			
 
				  * issue the do-nothing hypercall to flush any stored calls. */
			
 
				 static void lguest_leave_lazy_mmu_mode(void)
			
@@ -179,7 +195,7 @@ static void lguest_end_context_switch(struct task_struct *next)
 
				 	paravirt_end_context_switch(next);
			
 
				 }
			
 
				 
			
 
				-/*G:033
			
 
				+/*G:032
			
 
				  * After that diversion we return to our first native-instruction
			
 
				  * replacements: four functions for interrupt control.
			
 
				  *
			
@@ -199,30 +215,28 @@ static unsigned long save_fl(void)
 
				 {
			
 
				 	return lguest_data.irq_enabled;
			
 
				 }
			
 
				-PV_CALLEE_SAVE_REGS_THUNK(save_fl);
			
 
				-
			
 
				-/* restore_flags() just sets the flags back to the value given. */
			
 
				-static void restore_fl(unsigned long flags)
			
 
				-{
			
 
				-	lguest_data.irq_enabled = flags;
			
 
				-}
			
 
				-PV_CALLEE_SAVE_REGS_THUNK(restore_fl);
			
 
				 
			
 
				 /* Interrupts go off... */
			
 
				 static void irq_disable(void)
			
 
				 {
			
 
				 	lguest_data.irq_enabled = 0;
			
 
				 }
			
 
				+
			
 
				+/* Let's pause a moment.  Remember how I said these are called so often?
			
 
				+ * Jeremy Fitzhardinge optimized them so hard early in 2009 that he had to
			
 
				+ * break some rules.  In particular, these functions are assumed to save their
			
 
				+ * own registers if they need to: normal C functions assume they can trash the
			
 
				+ * eax register.  To use normal C functions, we use
			
 
				+ * PV_CALLEE_SAVE_REGS_THUNK(), which pushes %eax onto the stack, calls the
			
 
				+ * C function, then restores it. */
			
 
				+PV_CALLEE_SAVE_REGS_THUNK(save_fl);
			
 
				 PV_CALLEE_SAVE_REGS_THUNK(irq_disable);
			
 
				+/*:*/
			
 
				 
			
 
				-/* Interrupts go on... */
			
 
				-static void irq_enable(void)
			
 
				-{
			
 
				-	lguest_data.irq_enabled = X86_EFLAGS_IF;
			
 
				-}
			
 
				-PV_CALLEE_SAVE_REGS_THUNK(irq_enable);
			
 
				+/* These are in i386_head.S */
			
 
				+extern void lg_irq_enable(void);
			
 
				+extern void lg_restore_fl(unsigned long flags);
			
 
				 
			
 
				-/*:*/
			
 
				 /*M:003 Note that we don't check for outstanding interrupts when we re-enable
			
 
				  * them (or when we unmask an interrupt).  This seems to work for the moment,
			
 
				  * since interrupts are rare and we'll just get the interrupt on the next timer
			
@@ -368,8 +382,8 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
 
				 	case 1:	/* Basic feature request. */
			
 
				 		/* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */
			
 
				 		*cx &= 0x00002201;
			
 
				-		/* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU. */
			
 
				-		*dx &= 0x07808111;
			
 
				+		/* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU, PAE. */
			
 
				+		*dx &= 0x07808151;
			
 
				 		/* The Host can do a nice optimization if it knows that the
			
 
				 		 * kernel mappings (addresses above 0xC0000000 or whatever
			
 
				 		 * PAGE_OFFSET is set to) haven't changed.  But Linux calls
			
@@ -388,6 +402,11 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
 
				 		if (*ax > 0x80000008)
			
 
				 			*ax = 0x80000008;
			
 
				 		break;
			
 
				+	case 0x80000001:
			
 
				+		/* Here we should fix nx cap depending on host. */
			
 
				+		/* For this version of PAE, we just clear NX bit. */
			
 
				+		*dx &= ~(1 << 20);
			
 
				+		break;
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -521,25 +540,52 @@ static void lguest_write_cr4(unsigned long val)
 
				 static void lguest_pte_update(struct mm_struct *mm, unsigned long addr,
			
 
				 			       pte_t *ptep)
			
 
				 {
			
 
				+#ifdef CONFIG_X86_PAE
			
 
				+	lazy_hcall4(LHCALL_SET_PTE, __pa(mm->pgd), addr,
			
 
				+		    ptep->pte_low, ptep->pte_high);
			
 
				+#else
			
 
				 	lazy_hcall3(LHCALL_SET_PTE, __pa(mm->pgd), addr, ptep->pte_low);
			
 
				+#endif
			
 
				 }
			
 
				 
			
 
				 static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr,
			
 
				 			      pte_t *ptep, pte_t pteval)
			
 
				 {
			
 
				-	*ptep = pteval;
			
 
				+	native_set_pte(ptep, pteval);
			
 
				 	lguest_pte_update(mm, addr, ptep);
			
 
				 }
			
 
				 
			
 
				-/* The Guest calls this to set a top-level entry.  Again, we set the entry then
			
 
				- * tell the Host which top-level page we changed, and the index of the entry we
			
 
				- * changed. */
			
 
				+/* The Guest calls lguest_set_pud to set a top-level entry and lguest_set_pmd
			
 
				+ * to set a middle-level entry when PAE is activated.
			
 
				+ * Again, we set the entry then tell the Host which page we changed,
			
 
				+ * and the index of the entry we changed. */
			
 
				+#ifdef CONFIG_X86_PAE
			
 
				+static void lguest_set_pud(pud_t *pudp, pud_t pudval)
			
 
				+{
			
 
				+	native_set_pud(pudp, pudval);
			
 
				+
			
 
				+	/* 32 bytes aligned pdpt address and the index. */
			
 
				+	lazy_hcall2(LHCALL_SET_PGD, __pa(pudp) & 0xFFFFFFE0,
			
 
				+		   (__pa(pudp) & 0x1F) / sizeof(pud_t));
			
 
				+}
			
 
				+
			
 
				 static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
			
 
				 {
			
 
				-	*pmdp = pmdval;
			
 
				+	native_set_pmd(pmdp, pmdval);
			
 
				 	lazy_hcall2(LHCALL_SET_PMD, __pa(pmdp) & PAGE_MASK,
			
 
				-		   (__pa(pmdp) & (PAGE_SIZE - 1)) / 4);
			
 
				+		   (__pa(pmdp) & (PAGE_SIZE - 1)) / sizeof(pmd_t));
			
 
				 }
			
 
				+#else
			
 
				+
			
 
				+/* The Guest calls lguest_set_pmd to set a top-level entry when PAE is not
			
 
				+ * activated. */
			
 
				+static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
			
 
				+{
			
 
				+	native_set_pmd(pmdp, pmdval);
			
 
				+	lazy_hcall2(LHCALL_SET_PGD, __pa(pmdp) & PAGE_MASK,
			
 
				+		   (__pa(pmdp) & (PAGE_SIZE - 1)) / sizeof(pmd_t));
			
 
				+}
			
 
				+#endif
			
 
				 
			
 
				 /* There are a couple of legacy places where the kernel sets a PTE, but we
			
 
				  * don't know the top level any more.  This is useless for us, since we don't
			
@@ -552,11 +598,31 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
 
				  * which brings boot back to 0.25 seconds. */
			
 
				 static void lguest_set_pte(pte_t *ptep, pte_t pteval)
			
 
				 {
			
 
				-	*ptep = pteval;
			
 
				+	native_set_pte(ptep, pteval);
			
 
				+	if (cr3_changed)
			
 
				+		lazy_hcall1(LHCALL_FLUSH_TLB, 1);
			
 
				+}
			
 
				+
			
 
				+#ifdef CONFIG_X86_PAE
			
 
				+static void lguest_set_pte_atomic(pte_t *ptep, pte_t pte)
			
 
				+{
			
 
				+	native_set_pte_atomic(ptep, pte);
			
 
				 	if (cr3_changed)
			
 
				 		lazy_hcall1(LHCALL_FLUSH_TLB, 1);
			
 
				 }
			
 
				 
			
 
				+void lguest_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
			
 
				+{
			
 
				+	native_pte_clear(mm, addr, ptep);
			
 
				+	lguest_pte_update(mm, addr, ptep);
			
 
				+}
			
 
				+
			
 
				+void lguest_pmd_clear(pmd_t *pmdp)
			
 
				+{
			
 
				+	lguest_set_pmd(pmdp, __pmd(0));
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 /* Unfortunately for Lguest, the pv_mmu_ops for page tables were based on
			
 
				  * native page table operations.  On native hardware you can set a new page
			
 
				  * table entry whenever you want, but if you want to remove one you have to do
			
@@ -628,13 +694,12 @@ static void __init lguest_init_IRQ(void)
 
				 {
			
 
				 	unsigned int i;
			
 
				 
			
 
				-	for (i = 0; i < LGUEST_IRQS; i++) {
			
 
				-		int vector = FIRST_EXTERNAL_VECTOR + i;
			
 
				+	for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
			
 
				 		/* Some systems map "vectors" to interrupts weirdly.  Lguest has
			
 
				 		 * a straightforward 1 to 1 mapping, so force that here. */
			
 
				-		__get_cpu_var(vector_irq)[vector] = i;
			
 
				-		if (vector != SYSCALL_VECTOR)
			
 
				-			set_intr_gate(vector, interrupt[i]);
			
 
				+		__get_cpu_var(vector_irq)[i] = i - FIRST_EXTERNAL_VECTOR;
			
 
				+		if (i != SYSCALL_VECTOR)
			
 
				+			set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]);
			
 
				 	}
			
 
				 	/* This call is required to set up for 4k stacks, where we have
			
 
				 	 * separate stacks for hard and soft interrupts. */
			
@@ -973,10 +1038,10 @@ static void lguest_restart(char *reason)
 
				  *
			
 
				  * Our current solution is to allow the paravirt back end to optionally patch
			
 
				  * over the indirect calls to replace them with something more efficient.  We
			
 
				- * patch the four most commonly called functions: disable interrupts, enable
			
 
				- * interrupts, restore interrupts and save interrupts.  We usually have 6 or 10
			
 
				- * bytes to patch into: the Guest versions of these operations are small enough
			
 
				- * that we can fit comfortably.
			
 
				+ * patch two of the simplest of the most commonly called functions: disable
			
 
				+ * interrupts and save interrupts.  We usually have 6 or 10 bytes to patch
			
 
				+ * into: the Guest versions of these operations are small enough that we can
			
 
				+ * fit comfortably.
			
 
				  *
			
 
				  * First we need assembly templates of each of the patchable Guest operations,
			
 
				  * and these are in i386_head.S. */
			
@@ -987,8 +1052,6 @@ static const struct lguest_insns
 
				 	const char *start, *end;
			
 
				 } lguest_insns[] = {
			
 
				 	[PARAVIRT_PATCH(pv_irq_ops.irq_disable)] = { lgstart_cli, lgend_cli },
			
 
				-	[PARAVIRT_PATCH(pv_irq_ops.irq_enable)] = { lgstart_sti, lgend_sti },
			
 
				-	[PARAVIRT_PATCH(pv_irq_ops.restore_fl)] = { lgstart_popf, lgend_popf },
			
 
				 	[PARAVIRT_PATCH(pv_irq_ops.save_fl)] = { lgstart_pushf, lgend_pushf },
			
 
				 };
			
 
				 
			
@@ -1026,6 +1089,7 @@ __init void lguest_init(void)
 
				 	pv_info.name = "lguest";
			
 
				 	pv_info.paravirt_enabled = 1;
			
 
				 	pv_info.kernel_rpl = 1;
			
 
				+	pv_info.shared_kernel_pmd = 1;
			
 
				 
			
 
				 	/* We set up all the lguest overrides for sensitive operations.  These
			
 
				 	 * are detailed with the operations themselves. */
			
@@ -1033,9 +1097,9 @@ __init void lguest_init(void)
 
				 	/* interrupt-related operations */
			
 
				 	pv_irq_ops.init_IRQ = lguest_init_IRQ;
			
 
				 	pv_irq_ops.save_fl = PV_CALLEE_SAVE(save_fl);
			
 
				-	pv_irq_ops.restore_fl = PV_CALLEE_SAVE(restore_fl);
			
 
				+	pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(lg_restore_fl);
			
 
				 	pv_irq_ops.irq_disable = PV_CALLEE_SAVE(irq_disable);
			
 
				-	pv_irq_ops.irq_enable = PV_CALLEE_SAVE(irq_enable);
			
 
				+	pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(lg_irq_enable);
			
 
				 	pv_irq_ops.safe_halt = lguest_safe_halt;
			
 
				 
			
 
				 	/* init-time operations */
			
@@ -1071,6 +1135,12 @@ __init void lguest_init(void)
 
				 	pv_mmu_ops.set_pte = lguest_set_pte;
			
 
				 	pv_mmu_ops.set_pte_at = lguest_set_pte_at;
			
 
				 	pv_mmu_ops.set_pmd = lguest_set_pmd;
			
 
				+#ifdef CONFIG_X86_PAE
			
 
				+	pv_mmu_ops.set_pte_atomic = lguest_set_pte_atomic;
			
 
				+	pv_mmu_ops.pte_clear = lguest_pte_clear;
			
 
				+	pv_mmu_ops.pmd_clear = lguest_pmd_clear;
			
 
				+	pv_mmu_ops.set_pud = lguest_set_pud;
			
 
				+#endif
			
 
				 	pv_mmu_ops.read_cr2 = lguest_read_cr2;
			
 
				 	pv_mmu_ops.read_cr3 = lguest_read_cr3;
			
 
				 	pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu;
			
--- a/arch/x86/lguest/i386_head.S
+++ b/arch/x86/lguest/i386_head.S
@@ -46,10 +46,64 @@ ENTRY(lguest_entry)
 
				 	.globl lgstart_##name; .globl lgend_##name
			
 
				 
			
 
				 LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled)
			
 
				-LGUEST_PATCH(sti, movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled)
			
 
				-LGUEST_PATCH(popf, movl %eax, lguest_data+LGUEST_DATA_irq_enabled)
			
 
				 LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax)
			
 
				-/*:*/
			
 
				+
			
 
				+/*G:033 But using those wrappers is inefficient (we'll see why that doesn't
			
 
				+ * matter for save_fl and irq_disable later).  If we write our routines
			
 
				+ * carefully in assembler, we can avoid clobbering any registers and avoid
			
 
				+ * jumping through the wrapper functions.
			
 
				+ *
			
 
				+ * I skipped over our first piece of assembler, but this one is worth studying
			
 
				+ * in a bit more detail so I'll describe in easy stages.  First, the routine
			
 
				+ * to enable interrupts: */
			
 
				+ENTRY(lg_irq_enable)
			
 
				+	/* The reverse of irq_disable, this sets lguest_data.irq_enabled to
			
 
				+	 * X86_EFLAGS_IF (ie. "Interrupts enabled"). */
			
 
				+	movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled
			
 
				+	/* But now we need to check if the Host wants to know: there might have
			
 
				+	 * been interrupts waiting to be delivered, in which case it will have
			
 
				+	 * set lguest_data.irq_pending to X86_EFLAGS_IF.  If it's not zero, we
			
 
				+	 * jump to send_interrupts, otherwise we're done. */
			
 
				+	testl $0, lguest_data+LGUEST_DATA_irq_pending
			
 
				+	jnz send_interrupts
			
 
				+	/* One cool thing about x86 is that you can do many things without using
			
 
				+	 * a register.  In this case, the normal path hasn't needed to save or
			
 
				+	 * restore any registers at all! */
			
 
				+	ret
			
 
				+send_interrupts:
			
 
				+	/* OK, now we need a register: eax is used for the hypercall number,
			
 
				+	 * which is LHCALL_SEND_INTERRUPTS.
			
 
				+	 *
			
 
				+	 * We used not to bother with this pending detection at all, which was
			
 
				+	 * much simpler.  Sooner or later the Host would realize it had to
			
 
				+	 * send us an interrupt.  But that turns out to make performance 7
			
 
				+	 * times worse on a simple tcp benchmark.  So now we do this the hard
			
 
				+	 * way. */
			
 
				+	pushl %eax
			
 
				+	movl $LHCALL_SEND_INTERRUPTS, %eax
			
 
				+	/* This is a vmcall instruction (same thing that KVM uses).  Older
			
 
				+	 * assembler versions might not know the "vmcall" instruction, so we
			
 
				+	 * create one manually here. */
			
 
				+	.byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */
			
 
				+	popl %eax
			
 
				+	ret
			
 
				+
			
 
				+/* Finally, the "popf" or "restore flags" routine.  The %eax register holds the
			
 
				+ * flags (in practice, either X86_EFLAGS_IF or 0): if it's X86_EFLAGS_IF we're
			
 
				+ * enabling interrupts again, if it's 0 we're leaving them off. */
			
 
				+ENTRY(lg_restore_fl)
			
 
				+	/* This is just "lguest_data.irq_enabled = flags;" */
			
 
				+	movl %eax, lguest_data+LGUEST_DATA_irq_enabled
			
 
				+	/* Now, if the %eax value has enabled interrupts and
			
 
				+	 * lguest_data.irq_pending is set, we want to tell the Host so it can
			
 
				+	 * deliver any outstanding interrupts.  Fortunately, both values will
			
 
				+	 * be X86_EFLAGS_IF (ie. 512) in that case, and the "testl"
			
 
				+	 * instruction will AND them together for us.  If both are set, we
			
 
				+	 * jump to send_interrupts. */
			
 
				+	testl lguest_data+LGUEST_DATA_irq_pending, %eax
			
 
				+	jnz send_interrupts
			
 
				+	/* Again, the normal path has used no extra registers.  Clever, huh? */
			
 
				+	ret
			
 
				 
			
 
				 /* These demark the EIP range where host should never deliver interrupts. */
			
 
				 .global lguest_noirq_start
			
--- a/drivers/lguest/Kconfig
+++ b/drivers/lguest/Kconfig
@@ -1,6 +1,6 @@
 
				 config LGUEST
			
 
				 	tristate "Linux hypervisor example code"
			
 
				-	depends on X86_32 && EXPERIMENTAL && !X86_PAE && FUTEX
			
 
				+	depends on X86_32 && EXPERIMENTAL && EVENTFD
			
 
				 	select HVC_DRIVER
			
 
				 	---help---
			
 
				 	  This is a very simple module which allows you to run
			
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -95,7 +95,7 @@ static __init int map_switcher(void)
 
				 	 * array of struct pages.  It increments that pointer, but we don't
			
 
				 	 * care. */
			
 
				 	pagep = switcher_page;
			
 
				-	err = map_vm_area(switcher_vma, PAGE_KERNEL, &pagep);
			
 
				+	err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, &pagep);
			
 
				 	if (err) {
			
 
				 		printk("lguest: map_vm_area failed: %i\n", err);
			
 
				 		goto free_vma;
			
@@ -188,6 +188,9 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
 
				 {
			
 
				 	/* We stop running once the Guest is dead. */
			
 
				 	while (!cpu->lg->dead) {
			
 
				+		unsigned int irq;
			
 
				+		bool more;
			
 
				+
			
 
				 		/* First we run any hypercalls the Guest wants done. */
			
 
				 		if (cpu->hcall)
			
 
				 			do_hypercalls(cpu);
			
@@ -195,23 +198,23 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
 
				 		/* It's possible the Guest did a NOTIFY hypercall to the
			
 
				 		 * Launcher, in which case we return from the read() now. */
			
 
				 		if (cpu->pending_notify) {
			
 
				-			if (put_user(cpu->pending_notify, user))
			
 
				-				return -EFAULT;
			
 
				-			return sizeof(cpu->pending_notify);
			
 
				+			if (!send_notify_to_eventfd(cpu)) {
			
 
				+				if (put_user(cpu->pending_notify, user))
			
 
				+					return -EFAULT;
			
 
				+				return sizeof(cpu->pending_notify);
			
 
				+			}
			
 
				 		}
			
 
				 
			
 
				 		/* Check for signals */
			
 
				 		if (signal_pending(current))
			
 
				 			return -ERESTARTSYS;
			
 
				 
			
 
				-		/* If Waker set break_out, return to Launcher. */
			
 
				-		if (cpu->break_out)
			
 
				-			return -EAGAIN;
			
 
				-
			
 
				 		/* Check if there are any interrupts which can be delivered now:
			
 
				 		 * if so, this sets up the hander to be executed when we next
			
 
				 		 * run the Guest. */
			
 
				-		maybe_do_interrupt(cpu);
			
 
				+		irq = interrupt_pending(cpu, &more);
			
 
				+		if (irq < LGUEST_IRQS)
			
 
				+			try_deliver_interrupt(cpu, irq, more);
			
 
				 
			
 
				 		/* All long-lived kernel loops need to check with this horrible
			
 
				 		 * thing called the freezer.  If the Host is trying to suspend,
			
@@ -224,10 +227,15 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
 
				 			break;
			
 
				 
			
 
				 		/* If the Guest asked to be stopped, we sleep.  The Guest's
			
 
				-		 * clock timer or LHREQ_BREAK from the Waker will wake us. */
			
 
				+		 * clock timer will wake us. */
			
 
				 		if (cpu->halted) {
			
 
				 			set_current_state(TASK_INTERRUPTIBLE);
			
 
				-			schedule();
			
 
				+			/* Just before we sleep, make sure no interrupt snuck in
			
 
				+			 * which we should be doing. */
			
 
				+			if (interrupt_pending(cpu, &more) < LGUEST_IRQS)
			
 
				+				set_current_state(TASK_RUNNING);
			
 
				+			else
			
 
				+				schedule();
			
 
				 			continue;
			
 
				 		}
			
 
				 
			
--- a/drivers/lguest/hypercalls.c
+++ b/drivers/lguest/hypercalls.c
@@ -37,6 +37,10 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args)
 
				 		/* This call does nothing, except by breaking out of the Guest
			
 
				 		 * it makes us process all the asynchronous hypercalls. */
			
 
				 		break;
			
 
				+	case LHCALL_SEND_INTERRUPTS:
			
 
				+		/* This call does nothing too, but by breaking out of the Guest
			
 
				+		 * it makes us process any pending interrupts. */
			
 
				+		break;
			
 
				 	case LHCALL_LGUEST_INIT:
			
 
				 		/* You can't get here unless you're already initialized.  Don't
			
 
				 		 * do that. */
			
@@ -73,11 +77,21 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args)
 
				 		guest_set_stack(cpu, args->arg1, args->arg2, args->arg3);
			
 
				 		break;
			
 
				 	case LHCALL_SET_PTE:
			
 
				+#ifdef CONFIG_X86_PAE
			
 
				+		guest_set_pte(cpu, args->arg1, args->arg2,
			
 
				+				__pte(args->arg3 | (u64)args->arg4 << 32));
			
 
				+#else
			
 
				 		guest_set_pte(cpu, args->arg1, args->arg2, __pte(args->arg3));
			
 
				+#endif
			
 
				+		break;
			
 
				+	case LHCALL_SET_PGD:
			
 
				+		guest_set_pgd(cpu->lg, args->arg1, args->arg2);
			
 
				 		break;
			
 
				+#ifdef CONFIG_X86_PAE
			
 
				 	case LHCALL_SET_PMD:
			
 
				 		guest_set_pmd(cpu->lg, args->arg1, args->arg2);
			
 
				 		break;
			
 
				+#endif
			
 
				 	case LHCALL_SET_CLOCKEVENT:
			
 
				 		guest_set_clockevent(cpu, args->arg1);
			
 
				 		break;
			
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
@@ -128,30 +128,39 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi,
 
				 /*H:205
			
 
				  * Virtual Interrupts.
			
 
				  *
			
 
				- * maybe_do_interrupt() gets called before every entry to the Guest, to see if
			
 
				- * we should divert the Guest to running an interrupt handler. */
			
 
				-void maybe_do_interrupt(struct lg_cpu *cpu)
			
 
				+ * interrupt_pending() returns the first pending interrupt which isn't blocked
			
 
				+ * by the Guest.  It is called before every entry to the Guest, and just before
			
 
				+ * we go to sleep when the Guest has halted itself. */
			
 
				+unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more)
			
 
				 {
			
 
				 	unsigned int irq;
			
 
				 	DECLARE_BITMAP(blk, LGUEST_IRQS);
			
 
				-	struct desc_struct *idt;
			
 
				 
			
 
				 	/* If the Guest hasn't even initialized yet, we can do nothing. */
			
 
				 	if (!cpu->lg->lguest_data)
			
 
				-		return;
			
 
				+		return LGUEST_IRQS;
			
 
				 
			
 
				 	/* Take our "irqs_pending" array and remove any interrupts the Guest
			
 
				 	 * wants blocked: the result ends up in "blk". */
			
 
				 	if (copy_from_user(&blk, cpu->lg->lguest_data->blocked_interrupts,
			
 
				 			   sizeof(blk)))
			
 
				-		return;
			
 
				+		return LGUEST_IRQS;
			
 
				 	bitmap_andnot(blk, cpu->irqs_pending, blk, LGUEST_IRQS);
			
 
				 
			
 
				 	/* Find the first interrupt. */
			
 
				 	irq = find_first_bit(blk, LGUEST_IRQS);
			
 
				-	/* None?  Nothing to do */
			
 
				-	if (irq >= LGUEST_IRQS)
			
 
				-		return;
			
 
				+	*more = find_next_bit(blk, LGUEST_IRQS, irq+1);
			
 
				+
			
 
				+	return irq;
			
 
				+}
			
 
				+
			
 
				+/* This actually diverts the Guest to running an interrupt handler, once an
			
 
				+ * interrupt has been identified by interrupt_pending(). */
			
 
				+void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more)
			
 
				+{
			
 
				+	struct desc_struct *idt;
			
 
				+
			
 
				+	BUG_ON(irq >= LGUEST_IRQS);
			
 
				 
			
 
				 	/* They may be in the middle of an iret, where they asked us never to
			
 
				 	 * deliver interrupts. */
			
@@ -170,8 +179,12 @@ void maybe_do_interrupt(struct lg_cpu *cpu)
 
				 		u32 irq_enabled;
			
 
				 		if (get_user(irq_enabled, &cpu->lg->lguest_data->irq_enabled))
			
 
				 			irq_enabled = 0;
			
 
				-		if (!irq_enabled)
			
 
				+		if (!irq_enabled) {
			
 
				+			/* Make sure they know an IRQ is pending. */
			
 
				+			put_user(X86_EFLAGS_IF,
			
 
				+				 &cpu->lg->lguest_data->irq_pending);
			
 
				 			return;
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	/* Look at the IDT entry the Guest gave us for this interrupt.  The
			
@@ -194,6 +207,25 @@ void maybe_do_interrupt(struct lg_cpu *cpu)
 
				 	 * here is a compromise which means at least it gets updated every
			
 
				 	 * timer interrupt. */
			
 
				 	write_timestamp(cpu);
			
 
				+
			
 
				+	/* If there are no other interrupts we want to deliver, clear
			
 
				+	 * the pending flag. */
			
 
				+	if (!more)
			
 
				+		put_user(0, &cpu->lg->lguest_data->irq_pending);
			
 
				+}
			
 
				+
			
 
				+/* And this is the routine when we want to set an interrupt for the Guest. */
			
 
				+void set_interrupt(struct lg_cpu *cpu, unsigned int irq)
			
 
				+{
			
 
				+	/* Next time the Guest runs, the core code will see if it can deliver
			
 
				+	 * this interrupt. */
			
 
				+	set_bit(irq, cpu->irqs_pending);
			
 
				+
			
 
				+	/* Make sure it sees it; it might be asleep (eg. halted), or
			
 
				+	 * running the Guest right now, in which case kick_process()
			
 
				+	 * will knock it out. */
			
 
				+	if (!wake_up_process(cpu->tsk))
			
 
				+		kick_process(cpu->tsk);
			
 
				 }
			
 
				 /*:*/
			
 
				 
			
@@ -510,10 +542,7 @@ static enum hrtimer_restart clockdev_fn(struct hrtimer *timer)
 
				 	struct lg_cpu *cpu = container_of(timer, struct lg_cpu, hrt);
			
 
				 
			
 
				 	/* Remember the first interrupt is the timer interrupt. */
			
 
				-	set_bit(0, cpu->irqs_pending);
			
 
				-	/* If the Guest is actually stopped, we need to wake it up. */
			
 
				-	if (cpu->halted)
			
 
				-		wake_up_process(cpu->tsk);
			
 
				+	set_interrupt(cpu, 0);
			
 
				 	return HRTIMER_NORESTART;
			
 
				 }
			
 
				 
			
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -49,7 +49,7 @@ struct lg_cpu {
 
				 	u32 cr2;
			
 
				 	int ts;
			
 
				 	u32 esp1;
			
 
				-	u8 ss1;
			
 
				+	u16 ss1;
			
 
				 
			
 
				 	/* Bitmap of what has changed: see CHANGED_* above. */
			
 
				 	int changed;
			
@@ -71,9 +71,7 @@ struct lg_cpu {
 
				 	/* Virtual clock device */
			
 
				 	struct hrtimer hrt;
			
 
				 
			
 
				-	/* Do we need to stop what we're doing and return to userspace? */
			
 
				-	int break_out;
			
 
				-	wait_queue_head_t break_wq;
			
 
				+	/* Did the Guest tell us to halt? */
			
 
				 	int halted;
			
 
				 
			
 
				 	/* Pending virtual interrupts */
			
@@ -82,6 +80,16 @@ struct lg_cpu {
 
				 	struct lg_cpu_arch arch;
			
 
				 };
			
 
				 
			
 
				+struct lg_eventfd {
			
 
				+	unsigned long addr;
			
 
				+	struct file *event;
			
 
				+};
			
 
				+
			
 
				+struct lg_eventfd_map {
			
 
				+	unsigned int num;
			
 
				+	struct lg_eventfd map[];
			
 
				+};
			
 
				+
			
 
				 /* The private info the thread maintains about the guest. */
			
 
				 struct lguest
			
 
				 {
			
@@ -102,6 +110,8 @@ struct lguest
 
				 	unsigned int stack_pages;
			
 
				 	u32 tsc_khz;
			
 
				 
			
 
				+	struct lg_eventfd_map *eventfds;
			
 
				+
			
 
				 	/* Dead? */
			
 
				 	const char *dead;
			
 
				 };
			
@@ -137,9 +147,13 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user);
 
				  * in the kernel. */
			
 
				 #define pgd_flags(x)	(pgd_val(x) & ~PAGE_MASK)
			
 
				 #define pgd_pfn(x)	(pgd_val(x) >> PAGE_SHIFT)
			
 
				+#define pmd_flags(x)    (pmd_val(x) & ~PAGE_MASK)
			
 
				+#define pmd_pfn(x)	(pmd_val(x) >> PAGE_SHIFT)
			
 
				 
			
 
				 /* interrupts_and_traps.c: */
			
 
				-void maybe_do_interrupt(struct lg_cpu *cpu);
			
 
				+unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more);
			
 
				+void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more);
			
 
				+void set_interrupt(struct lg_cpu *cpu, unsigned int irq);
			
 
				 bool deliver_trap(struct lg_cpu *cpu, unsigned int num);
			
 
				 void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int i,
			
 
				 			  u32 low, u32 hi);
			
@@ -150,6 +164,7 @@ void setup_default_idt_entries(struct lguest_ro_state *state,
 
				 void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt,
			
 
				 		const unsigned long *def);
			
 
				 void guest_set_clockevent(struct lg_cpu *cpu, unsigned long delta);
			
 
				+bool send_notify_to_eventfd(struct lg_cpu *cpu);
			
 
				 void init_clockdev(struct lg_cpu *cpu);
			
 
				 bool check_syscall_vector(struct lguest *lg);
			
 
				 int init_interrupts(void);
			
@@ -168,7 +183,10 @@ void copy_gdt_tls(const struct lg_cpu *cpu, struct desc_struct *gdt);
 
				 int init_guest_pagetable(struct lguest *lg);
			
 
				 void free_guest_pagetable(struct lguest *lg);
			
 
				 void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable);
			
 
				+void guest_set_pgd(struct lguest *lg, unsigned long gpgdir, u32 i);
			
 
				+#ifdef CONFIG_X86_PAE
			
 
				 void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 i);
			
 
				+#endif
			
 
				 void guest_pagetable_clear_all(struct lg_cpu *cpu);
			
 
				 void guest_pagetable_flush_user(struct lg_cpu *cpu);
			
 
				 void guest_set_pte(struct lg_cpu *cpu, unsigned long gpgdir,
			
--- a/drivers/lguest/lguest_user.c
+++ b/drivers/lguest/lguest_user.c
@@ -7,32 +7,83 @@
 
				 #include <linux/miscdevice.h>
			
 
				 #include <linux/fs.h>
			
 
				 #include <linux/sched.h>
			
 
				+#include <linux/eventfd.h>
			
 
				+#include <linux/file.h>
			
 
				 #include "lg.h"
			
 
				 
			
 
				-/*L:055 When something happens, the Waker process needs a way to stop the
			
 
				- * kernel running the Guest and return to the Launcher.  So the Waker writes
			
 
				- * LHREQ_BREAK and the value "1" to /dev/lguest to do this.  Once the Launcher
			
 
				- * has done whatever needs attention, it writes LHREQ_BREAK and "0" to release
			
 
				- * the Waker. */
			
 
				-static int break_guest_out(struct lg_cpu *cpu, const unsigned long __user*input)
			
 
				+bool send_notify_to_eventfd(struct lg_cpu *cpu)
			
 
				 {
			
 
				-	unsigned long on;
			
 
				+	unsigned int i;
			
 
				+	struct lg_eventfd_map *map;
			
 
				+
			
 
				+	/* lg->eventfds is RCU-protected */
			
 
				+	rcu_read_lock();
			
 
				+	map = rcu_dereference(cpu->lg->eventfds);
			
 
				+	for (i = 0; i < map->num; i++) {
			
 
				+		if (map->map[i].addr == cpu->pending_notify) {
			
 
				+			eventfd_signal(map->map[i].event, 1);
			
 
				+			cpu->pending_notify = 0;
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+	rcu_read_unlock();
			
 
				+	return cpu->pending_notify == 0;
			
 
				+}
			
 
				 
			
 
				-	/* Fetch whether they're turning break on or off. */
			
 
				-	if (get_user(on, input) != 0)
			
 
				-		return -EFAULT;
			
 
				+static int add_eventfd(struct lguest *lg, unsigned long addr, int fd)
			
 
				+{
			
 
				+	struct lg_eventfd_map *new, *old = lg->eventfds;
			
 
				 
			
 
				-	if (on) {
			
 
				-		cpu->break_out = 1;
			
 
				-		/* Pop it out of the Guest (may be running on different CPU) */
			
 
				-		wake_up_process(cpu->tsk);
			
 
				-		/* Wait for them to reset it */
			
 
				-		return wait_event_interruptible(cpu->break_wq, !cpu->break_out);
			
 
				-	} else {
			
 
				-		cpu->break_out = 0;
			
 
				-		wake_up(&cpu->break_wq);
			
 
				-		return 0;
			
 
				+	if (!addr)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	/* Replace the old array with the new one, carefully: others can
			
 
				+	 * be accessing it at the same time */
			
 
				+	new = kmalloc(sizeof(*new) + sizeof(new->map[0]) * (old->num + 1),
			
 
				+		      GFP_KERNEL);
			
 
				+	if (!new)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	/* First make identical copy. */
			
 
				+	memcpy(new->map, old->map, sizeof(old->map[0]) * old->num);
			
 
				+	new->num = old->num;
			
 
				+
			
 
				+	/* Now append new entry. */
			
 
				+	new->map[new->num].addr = addr;
			
 
				+	new->map[new->num].event = eventfd_fget(fd);
			
 
				+	if (IS_ERR(new->map[new->num].event)) {
			
 
				+		kfree(new);
			
 
				+		return PTR_ERR(new->map[new->num].event);
			
 
				 	}
			
 
				+	new->num++;
			
 
				+
			
 
				+	/* Now put new one in place. */
			
 
				+	rcu_assign_pointer(lg->eventfds, new);
			
 
				+
			
 
				+	/* We're not in a big hurry.  Wait until noone's looking at old
			
 
				+	 * version, then delete it. */
			
 
				+	synchronize_rcu();
			
 
				+	kfree(old);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int attach_eventfd(struct lguest *lg, const unsigned long __user *input)
			
 
				+{
			
 
				+	unsigned long addr, fd;
			
 
				+	int err;
			
 
				+
			
 
				+	if (get_user(addr, input) != 0)
			
 
				+		return -EFAULT;
			
 
				+	input++;
			
 
				+	if (get_user(fd, input) != 0)
			
 
				+		return -EFAULT;
			
 
				+
			
 
				+	mutex_lock(&lguest_lock);
			
 
				+	err = add_eventfd(lg, addr, fd);
			
 
				+	mutex_unlock(&lguest_lock);
			
 
				+
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 /*L:050 Sending an interrupt is done by writing LHREQ_IRQ and an interrupt
			
@@ -45,9 +96,8 @@ static int user_send_irq(struct lg_cpu *cpu, const unsigned long __user *input)
 
				 		return -EFAULT;
			
 
				 	if (irq >= LGUEST_IRQS)
			
 
				 		return -EINVAL;
			
 
				-	/* Next time the Guest runs, the core code will see if it can deliver
			
 
				-	 * this interrupt. */
			
 
				-	set_bit(irq, cpu->irqs_pending);
			
 
				+
			
 
				+	set_interrupt(cpu, irq);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -126,9 +176,6 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip)
 
				 	 * address. */
			
 
				 	lguest_arch_setup_regs(cpu, start_ip);
			
 
				 
			
 
				-	/* Initialize the queue for the Waker to wait on */
			
 
				-	init_waitqueue_head(&cpu->break_wq);
			
 
				-
			
 
				 	/* We keep a pointer to the Launcher task (ie. current task) for when
			
 
				 	 * other Guests want to wake this one (eg. console input). */
			
 
				 	cpu->tsk = current;
			
@@ -185,6 +232,13 @@ static int initialize(struct file *file, const unsigned long __user *input)
 
				 		goto unlock;
			
 
				 	}
			
 
				 
			
 
				+	lg->eventfds = kmalloc(sizeof(*lg->eventfds), GFP_KERNEL);
			
 
				+	if (!lg->eventfds) {
			
 
				+		err = -ENOMEM;
			
 
				+		goto free_lg;
			
 
				+	}
			
 
				+	lg->eventfds->num = 0;
			
 
				+
			
 
				 	/* Populate the easy fields of our "struct lguest" */
			
 
				 	lg->mem_base = (void __user *)args[0];
			
 
				 	lg->pfn_limit = args[1];
			
@@ -192,7 +246,7 @@ static int initialize(struct file *file, const unsigned long __user *input)
 
				 	/* This is the first cpu (cpu 0) and it will start booting at args[2] */
			
 
				 	err = lg_cpu_start(&lg->cpus[0], 0, args[2]);
			
 
				 	if (err)
			
 
				-		goto release_guest;
			
 
				+		goto free_eventfds;
			
 
				 
			
 
				 	/* Initialize the Guest's shadow page tables, using the toplevel
			
 
				 	 * address the Launcher gave us.  This allocates memory, so can fail. */
			
@@ -211,7 +265,9 @@ static int initialize(struct file *file, const unsigned long __user *input)
 
				 free_regs:
			
 
				 	/* FIXME: This should be in free_vcpu */
			
 
				 	free_page(lg->cpus[0].regs_page);
			
 
				-release_guest:
			
 
				+free_eventfds:
			
 
				+	kfree(lg->eventfds);
			
 
				+free_lg:
			
 
				 	kfree(lg);
			
 
				 unlock:
			
 
				 	mutex_unlock(&lguest_lock);
			
@@ -252,11 +308,6 @@ static ssize_t write(struct file *file, const char __user *in,
 
				 		/* Once the Guest is dead, you can only read() why it died. */
			
 
				 		if (lg->dead)
			
 
				 			return -ENOENT;
			
 
				-
			
 
				-		/* If you're not the task which owns the Guest, all you can do
			
 
				-		 * is break the Launcher out of running the Guest. */
			
 
				-		if (current != cpu->tsk && req != LHREQ_BREAK)
			
 
				-			return -EPERM;
			
 
				 	}
			
 
				 
			
 
				 	switch (req) {
			
@@ -264,8 +315,8 @@ static ssize_t write(struct file *file, const char __user *in,
 
				 		return initialize(file, input);
			
 
				 	case LHREQ_IRQ:
			
 
				 		return user_send_irq(cpu, input);
			
 
				-	case LHREQ_BREAK:
			
 
				-		return break_guest_out(cpu, input);
			
 
				+	case LHREQ_EVENTFD:
			
 
				+		return attach_eventfd(lg, input);
			
 
				 	default:
			
 
				 		return -EINVAL;
			
 
				 	}
			
@@ -303,6 +354,12 @@ static int close(struct inode *inode, struct file *file)
 
				 		 * the Launcher's memory management structure. */
			
 
				 		mmput(lg->cpus[i].mm);
			
 
				 	}
			
 
				+
			
 
				+	/* Release any eventfds they registered. */
			
 
				+	for (i = 0; i < lg->eventfds->num; i++)
			
 
				+		fput(lg->eventfds->map[i].event);
			
 
				+	kfree(lg->eventfds);
			
 
				+
			
 
				 	/* If lg->dead doesn't contain an error code it will be NULL or a
			
 
				 	 * kmalloc()ed string, either of which is ok to hand to kfree(). */
			
 
				 	if (!IS_ERR(lg->dead))
			
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -53,6 +53,17 @@
 
				  * page.  */
			
 
				 #define SWITCHER_PGD_INDEX (PTRS_PER_PGD - 1)
			
 
				 
			
 
				+/* For PAE we need the PMD index as well. We use the last 2MB, so we
			
 
				+ * will need the last pmd entry of the last pmd page.  */
			
 
				+#ifdef CONFIG_X86_PAE
			
 
				+#define SWITCHER_PMD_INDEX 	(PTRS_PER_PMD - 1)
			
 
				+#define RESERVE_MEM 		2U
			
 
				+#define CHECK_GPGD_MASK		_PAGE_PRESENT
			
 
				+#else
			
 
				+#define RESERVE_MEM 		4U
			
 
				+#define CHECK_GPGD_MASK		_PAGE_TABLE
			
 
				+#endif
			
 
				+
			
 
				 /* We actually need a separate PTE page for each CPU.  Remember that after the
			
 
				  * Switcher code itself comes two pages for each CPU, and we don't want this
			
 
				  * CPU's guest to see the pages of any other CPU. */
			
@@ -73,24 +84,59 @@ static pgd_t *spgd_addr(struct lg_cpu *cpu, u32 i, unsigned long vaddr)
 
				 {
			
 
				 	unsigned int index = pgd_index(vaddr);
			
 
				 
			
 
				+#ifndef CONFIG_X86_PAE
			
 
				 	/* We kill any Guest trying to touch the Switcher addresses. */
			
 
				 	if (index >= SWITCHER_PGD_INDEX) {
			
 
				 		kill_guest(cpu, "attempt to access switcher pages");
			
 
				 		index = 0;
			
 
				 	}
			
 
				+#endif
			
 
				 	/* Return a pointer index'th pgd entry for the i'th page table. */
			
 
				 	return &cpu->lg->pgdirs[i].pgdir[index];
			
 
				 }
			
 
				 
			
 
				+#ifdef CONFIG_X86_PAE
			
 
				+/* This routine then takes the PGD entry given above, which contains the
			
 
				+ * address of the PMD page.  It then returns a pointer to the PMD entry for the
			
 
				+ * given address. */
			
 
				+static pmd_t *spmd_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr)
			
 
				+{
			
 
				+	unsigned int index = pmd_index(vaddr);
			
 
				+	pmd_t *page;
			
 
				+
			
 
				+	/* We kill any Guest trying to touch the Switcher addresses. */
			
 
				+	if (pgd_index(vaddr) == SWITCHER_PGD_INDEX &&
			
 
				+					index >= SWITCHER_PMD_INDEX) {
			
 
				+		kill_guest(cpu, "attempt to access switcher pages");
			
 
				+		index = 0;
			
 
				+	}
			
 
				+
			
 
				+	/* You should never call this if the PGD entry wasn't valid */
			
 
				+	BUG_ON(!(pgd_flags(spgd) & _PAGE_PRESENT));
			
 
				+	page = __va(pgd_pfn(spgd) << PAGE_SHIFT);
			
 
				+
			
 
				+	return &page[index];
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 /* This routine then takes the page directory entry returned above, which
			
 
				  * contains the address of the page table entry (PTE) page.  It then returns a
			
 
				  * pointer to the PTE entry for the given address. */
			
 
				-static pte_t *spte_addr(pgd_t spgd, unsigned long vaddr)
			
 
				+static pte_t *spte_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr)
			
 
				 {
			
 
				+#ifdef CONFIG_X86_PAE
			
 
				+	pmd_t *pmd = spmd_addr(cpu, spgd, vaddr);
			
 
				+	pte_t *page = __va(pmd_pfn(*pmd) << PAGE_SHIFT);
			
 
				+
			
 
				+	/* You should never call this if the PMD entry wasn't valid */
			
 
				+	BUG_ON(!(pmd_flags(*pmd) & _PAGE_PRESENT));
			
 
				+#else
			
 
				 	pte_t *page = __va(pgd_pfn(spgd) << PAGE_SHIFT);
			
 
				 	/* You should never call this if the PGD entry wasn't valid */
			
 
				 	BUG_ON(!(pgd_flags(spgd) & _PAGE_PRESENT));
			
 
				-	return &page[(vaddr >> PAGE_SHIFT) % PTRS_PER_PTE];
			
 
				+#endif
			
 
				+
			
 
				+	return &page[pte_index(vaddr)];
			
 
				 }
			
 
				 
			
 
				 /* These two functions just like the above two, except they access the Guest
			
@@ -101,12 +147,32 @@ static unsigned long gpgd_addr(struct lg_cpu *cpu, unsigned long vaddr)
 
				 	return cpu->lg->pgdirs[cpu->cpu_pgd].gpgdir + index * sizeof(pgd_t);
			
 
				 }
			
 
				 
			
 
				-static unsigned long gpte_addr(pgd_t gpgd, unsigned long vaddr)
			
 
				+#ifdef CONFIG_X86_PAE
			
 
				+static unsigned long gpmd_addr(pgd_t gpgd, unsigned long vaddr)
			
 
				+{
			
 
				+	unsigned long gpage = pgd_pfn(gpgd) << PAGE_SHIFT;
			
 
				+	BUG_ON(!(pgd_flags(gpgd) & _PAGE_PRESENT));
			
 
				+	return gpage + pmd_index(vaddr) * sizeof(pmd_t);
			
 
				+}
			
 
				+
			
 
				+static unsigned long gpte_addr(struct lg_cpu *cpu,
			
 
				+			       pmd_t gpmd, unsigned long vaddr)
			
 
				+{
			
 
				+	unsigned long gpage = pmd_pfn(gpmd) << PAGE_SHIFT;
			
 
				+
			
 
				+	BUG_ON(!(pmd_flags(gpmd) & _PAGE_PRESENT));
			
 
				+	return gpage + pte_index(vaddr) * sizeof(pte_t);
			
 
				+}
			
 
				+#else
			
 
				+static unsigned long gpte_addr(struct lg_cpu *cpu,
			
 
				+				pgd_t gpgd, unsigned long vaddr)
			
 
				 {
			
 
				 	unsigned long gpage = pgd_pfn(gpgd) << PAGE_SHIFT;
			
 
				+
			
 
				 	BUG_ON(!(pgd_flags(gpgd) & _PAGE_PRESENT));
			
 
				-	return gpage + ((vaddr>>PAGE_SHIFT) % PTRS_PER_PTE) * sizeof(pte_t);
			
 
				+	return gpage + pte_index(vaddr) * sizeof(pte_t);
			
 
				 }
			
 
				+#endif
			
 
				 /*:*/
			
 
				 
			
 
				 /*M:014 get_pfn is slow: we could probably try to grab batches of pages here as
			
@@ -171,7 +237,7 @@ static void release_pte(pte_t pte)
 
				 	/* Remember that get_user_pages_fast() took a reference to the page, in
			
 
				 	 * get_pfn()?  We have to put it back now. */
			
 
				 	if (pte_flags(pte) & _PAGE_PRESENT)
			
 
				-		put_page(pfn_to_page(pte_pfn(pte)));
			
 
				+		put_page(pte_page(pte));
			
 
				 }
			
 
				 /*:*/
			
 
				 
			
@@ -184,11 +250,20 @@ static void check_gpte(struct lg_cpu *cpu, pte_t gpte)
 
				 
			
 
				 static void check_gpgd(struct lg_cpu *cpu, pgd_t gpgd)
			
 
				 {
			
 
				-	if ((pgd_flags(gpgd) & ~_PAGE_TABLE) ||
			
 
				+	if ((pgd_flags(gpgd) & ~CHECK_GPGD_MASK) ||
			
 
				 	   (pgd_pfn(gpgd) >= cpu->lg->pfn_limit))
			
 
				 		kill_guest(cpu, "bad page directory entry");
			
 
				 }
			
 
				 
			
 
				+#ifdef CONFIG_X86_PAE
			
 
				+static void check_gpmd(struct lg_cpu *cpu, pmd_t gpmd)
			
 
				+{
			
 
				+	if ((pmd_flags(gpmd) & ~_PAGE_TABLE) ||
			
 
				+	   (pmd_pfn(gpmd) >= cpu->lg->pfn_limit))
			
 
				+		kill_guest(cpu, "bad page middle directory entry");
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 /*H:330
			
 
				  * (i) Looking up a page table entry when the Guest faults.
			
 
				  *
			
@@ -207,6 +282,11 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
 
				 	pte_t gpte;
			
 
				 	pte_t *spte;
			
 
				 
			
 
				+#ifdef CONFIG_X86_PAE
			
 
				+	pmd_t *spmd;
			
 
				+	pmd_t gpmd;
			
 
				+#endif
			
 
				+
			
 
				 	/* First step: get the top-level Guest page table entry. */
			
 
				 	gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t);
			
 
				 	/* Toplevel not present?  We can't map it in. */
			
@@ -228,12 +308,45 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
 
				 		check_gpgd(cpu, gpgd);
			
 
				 		/* And we copy the flags to the shadow PGD entry.  The page
			
 
				 		 * number in the shadow PGD is the page we just allocated. */
			
 
				-		*spgd = __pgd(__pa(ptepage) | pgd_flags(gpgd));
			
 
				+		set_pgd(spgd, __pgd(__pa(ptepage) | pgd_flags(gpgd)));
			
 
				 	}
			
 
				 
			
 
				+#ifdef CONFIG_X86_PAE
			
 
				+	gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t);
			
 
				+	/* middle level not present?  We can't map it in. */
			
 
				+	if (!(pmd_flags(gpmd) & _PAGE_PRESENT))
			
 
				+		return false;
			
 
				+
			
 
				+	/* Now look at the matching shadow entry. */
			
 
				+	spmd = spmd_addr(cpu, *spgd, vaddr);
			
 
				+
			
 
				+	if (!(pmd_flags(*spmd) & _PAGE_PRESENT)) {
			
 
				+		/* No shadow entry: allocate a new shadow PTE page. */
			
 
				+		unsigned long ptepage = get_zeroed_page(GFP_KERNEL);
			
 
				+
			
 
				+		/* This is not really the Guest's fault, but killing it is
			
 
				+		* simple for this corner case. */
			
 
				+		if (!ptepage) {
			
 
				+			kill_guest(cpu, "out of memory allocating pte page");
			
 
				+			return false;
			
 
				+		}
			
 
				+
			
 
				+		/* We check that the Guest pmd is OK. */
			
 
				+		check_gpmd(cpu, gpmd);
			
 
				+
			
 
				+		/* And we copy the flags to the shadow PMD entry.  The page
			
 
				+		 * number in the shadow PMD is the page we just allocated. */
			
 
				+		native_set_pmd(spmd, __pmd(__pa(ptepage) | pmd_flags(gpmd)));
			
 
				+	}
			
 
				+
			
 
				+	/* OK, now we look at the lower level in the Guest page table: keep its
			
 
				+	 * address, because we might update it later. */
			
 
				+	gpte_ptr = gpte_addr(cpu, gpmd, vaddr);
			
 
				+#else
			
 
				 	/* OK, now we look at the lower level in the Guest page table: keep its
			
 
				 	 * address, because we might update it later. */
			
 
				-	gpte_ptr = gpte_addr(gpgd, vaddr);
			
 
				+	gpte_ptr = gpte_addr(cpu, gpgd, vaddr);
			
 
				+#endif
			
 
				 	gpte = lgread(cpu, gpte_ptr, pte_t);
			
 
				 
			
 
				 	/* If this page isn't in the Guest page tables, we can't page it in. */
			
@@ -259,7 +372,7 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
 
				 		gpte = pte_mkdirty(gpte);
			
 
				 
			
 
				 	/* Get the pointer to the shadow PTE entry we're going to set. */
			
 
				-	spte = spte_addr(*spgd, vaddr);
			
 
				+	spte = spte_addr(cpu, *spgd, vaddr);
			
 
				 	/* If there was a valid shadow PTE entry here before, we release it.
			
 
				 	 * This can happen with a write to a previously read-only entry. */
			
 
				 	release_pte(*spte);
			
@@ -273,7 +386,7 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
 
				 		 * table entry, even if the Guest says it's writable.  That way
			
 
				 		 * we will come back here when a write does actually occur, so
			
 
				 		 * we can update the Guest's _PAGE_DIRTY flag. */
			
 
				-		*spte = gpte_to_spte(cpu, pte_wrprotect(gpte), 0);
			
 
				+		native_set_pte(spte, gpte_to_spte(cpu, pte_wrprotect(gpte), 0));
			
 
				 
			
 
				 	/* Finally, we write the Guest PTE entry back: we've set the
			
 
				 	 * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags. */
			
@@ -301,14 +414,23 @@ static bool page_writable(struct lg_cpu *cpu, unsigned long vaddr)
 
				 	pgd_t *spgd;
			
 
				 	unsigned long flags;
			
 
				 
			
 
				+#ifdef CONFIG_X86_PAE
			
 
				+	pmd_t *spmd;
			
 
				+#endif
			
 
				 	/* Look at the current top level entry: is it present? */
			
 
				 	spgd = spgd_addr(cpu, cpu->cpu_pgd, vaddr);
			
 
				 	if (!(pgd_flags(*spgd) & _PAGE_PRESENT))
			
 
				 		return false;
			
 
				 
			
 
				+#ifdef CONFIG_X86_PAE
			
 
				+	spmd = spmd_addr(cpu, *spgd, vaddr);
			
 
				+	if (!(pmd_flags(*spmd) & _PAGE_PRESENT))
			
 
				+		return false;
			
 
				+#endif
			
 
				+
			
 
				 	/* Check the flags on the pte entry itself: it must be present and
			
 
				 	 * writable. */
			
 
				-	flags = pte_flags(*(spte_addr(*spgd, vaddr)));
			
 
				+	flags = pte_flags(*(spte_addr(cpu, *spgd, vaddr)));
			
 
				 
			
 
				 	return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW);
			
 
				 }
			
@@ -322,8 +444,43 @@ void pin_page(struct lg_cpu *cpu, unsigned long vaddr)
 
				 		kill_guest(cpu, "bad stack page %#lx", vaddr);
			
 
				 }
			
 
				 
			
 
				+#ifdef CONFIG_X86_PAE
			
 
				+static void release_pmd(pmd_t *spmd)
			
 
				+{
			
 
				+	/* If the entry's not present, there's nothing to release. */
			
 
				+	if (pmd_flags(*spmd) & _PAGE_PRESENT) {
			
 
				+		unsigned int i;
			
 
				+		pte_t *ptepage = __va(pmd_pfn(*spmd) << PAGE_SHIFT);
			
 
				+		/* For each entry in the page, we might need to release it. */
			
 
				+		for (i = 0; i < PTRS_PER_PTE; i++)
			
 
				+			release_pte(ptepage[i]);
			
 
				+		/* Now we can free the page of PTEs */
			
 
				+		free_page((long)ptepage);
			
 
				+		/* And zero out the PMD entry so we never release it twice. */
			
 
				+		native_set_pmd(spmd, __pmd(0));
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void release_pgd(pgd_t *spgd)
			
 
				+{
			
 
				+	/* If the entry's not present, there's nothing to release. */
			
 
				+	if (pgd_flags(*spgd) & _PAGE_PRESENT) {
			
 
				+		unsigned int i;
			
 
				+		pmd_t *pmdpage = __va(pgd_pfn(*spgd) << PAGE_SHIFT);
			
 
				+
			
 
				+		for (i = 0; i < PTRS_PER_PMD; i++)
			
 
				+			release_pmd(&pmdpage[i]);
			
 
				+
			
 
				+		/* Now we can free the page of PMDs */
			
 
				+		free_page((long)pmdpage);
			
 
				+		/* And zero out the PGD entry so we never release it twice. */
			
 
				+		set_pgd(spgd, __pgd(0));
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+#else /* !CONFIG_X86_PAE */
			
 
				 /*H:450 If we chase down the release_pgd() code, it looks like this: */
			
 
				-static void release_pgd(struct lguest *lg, pgd_t *spgd)
			
 
				+static void release_pgd(pgd_t *spgd)
			
 
				 {
			
 
				 	/* If the entry's not present, there's nothing to release. */
			
 
				 	if (pgd_flags(*spgd) & _PAGE_PRESENT) {
			
@@ -341,7 +498,7 @@ static void release_pgd(struct lguest *lg, pgd_t *spgd)
 
				 		*spgd = __pgd(0);
			
 
				 	}
			
 
				 }
			
 
				-
			
 
				+#endif
			
 
				 /*H:445 We saw flush_user_mappings() twice: once from the flush_user_mappings()
			
 
				  * hypercall and once in new_pgdir() when we re-used a top-level pgdir page.
			
 
				  * It simply releases every PTE page from 0 up to the Guest's kernel address. */
			
@@ -350,7 +507,7 @@ static void flush_user_mappings(struct lguest *lg, int idx)
 
				 	unsigned int i;
			
 
				 	/* Release every pgd entry up to the kernel's address. */
			
 
				 	for (i = 0; i < pgd_index(lg->kernel_address); i++)
			
 
				-		release_pgd(lg, lg->pgdirs[idx].pgdir + i);
			
 
				+		release_pgd(lg->pgdirs[idx].pgdir + i);
			
 
				 }
			
 
				 
			
 
				 /*H:440 (v) Flushing (throwing away) page tables,
			
@@ -369,7 +526,9 @@ unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr)
 
				 {
			
 
				 	pgd_t gpgd;
			
 
				 	pte_t gpte;
			
 
				-
			
 
				+#ifdef CONFIG_X86_PAE
			
 
				+	pmd_t gpmd;
			
 
				+#endif
			
 
				 	/* First step: get the top-level Guest page table entry. */
			
 
				 	gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t);
			
 
				 	/* Toplevel not present?  We can't map it in. */
			
@@ -378,7 +537,14 @@ unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr)
 
				 		return -1UL;
			
 
				 	}
			
 
				 
			
 
				-	gpte = lgread(cpu, gpte_addr(gpgd, vaddr), pte_t);
			
 
				+#ifdef CONFIG_X86_PAE
			
 
				+	gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t);
			
 
				+	if (!(pmd_flags(gpmd) & _PAGE_PRESENT))
			
 
				+		kill_guest(cpu, "Bad address %#lx", vaddr);
			
 
				+	gpte = lgread(cpu, gpte_addr(cpu, gpmd, vaddr), pte_t);
			
 
				+#else
			
 
				+	gpte = lgread(cpu, gpte_addr(cpu, gpgd, vaddr), pte_t);
			
 
				+#endif
			
 
				 	if (!(pte_flags(gpte) & _PAGE_PRESENT))
			
 
				 		kill_guest(cpu, "Bad address %#lx", vaddr);
			
 
				 
			
@@ -405,6 +571,9 @@ static unsigned int new_pgdir(struct lg_cpu *cpu,
 
				 			      int *blank_pgdir)
			
 
				 {
			
 
				 	unsigned int next;
			
 
				+#ifdef CONFIG_X86_PAE
			
 
				+	pmd_t *pmd_table;
			
 
				+#endif
			
 
				 
			
 
				 	/* We pick one entry at random to throw out.  Choosing the Least
			
 
				 	 * Recently Used might be better, but this is easy. */
			
@@ -416,10 +585,27 @@ static unsigned int new_pgdir(struct lg_cpu *cpu,
 
				 		/* If the allocation fails, just keep using the one we have */
			
 
				 		if (!cpu->lg->pgdirs[next].pgdir)
			
 
				 			next = cpu->cpu_pgd;
			
 
				-		else
			
 
				-			/* This is a blank page, so there are no kernel
			
 
				-			 * mappings: caller must map the stack! */
			
 
				+		else {
			
 
				+#ifdef CONFIG_X86_PAE
			
 
				+			/* In PAE mode, allocate a pmd page and populate the
			
 
				+			 * last pgd entry. */
			
 
				+			pmd_table = (pmd_t *)get_zeroed_page(GFP_KERNEL);
			
 
				+			if (!pmd_table) {
			
 
				+				free_page((long)cpu->lg->pgdirs[next].pgdir);
			
 
				+				set_pgd(cpu->lg->pgdirs[next].pgdir, __pgd(0));
			
 
				+				next = cpu->cpu_pgd;
			
 
				+			} else {
			
 
				+				set_pgd(cpu->lg->pgdirs[next].pgdir +
			
 
				+					SWITCHER_PGD_INDEX,
			
 
				+					__pgd(__pa(pmd_table) | _PAGE_PRESENT));
			
 
				+				/* This is a blank page, so there are no kernel
			
 
				+				 * mappings: caller must map the stack! */
			
 
				+				*blank_pgdir = 1;
			
 
				+			}
			
 
				+#else
			
 
				 			*blank_pgdir = 1;
			
 
				+#endif
			
 
				+		}
			
 
				 	}
			
 
				 	/* Record which Guest toplevel this shadows. */
			
 
				 	cpu->lg->pgdirs[next].gpgdir = gpgdir;
			
@@ -431,7 +617,7 @@ static unsigned int new_pgdir(struct lg_cpu *cpu,
 
				 
			
 
				 /*H:430 (iv) Switching page tables
			
 
				  *
			
 
				- * Now we've seen all the page table setting and manipulation, let's see what
			
 
				+ * Now we've seen all the page table setting and manipulation, let's see
			
 
				  * what happens when the Guest changes page tables (ie. changes the top-level
			
 
				  * pgdir).  This occurs on almost every context switch. */
			
 
				 void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable)
			
@@ -460,10 +646,25 @@ static void release_all_pagetables(struct lguest *lg)
 
				 
			
 
				 	/* Every shadow pagetable this Guest has */
			
 
				 	for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++)
			
 
				-		if (lg->pgdirs[i].pgdir)
			
 
				+		if (lg->pgdirs[i].pgdir) {
			
 
				+#ifdef CONFIG_X86_PAE
			
 
				+			pgd_t *spgd;
			
 
				+			pmd_t *pmdpage;
			
 
				+			unsigned int k;
			
 
				+
			
 
				+			/* Get the last pmd page. */
			
 
				+			spgd = lg->pgdirs[i].pgdir + SWITCHER_PGD_INDEX;
			
 
				+			pmdpage = __va(pgd_pfn(*spgd) << PAGE_SHIFT);
			
 
				+
			
 
				+			/* And release the pmd entries of that pmd page,
			
 
				+			 * except for the switcher pmd. */
			
 
				+			for (k = 0; k < SWITCHER_PMD_INDEX; k++)
			
 
				+				release_pmd(&pmdpage[k]);
			
 
				+#endif
			
 
				 			/* Every PGD entry except the Switcher at the top */
			
 
				 			for (j = 0; j < SWITCHER_PGD_INDEX; j++)
			
 
				-				release_pgd(lg, lg->pgdirs[i].pgdir + j);
			
 
				+				release_pgd(lg->pgdirs[i].pgdir + j);
			
 
				+		}
			
 
				 }
			
 
				 
			
 
				 /* We also throw away everything when a Guest tells us it's changed a kernel
			
@@ -504,24 +705,37 @@ static void do_set_pte(struct lg_cpu *cpu, int idx,
 
				 {
			
 
				 	/* Look up the matching shadow page directory entry. */
			
 
				 	pgd_t *spgd = spgd_addr(cpu, idx, vaddr);
			
 
				+#ifdef CONFIG_X86_PAE
			
 
				+	pmd_t *spmd;
			
 
				+#endif
			
 
				 
			
 
				 	/* If the top level isn't present, there's no entry to update. */
			
 
				 	if (pgd_flags(*spgd) & _PAGE_PRESENT) {
			
 
				-		/* Otherwise, we start by releasing the existing entry. */
			
 
				-		pte_t *spte = spte_addr(*spgd, vaddr);
			
 
				-		release_pte(*spte);
			
 
				-
			
 
				-		/* If they're setting this entry as dirty or accessed, we might
			
 
				-		 * as well put that entry they've given us in now.  This shaves
			
 
				-		 * 10% off a copy-on-write micro-benchmark. */
			
 
				-		if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) {
			
 
				-			check_gpte(cpu, gpte);
			
 
				-			*spte = gpte_to_spte(cpu, gpte,
			
 
				-					     pte_flags(gpte) & _PAGE_DIRTY);
			
 
				-		} else
			
 
				-			/* Otherwise kill it and we can demand_page() it in
			
 
				-			 * later. */
			
 
				-			*spte = __pte(0);
			
 
				+#ifdef CONFIG_X86_PAE
			
 
				+		spmd = spmd_addr(cpu, *spgd, vaddr);
			
 
				+		if (pmd_flags(*spmd) & _PAGE_PRESENT) {
			
 
				+#endif
			
 
				+			/* Otherwise, we start by releasing
			
 
				+			 * the existing entry. */
			
 
				+			pte_t *spte = spte_addr(cpu, *spgd, vaddr);
			
 
				+			release_pte(*spte);
			
 
				+
			
 
				+			/* If they're setting this entry as dirty or accessed,
			
 
				+			 * we might as well put that entry they've given us
			
 
				+			 * in now.  This shaves 10% off a
			
 
				+			 * copy-on-write micro-benchmark. */
			
 
				+			if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) {
			
 
				+				check_gpte(cpu, gpte);
			
 
				+				native_set_pte(spte,
			
 
				+						gpte_to_spte(cpu, gpte,
			
 
				+						pte_flags(gpte) & _PAGE_DIRTY));
			
 
				+			} else
			
 
				+				/* Otherwise kill it and we can demand_page()
			
 
				+				 * it in later. */
			
 
				+				native_set_pte(spte, __pte(0));
			
 
				+#ifdef CONFIG_X86_PAE
			
 
				+		}
			
 
				+#endif
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -568,12 +782,10 @@ void guest_set_pte(struct lg_cpu *cpu,
 
				  *
			
 
				  * So with that in mind here's our code to to update a (top-level) PGD entry:
			
 
				  */
			
 
				-void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 idx)
			
 
				+void guest_set_pgd(struct lguest *lg, unsigned long gpgdir, u32 idx)
			
 
				 {
			
 
				 	int pgdir;
			
 
				 
			
 
				-	/* The kernel seems to try to initialize this early on: we ignore its
			
 
				-	 * attempts to map over the Switcher. */
			
 
				 	if (idx >= SWITCHER_PGD_INDEX)
			
 
				 		return;
			
 
				 
			
@@ -581,8 +793,14 @@ void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 idx)
 
				 	pgdir = find_pgdir(lg, gpgdir);
			
 
				 	if (pgdir < ARRAY_SIZE(lg->pgdirs))
			
 
				 		/* ... throw it away. */
			
 
				-		release_pgd(lg, lg->pgdirs[pgdir].pgdir + idx);
			
 
				+		release_pgd(lg->pgdirs[pgdir].pgdir + idx);
			
 
				 }
			
 
				+#ifdef CONFIG_X86_PAE
			
 
				+void guest_set_pmd(struct lguest *lg, unsigned long pmdp, u32 idx)
			
 
				+{
			
 
				+	guest_pagetable_clear_all(&lg->cpus[0]);
			
 
				+}
			
 
				+#endif
			
 
				 
			
 
				 /* Once we know how much memory we have we can construct simple identity
			
 
				  * (which set virtual == physical) and linear mappings
			
@@ -596,8 +814,16 @@ static unsigned long setup_pagetables(struct lguest *lg,
 
				 {
			
 
				 	pgd_t __user *pgdir;
			
 
				 	pte_t __user *linear;
			
 
				-	unsigned int mapped_pages, i, linear_pages, phys_linear;
			
 
				 	unsigned long mem_base = (unsigned long)lg->mem_base;
			
 
				+	unsigned int mapped_pages, i, linear_pages;
			
 
				+#ifdef CONFIG_X86_PAE
			
 
				+	pmd_t __user *pmds;
			
 
				+	unsigned int j;
			
 
				+	pgd_t pgd;
			
 
				+	pmd_t pmd;
			
 
				+#else
			
 
				+	unsigned int phys_linear;
			
 
				+#endif
			
 
				 
			
 
				 	/* We have mapped_pages frames to map, so we need
			
 
				 	 * linear_pages page tables to map them. */
			
@@ -610,6 +836,9 @@ static unsigned long setup_pagetables(struct lguest *lg,
 
				 	/* Now we use the next linear_pages pages as pte pages */
			
 
				 	linear = (void *)pgdir - linear_pages * PAGE_SIZE;
			
 
				 
			
 
				+#ifdef CONFIG_X86_PAE
			
 
				+	pmds = (void *)linear - PAGE_SIZE;
			
 
				+#endif
			
 
				 	/* Linear mapping is easy: put every page's address into the
			
 
				 	 * mapping in order. */
			
 
				 	for (i = 0; i < mapped_pages; i++) {
			
@@ -621,6 +850,22 @@ static unsigned long setup_pagetables(struct lguest *lg,
 
				 
			
 
				 	/* The top level points to the linear page table pages above.
			
 
				 	 * We setup the identity and linear mappings here. */
			
 
				+#ifdef CONFIG_X86_PAE
			
 
				+	for (i = j = 0; i < mapped_pages && j < PTRS_PER_PMD;
			
 
				+	     i += PTRS_PER_PTE, j++) {
			
 
				+		native_set_pmd(&pmd, __pmd(((unsigned long)(linear + i)
			
 
				+		- mem_base) | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER));
			
 
				+
			
 
				+		if (copy_to_user(&pmds[j], &pmd, sizeof(pmd)) != 0)
			
 
				+			return -EFAULT;
			
 
				+	}
			
 
				+
			
 
				+	set_pgd(&pgd, __pgd(((u32)pmds - mem_base) | _PAGE_PRESENT));
			
 
				+	if (copy_to_user(&pgdir[0], &pgd, sizeof(pgd)) != 0)
			
 
				+		return -EFAULT;
			
 
				+	if (copy_to_user(&pgdir[3], &pgd, sizeof(pgd)) != 0)
			
 
				+		return -EFAULT;
			
 
				+#else
			
 
				 	phys_linear = (unsigned long)linear - mem_base;
			
 
				 	for (i = 0; i < mapped_pages; i += PTRS_PER_PTE) {
			
 
				 		pgd_t pgd;
			
@@ -633,6 +878,7 @@ static unsigned long setup_pagetables(struct lguest *lg,
 
				 				    &pgd, sizeof(pgd)))
			
 
				 			return -EFAULT;
			
 
				 	}
			
 
				+#endif
			
 
				 
			
 
				 	/* We return the top level (guest-physical) address: remember where
			
 
				 	 * this is. */
			
@@ -648,7 +894,10 @@ int init_guest_pagetable(struct lguest *lg)
 
				 	u64 mem;
			
 
				 	u32 initrd_size;
			
 
				 	struct boot_params __user *boot = (struct boot_params *)lg->mem_base;
			
 
				-
			
 
				+#ifdef CONFIG_X86_PAE
			
 
				+	pgd_t *pgd;
			
 
				+	pmd_t *pmd_table;
			
 
				+#endif
			
 
				 	/* Get the Guest memory size and the ramdisk size from the boot header
			
 
				 	 * located at lg->mem_base (Guest address 0). */
			
 
				 	if (copy_from_user(&mem, &boot->e820_map[0].size, sizeof(mem))
			
@@ -663,6 +912,15 @@ int init_guest_pagetable(struct lguest *lg)
 
				 	lg->pgdirs[0].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL);
			
 
				 	if (!lg->pgdirs[0].pgdir)
			
 
				 		return -ENOMEM;
			
 
				+#ifdef CONFIG_X86_PAE
			
 
				+	pgd = lg->pgdirs[0].pgdir;
			
 
				+	pmd_table = (pmd_t *) get_zeroed_page(GFP_KERNEL);
			
 
				+	if (!pmd_table)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	set_pgd(pgd + SWITCHER_PGD_INDEX,
			
 
				+		__pgd(__pa(pmd_table) | _PAGE_PRESENT));
			
 
				+#endif
			
 
				 	lg->cpus[0].cpu_pgd = 0;
			
 
				 	return 0;
			
 
				 }
			
@@ -672,17 +930,24 @@ void page_table_guest_data_init(struct lg_cpu *cpu)
 
				 {
			
 
				 	/* We get the kernel address: above this is all kernel memory. */
			
 
				 	if (get_user(cpu->lg->kernel_address,
			
 
				-		     &cpu->lg->lguest_data->kernel_address)
			
 
				-	    /* We tell the Guest that it can't use the top 4MB of virtual
			
 
				-	     * addresses used by the Switcher. */
			
 
				-	    || put_user(4U*1024*1024, &cpu->lg->lguest_data->reserve_mem)
			
 
				-	    || put_user(cpu->lg->pgdirs[0].gpgdir, &cpu->lg->lguest_data->pgdir))
			
 
				+		&cpu->lg->lguest_data->kernel_address)
			
 
				+		/* We tell the Guest that it can't use the top 2 or 4 MB
			
 
				+		 * of virtual addresses used by the Switcher. */
			
 
				+		|| put_user(RESERVE_MEM * 1024 * 1024,
			
 
				+			&cpu->lg->lguest_data->reserve_mem)
			
 
				+		|| put_user(cpu->lg->pgdirs[0].gpgdir,
			
 
				+			&cpu->lg->lguest_data->pgdir))
			
 
				 		kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data);
			
 
				 
			
 
				 	/* In flush_user_mappings() we loop from 0 to
			
 
				 	 * "pgd_index(lg->kernel_address)".  This assumes it won't hit the
			
 
				 	 * Switcher mappings, so check that now. */
			
 
				+#ifdef CONFIG_X86_PAE
			
 
				+	if (pgd_index(cpu->lg->kernel_address) == SWITCHER_PGD_INDEX &&
			
 
				+		pmd_index(cpu->lg->kernel_address) == SWITCHER_PMD_INDEX)
			
 
				+#else
			
 
				 	if (pgd_index(cpu->lg->kernel_address) >= SWITCHER_PGD_INDEX)
			
 
				+#endif
			
 
				 		kill_guest(cpu, "bad kernel address %#lx",
			
 
				 				 cpu->lg->kernel_address);
			
 
				 }
			
@@ -708,16 +973,30 @@ void free_guest_pagetable(struct lguest *lg)
 
				 void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages)
			
 
				 {
			
 
				 	pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages);
			
 
				-	pgd_t switcher_pgd;
			
 
				 	pte_t regs_pte;
			
 
				 	unsigned long pfn;
			
 
				 
			
 
				+#ifdef CONFIG_X86_PAE
			
 
				+	pmd_t switcher_pmd;
			
 
				+	pmd_t *pmd_table;
			
 
				+
			
 
				+	native_set_pmd(&switcher_pmd, pfn_pmd(__pa(switcher_pte_page) >>
			
 
				+		       PAGE_SHIFT, PAGE_KERNEL_EXEC));
			
 
				+
			
 
				+	pmd_table = __va(pgd_pfn(cpu->lg->
			
 
				+			pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX])
			
 
				+								<< PAGE_SHIFT);
			
 
				+	native_set_pmd(&pmd_table[SWITCHER_PMD_INDEX], switcher_pmd);
			
 
				+#else
			
 
				+	pgd_t switcher_pgd;
			
 
				+
			
 
				 	/* Make the last PGD entry for this Guest point to the Switcher's PTE
			
 
				 	 * page for this CPU (with appropriate flags). */
			
 
				-	switcher_pgd = __pgd(__pa(switcher_pte_page) | __PAGE_KERNEL);
			
 
				+	switcher_pgd = __pgd(__pa(switcher_pte_page) | __PAGE_KERNEL_EXEC);
			
 
				 
			
 
				 	cpu->lg->pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd;
			
 
				 
			
 
				+#endif
			
 
				 	/* We also change the Switcher PTE page.  When we're running the Guest,
			
 
				 	 * we want the Guest's "regs" page to appear where the first Switcher
			
 
				 	 * page for this CPU is.  This is an optimization: when the Switcher
			
@@ -726,8 +1005,9 @@ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages)
 
				 	 * page is already mapped there, we don't have to copy them out
			
 
				 	 * again. */
			
 
				 	pfn = __pa(cpu->regs_page) >> PAGE_SHIFT;
			
 
				-	regs_pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL));
			
 
				-	switcher_pte_page[(unsigned long)pages/PAGE_SIZE%PTRS_PER_PTE] = regs_pte;
			
 
				+	native_set_pte(&regs_pte, pfn_pte(pfn, PAGE_KERNEL));
			
 
				+	native_set_pte(&switcher_pte_page[pte_index((unsigned long)pages)],
			
 
				+			regs_pte);
			
 
				 }
			
 
				 /*:*/
			
 
				 
			
@@ -752,21 +1032,21 @@ static __init void populate_switcher_pte_page(unsigned int cpu,
 
				 
			
 
				 	/* The first entries are easy: they map the Switcher code. */
			
 
				 	for (i = 0; i < pages; i++) {
			
 
				-		pte[i] = mk_pte(switcher_page[i],
			
 
				-				__pgprot(_PAGE_PRESENT|_PAGE_ACCESSED));
			
 
				+		native_set_pte(&pte[i], mk_pte(switcher_page[i],
			
 
				+				__pgprot(_PAGE_PRESENT|_PAGE_ACCESSED)));
			
 
				 	}
			
 
				 
			
 
				 	/* The only other thing we map is this CPU's pair of pages. */
			
 
				 	i = pages + cpu*2;
			
 
				 
			
 
				 	/* First page (Guest registers) is writable from the Guest */
			
 
				-	pte[i] = pfn_pte(page_to_pfn(switcher_page[i]),
			
 
				-			 __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW));
			
 
				+	native_set_pte(&pte[i], pfn_pte(page_to_pfn(switcher_page[i]),
			
 
				+			 __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW)));
			
 
				 
			
 
				 	/* The second page contains the "struct lguest_ro_state", and is
			
 
				 	 * read-only. */
			
 
				-	pte[i+1] = pfn_pte(page_to_pfn(switcher_page[i+1]),
			
 
				-			   __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED));
			
 
				+	native_set_pte(&pte[i+1], pfn_pte(page_to_pfn(switcher_page[i+1]),
			
 
				+			   __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED)));
			
 
				 }
			
 
				 
			
 
				 /* We've made it through the page table code.  Perhaps our tired brains are
			
--- a/drivers/lguest/segments.c
+++ b/drivers/lguest/segments.c
@@ -150,7 +150,7 @@ void load_guest_gdt_entry(struct lg_cpu *cpu, u32 num, u32 lo, u32 hi)
 
				 {
			
 
				 	/* We assume the Guest has the same number of GDT entries as the
			
 
				 	 * Host, otherwise we'd have to dynamically allocate the Guest GDT. */
			
 
				-	if (num > ARRAY_SIZE(cpu->arch.gdt))
			
 
				+	if (num >= ARRAY_SIZE(cpu->arch.gdt))
			
 
				 		kill_guest(cpu, "too many gdt entries %i", num);
			
 
				 
			
 
				 	/* Set it up, then fix it. */
			
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -16,6 +16,7 @@
 
				 #include <linux/anon_inodes.h>
			
 
				 #include <linux/eventfd.h>
			
 
				 #include <linux/syscalls.h>
			
 
				+#include <linux/module.h>
			
 
				 
			
 
				 struct eventfd_ctx {
			
 
				 	wait_queue_head_t wqh;
			
@@ -56,6 +57,7 @@ int eventfd_signal(struct file *file, int n)
 
				 
			
 
				 	return n;
			
 
				 }
			
 
				+EXPORT_SYMBOL_GPL(eventfd_signal);
			
 
				 
			
 
				 static int eventfd_release(struct inode *inode, struct file *file)
			
 
				 {
			
@@ -197,6 +199,7 @@ struct file *eventfd_fget(int fd)
 
				 
			
 
				 	return file;
			
 
				 }
			
 
				+EXPORT_SYMBOL_GPL(eventfd_fget);
			
 
				 
			
 
				 SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
			
 
				 {
			
--- a/include/linux/lguest.h
+++ b/include/linux/lguest.h
@@ -30,6 +30,10 @@ struct lguest_data
 
				 	/* Wallclock time set by the Host. */
			
 
				 	struct timespec time;
			
 
				 
			
 
				+	/* Interrupt pending set by the Host.  The Guest should do a hypercall
			
 
				+	 * if it re-enables interrupts and sees this set (to X86_EFLAGS_IF). */
			
 
				+	int irq_pending;
			
 
				+
			
 
				 	/* Async hypercall ring.  Instead of directly making hypercalls, we can
			
 
				 	 * place them in here for processing the next time the Host wants.
			
 
				 	 * This batching can be quite efficient. */
			
--- a/include/linux/lguest_launcher.h
+++ b/include/linux/lguest_launcher.h
@@ -57,7 +57,8 @@ enum lguest_req
 
				 	LHREQ_INITIALIZE, /* + base, pfnlimit, start */
			
 
				 	LHREQ_GETDMA, /* No longer used */
			
 
				 	LHREQ_IRQ, /* + irq */
			
 
				-	LHREQ_BREAK, /* + on/off flag (on blocks until someone does off) */
			
 
				+	LHREQ_BREAK, /* No longer used */
			
 
				+	LHREQ_EVENTFD, /* + address, fd. */
			
 
				 };
			
 
				 
			
 
				 /* The alignment to use between consumer and producer parts of vring.
			
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2192,6 +2192,7 @@ void kick_process(struct task_struct *p)
 
				 		smp_send_reschedule(cpu);
			
 
				 	preempt_enable();
			
 
				 }
			
 
				+EXPORT_SYMBOL_GPL(kick_process);
			
 
				 
			
 
				 /*
			
 
				  * Return a low guess at the load of a migration-source cpu weighted