Browse Source

Merge git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus

* git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus:
  lguest: stop using KVM hypercall mechanism
  lguest: workaround cmpxchg8b_emu by ignoring cli in the guest.
Linus Torvalds 15 years ago
parent
commit
7223b91542

+ 23 - 6
arch/x86/include/asm/lguest_hcall.h

@@ -28,22 +28,39 @@
 
 #ifndef __ASSEMBLY__
 #include <asm/hw_irq.h>
-#include <asm/kvm_para.h>
 
 /*G:030
  * But first, how does our Guest contact the Host to ask for privileged
  * operations?  There are two ways: the direct way is to make a "hypercall",
  * to make requests of the Host Itself.
  *
- * We use the KVM hypercall mechanism, though completely different hypercall
- * numbers. Seventeen hypercalls are available: the hypercall number is put in
- * the %eax register, and the arguments (when required) are placed in %ebx,
- * %ecx, %edx and %esi.  If a return value makes sense, it's returned in %eax.
+ * Our hypercall mechanism uses the highest unused trap code (traps 32 and
+ * above are used by real hardware interrupts).  Seventeen hypercalls are
+ * available: the hypercall number is put in the %eax register, and the
+ * arguments (when required) are placed in %ebx, %ecx, %edx and %esi.
+ * If a return value makes sense, it's returned in %eax.
  *
  * Grossly invalid calls result in Sudden Death at the hands of the vengeful
  * Host, rather than returning failure.  This reflects Winston Churchill's
  * definition of a gentleman: "someone who is only rude intentionally".
-:*/
+ */
+static inline unsigned long
+hcall(unsigned long call,
+      unsigned long arg1, unsigned long arg2, unsigned long arg3,
+      unsigned long arg4)
+{
+	/* "int" is the Intel instruction to trigger a trap. */
+	asm volatile("int $" __stringify(LGUEST_TRAP_ENTRY)
+		     /* The call in %eax (aka "a") might be overwritten */
+		     : "=a"(call)
+		       /* The arguments are in %eax, %ebx, %ecx, %edx & %esi */
+		     : "a"(call), "b"(arg1), "c"(arg2), "d"(arg3), "S"(arg4)
+		       /* "memory" means this might write somewhere in memory.
+			* This isn't true for all calls, but it's safe to tell
+			* gcc that it might happen so it doesn't get clever. */
+		     : "memory");
+	return call;
+}
 
 /* Can't use our min() macro here: needs to be a constant */
 #define LGUEST_IRQS (NR_IRQS < 32 ? NR_IRQS: 32)

+ 30 - 31
arch/x86/lguest/boot.c

@@ -115,7 +115,7 @@ static void async_hcall(unsigned long call, unsigned long arg1,
 	local_irq_save(flags);
 	if (lguest_data.hcall_status[next_call] != 0xFF) {
 		/* Table full, so do normal hcall which will flush table. */
-		kvm_hypercall4(call, arg1, arg2, arg3, arg4);
+		hcall(call, arg1, arg2, arg3, arg4);
 	} else {
 		lguest_data.hcalls[next_call].arg0 = call;
 		lguest_data.hcalls[next_call].arg1 = arg1;
@@ -145,46 +145,45 @@ static void async_hcall(unsigned long call, unsigned long arg1,
  * So, when we're in lazy mode, we call async_hcall() to store the call for
  * future processing:
  */
-static void lazy_hcall1(unsigned long call,
-		       unsigned long arg1)
+static void lazy_hcall1(unsigned long call, unsigned long arg1)
 {
 	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
-		kvm_hypercall1(call, arg1);
+		hcall(call, arg1, 0, 0, 0);
 	else
 		async_hcall(call, arg1, 0, 0, 0);
 }
 
 /* You can imagine what lazy_hcall2, 3 and 4 look like. :*/
 static void lazy_hcall2(unsigned long call,
-		       unsigned long arg1,
-		       unsigned long arg2)
+			unsigned long arg1,
+			unsigned long arg2)
 {
 	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
-		kvm_hypercall2(call, arg1, arg2);
+		hcall(call, arg1, arg2, 0, 0);
 	else
 		async_hcall(call, arg1, arg2, 0, 0);
 }
 
 static void lazy_hcall3(unsigned long call,
-		       unsigned long arg1,
-		       unsigned long arg2,
-		       unsigned long arg3)
+			unsigned long arg1,
+			unsigned long arg2,
+			unsigned long arg3)
 {
 	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
-		kvm_hypercall3(call, arg1, arg2, arg3);
+		hcall(call, arg1, arg2, arg3, 0);
 	else
 		async_hcall(call, arg1, arg2, arg3, 0);
 }
 
 #ifdef CONFIG_X86_PAE
 static void lazy_hcall4(unsigned long call,
-		       unsigned long arg1,
-		       unsigned long arg2,
-		       unsigned long arg3,
-		       unsigned long arg4)
+			unsigned long arg1,
+			unsigned long arg2,
+			unsigned long arg3,
+			unsigned long arg4)
 {
 	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
-		kvm_hypercall4(call, arg1, arg2, arg3, arg4);
+		hcall(call, arg1, arg2, arg3, arg4);
 	else
 		async_hcall(call, arg1, arg2, arg3, arg4);
 }
@@ -196,13 +195,13 @@ static void lazy_hcall4(unsigned long call,
 :*/
 static void lguest_leave_lazy_mmu_mode(void)
 {
-	kvm_hypercall0(LHCALL_FLUSH_ASYNC);
+	hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0, 0);
 	paravirt_leave_lazy_mmu();
 }
 
 static void lguest_end_context_switch(struct task_struct *next)
 {
-	kvm_hypercall0(LHCALL_FLUSH_ASYNC);
+	hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0, 0);
 	paravirt_end_context_switch(next);
 }
 
@@ -286,7 +285,7 @@ static void lguest_write_idt_entry(gate_desc *dt,
 	/* Keep the local copy up to date. */
 	native_write_idt_entry(dt, entrynum, g);
 	/* Tell Host about this new entry. */
-	kvm_hypercall3(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1]);
+	hcall(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1], 0);
 }
 
 /*
@@ -300,7 +299,7 @@ static void lguest_load_idt(const struct desc_ptr *desc)
 	struct desc_struct *idt = (void *)desc->address;
 
 	for (i = 0; i < (desc->size+1)/8; i++)
-		kvm_hypercall3(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b);
+		hcall(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b, 0);
 }
 
 /*
@@ -321,7 +320,7 @@ static void lguest_load_gdt(const struct desc_ptr *desc)
 	struct desc_struct *gdt = (void *)desc->address;
 
 	for (i = 0; i < (desc->size+1)/8; i++)
-		kvm_hypercall3(LHCALL_LOAD_GDT_ENTRY, i, gdt[i].a, gdt[i].b);
+		hcall(LHCALL_LOAD_GDT_ENTRY, i, gdt[i].a, gdt[i].b, 0);
 }
 
 /*
@@ -334,8 +333,8 @@ static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum,
 {
 	native_write_gdt_entry(dt, entrynum, desc, type);
 	/* Tell Host about this new entry. */
-	kvm_hypercall3(LHCALL_LOAD_GDT_ENTRY, entrynum,
-		       dt[entrynum].a, dt[entrynum].b);
+	hcall(LHCALL_LOAD_GDT_ENTRY, entrynum,
+	      dt[entrynum].a, dt[entrynum].b, 0);
 }
 
 /*
@@ -931,7 +930,7 @@ static int lguest_clockevent_set_next_event(unsigned long delta,
 	}
 
 	/* Please wake us this far in the future. */
-	kvm_hypercall1(LHCALL_SET_CLOCKEVENT, delta);
+	hcall(LHCALL_SET_CLOCKEVENT, delta, 0, 0, 0);
 	return 0;
 }
 
@@ -942,7 +941,7 @@ static void lguest_clockevent_set_mode(enum clock_event_mode mode,
 	case CLOCK_EVT_MODE_UNUSED:
 	case CLOCK_EVT_MODE_SHUTDOWN:
 		/* A 0 argument shuts the clock down. */
-		kvm_hypercall0(LHCALL_SET_CLOCKEVENT);
+		hcall(LHCALL_SET_CLOCKEVENT, 0, 0, 0, 0);
 		break;
 	case CLOCK_EVT_MODE_ONESHOT:
 		/* This is what we expect. */
@@ -1100,7 +1099,7 @@ static void set_lguest_basic_apic_ops(void)
 /* STOP!  Until an interrupt comes in. */
 static void lguest_safe_halt(void)
 {
-	kvm_hypercall0(LHCALL_HALT);
+	hcall(LHCALL_HALT, 0, 0, 0, 0);
 }
 
 /*
@@ -1112,8 +1111,8 @@ static void lguest_safe_halt(void)
  */
 static void lguest_power_off(void)
 {
-	kvm_hypercall2(LHCALL_SHUTDOWN, __pa("Power down"),
-					LGUEST_SHUTDOWN_POWEROFF);
+	hcall(LHCALL_SHUTDOWN, __pa("Power down"),
+	      LGUEST_SHUTDOWN_POWEROFF, 0, 0);
 }
 
 /*
@@ -1123,7 +1122,7 @@ static void lguest_power_off(void)
  */
 static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p)
 {
-	kvm_hypercall2(LHCALL_SHUTDOWN, __pa(p), LGUEST_SHUTDOWN_POWEROFF);
+	hcall(LHCALL_SHUTDOWN, __pa(p), LGUEST_SHUTDOWN_POWEROFF, 0, 0);
 	/* The hcall won't return, but to keep gcc happy, we're "done". */
 	return NOTIFY_DONE;
 }
@@ -1162,7 +1161,7 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count)
 		len = sizeof(scratch) - 1;
 	scratch[len] = '\0';
 	memcpy(scratch, buf, len);
-	kvm_hypercall1(LHCALL_NOTIFY, __pa(scratch));
+	hcall(LHCALL_NOTIFY, __pa(scratch), 0, 0, 0);
 
 	/* This routine returns the number of bytes actually written. */
 	return len;
@@ -1174,7 +1173,7 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count)
  */
 static void lguest_restart(char *reason)
 {
-	kvm_hypercall2(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART);
+	hcall(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART, 0, 0);
 }
 
 /*G:050

+ 1 - 1
arch/x86/lguest/i386_head.S

@@ -32,7 +32,7 @@ ENTRY(lguest_entry)
 	 */
 	movl $LHCALL_LGUEST_INIT, %eax
 	movl $lguest_data - __PAGE_OFFSET, %ebx
-	.byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */
+	int $LGUEST_TRAP_ENTRY
 
 	/* Set up the initial stack so we can run C code. */
 	movl $(init_thread_union+THREAD_SIZE),%esp

+ 2 - 2
drivers/lguest/lguest_device.c

@@ -178,7 +178,7 @@ static void set_status(struct virtio_device *vdev, u8 status)
 
 	/* We set the status. */
 	to_lgdev(vdev)->desc->status = status;
-	kvm_hypercall1(LHCALL_NOTIFY, (max_pfn << PAGE_SHIFT) + offset);
+	hcall(LHCALL_NOTIFY, (max_pfn << PAGE_SHIFT) + offset, 0, 0, 0);
 }
 
 static void lg_set_status(struct virtio_device *vdev, u8 status)
@@ -229,7 +229,7 @@ static void lg_notify(struct virtqueue *vq)
 	 */
 	struct lguest_vq_info *lvq = vq->priv;
 
-	kvm_hypercall1(LHCALL_NOTIFY, lvq->config.pfn << PAGE_SHIFT);
+	hcall(LHCALL_NOTIFY, lvq->config.pfn << PAGE_SHIFT, 0, 0, 0);
 }
 
 /* An extern declaration inside a C file is bad form.  Don't do it. */

+ 12 - 0
drivers/lguest/x86/core.c

@@ -287,6 +287,18 @@ static int emulate_insn(struct lg_cpu *cpu)
 	/* Decoding x86 instructions is icky. */
 	insn = lgread(cpu, physaddr, u8);
 
+	/*
+	 * Around 2.6.33, the kernel started using an emulation for the
+	 * cmpxchg8b instruction in early boot on many configurations.  This
+	 * code isn't paravirtualized, and it tries to disable interrupts.
+	 * Ignore it, which will Mostly Work.
+	 */
+	if (insn == 0xfa) {
+		/* "cli", or Clear Interrupt Enable instruction.  Skip it. */
+		cpu->regs->eip++;
+		return 1;
+	}
+
 	/*
 	 * 0x66 is an "operand prefix".  It means it's using the upper 16 bits
 	 * of the eax register.