Browse Source

xen: support sysenter/sysexit if hypervisor does

64-bit Xen supports sysenter for 32-bit guests, so support its
use.  (sysenter is faster than int $0x80 in 32-on-64.)

sysexit is still not supported, so we fake it up using iret.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Jeremy Fitzhardinge 17 years ago
parent
commit
e2a81baf66

+ 17 - 1
arch/x86/kernel/entry_32.S

@@ -1017,6 +1017,13 @@ ENTRY(kernel_thread_helper)
 ENDPROC(kernel_thread_helper)
 ENDPROC(kernel_thread_helper)
 
 
 #ifdef CONFIG_XEN
 #ifdef CONFIG_XEN
+/* Xen doesn't set %esp to be precisely what the normal sysenter
+   entrypoint expects, so fix it up before using the normal path. */
+ENTRY(xen_sysenter_target)
+	RING0_INT_FRAME
+	addl $5*4, %esp		/* remove xen-provided frame */
+	jmp sysenter_past_esp
+
 ENTRY(xen_hypervisor_callback)
 ENTRY(xen_hypervisor_callback)
 	CFI_STARTPROC
 	CFI_STARTPROC
 	pushl $0
 	pushl $0
@@ -1036,8 +1043,17 @@ ENTRY(xen_hypervisor_callback)
 	jae  1f
 	jae  1f
 
 
 	call xen_iret_crit_fixup
 	call xen_iret_crit_fixup
+	jmp  2f
+
+1:	cmpl $xen_sysexit_start_crit,%eax
+	jb   2f
+	cmpl $xen_sysexit_end_crit,%eax
+	jae  2f
+
+	jmp xen_sysexit_crit_fixup
 
 
-1:	mov %esp, %eax
+ENTRY(xen_do_upcall)
+2:	mov %esp, %eax
 	call xen_evtchn_do_upcall
 	call xen_evtchn_do_upcall
 	jmp  ret_from_intr
 	jmp  ret_from_intr
 	CFI_ENDPROC
 	CFI_ENDPROC

+ 1 - 2
arch/x86/xen/enlighten.c

@@ -155,7 +155,6 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
 	if (*ax == 1)
 	if (*ax == 1)
 		maskedx = ~((1 << X86_FEATURE_APIC) |  /* disable APIC */
 		maskedx = ~((1 << X86_FEATURE_APIC) |  /* disable APIC */
 			    (1 << X86_FEATURE_ACPI) |  /* disable ACPI */
 			    (1 << X86_FEATURE_ACPI) |  /* disable ACPI */
-			    (1 << X86_FEATURE_SEP)  |  /* disable SEP */
 			    (1 << X86_FEATURE_ACC));   /* thermal monitoring */
 			    (1 << X86_FEATURE_ACC));   /* thermal monitoring */
 
 
 	asm(XEN_EMULATE_PREFIX "cpuid"
 	asm(XEN_EMULATE_PREFIX "cpuid"
@@ -994,7 +993,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
 	.read_pmc = native_read_pmc,
 	.read_pmc = native_read_pmc,
 
 
 	.iret = xen_iret,
 	.iret = xen_iret,
-	.irq_enable_syscall_ret = NULL,  /* never called */
+	.irq_enable_syscall_ret = xen_sysexit,
 
 
 	.load_tr_desc = paravirt_nop,
 	.load_tr_desc = paravirt_nop,
 	.set_ldt = xen_set_ldt,
 	.set_ldt = xen_set_ldt,

+ 21 - 0
arch/x86/xen/setup.c

@@ -16,6 +16,7 @@
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
 #include <asm/xen/hypercall.h>
 
 
+#include <xen/interface/callback.h>
 #include <xen/interface/physdev.h>
 #include <xen/interface/physdev.h>
 #include <xen/features.h>
 #include <xen/features.h>
 
 
@@ -68,6 +69,24 @@ static void __init fiddle_vdso(void)
 	*mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
 	*mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
 }
 }
 
 
+void xen_enable_sysenter(void)
+{
+	int cpu = smp_processor_id();
+	extern void xen_sysenter_target(void);
+	/* Mask events on entry, even though they get enabled immediately */
+	static struct callback_register sysenter = {
+		.type = CALLBACKTYPE_sysenter,
+		.address = { __KERNEL_CS, (unsigned long)xen_sysenter_target },
+		.flags = CALLBACKF_mask_events,
+	};
+
+	if (!boot_cpu_has(X86_FEATURE_SEP) ||
+	    HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) != 0) {
+		clear_cpu_cap(&cpu_data(cpu), X86_FEATURE_SEP);
+		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP);
+	}
+}
+
 void __init xen_arch_setup(void)
 void __init xen_arch_setup(void)
 {
 {
 	struct physdev_set_iopl set_iopl;
 	struct physdev_set_iopl set_iopl;
@@ -82,6 +101,8 @@ void __init xen_arch_setup(void)
 	HYPERVISOR_set_callbacks(__KERNEL_CS, (unsigned long)xen_hypervisor_callback,
 	HYPERVISOR_set_callbacks(__KERNEL_CS, (unsigned long)xen_hypervisor_callback,
 				 __KERNEL_CS, (unsigned long)xen_failsafe_callback);
 				 __KERNEL_CS, (unsigned long)xen_failsafe_callback);
 
 
+	xen_enable_sysenter();
+
 	set_iopl.iopl = 1;
 	set_iopl.iopl = 1;
 	rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
 	rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
 	if (rc != 0)
 	if (rc != 0)

+ 1 - 0
arch/x86/xen/smp.c

@@ -72,6 +72,7 @@ static __cpuinit void cpu_bringup_and_idle(void)
 	int cpu = smp_processor_id();
 	int cpu = smp_processor_id();
 
 
 	cpu_init();
 	cpu_init();
+	xen_enable_sysenter();
 
 
 	preempt_disable();
 	preempt_disable();
 	per_cpu(cpu_state, cpu) = CPU_ONLINE;
 	per_cpu(cpu_state, cpu) = CPU_ONLINE;

+ 56 - 0
arch/x86/xen/xen-asm.S

@@ -280,6 +280,62 @@ ENTRY(xen_iret_crit_fixup)
 2:	ret
 2:	ret
 
 
 
 
+ENTRY(xen_sysexit)
+	/* Store vcpu_info pointer for easy access.  Do it this
+	   way to avoid having to reload %fs */
+#ifdef CONFIG_SMP
+	GET_THREAD_INFO(%eax)
+	movl TI_cpu(%eax),%eax
+	movl __per_cpu_offset(,%eax,4),%eax
+	mov per_cpu__xen_vcpu(%eax),%eax
+#else
+	movl per_cpu__xen_vcpu, %eax
+#endif
+
+	/* We can't actually use sysexit in a pv guest,
+	   so fake it up with iret */
+	pushl $__USER_DS		/* user stack segment */
+	pushl %ecx			/* user esp */
+	pushl PT_EFLAGS+2*4(%esp)	/* user eflags */
+	pushl $__USER_CS		/* user code segment */
+	pushl %edx			/* user eip */
+
+xen_sysexit_start_crit:
+	/* Unmask events... */
+	movb $0, XEN_vcpu_info_mask(%eax)
+	/* ...and test for pending.
+	   There's a preempt window here, but it doesn't
+	   matter because we're within the critical section. */
+	testb $0xff, XEN_vcpu_info_pending(%eax)
+
+	/* If there's something pending, mask events again so we
+	   can directly inject it back into the kernel. */
+	jnz   1f
+
+	movl PT_EAX+5*4(%esp),%eax
+2:	iret
+1:	movb $1, XEN_vcpu_info_mask(%eax)
+xen_sysexit_end_crit:
+	addl $5*4, %esp		/* remove iret frame */
+	/* no need to re-save regs, but need to restore kernel %fs */
+	mov $__KERNEL_PERCPU, %eax
+	mov %eax, %fs
+	jmp xen_do_upcall
+.section __ex_table,"a"
+	.align 4
+	.long 2b,iret_exc
+.previous
+
+	.globl xen_sysexit_start_crit, xen_sysexit_end_crit
+/*
+	sysexit fixup is easy, since the old frame is still sitting there
+	on the stack.  We just need to remove the new recursive
+	interrupt and return.
+ */
+ENTRY(xen_sysexit_crit_fixup)
+	addl $PT_OLDESP+5*4, %esp		/* remove frame+iret */
+	jmp xen_do_upcall
+
 /*
 /*
 	Force an event check by making a hypercall,
 	Force an event check by making a hypercall,
 	but preserve regs before making the call.
 	but preserve regs before making the call.

+ 3 - 0
arch/x86/xen/xen-ops.h

@@ -19,6 +19,7 @@ extern struct shared_info *HYPERVISOR_shared_info;
 char * __init xen_memory_setup(void);
 char * __init xen_memory_setup(void);
 void __init xen_arch_setup(void);
 void __init xen_arch_setup(void);
 void __init xen_init_IRQ(void);
 void __init xen_init_IRQ(void);
+void xen_enable_sysenter(void);
 
 
 void xen_setup_timer(int cpu);
 void xen_setup_timer(int cpu);
 void xen_setup_cpu_clockevents(void);
 void xen_setup_cpu_clockevents(void);
@@ -64,4 +65,6 @@ DECL_ASM(unsigned long, xen_save_fl_direct, void);
 DECL_ASM(void, xen_restore_fl_direct, unsigned long);
 DECL_ASM(void, xen_restore_fl_direct, unsigned long);
 
 
 void xen_iret(void);
 void xen_iret(void);
+void xen_sysexit(void);
+
 #endif /* XEN_OPS_H */
 #endif /* XEN_OPS_H */