Răsfoiți Sursa

x86-64: Move kernelstack from PDA to per-cpu.

Also clean up PER_CPU_VAR usage in xen-asm_64.S

tj: * remove now unused stack_thread_info()
    * s/kernelstack/kernel_stack/
    * added FIXME comment in xen-asm_64.S

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Brian Gerst 16 ani în urmă
părinte
comite
9af45651f1

+ 4 - 4
arch/x86/ia32/ia32entry.S

@@ -112,8 +112,8 @@ ENTRY(ia32_sysenter_target)
 	CFI_DEF_CFA	rsp,0
 	CFI_DEF_CFA	rsp,0
 	CFI_REGISTER	rsp,rbp
 	CFI_REGISTER	rsp,rbp
 	SWAPGS_UNSAFE_STACK
 	SWAPGS_UNSAFE_STACK
-	movq	%gs:pda_kernelstack, %rsp
-	addq	$(PDA_STACKOFFSET),%rsp	
+	movq	PER_CPU_VAR(kernel_stack), %rsp
+	addq	$(KERNEL_STACK_OFFSET),%rsp
 	/*
 	/*
 	 * No need to follow this irqs on/off section: the syscall
 	 * No need to follow this irqs on/off section: the syscall
 	 * disabled irqs, here we enable it straight after entry:
 	 * disabled irqs, here we enable it straight after entry:
@@ -273,13 +273,13 @@ ENDPROC(ia32_sysenter_target)
 ENTRY(ia32_cstar_target)
 ENTRY(ia32_cstar_target)
 	CFI_STARTPROC32	simple
 	CFI_STARTPROC32	simple
 	CFI_SIGNAL_FRAME
 	CFI_SIGNAL_FRAME
-	CFI_DEF_CFA	rsp,PDA_STACKOFFSET
+	CFI_DEF_CFA	rsp,KERNEL_STACK_OFFSET
 	CFI_REGISTER	rip,rcx
 	CFI_REGISTER	rip,rcx
 	/*CFI_REGISTER	rflags,r11*/
 	/*CFI_REGISTER	rflags,r11*/
 	SWAPGS_UNSAFE_STACK
 	SWAPGS_UNSAFE_STACK
 	movl	%esp,%r8d
 	movl	%esp,%r8d
 	CFI_REGISTER	rsp,r8
 	CFI_REGISTER	rsp,r8
-	movq	%gs:pda_kernelstack,%rsp
+	movq	PER_CPU_VAR(kernel_stack),%rsp
 	/*
 	/*
 	 * No need to follow this irqs on/off section: the syscall
 	 * No need to follow this irqs on/off section: the syscall
 	 * disabled irqs and here we enable it straight after entry:
 	 * disabled irqs and here we enable it straight after entry:

+ 1 - 3
arch/x86/include/asm/pda.h

@@ -13,7 +13,7 @@
 struct x8664_pda {
 struct x8664_pda {
 	unsigned long unused1;
 	unsigned long unused1;
 	unsigned long unused2;
 	unsigned long unused2;
-	unsigned long kernelstack;	/* 16 top of kernel stack for current */
+	unsigned long unused3;
 	unsigned long oldrsp;		/* 24 user rsp for system call */
 	unsigned long oldrsp;		/* 24 user rsp for system call */
 	int irqcount;			/* 32 Irq nesting counter. Starts -1 */
 	int irqcount;			/* 32 Irq nesting counter. Starts -1 */
 	unsigned int unused6;		/* 36 was cpunumber */
 	unsigned int unused6;		/* 36 was cpunumber */
@@ -44,6 +44,4 @@ extern void pda_init(int);
 
 
 #endif
 #endif
 
 
-#define PDA_STACKOFFSET (5*8)
-
 #endif /* _ASM_X86_PDA_H */
 #endif /* _ASM_X86_PDA_H */

+ 8 - 12
arch/x86/include/asm/thread_info.h

@@ -194,25 +194,21 @@ static inline struct thread_info *current_thread_info(void)
 
 
 #else /* X86_32 */
 #else /* X86_32 */
 
 
-#include <asm/pda.h>
+#include <asm/percpu.h>
+#define KERNEL_STACK_OFFSET (5*8)
 
 
 /*
 /*
  * macros/functions for gaining access to the thread information structure
  * macros/functions for gaining access to the thread information structure
  * preempt_count needs to be 1 initially, until the scheduler is functional.
  * preempt_count needs to be 1 initially, until the scheduler is functional.
  */
  */
 #ifndef __ASSEMBLY__
 #ifndef __ASSEMBLY__
-static inline struct thread_info *current_thread_info(void)
-{
-	struct thread_info *ti;
-	ti = (void *)(read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE);
-	return ti;
-}
+DECLARE_PER_CPU(unsigned long, kernel_stack);
 
 
-/* do not use in interrupt context */
-static inline struct thread_info *stack_thread_info(void)
+static inline struct thread_info *current_thread_info(void)
 {
 {
 	struct thread_info *ti;
 	struct thread_info *ti;
-	asm("andq %%rsp,%0; " : "=r" (ti) : "0" (~(THREAD_SIZE - 1)));
+	ti = (void *)(percpu_read(kernel_stack) +
+		      KERNEL_STACK_OFFSET - THREAD_SIZE);
 	return ti;
 	return ti;
 }
 }
 
 
@@ -220,8 +216,8 @@ static inline struct thread_info *stack_thread_info(void)
 
 
 /* how to get the thread information struct from ASM */
 /* how to get the thread information struct from ASM */
 #define GET_THREAD_INFO(reg) \
 #define GET_THREAD_INFO(reg) \
-	movq %gs:pda_kernelstack,reg ; \
-	subq $(THREAD_SIZE-PDA_STACKOFFSET),reg
+	movq PER_CPU_VAR(kernel_stack),reg ; \
+	subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg
 
 
 #endif
 #endif
 
 

+ 0 - 1
arch/x86/kernel/asm-offsets_64.c

@@ -49,7 +49,6 @@ int main(void)
 	BLANK();
 	BLANK();
 #undef ENTRY
 #undef ENTRY
 #define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
 #define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
-	ENTRY(kernelstack); 
 	ENTRY(oldrsp); 
 	ENTRY(oldrsp); 
 	ENTRY(irqcount);
 	ENTRY(irqcount);
 	DEFINE(pda_size, sizeof(struct x8664_pda));
 	DEFINE(pda_size, sizeof(struct x8664_pda));

+ 4 - 2
arch/x86/kernel/cpu/common.c

@@ -889,6 +889,10 @@ DEFINE_PER_CPU(char *, irq_stack_ptr) =
 	per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64;
 	per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64;
 #endif
 #endif
 
 
+DEFINE_PER_CPU(unsigned long, kernel_stack) =
+	(unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
+EXPORT_PER_CPU_SYMBOL(kernel_stack);
+
 void __cpuinit pda_init(int cpu)
 void __cpuinit pda_init(int cpu)
 {
 {
 	struct x8664_pda *pda = cpu_pda(cpu);
 	struct x8664_pda *pda = cpu_pda(cpu);
@@ -900,8 +904,6 @@ void __cpuinit pda_init(int cpu)
 	load_pda_offset(cpu);
 	load_pda_offset(cpu);
 
 
 	pda->irqcount = -1;
 	pda->irqcount = -1;
-	pda->kernelstack = (unsigned long)stack_thread_info() -
-				 PDA_STACKOFFSET + THREAD_SIZE;
 
 
 	if (cpu != 0) {
 	if (cpu != 0) {
 		if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
 		if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)

+ 2 - 2
arch/x86/kernel/entry_64.S

@@ -468,7 +468,7 @@ END(ret_from_fork)
 ENTRY(system_call)
 ENTRY(system_call)
 	CFI_STARTPROC	simple
 	CFI_STARTPROC	simple
 	CFI_SIGNAL_FRAME
 	CFI_SIGNAL_FRAME
-	CFI_DEF_CFA	rsp,PDA_STACKOFFSET
+	CFI_DEF_CFA	rsp,KERNEL_STACK_OFFSET
 	CFI_REGISTER	rip,rcx
 	CFI_REGISTER	rip,rcx
 	/*CFI_REGISTER	rflags,r11*/
 	/*CFI_REGISTER	rflags,r11*/
 	SWAPGS_UNSAFE_STACK
 	SWAPGS_UNSAFE_STACK
@@ -480,7 +480,7 @@ ENTRY(system_call)
 ENTRY(system_call_after_swapgs)
 ENTRY(system_call_after_swapgs)
 
 
 	movq	%rsp,%gs:pda_oldrsp
 	movq	%rsp,%gs:pda_oldrsp
-	movq	%gs:pda_kernelstack,%rsp
+	movq	PER_CPU_VAR(kernel_stack),%rsp
 	/*
 	/*
 	 * No need to follow this irqs off/on section - it's straight
 	 * No need to follow this irqs off/on section - it's straight
 	 * and short:
 	 * and short:

+ 2 - 2
arch/x86/kernel/process_64.c

@@ -620,9 +620,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	write_pda(oldrsp, next->usersp);
 	write_pda(oldrsp, next->usersp);
 	percpu_write(current_task, next_p);
 	percpu_write(current_task, next_p);
 
 
-	write_pda(kernelstack,
+	percpu_write(kernel_stack,
 		  (unsigned long)task_stack_page(next_p) +
 		  (unsigned long)task_stack_page(next_p) +
-		  THREAD_SIZE - PDA_STACKOFFSET);
+		  THREAD_SIZE - KERNEL_STACK_OFFSET);
 #ifdef CONFIG_CC_STACKPROTECTOR
 #ifdef CONFIG_CC_STACKPROTECTOR
 	write_pda(stack_canary, next_p->stack_canary);
 	write_pda(stack_canary, next_p->stack_canary);
 	/*
 	/*

+ 3 - 0
arch/x86/kernel/smpboot.c

@@ -798,6 +798,9 @@ do_rest:
 #else
 #else
 	clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
 	clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
 	initial_gs = per_cpu_offset(cpu);
 	initial_gs = per_cpu_offset(cpu);
+	per_cpu(kernel_stack, cpu) =
+		(unsigned long)task_stack_page(c_idle.idle) -
+		KERNEL_STACK_OFFSET + THREAD_SIZE;
 #endif
 #endif
 	early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
 	early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
 	initial_code = (unsigned long)start_secondary;
 	initial_code = (unsigned long)start_secondary;

+ 11 - 12
arch/x86/xen/xen-asm_64.S

@@ -17,6 +17,7 @@
 #include <asm/processor-flags.h>
 #include <asm/processor-flags.h>
 #include <asm/errno.h>
 #include <asm/errno.h>
 #include <asm/segment.h>
 #include <asm/segment.h>
+#include <asm/percpu.h>
 
 
 #include <xen/interface/xen.h>
 #include <xen/interface/xen.h>
 
 
@@ -28,12 +29,10 @@
 
 
 #if 1
 #if 1
 /*
 /*
-	x86-64 does not yet support direct access to percpu variables
-	via a segment override, so we just need to make sure this code
-	never gets used
+	FIXME: x86_64 now can support direct access to percpu variables
+	via a segment override.  Update xen accordingly.
  */
  */
 #define BUG			ud2a
 #define BUG			ud2a
-#define PER_CPU_VAR(var, off)	0xdeadbeef
 #endif
 #endif
 
 
 /*
 /*
@@ -45,14 +44,14 @@ ENTRY(xen_irq_enable_direct)
 	BUG
 	BUG
 
 
 	/* Unmask events */
 	/* Unmask events */
-	movb $0, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
+	movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
 
 
 	/* Preempt here doesn't matter because that will deal with
 	/* Preempt here doesn't matter because that will deal with
 	   any pending interrupts.  The pending check may end up being
 	   any pending interrupts.  The pending check may end up being
 	   run on the wrong CPU, but that doesn't hurt. */
 	   run on the wrong CPU, but that doesn't hurt. */
 
 
 	/* Test for pending */
 	/* Test for pending */
-	testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending)
+	testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
 	jz 1f
 	jz 1f
 
 
 2:	call check_events
 2:	call check_events
@@ -69,7 +68,7 @@ ENDPATCH(xen_irq_enable_direct)
 ENTRY(xen_irq_disable_direct)
 ENTRY(xen_irq_disable_direct)
 	BUG
 	BUG
 
 
-	movb $1, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
+	movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
 ENDPATCH(xen_irq_disable_direct)
 ENDPATCH(xen_irq_disable_direct)
 	ret
 	ret
 	ENDPROC(xen_irq_disable_direct)
 	ENDPROC(xen_irq_disable_direct)
@@ -87,7 +86,7 @@ ENDPATCH(xen_irq_disable_direct)
 ENTRY(xen_save_fl_direct)
 ENTRY(xen_save_fl_direct)
 	BUG
 	BUG
 
 
-	testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
+	testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
 	setz %ah
 	setz %ah
 	addb %ah,%ah
 	addb %ah,%ah
 ENDPATCH(xen_save_fl_direct)
 ENDPATCH(xen_save_fl_direct)
@@ -107,13 +106,13 @@ ENTRY(xen_restore_fl_direct)
 	BUG
 	BUG
 
 
 	testb $X86_EFLAGS_IF>>8, %ah
 	testb $X86_EFLAGS_IF>>8, %ah
-	setz PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
+	setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
 	/* Preempt here doesn't matter because that will deal with
 	/* Preempt here doesn't matter because that will deal with
 	   any pending interrupts.  The pending check may end up being
 	   any pending interrupts.  The pending check may end up being
 	   run on the wrong CPU, but that doesn't hurt. */
 	   run on the wrong CPU, but that doesn't hurt. */
 
 
 	/* check for unmasked and pending */
 	/* check for unmasked and pending */
-	cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending)
+	cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
 	jz 1f
 	jz 1f
 2:	call check_events
 2:	call check_events
 1:
 1:
@@ -196,7 +195,7 @@ ENTRY(xen_sysret64)
 	/* We're already on the usermode stack at this point, but still
 	/* We're already on the usermode stack at this point, but still
 	   with the kernel gs, so we can easily switch back */
 	   with the kernel gs, so we can easily switch back */
 	movq %rsp, %gs:pda_oldrsp
 	movq %rsp, %gs:pda_oldrsp
-	movq %gs:pda_kernelstack,%rsp
+	movq PER_CPU_VAR(kernel_stack),%rsp
 
 
 	pushq $__USER_DS
 	pushq $__USER_DS
 	pushq %gs:pda_oldrsp
 	pushq %gs:pda_oldrsp
@@ -213,7 +212,7 @@ ENTRY(xen_sysret32)
 	/* We're already on the usermode stack at this point, but still
 	/* We're already on the usermode stack at this point, but still
 	   with the kernel gs, so we can easily switch back */
 	   with the kernel gs, so we can easily switch back */
 	movq %rsp, %gs:pda_oldrsp
 	movq %rsp, %gs:pda_oldrsp
-	movq %gs:pda_kernelstack, %rsp
+	movq PER_CPU_VAR(kernel_stack), %rsp
 
 
 	pushq $__USER32_DS
 	pushq $__USER32_DS
 	pushq %gs:pda_oldrsp
 	pushq %gs:pda_oldrsp