浏览代码

Merge branch 'tj-percpu' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc into core/percpu

Ingo Molnar 16 年之前
父节点
当前提交
99937d6455

+ 4 - 4
arch/x86/ia32/ia32entry.S

@@ -112,8 +112,8 @@ ENTRY(ia32_sysenter_target)
 	CFI_DEF_CFA	rsp,0
 	CFI_DEF_CFA	rsp,0
 	CFI_REGISTER	rsp,rbp
 	CFI_REGISTER	rsp,rbp
 	SWAPGS_UNSAFE_STACK
 	SWAPGS_UNSAFE_STACK
-	movq	%gs:pda_kernelstack, %rsp
-	addq	$(PDA_STACKOFFSET),%rsp	
+	movq	PER_CPU_VAR(kernel_stack), %rsp
+	addq	$(KERNEL_STACK_OFFSET),%rsp
 	/*
 	/*
 	 * No need to follow this irqs on/off section: the syscall
 	 * No need to follow this irqs on/off section: the syscall
 	 * disabled irqs, here we enable it straight after entry:
 	 * disabled irqs, here we enable it straight after entry:
@@ -273,13 +273,13 @@ ENDPROC(ia32_sysenter_target)
 ENTRY(ia32_cstar_target)
 ENTRY(ia32_cstar_target)
 	CFI_STARTPROC32	simple
 	CFI_STARTPROC32	simple
 	CFI_SIGNAL_FRAME
 	CFI_SIGNAL_FRAME
-	CFI_DEF_CFA	rsp,PDA_STACKOFFSET
+	CFI_DEF_CFA	rsp,KERNEL_STACK_OFFSET
 	CFI_REGISTER	rip,rcx
 	CFI_REGISTER	rip,rcx
 	/*CFI_REGISTER	rflags,r11*/
 	/*CFI_REGISTER	rflags,r11*/
 	SWAPGS_UNSAFE_STACK
 	SWAPGS_UNSAFE_STACK
 	movl	%esp,%r8d
 	movl	%esp,%r8d
 	CFI_REGISTER	rsp,r8
 	CFI_REGISTER	rsp,r8
-	movq	%gs:pda_kernelstack,%rsp
+	movq	PER_CPU_VAR(kernel_stack),%rsp
 	/*
 	/*
 	 * No need to follow this irqs on/off section: the syscall
 	 * No need to follow this irqs on/off section: the syscall
 	 * disabled irqs and here we enable it straight after entry:
 	 * disabled irqs and here we enable it straight after entry:

+ 3 - 21
arch/x86/include/asm/current.h

@@ -1,39 +1,21 @@
 #ifndef _ASM_X86_CURRENT_H
 #ifndef _ASM_X86_CURRENT_H
 #define _ASM_X86_CURRENT_H
 #define _ASM_X86_CURRENT_H
 
 
-#ifdef CONFIG_X86_32
 #include <linux/compiler.h>
 #include <linux/compiler.h>
 #include <asm/percpu.h>
 #include <asm/percpu.h>
 
 
+#ifndef __ASSEMBLY__
 struct task_struct;
 struct task_struct;
 
 
 DECLARE_PER_CPU(struct task_struct *, current_task);
 DECLARE_PER_CPU(struct task_struct *, current_task);
-static __always_inline struct task_struct *get_current(void)
-{
-	return percpu_read(current_task);
-}
-
-#else /* X86_32 */
-
-#ifndef __ASSEMBLY__
-#include <asm/pda.h>
-
-struct task_struct;
 
 
 static __always_inline struct task_struct *get_current(void)
 static __always_inline struct task_struct *get_current(void)
 {
 {
-	return read_pda(pcurrent);
+	return percpu_read(current_task);
 }
 }
 
 
-#else /* __ASSEMBLY__ */
-
-#include <asm/asm-offsets.h>
-#define GET_CURRENT(reg) movq %gs:(pda_pcurrent),reg
+#define current get_current()
 
 
 #endif /* __ASSEMBLY__ */
 #endif /* __ASSEMBLY__ */
 
 
-#endif /* X86_32 */
-
-#define current get_current()
-
 #endif /* _ASM_X86_CURRENT_H */
 #endif /* _ASM_X86_CURRENT_H */

+ 19 - 5
arch/x86/include/asm/hardirq_64.h

@@ -3,22 +3,36 @@
 
 
 #include <linux/threads.h>
 #include <linux/threads.h>
 #include <linux/irq.h>
 #include <linux/irq.h>
-#include <asm/pda.h>
 #include <asm/apic.h>
 #include <asm/apic.h>
 
 
+typedef struct {
+	unsigned int __softirq_pending;
+	unsigned int __nmi_count;	/* arch dependent */
+	unsigned int apic_timer_irqs;	/* arch dependent */
+	unsigned int irq0_irqs;
+	unsigned int irq_resched_count;
+	unsigned int irq_call_count;
+	unsigned int irq_tlb_count;
+	unsigned int irq_thermal_count;
+	unsigned int irq_spurious_count;
+	unsigned int irq_threshold_count;
+} ____cacheline_aligned irq_cpustat_t;
+
+DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
+
 /* We can have at most NR_VECTORS irqs routed to a cpu at a time */
 /* We can have at most NR_VECTORS irqs routed to a cpu at a time */
 #define MAX_HARDIRQS_PER_CPU NR_VECTORS
 #define MAX_HARDIRQS_PER_CPU NR_VECTORS
 
 
 #define __ARCH_IRQ_STAT 1
 #define __ARCH_IRQ_STAT 1
 
 
-#define inc_irq_stat(member)	add_pda(member, 1)
+#define inc_irq_stat(member)	percpu_add(irq_stat.member, 1)
 
 
-#define local_softirq_pending() read_pda(__softirq_pending)
+#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending)
 
 
 #define __ARCH_SET_SOFTIRQ_PENDING 1
 #define __ARCH_SET_SOFTIRQ_PENDING 1
 
 
-#define set_softirq_pending(x) write_pda(__softirq_pending, (x))
-#define or_softirq_pending(x)  or_pda(__softirq_pending, (x))
+#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x))
+#define or_softirq_pending(x)  percpu_or(irq_stat.__softirq_pending, (x))
 
 
 extern void ack_bad_irq(unsigned int irq);
 extern void ack_bad_irq(unsigned int irq);
 
 

+ 7 - 9
arch/x86/include/asm/mmu_context_64.h

@@ -1,13 +1,11 @@
 #ifndef _ASM_X86_MMU_CONTEXT_64_H
 #ifndef _ASM_X86_MMU_CONTEXT_64_H
 #define _ASM_X86_MMU_CONTEXT_64_H
 #define _ASM_X86_MMU_CONTEXT_64_H
 
 
-#include <asm/pda.h>
-
 static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 {
 {
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
-	if (read_pda(mmu_state) == TLBSTATE_OK)
-		write_pda(mmu_state, TLBSTATE_LAZY);
+	if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
+		percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
 #endif
 #endif
 }
 }
 
 
@@ -19,8 +17,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 		/* stop flush ipis for the previous mm */
 		/* stop flush ipis for the previous mm */
 		cpu_clear(cpu, prev->cpu_vm_mask);
 		cpu_clear(cpu, prev->cpu_vm_mask);
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
-		write_pda(mmu_state, TLBSTATE_OK);
-		write_pda(active_mm, next);
+		percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+		percpu_write(cpu_tlbstate.active_mm, next);
 #endif
 #endif
 		cpu_set(cpu, next->cpu_vm_mask);
 		cpu_set(cpu, next->cpu_vm_mask);
 		load_cr3(next->pgd);
 		load_cr3(next->pgd);
@@ -30,9 +28,9 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	}
 	}
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
 	else {
 	else {
-		write_pda(mmu_state, TLBSTATE_OK);
-		if (read_pda(active_mm) != next)
-			BUG();
+		percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+		BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
+
 		if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
 		if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
 			/* We were in lazy tlb mode and leave_mm disabled
 			/* We were in lazy tlb mode and leave_mm disabled
 			 * tlb flush IPI delivery. We must reload CR3
 			 * tlb flush IPI delivery. We must reload CR3

+ 2 - 2
arch/x86/include/asm/page_64.h

@@ -13,8 +13,8 @@
 #define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1)
 #define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1)
 #define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER)
 #define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER)
 
 
-#define IRQSTACK_ORDER 2
-#define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER)
+#define IRQ_STACK_ORDER 2
+#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER)
 
 
 #define STACKFAULT_STACK 1
 #define STACKFAULT_STACK 1
 #define DOUBLEFAULT_STACK 2
 #define DOUBLEFAULT_STACK 2

+ 6 - 23
arch/x86/include/asm/pda.h

@@ -11,33 +11,18 @@
 
 
 /* Per processor datastructure. %gs points to it while the kernel runs */
 /* Per processor datastructure. %gs points to it while the kernel runs */
 struct x8664_pda {
 struct x8664_pda {
-	struct task_struct *pcurrent;	/* 0  Current process */
-	unsigned long dummy;
-	unsigned long kernelstack;	/* 16 top of kernel stack for current */
-	unsigned long oldrsp;		/* 24 user rsp for system call */
-	int irqcount;			/* 32 Irq nesting counter. Starts -1 */
-	unsigned int cpunumber;		/* 36 Logical CPU number */
+	unsigned long unused1;
+	unsigned long unused2;
+	unsigned long unused3;
+	unsigned long unused4;
+	int unused5;
+	unsigned int unused6;		/* 36 was cpunumber */
 #ifdef CONFIG_CC_STACKPROTECTOR
 #ifdef CONFIG_CC_STACKPROTECTOR
 	unsigned long stack_canary;	/* 40 stack canary value */
 	unsigned long stack_canary;	/* 40 stack canary value */
 					/* gcc-ABI: this canary MUST be at
 					/* gcc-ABI: this canary MUST be at
 					   offset 40!!! */
 					   offset 40!!! */
 #endif
 #endif
-	char *irqstackptr;
-	short nodenumber;		/* number of current node (32k max) */
 	short in_bootmem;		/* pda lives in bootmem */
 	short in_bootmem;		/* pda lives in bootmem */
-	unsigned int __softirq_pending;
-	unsigned int __nmi_count;	/* number of NMI on this CPUs */
-	short mmu_state;
-	short isidle;
-	struct mm_struct *active_mm;
-	unsigned apic_timer_irqs;
-	unsigned irq0_irqs;
-	unsigned irq_resched_count;
-	unsigned irq_call_count;
-	unsigned irq_tlb_count;
-	unsigned irq_thermal_count;
-	unsigned irq_threshold_count;
-	unsigned irq_spurious_count;
 } ____cacheline_aligned_in_smp;
 } ____cacheline_aligned_in_smp;
 
 
 DECLARE_PER_CPU(struct x8664_pda, __pda);
 DECLARE_PER_CPU(struct x8664_pda, __pda);
@@ -57,6 +42,4 @@ extern void pda_init(int);
 
 
 #endif
 #endif
 
 
-#define PDA_STACKOFFSET (5*8)
-
 #endif /* _ASM_X86_PDA_H */
 #endif /* _ASM_X86_PDA_H */

+ 13 - 13
arch/x86/include/asm/percpu.h

@@ -39,10 +39,10 @@
 #include <linux/stringify.h>
 #include <linux/stringify.h>
 
 
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
-#define __percpu_seg_str	"%%"__stringify(__percpu_seg)":"
+#define __percpu_arg(x)		"%%"__stringify(__percpu_seg)":%P" #x
 #define __my_cpu_offset		percpu_read(this_cpu_off)
 #define __my_cpu_offset		percpu_read(this_cpu_off)
 #else
 #else
-#define __percpu_seg_str
+#define __percpu_arg(x)		"%" #x
 #endif
 #endif
 
 
 /* For arch-specific code, we can use direct single-insn ops (they
 /* For arch-specific code, we can use direct single-insn ops (they
@@ -58,22 +58,22 @@ do {							\
 	}						\
 	}						\
 	switch (sizeof(var)) {				\
 	switch (sizeof(var)) {				\
 	case 1:						\
 	case 1:						\
-		asm(op "b %1,"__percpu_seg_str"%0"	\
+		asm(op "b %1,"__percpu_arg(0)		\
 		    : "+m" (var)			\
 		    : "+m" (var)			\
 		    : "ri" ((T__)val));			\
 		    : "ri" ((T__)val));			\
 		break;					\
 		break;					\
 	case 2:						\
 	case 2:						\
-		asm(op "w %1,"__percpu_seg_str"%0"	\
+		asm(op "w %1,"__percpu_arg(0)		\
 		    : "+m" (var)			\
 		    : "+m" (var)			\
 		    : "ri" ((T__)val));			\
 		    : "ri" ((T__)val));			\
 		break;					\
 		break;					\
 	case 4:						\
 	case 4:						\
-		asm(op "l %1,"__percpu_seg_str"%0"	\
+		asm(op "l %1,"__percpu_arg(0)		\
 		    : "+m" (var)			\
 		    : "+m" (var)			\
 		    : "ri" ((T__)val));			\
 		    : "ri" ((T__)val));			\
 		break;					\
 		break;					\
 	case 8:						\
 	case 8:						\
-		asm(op "q %1,"__percpu_seg_str"%0"	\
+		asm(op "q %1,"__percpu_arg(0)		\
 		    : "+m" (var)			\
 		    : "+m" (var)			\
 		    : "r" ((T__)val));			\
 		    : "r" ((T__)val));			\
 		break;					\
 		break;					\
@@ -86,22 +86,22 @@ do {							\
 	typeof(var) ret__;				\
 	typeof(var) ret__;				\
 	switch (sizeof(var)) {				\
 	switch (sizeof(var)) {				\
 	case 1:						\
 	case 1:						\
-		asm(op "b "__percpu_seg_str"%1,%0"	\
+		asm(op "b "__percpu_arg(1)",%0"		\
 		    : "=r" (ret__)			\
 		    : "=r" (ret__)			\
 		    : "m" (var));			\
 		    : "m" (var));			\
 		break;					\
 		break;					\
 	case 2:						\
 	case 2:						\
-		asm(op "w "__percpu_seg_str"%1,%0"	\
+		asm(op "w "__percpu_arg(1)",%0"		\
 		    : "=r" (ret__)			\
 		    : "=r" (ret__)			\
 		    : "m" (var));			\
 		    : "m" (var));			\
 		break;					\
 		break;					\
 	case 4:						\
 	case 4:						\
-		asm(op "l "__percpu_seg_str"%1,%0"	\
+		asm(op "l "__percpu_arg(1)",%0"		\
 		    : "=r" (ret__)			\
 		    : "=r" (ret__)			\
 		    : "m" (var));			\
 		    : "m" (var));			\
 		break;					\
 		break;					\
 	case 8:						\
 	case 8:						\
-		asm(op "q "__percpu_seg_str"%1,%0"	\
+		asm(op "q "__percpu_arg(1)",%0"		\
 		    : "=r" (ret__)			\
 		    : "=r" (ret__)			\
 		    : "m" (var));			\
 		    : "m" (var));			\
 		break;					\
 		break;					\
@@ -122,9 +122,9 @@ do {							\
 #define x86_test_and_clear_bit_percpu(bit, var)				\
 #define x86_test_and_clear_bit_percpu(bit, var)				\
 ({									\
 ({									\
 	int old__;							\
 	int old__;							\
-	asm volatile("btr %1,"__percpu_seg_str"%c2\n\tsbbl %0,%0"	\
-		     : "=r" (old__)					\
-		     : "dIr" (bit), "i" (&per_cpu__##var) : "memory");	\
+	asm volatile("btr %2,"__percpu_arg(1)"\n\tsbbl %0,%0"		\
+		     : "=r" (old__), "+m" (per_cpu__##var)		\
+		     : "dIr" (bit));					\
 	old__;								\
 	old__;								\
 })
 })
 
 

+ 3 - 0
arch/x86/include/asm/processor.h

@@ -378,6 +378,9 @@ union thread_xstate {
 
 
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_X86_64
 DECLARE_PER_CPU(struct orig_ist, orig_ist);
 DECLARE_PER_CPU(struct orig_ist, orig_ist);
+
+DECLARE_PER_CPU(char[IRQ_STACK_SIZE], irq_stack);
+DECLARE_PER_CPU(char *, irq_stack_ptr);
 #endif
 #endif
 
 
 extern void print_cpu_info(struct cpuinfo_x86 *);
 extern void print_cpu_info(struct cpuinfo_x86 *);

+ 1 - 3
arch/x86/include/asm/smp.h

@@ -25,9 +25,7 @@ extern unsigned int num_processors;
 DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
 DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
 DECLARE_PER_CPU(cpumask_t, cpu_core_map);
 DECLARE_PER_CPU(cpumask_t, cpu_core_map);
 DECLARE_PER_CPU(u16, cpu_llc_id);
 DECLARE_PER_CPU(u16, cpu_llc_id);
-#ifdef CONFIG_X86_32
 DECLARE_PER_CPU(int, cpu_number);
 DECLARE_PER_CPU(int, cpu_number);
-#endif
 
 
 static inline struct cpumask *cpu_sibling_mask(int cpu)
 static inline struct cpumask *cpu_sibling_mask(int cpu)
 {
 {
@@ -164,7 +162,7 @@ extern unsigned disabled_cpus __cpuinitdata;
 extern int safe_smp_processor_id(void);
 extern int safe_smp_processor_id(void);
 
 
 #elif defined(CONFIG_X86_64_SMP)
 #elif defined(CONFIG_X86_64_SMP)
-#define raw_smp_processor_id()	read_pda(cpunumber)
+#define raw_smp_processor_id() (percpu_read(cpu_number))
 
 
 #define stack_smp_processor_id()					\
 #define stack_smp_processor_id()					\
 ({								\
 ({								\

+ 2 - 2
arch/x86/include/asm/system.h

@@ -94,7 +94,7 @@ do {									\
 	     "call __switch_to\n\t"					  \
 	     "call __switch_to\n\t"					  \
 	     ".globl thread_return\n"					  \
 	     ".globl thread_return\n"					  \
 	     "thread_return:\n\t"					  \
 	     "thread_return:\n\t"					  \
-	     "movq %%gs:%P[pda_pcurrent],%%rsi\n\t"			  \
+	     "movq "__percpu_arg([current_task])",%%rsi\n\t"		  \
 	     "movq %P[thread_info](%%rsi),%%r8\n\t"			  \
 	     "movq %P[thread_info](%%rsi),%%r8\n\t"			  \
 	     LOCK_PREFIX "btr  %[tif_fork],%P[ti_flags](%%r8)\n\t"	  \
 	     LOCK_PREFIX "btr  %[tif_fork],%P[ti_flags](%%r8)\n\t"	  \
 	     "movq %%rax,%%rdi\n\t" 					  \
 	     "movq %%rax,%%rdi\n\t" 					  \
@@ -106,7 +106,7 @@ do {									\
 	       [ti_flags] "i" (offsetof(struct thread_info, flags)),	  \
 	       [ti_flags] "i" (offsetof(struct thread_info, flags)),	  \
 	       [tif_fork] "i" (TIF_FORK),			  	  \
 	       [tif_fork] "i" (TIF_FORK),			  	  \
 	       [thread_info] "i" (offsetof(struct task_struct, stack)),   \
 	       [thread_info] "i" (offsetof(struct task_struct, stack)),   \
-	       [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent))  \
+	       [current_task] "m" (per_cpu_var(current_task))		  \
 	     : "memory", "cc" __EXTRA_CLOBBER)
 	     : "memory", "cc" __EXTRA_CLOBBER)
 #endif
 #endif
 
 

+ 8 - 12
arch/x86/include/asm/thread_info.h

@@ -194,25 +194,21 @@ static inline struct thread_info *current_thread_info(void)
 
 
 #else /* X86_32 */
 #else /* X86_32 */
 
 
-#include <asm/pda.h>
+#include <asm/percpu.h>
+#define KERNEL_STACK_OFFSET (5*8)
 
 
 /*
 /*
  * macros/functions for gaining access to the thread information structure
  * macros/functions for gaining access to the thread information structure
  * preempt_count needs to be 1 initially, until the scheduler is functional.
  * preempt_count needs to be 1 initially, until the scheduler is functional.
  */
  */
 #ifndef __ASSEMBLY__
 #ifndef __ASSEMBLY__
-static inline struct thread_info *current_thread_info(void)
-{
-	struct thread_info *ti;
-	ti = (void *)(read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE);
-	return ti;
-}
+DECLARE_PER_CPU(unsigned long, kernel_stack);
 
 
-/* do not use in interrupt context */
-static inline struct thread_info *stack_thread_info(void)
+static inline struct thread_info *current_thread_info(void)
 {
 {
 	struct thread_info *ti;
 	struct thread_info *ti;
-	asm("andq %%rsp,%0; " : "=r" (ti) : "0" (~(THREAD_SIZE - 1)));
+	ti = (void *)(percpu_read(kernel_stack) +
+		      KERNEL_STACK_OFFSET - THREAD_SIZE);
 	return ti;
 	return ti;
 }
 }
 
 
@@ -220,8 +216,8 @@ static inline struct thread_info *stack_thread_info(void)
 
 
 /* how to get the thread information struct from ASM */
 /* how to get the thread information struct from ASM */
 #define GET_THREAD_INFO(reg) \
 #define GET_THREAD_INFO(reg) \
-	movq %gs:pda_kernelstack,reg ; \
-	subq $(THREAD_SIZE-PDA_STACKOFFSET),reg
+	movq PER_CPU_VAR(kernel_stack),reg ; \
+	subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg
 
 
 #endif
 #endif
 
 

+ 2 - 5
arch/x86/include/asm/tlbflush.h

@@ -148,20 +148,17 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
 #define TLBSTATE_OK	1
 #define TLBSTATE_OK	1
 #define TLBSTATE_LAZY	2
 #define TLBSTATE_LAZY	2
 
 
-#ifdef CONFIG_X86_32
 struct tlb_state {
 struct tlb_state {
 	struct mm_struct *active_mm;
 	struct mm_struct *active_mm;
 	int state;
 	int state;
-	char __cacheline_padding[L1_CACHE_BYTES-8];
 };
 };
 DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate);
 DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate);
 
 
-void reset_lazy_tlbstate(void);
-#else
 static inline void reset_lazy_tlbstate(void)
 static inline void reset_lazy_tlbstate(void)
 {
 {
+	percpu_write(cpu_tlbstate.state, 0);
+	percpu_write(cpu_tlbstate.active_mm, &init_mm);
 }
 }
-#endif
 
 
 #endif	/* SMP */
 #endif	/* SMP */
 
 

+ 2 - 1
arch/x86/include/asm/topology.h

@@ -83,7 +83,8 @@ extern cpumask_t *node_to_cpumask_map;
 DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);
 DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);
 
 
 /* Returns the number of the current Node. */
 /* Returns the number of the current Node. */
-#define numa_node_id()		read_pda(nodenumber)
+DECLARE_PER_CPU(int, node_number);
+#define numa_node_id()		percpu_read(node_number)
 
 
 #ifdef CONFIG_DEBUG_PER_CPU_MAPS
 #ifdef CONFIG_DEBUG_PER_CPU_MAPS
 extern int cpu_to_node(int cpu);
 extern int cpu_to_node(int cpu);

+ 0 - 6
arch/x86/kernel/asm-offsets_64.c

@@ -49,12 +49,6 @@ int main(void)
 	BLANK();
 	BLANK();
 #undef ENTRY
 #undef ENTRY
 #define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
 #define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
-	ENTRY(kernelstack); 
-	ENTRY(oldrsp); 
-	ENTRY(pcurrent); 
-	ENTRY(irqcount);
-	ENTRY(cpunumber);
-	ENTRY(irqstackptr);
 	DEFINE(pda_size, sizeof(struct x8664_pda));
 	DEFINE(pda_size, sizeof(struct x8664_pda));
 	BLANK();
 	BLANK();
 #undef ENTRY
 #undef ENTRY

+ 27 - 44
arch/x86/kernel/cpu/common.c

@@ -881,47 +881,32 @@ __setup("clearcpuid=", setup_disablecpuid);
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_X86_64
 struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
 struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
 
 
-static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
+DEFINE_PER_CPU_PAGE_ALIGNED(char[IRQ_STACK_SIZE], irq_stack);
+#ifdef CONFIG_SMP
+DEFINE_PER_CPU(char *, irq_stack_ptr);	/* will be set during per cpu init */
+#else
+DEFINE_PER_CPU(char *, irq_stack_ptr) =
+	per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64;
+#endif
+
+DEFINE_PER_CPU(unsigned long, kernel_stack) =
+	(unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
+EXPORT_PER_CPU_SYMBOL(kernel_stack);
+
+DEFINE_PER_CPU(unsigned int, irq_count) = -1;
 
 
 void __cpuinit pda_init(int cpu)
 void __cpuinit pda_init(int cpu)
 {
 {
-	struct x8664_pda *pda = cpu_pda(cpu);
-
 	/* Setup up data that may be needed in __get_free_pages early */
 	/* Setup up data that may be needed in __get_free_pages early */
 	loadsegment(fs, 0);
 	loadsegment(fs, 0);
 	loadsegment(gs, 0);
 	loadsegment(gs, 0);
 
 
 	load_pda_offset(cpu);
 	load_pda_offset(cpu);
-
-	pda->cpunumber = cpu;
-	pda->irqcount = -1;
-	pda->kernelstack = (unsigned long)stack_thread_info() -
-				 PDA_STACKOFFSET + THREAD_SIZE;
-	pda->active_mm = &init_mm;
-	pda->mmu_state = 0;
-
-	if (cpu == 0) {
-		/* others are initialized in smpboot.c */
-		pda->pcurrent = &init_task;
-		pda->irqstackptr = boot_cpu_stack;
-		pda->irqstackptr += IRQSTACKSIZE - 64;
-	} else {
-		if (!pda->irqstackptr) {
-			pda->irqstackptr = (char *)
-				__get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
-			if (!pda->irqstackptr)
-				panic("cannot allocate irqstack for cpu %d",
-				      cpu);
-			pda->irqstackptr += IRQSTACKSIZE - 64;
-		}
-
-		if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
-			pda->nodenumber = cpu_to_node(cpu);
-	}
 }
 }
 
 
-static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
-				  DEBUG_STKSZ] __page_aligned_bss;
+static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
+	[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
+	__aligned(PAGE_SIZE);
 
 
 extern asmlinkage void ignore_sysret(void);
 extern asmlinkage void ignore_sysret(void);
 
 
@@ -979,15 +964,18 @@ void __cpuinit cpu_init(void)
 	struct tss_struct *t = &per_cpu(init_tss, cpu);
 	struct tss_struct *t = &per_cpu(init_tss, cpu);
 	struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
 	struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
 	unsigned long v;
 	unsigned long v;
-	char *estacks = NULL;
 	struct task_struct *me;
 	struct task_struct *me;
 	int i;
 	int i;
 
 
 	/* CPU 0 is initialised in head64.c */
 	/* CPU 0 is initialised in head64.c */
 	if (cpu != 0)
 	if (cpu != 0)
 		pda_init(cpu);
 		pda_init(cpu);
-	else
-		estacks = boot_exception_stacks;
+
+#ifdef CONFIG_NUMA
+	if (cpu != 0 && percpu_read(node_number) == 0 &&
+	    cpu_to_node(cpu) != NUMA_NO_NODE)
+		percpu_write(node_number, cpu_to_node(cpu));
+#endif
 
 
 	me = current;
 	me = current;
 
 
@@ -1021,18 +1009,13 @@ void __cpuinit cpu_init(void)
 	 * set up and load the per-CPU TSS
 	 * set up and load the per-CPU TSS
 	 */
 	 */
 	if (!orig_ist->ist[0]) {
 	if (!orig_ist->ist[0]) {
-		static const unsigned int order[N_EXCEPTION_STACKS] = {
-		  [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
-		  [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
+		static const unsigned int sizes[N_EXCEPTION_STACKS] = {
+		  [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
+		  [DEBUG_STACK - 1] = DEBUG_STKSZ
 		};
 		};
+		char *estacks = per_cpu(exception_stacks, cpu);
 		for (v = 0; v < N_EXCEPTION_STACKS; v++) {
 		for (v = 0; v < N_EXCEPTION_STACKS; v++) {
-			if (cpu) {
-				estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
-				if (!estacks)
-					panic("Cannot allocate exception "
-					      "stack %ld %d\n", v, cpu);
-			}
-			estacks += PAGE_SIZE << order[v];
+			estacks += sizes[v];
 			orig_ist->ist[v] = t->x86_tss.ist[v] =
 			orig_ist->ist[v] = t->x86_tss.ist[v] =
 					(unsigned long)estacks;
 					(unsigned long)estacks;
 		}
 		}

+ 18 - 17
arch/x86/kernel/dumpstack_64.c

@@ -106,7 +106,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 		const struct stacktrace_ops *ops, void *data)
 		const struct stacktrace_ops *ops, void *data)
 {
 {
 	const unsigned cpu = get_cpu();
 	const unsigned cpu = get_cpu();
-	unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
+	unsigned long *irq_stack_end =
+		(unsigned long *)per_cpu(irq_stack_ptr, cpu);
 	unsigned used = 0;
 	unsigned used = 0;
 	struct thread_info *tinfo;
 	struct thread_info *tinfo;
 	int graph = 0;
 	int graph = 0;
@@ -160,23 +161,23 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 			stack = (unsigned long *) estack_end[-2];
 			stack = (unsigned long *) estack_end[-2];
 			continue;
 			continue;
 		}
 		}
-		if (irqstack_end) {
-			unsigned long *irqstack;
-			irqstack = irqstack_end -
-				(IRQSTACKSIZE - 64) / sizeof(*irqstack);
+		if (irq_stack_end) {
+			unsigned long *irq_stack;
+			irq_stack = irq_stack_end -
+				(IRQ_STACK_SIZE - 64) / sizeof(*irq_stack);
 
 
-			if (stack >= irqstack && stack < irqstack_end) {
+			if (stack >= irq_stack && stack < irq_stack_end) {
 				if (ops->stack(data, "IRQ") < 0)
 				if (ops->stack(data, "IRQ") < 0)
 					break;
 					break;
 				bp = print_context_stack(tinfo, stack, bp,
 				bp = print_context_stack(tinfo, stack, bp,
-					ops, data, irqstack_end, &graph);
+					ops, data, irq_stack_end, &graph);
 				/*
 				/*
 				 * We link to the next stack (which would be
 				 * We link to the next stack (which would be
 				 * the process stack normally) the last
 				 * the process stack normally) the last
 				 * pointer (index -1 to end) in the IRQ stack:
 				 * pointer (index -1 to end) in the IRQ stack:
 				 */
 				 */
-				stack = (unsigned long *) (irqstack_end[-1]);
-				irqstack_end = NULL;
+				stack = (unsigned long *) (irq_stack_end[-1]);
+				irq_stack_end = NULL;
 				ops->stack(data, "EOI");
 				ops->stack(data, "EOI");
 				continue;
 				continue;
 			}
 			}
@@ -199,10 +200,10 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
 	unsigned long *stack;
 	unsigned long *stack;
 	int i;
 	int i;
 	const int cpu = smp_processor_id();
 	const int cpu = smp_processor_id();
-	unsigned long *irqstack_end =
-		(unsigned long *) (cpu_pda(cpu)->irqstackptr);
-	unsigned long *irqstack =
-		(unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
+	unsigned long *irq_stack_end =
+		(unsigned long *)(per_cpu(irq_stack_ptr, cpu));
+	unsigned long *irq_stack =
+		(unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE);
 
 
 	/*
 	/*
 	 * debugging aid: "show_stack(NULL, NULL);" prints the
 	 * debugging aid: "show_stack(NULL, NULL);" prints the
@@ -218,9 +219,9 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
 
 
 	stack = sp;
 	stack = sp;
 	for (i = 0; i < kstack_depth_to_print; i++) {
 	for (i = 0; i < kstack_depth_to_print; i++) {
-		if (stack >= irqstack && stack <= irqstack_end) {
-			if (stack == irqstack_end) {
-				stack = (unsigned long *) (irqstack_end[-1]);
+		if (stack >= irq_stack && stack <= irq_stack_end) {
+			if (stack == irq_stack_end) {
+				stack = (unsigned long *) (irq_stack_end[-1]);
 				printk(" <EOI> ");
 				printk(" <EOI> ");
 			}
 			}
 		} else {
 		} else {
@@ -241,7 +242,7 @@ void show_registers(struct pt_regs *regs)
 	int i;
 	int i;
 	unsigned long sp;
 	unsigned long sp;
 	const int cpu = smp_processor_id();
 	const int cpu = smp_processor_id();
-	struct task_struct *cur = cpu_pda(cpu)->pcurrent;
+	struct task_struct *cur = current;
 
 
 	sp = regs->sp;
 	sp = regs->sp;
 	printk("CPU %d ", cpu);
 	printk("CPU %d ", cpu);

+ 17 - 17
arch/x86/kernel/entry_64.S

@@ -210,7 +210,7 @@ ENTRY(native_usergs_sysret64)
 
 
 	/* %rsp:at FRAMEEND */
 	/* %rsp:at FRAMEEND */
 	.macro FIXUP_TOP_OF_STACK tmp offset=0
 	.macro FIXUP_TOP_OF_STACK tmp offset=0
-	movq %gs:pda_oldrsp,\tmp
+	movq PER_CPU_VAR(old_rsp),\tmp
 	movq \tmp,RSP+\offset(%rsp)
 	movq \tmp,RSP+\offset(%rsp)
 	movq $__USER_DS,SS+\offset(%rsp)
 	movq $__USER_DS,SS+\offset(%rsp)
 	movq $__USER_CS,CS+\offset(%rsp)
 	movq $__USER_CS,CS+\offset(%rsp)
@@ -221,7 +221,7 @@ ENTRY(native_usergs_sysret64)
 
 
 	.macro RESTORE_TOP_OF_STACK tmp offset=0
 	.macro RESTORE_TOP_OF_STACK tmp offset=0
 	movq RSP+\offset(%rsp),\tmp
 	movq RSP+\offset(%rsp),\tmp
-	movq \tmp,%gs:pda_oldrsp
+	movq \tmp,PER_CPU_VAR(old_rsp)
 	movq EFLAGS+\offset(%rsp),\tmp
 	movq EFLAGS+\offset(%rsp),\tmp
 	movq \tmp,R11+\offset(%rsp)
 	movq \tmp,R11+\offset(%rsp)
 	.endm
 	.endm
@@ -337,15 +337,15 @@ ENTRY(save_args)
 	je 1f
 	je 1f
 	SWAPGS
 	SWAPGS
 	/*
 	/*
-	 * irqcount is used to check if a CPU is already on an interrupt stack
+	 * irq_count is used to check if a CPU is already on an interrupt stack
 	 * or not. While this is essentially redundant with preempt_count it is
 	 * or not. While this is essentially redundant with preempt_count it is
 	 * a little cheaper to use a separate counter in the PDA (short of
 	 * a little cheaper to use a separate counter in the PDA (short of
 	 * moving irq_enter into assembly, which would be too much work)
 	 * moving irq_enter into assembly, which would be too much work)
 	 */
 	 */
-1:	incl %gs:pda_irqcount
+1:	incl PER_CPU_VAR(irq_count)
 	jne 2f
 	jne 2f
 	popq_cfi %rax			/* move return address... */
 	popq_cfi %rax			/* move return address... */
-	mov %gs:pda_irqstackptr,%rsp
+	mov PER_CPU_VAR(irq_stack_ptr),%rsp
 	EMPTY_FRAME 0
 	EMPTY_FRAME 0
 	pushq_cfi %rax			/* ... to the new stack */
 	pushq_cfi %rax			/* ... to the new stack */
 	/*
 	/*
@@ -468,7 +468,7 @@ END(ret_from_fork)
 ENTRY(system_call)
 ENTRY(system_call)
 	CFI_STARTPROC	simple
 	CFI_STARTPROC	simple
 	CFI_SIGNAL_FRAME
 	CFI_SIGNAL_FRAME
-	CFI_DEF_CFA	rsp,PDA_STACKOFFSET
+	CFI_DEF_CFA	rsp,KERNEL_STACK_OFFSET
 	CFI_REGISTER	rip,rcx
 	CFI_REGISTER	rip,rcx
 	/*CFI_REGISTER	rflags,r11*/
 	/*CFI_REGISTER	rflags,r11*/
 	SWAPGS_UNSAFE_STACK
 	SWAPGS_UNSAFE_STACK
@@ -479,8 +479,8 @@ ENTRY(system_call)
 	 */
 	 */
 ENTRY(system_call_after_swapgs)
 ENTRY(system_call_after_swapgs)
 
 
-	movq	%rsp,%gs:pda_oldrsp
-	movq	%gs:pda_kernelstack,%rsp
+	movq	%rsp,PER_CPU_VAR(old_rsp)
+	movq	PER_CPU_VAR(kernel_stack),%rsp
 	/*
 	/*
 	 * No need to follow this irqs off/on section - it's straight
 	 * No need to follow this irqs off/on section - it's straight
 	 * and short:
 	 * and short:
@@ -523,7 +523,7 @@ sysret_check:
 	CFI_REGISTER	rip,rcx
 	CFI_REGISTER	rip,rcx
 	RESTORE_ARGS 0,-ARG_SKIP,1
 	RESTORE_ARGS 0,-ARG_SKIP,1
 	/*CFI_REGISTER	rflags,r11*/
 	/*CFI_REGISTER	rflags,r11*/
-	movq	%gs:pda_oldrsp, %rsp
+	movq	PER_CPU_VAR(old_rsp), %rsp
 	USERGS_SYSRET64
 	USERGS_SYSRET64
 
 
 	CFI_RESTORE_STATE
 	CFI_RESTORE_STATE
@@ -833,11 +833,11 @@ common_interrupt:
 	XCPT_FRAME
 	XCPT_FRAME
 	addq $-0x80,(%rsp)		/* Adjust vector to [-256,-1] range */
 	addq $-0x80,(%rsp)		/* Adjust vector to [-256,-1] range */
 	interrupt do_IRQ
 	interrupt do_IRQ
-	/* 0(%rsp): oldrsp-ARGOFFSET */
+	/* 0(%rsp): old_rsp-ARGOFFSET */
 ret_from_intr:
 ret_from_intr:
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	TRACE_IRQS_OFF
-	decl %gs:pda_irqcount
+	decl PER_CPU_VAR(irq_count)
 	leaveq
 	leaveq
 	CFI_DEF_CFA_REGISTER	rsp
 	CFI_DEF_CFA_REGISTER	rsp
 	CFI_ADJUST_CFA_OFFSET	-8
 	CFI_ADJUST_CFA_OFFSET	-8
@@ -1260,14 +1260,14 @@ ENTRY(call_softirq)
 	CFI_REL_OFFSET rbp,0
 	CFI_REL_OFFSET rbp,0
 	mov  %rsp,%rbp
 	mov  %rsp,%rbp
 	CFI_DEF_CFA_REGISTER rbp
 	CFI_DEF_CFA_REGISTER rbp
-	incl %gs:pda_irqcount
-	cmove %gs:pda_irqstackptr,%rsp
+	incl PER_CPU_VAR(irq_count)
+	cmove PER_CPU_VAR(irq_stack_ptr),%rsp
 	push  %rbp			# backlink for old unwinder
 	push  %rbp			# backlink for old unwinder
 	call __do_softirq
 	call __do_softirq
 	leaveq
 	leaveq
 	CFI_DEF_CFA_REGISTER	rsp
 	CFI_DEF_CFA_REGISTER	rsp
 	CFI_ADJUST_CFA_OFFSET   -8
 	CFI_ADJUST_CFA_OFFSET   -8
-	decl %gs:pda_irqcount
+	decl PER_CPU_VAR(irq_count)
 	ret
 	ret
 	CFI_ENDPROC
 	CFI_ENDPROC
 END(call_softirq)
 END(call_softirq)
@@ -1297,15 +1297,15 @@ ENTRY(xen_do_hypervisor_callback)   # do_hypervisor_callback(struct *pt_regs)
 	movq %rdi, %rsp            # we don't return, adjust the stack frame
 	movq %rdi, %rsp            # we don't return, adjust the stack frame
 	CFI_ENDPROC
 	CFI_ENDPROC
 	DEFAULT_FRAME
 	DEFAULT_FRAME
-11:	incl %gs:pda_irqcount
+11:	incl PER_CPU_VAR(irq_count)
 	movq %rsp,%rbp
 	movq %rsp,%rbp
 	CFI_DEF_CFA_REGISTER rbp
 	CFI_DEF_CFA_REGISTER rbp
-	cmovzq %gs:pda_irqstackptr,%rsp
+	cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
 	pushq %rbp			# backlink for old unwinder
 	pushq %rbp			# backlink for old unwinder
 	call xen_evtchn_do_upcall
 	call xen_evtchn_do_upcall
 	popq %rsp
 	popq %rsp
 	CFI_DEF_CFA_REGISTER rsp
 	CFI_DEF_CFA_REGISTER rsp
-	decl %gs:pda_irqcount
+	decl PER_CPU_VAR(irq_count)
 	jmp  error_exit
 	jmp  error_exit
 	CFI_ENDPROC
 	CFI_ENDPROC
 END(do_hypervisor_callback)
 END(do_hypervisor_callback)

+ 1 - 5
arch/x86/kernel/irq.c

@@ -36,11 +36,7 @@ void ack_bad_irq(unsigned int irq)
 #endif
 #endif
 }
 }
 
 
-#ifdef CONFIG_X86_32
-# define irq_stats(x)		(&per_cpu(irq_stat, x))
-#else
-# define irq_stats(x)		cpu_pda(x)
-#endif
+#define irq_stats(x)		(&per_cpu(irq_stat, x))
 /*
 /*
  * /proc/interrupts printing:
  * /proc/interrupts printing:
  */
  */

+ 3 - 0
arch/x86/kernel/irq_64.c

@@ -19,6 +19,9 @@
 #include <asm/io_apic.h>
 #include <asm/io_apic.h>
 #include <asm/idle.h>
 #include <asm/idle.h>
 
 
+DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
+EXPORT_PER_CPU_SYMBOL(irq_stat);
+
 /*
 /*
  * Probabilistic stack overflow check:
  * Probabilistic stack overflow check:
  *
  *

+ 1 - 9
arch/x86/kernel/nmi.c

@@ -61,11 +61,7 @@ static int endflag __initdata;
 
 
 static inline unsigned int get_nmi_count(int cpu)
 static inline unsigned int get_nmi_count(int cpu)
 {
 {
-#ifdef CONFIG_X86_64
-	return cpu_pda(cpu)->__nmi_count;
-#else
-	return nmi_count(cpu);
-#endif
+	return per_cpu(irq_stat, cpu).__nmi_count;
 }
 }
 
 
 static inline int mce_in_progress(void)
 static inline int mce_in_progress(void)
@@ -82,12 +78,8 @@ static inline int mce_in_progress(void)
  */
  */
 static inline unsigned int get_timer_irqs(int cpu)
 static inline unsigned int get_timer_irqs(int cpu)
 {
 {
-#ifdef CONFIG_X86_64
-	return read_pda(apic_timer_irqs) + read_pda(irq0_irqs);
-#else
 	return per_cpu(irq_stat, cpu).apic_timer_irqs +
 	return per_cpu(irq_stat, cpu).apic_timer_irqs +
 		per_cpu(irq_stat, cpu).irq0_irqs;
 		per_cpu(irq_stat, cpu).irq0_irqs;
-#endif
 }
 }
 
 
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP

+ 0 - 3
arch/x86/kernel/process_32.c

@@ -66,9 +66,6 @@ asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
 DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
 EXPORT_PER_CPU_SYMBOL(current_task);
 EXPORT_PER_CPU_SYMBOL(current_task);
 
 
-DEFINE_PER_CPU(int, cpu_number);
-EXPORT_PER_CPU_SYMBOL(cpu_number);
-
 /*
 /*
  * Return saved PC of a blocked thread.
  * Return saved PC of a blocked thread.
  */
  */

+ 14 - 8
arch/x86/kernel/process_64.c

@@ -57,6 +57,12 @@
 
 
 asmlinkage extern void ret_from_fork(void);
 asmlinkage extern void ret_from_fork(void);
 
 
+DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
+EXPORT_PER_CPU_SYMBOL(current_task);
+
+DEFINE_PER_CPU(unsigned long, old_rsp);
+static DEFINE_PER_CPU(unsigned char, is_idle);
+
 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
 
 
 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
@@ -75,13 +81,13 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister);
 
 
 void enter_idle(void)
 void enter_idle(void)
 {
 {
-	write_pda(isidle, 1);
+	percpu_write(is_idle, 1);
 	atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
 	atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
 }
 }
 
 
 static void __exit_idle(void)
 static void __exit_idle(void)
 {
 {
-	if (test_and_clear_bit_pda(0, isidle) == 0)
+	if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
 		return;
 		return;
 	atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
 	atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
 }
 }
@@ -392,7 +398,7 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
 	load_gs_index(0);
 	load_gs_index(0);
 	regs->ip		= new_ip;
 	regs->ip		= new_ip;
 	regs->sp		= new_sp;
 	regs->sp		= new_sp;
-	write_pda(oldrsp, new_sp);
+	percpu_write(old_rsp, new_sp);
 	regs->cs		= __USER_CS;
 	regs->cs		= __USER_CS;
 	regs->ss		= __USER_DS;
 	regs->ss		= __USER_DS;
 	regs->flags		= 0x200;
 	regs->flags		= 0x200;
@@ -613,13 +619,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	/*
 	/*
 	 * Switch the PDA and FPU contexts.
 	 * Switch the PDA and FPU contexts.
 	 */
 	 */
-	prev->usersp = read_pda(oldrsp);
-	write_pda(oldrsp, next->usersp);
-	write_pda(pcurrent, next_p);
+	prev->usersp = percpu_read(old_rsp);
+	percpu_write(old_rsp, next->usersp);
+	percpu_write(current_task, next_p);
 
 
-	write_pda(kernelstack,
+	percpu_write(kernel_stack,
 		  (unsigned long)task_stack_page(next_p) +
 		  (unsigned long)task_stack_page(next_p) +
-		  THREAD_SIZE - PDA_STACKOFFSET);
+		  THREAD_SIZE - KERNEL_STACK_OFFSET);
 #ifdef CONFIG_CC_STACKPROTECTOR
 #ifdef CONFIG_CC_STACKPROTECTOR
 	write_pda(stack_canary, next_p->stack_canary);
 	write_pda(stack_canary, next_p->stack_canary);
 	/*
 	/*

+ 16 - 2
arch/x86/kernel/setup_percpu.c

@@ -22,6 +22,15 @@
 # define DBG(x...)
 # define DBG(x...)
 #endif
 #endif
 
 
+/*
+ * Could be inside CONFIG_HAVE_SETUP_PER_CPU_AREA with other stuff but
+ * voyager wants cpu_number too.
+ */
+#ifdef CONFIG_SMP
+DEFINE_PER_CPU(int, cpu_number);
+EXPORT_PER_CPU_SYMBOL(cpu_number);
+#endif
+
 #ifdef CONFIG_X86_LOCAL_APIC
 #ifdef CONFIG_X86_LOCAL_APIC
 unsigned int num_processors;
 unsigned int num_processors;
 unsigned disabled_cpus __cpuinitdata;
 unsigned disabled_cpus __cpuinitdata;
@@ -44,6 +53,8 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
 
 
 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
 #define	X86_64_NUMA	1	/* (used later) */
 #define	X86_64_NUMA	1	/* (used later) */
+DEFINE_PER_CPU(int, node_number) = 0;
+EXPORT_PER_CPU_SYMBOL(node_number);
 
 
 /*
 /*
  * Map cpu index to node index
  * Map cpu index to node index
@@ -192,7 +203,11 @@ void __init setup_per_cpu_areas(void)
 
 
 		memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start);
 		memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start);
 		per_cpu_offset(cpu) = ptr - __per_cpu_start;
 		per_cpu_offset(cpu) = ptr - __per_cpu_start;
+		per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
+		per_cpu(cpu_number, cpu) = cpu;
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_X86_64
+		per_cpu(irq_stack_ptr, cpu) =
+			(char *)per_cpu(irq_stack, cpu) + IRQ_STACK_SIZE - 64;
 		/*
 		/*
 		 * CPU0 modified pda in the init data area, reload pda
 		 * CPU0 modified pda in the init data area, reload pda
 		 * offset for CPU0 and clear the area for others.
 		 * offset for CPU0 and clear the area for others.
@@ -202,7 +217,6 @@ void __init setup_per_cpu_areas(void)
 		else
 		else
 			memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu)));
 			memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu)));
 #endif
 #endif
-		per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
 
 
 		DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
 		DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
 	}
 	}
@@ -271,7 +285,7 @@ void __cpuinit numa_set_node(int cpu, int node)
 	per_cpu(x86_cpu_to_node_map, cpu) = node;
 	per_cpu(x86_cpu_to_node_map, cpu) = node;
 
 
 	if (node != NUMA_NO_NODE)
 	if (node != NUMA_NO_NODE)
-		cpu_pda(cpu)->nodenumber = node;
+		per_cpu(node_number, cpu) = node;
 }
 }
 
 
 void __cpuinit numa_clear_node(int cpu)
 void __cpuinit numa_clear_node(int cpu)

+ 4 - 2
arch/x86/kernel/smpboot.c

@@ -790,15 +790,17 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
 
 
 	set_idle_for_cpu(cpu, c_idle.idle);
 	set_idle_for_cpu(cpu, c_idle.idle);
 do_rest:
 do_rest:
-#ifdef CONFIG_X86_32
 	per_cpu(current_task, cpu) = c_idle.idle;
 	per_cpu(current_task, cpu) = c_idle.idle;
+#ifdef CONFIG_X86_32
 	init_gdt(cpu);
 	init_gdt(cpu);
 	/* Stack for startup_32 can be just as for start_secondary onwards */
 	/* Stack for startup_32 can be just as for start_secondary onwards */
 	irq_ctx_init(cpu);
 	irq_ctx_init(cpu);
 #else
 #else
-	cpu_pda(cpu)->pcurrent = c_idle.idle;
 	clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
 	clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
 	initial_gs = per_cpu_offset(cpu);
 	initial_gs = per_cpu_offset(cpu);
+	per_cpu(kernel_stack, cpu) =
+		(unsigned long)task_stack_page(c_idle.idle) -
+		KERNEL_STACK_OFFSET + THREAD_SIZE;
 #endif
 #endif
 	early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
 	early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
 	initial_code = (unsigned long)start_secondary;
 	initial_code = (unsigned long)start_secondary;

+ 0 - 2
arch/x86/kernel/smpcommon.c

@@ -28,7 +28,5 @@ __cpuinit void init_gdt(int cpu)
 
 
 	write_gdt_entry(get_cpu_gdt_table(cpu),
 	write_gdt_entry(get_cpu_gdt_table(cpu),
 			GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
 			GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
-
-	per_cpu(cpu_number, cpu) = cpu;
 }
 }
 #endif
 #endif

+ 2 - 10
arch/x86/kernel/tlb_32.c

@@ -4,8 +4,8 @@
 
 
 #include <asm/tlbflush.h>
 #include <asm/tlbflush.h>
 
 
-DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate)
-			____cacheline_aligned = { &init_mm, 0, };
+DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
+			= { &init_mm, 0, };
 
 
 /* must come after the send_IPI functions above for inlining */
 /* must come after the send_IPI functions above for inlining */
 #include <mach_ipi.h>
 #include <mach_ipi.h>
@@ -231,14 +231,6 @@ void flush_tlb_all(void)
 	on_each_cpu(do_flush_tlb_all, NULL, 1);
 	on_each_cpu(do_flush_tlb_all, NULL, 1);
 }
 }
 
 
-void reset_lazy_tlbstate(void)
-{
-	int cpu = raw_smp_processor_id();
-
-	per_cpu(cpu_tlbstate, cpu).state = 0;
-	per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
-}
-
 static int init_flush_cpumask(void)
 static int init_flush_cpumask(void)
 {
 {
 	alloc_cpumask_var(&flush_cpumask, GFP_KERNEL);
 	alloc_cpumask_var(&flush_cpumask, GFP_KERNEL);

+ 8 - 5
arch/x86/kernel/tlb_64.c

@@ -18,6 +18,9 @@
 #include <asm/uv/uv_hub.h>
 #include <asm/uv/uv_hub.h>
 #include <asm/uv/uv_bau.h>
 #include <asm/uv/uv_bau.h>
 
 
+DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
+			= { &init_mm, 0, };
+
 #include <mach_ipi.h>
 #include <mach_ipi.h>
 /*
 /*
  *	Smarter SMP flushing macros.
  *	Smarter SMP flushing macros.
@@ -62,9 +65,9 @@ static DEFINE_PER_CPU(union smp_flush_state, flush_state);
  */
  */
 void leave_mm(int cpu)
 void leave_mm(int cpu)
 {
 {
-	if (read_pda(mmu_state) == TLBSTATE_OK)
+	if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
 		BUG();
 		BUG();
-	cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask);
+	cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask);
 	load_cr3(swapper_pg_dir);
 	load_cr3(swapper_pg_dir);
 }
 }
 EXPORT_SYMBOL_GPL(leave_mm);
 EXPORT_SYMBOL_GPL(leave_mm);
@@ -142,8 +145,8 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
 		 * BUG();
 		 * BUG();
 		 */
 		 */
 
 
-	if (f->flush_mm == read_pda(active_mm)) {
-		if (read_pda(mmu_state) == TLBSTATE_OK) {
+	if (f->flush_mm == percpu_read(cpu_tlbstate.active_mm)) {
+		if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
 			if (f->flush_va == TLB_FLUSH_ALL)
 			if (f->flush_va == TLB_FLUSH_ALL)
 				local_flush_tlb();
 				local_flush_tlb();
 			else
 			else
@@ -281,7 +284,7 @@ static void do_flush_tlb_all(void *info)
 	unsigned long cpu = smp_processor_id();
 	unsigned long cpu = smp_processor_id();
 
 
 	__flush_tlb_all();
 	__flush_tlb_all();
-	if (read_pda(mmu_state) == TLBSTATE_LAZY)
+	if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
 		leave_mm(cpu);
 		leave_mm(cpu);
 }
 }
 
 

+ 1 - 5
arch/x86/xen/mmu.c

@@ -1063,11 +1063,7 @@ static void drop_other_mm_ref(void *info)
 	struct mm_struct *mm = info;
 	struct mm_struct *mm = info;
 	struct mm_struct *active_mm;
 	struct mm_struct *active_mm;
 
 
-#ifdef CONFIG_X86_64
-	active_mm = read_pda(active_mm);
-#else
-	active_mm = __get_cpu_var(cpu_tlbstate).active_mm;
-#endif
+	active_mm = percpu_read(cpu_tlbstate.active_mm);
 
 
 	if (active_mm == mm)
 	if (active_mm == mm)
 		leave_mm(smp_processor_id());
 		leave_mm(smp_processor_id());

+ 4 - 17
arch/x86/xen/smp.c

@@ -50,11 +50,7 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
  */
  */
 static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
 static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
 {
 {
-#ifdef CONFIG_X86_32
-	__get_cpu_var(irq_stat).irq_resched_count++;
-#else
-	add_pda(irq_resched_count, 1);
-#endif
+	inc_irq_stat(irq_resched_count);
 
 
 	return IRQ_HANDLED;
 	return IRQ_HANDLED;
 }
 }
@@ -283,12 +279,11 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
 	struct task_struct *idle = idle_task(cpu);
 	struct task_struct *idle = idle_task(cpu);
 	int rc;
 	int rc;
 
 
+	per_cpu(current_task, cpu) = idle;
 #ifdef CONFIG_X86_32
 #ifdef CONFIG_X86_32
 	init_gdt(cpu);
 	init_gdt(cpu);
-	per_cpu(current_task, cpu) = idle;
 	irq_ctx_init(cpu);
 	irq_ctx_init(cpu);
 #else
 #else
-	cpu_pda(cpu)->pcurrent = idle;
 	clear_tsk_thread_flag(idle, TIF_FORK);
 	clear_tsk_thread_flag(idle, TIF_FORK);
 #endif
 #endif
 	xen_setup_timer(cpu);
 	xen_setup_timer(cpu);
@@ -435,11 +430,7 @@ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
 {
 {
 	irq_enter();
 	irq_enter();
 	generic_smp_call_function_interrupt();
 	generic_smp_call_function_interrupt();
-#ifdef CONFIG_X86_32
-	__get_cpu_var(irq_stat).irq_call_count++;
-#else
-	add_pda(irq_call_count, 1);
-#endif
+	inc_irq_stat(irq_call_count);
 	irq_exit();
 	irq_exit();
 
 
 	return IRQ_HANDLED;
 	return IRQ_HANDLED;
@@ -449,11 +440,7 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
 {
 {
 	irq_enter();
 	irq_enter();
 	generic_smp_call_function_single_interrupt();
 	generic_smp_call_function_single_interrupt();
-#ifdef CONFIG_X86_32
-	__get_cpu_var(irq_stat).irq_call_count++;
-#else
-	add_pda(irq_call_count, 1);
-#endif
+	inc_irq_stat(irq_call_count);
 	irq_exit();
 	irq_exit();
 
 
 	return IRQ_HANDLED;
 	return IRQ_HANDLED;

+ 15 - 16
arch/x86/xen/xen-asm_64.S

@@ -17,6 +17,7 @@
 #include <asm/processor-flags.h>
 #include <asm/processor-flags.h>
 #include <asm/errno.h>
 #include <asm/errno.h>
 #include <asm/segment.h>
 #include <asm/segment.h>
+#include <asm/percpu.h>
 
 
 #include <xen/interface/xen.h>
 #include <xen/interface/xen.h>
 
 
@@ -28,12 +29,10 @@
 
 
 #if 1
 #if 1
 /*
 /*
-	x86-64 does not yet support direct access to percpu variables
-	via a segment override, so we just need to make sure this code
-	never gets used
+	FIXME: x86_64 now can support direct access to percpu variables
+	via a segment override.  Update xen accordingly.
  */
  */
 #define BUG			ud2a
 #define BUG			ud2a
-#define PER_CPU_VAR(var, off)	0xdeadbeef
 #endif
 #endif
 
 
 /*
 /*
@@ -45,14 +44,14 @@ ENTRY(xen_irq_enable_direct)
 	BUG
 	BUG
 
 
 	/* Unmask events */
 	/* Unmask events */
-	movb $0, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
+	movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
 
 
 	/* Preempt here doesn't matter because that will deal with
 	/* Preempt here doesn't matter because that will deal with
 	   any pending interrupts.  The pending check may end up being
 	   any pending interrupts.  The pending check may end up being
 	   run on the wrong CPU, but that doesn't hurt. */
 	   run on the wrong CPU, but that doesn't hurt. */
 
 
 	/* Test for pending */
 	/* Test for pending */
-	testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending)
+	testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
 	jz 1f
 	jz 1f
 
 
 2:	call check_events
 2:	call check_events
@@ -69,7 +68,7 @@ ENDPATCH(xen_irq_enable_direct)
 ENTRY(xen_irq_disable_direct)
 ENTRY(xen_irq_disable_direct)
 	BUG
 	BUG
 
 
-	movb $1, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
+	movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
 ENDPATCH(xen_irq_disable_direct)
 ENDPATCH(xen_irq_disable_direct)
 	ret
 	ret
 	ENDPROC(xen_irq_disable_direct)
 	ENDPROC(xen_irq_disable_direct)
@@ -87,7 +86,7 @@ ENDPATCH(xen_irq_disable_direct)
 ENTRY(xen_save_fl_direct)
 ENTRY(xen_save_fl_direct)
 	BUG
 	BUG
 
 
-	testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
+	testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
 	setz %ah
 	setz %ah
 	addb %ah,%ah
 	addb %ah,%ah
 ENDPATCH(xen_save_fl_direct)
 ENDPATCH(xen_save_fl_direct)
@@ -107,13 +106,13 @@ ENTRY(xen_restore_fl_direct)
 	BUG
 	BUG
 
 
 	testb $X86_EFLAGS_IF>>8, %ah
 	testb $X86_EFLAGS_IF>>8, %ah
-	setz PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
+	setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
 	/* Preempt here doesn't matter because that will deal with
 	/* Preempt here doesn't matter because that will deal with
 	   any pending interrupts.  The pending check may end up being
 	   any pending interrupts.  The pending check may end up being
 	   run on the wrong CPU, but that doesn't hurt. */
 	   run on the wrong CPU, but that doesn't hurt. */
 
 
 	/* check for unmasked and pending */
 	/* check for unmasked and pending */
-	cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending)
+	cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
 	jz 1f
 	jz 1f
 2:	call check_events
 2:	call check_events
 1:
 1:
@@ -195,11 +194,11 @@ RELOC(xen_sysexit, 1b+1)
 ENTRY(xen_sysret64)
 ENTRY(xen_sysret64)
 	/* We're already on the usermode stack at this point, but still
 	/* We're already on the usermode stack at this point, but still
 	   with the kernel gs, so we can easily switch back */
 	   with the kernel gs, so we can easily switch back */
-	movq %rsp, %gs:pda_oldrsp
-	movq %gs:pda_kernelstack,%rsp
+	movq %rsp, PER_CPU_VAR(old_rsp)
+	movq PER_CPU_VAR(kernel_stack),%rsp
 
 
 	pushq $__USER_DS
 	pushq $__USER_DS
-	pushq %gs:pda_oldrsp
+	pushq PER_CPU_VAR(old_rsp)
 	pushq %r11
 	pushq %r11
 	pushq $__USER_CS
 	pushq $__USER_CS
 	pushq %rcx
 	pushq %rcx
@@ -212,11 +211,11 @@ RELOC(xen_sysret64, 1b+1)
 ENTRY(xen_sysret32)
 ENTRY(xen_sysret32)
 	/* We're already on the usermode stack at this point, but still
 	/* We're already on the usermode stack at this point, but still
 	   with the kernel gs, so we can easily switch back */
 	   with the kernel gs, so we can easily switch back */
-	movq %rsp, %gs:pda_oldrsp
-	movq %gs:pda_kernelstack, %rsp
+	movq %rsp, PER_CPU_VAR(old_rsp)
+	movq PER_CPU_VAR(kernel_stack), %rsp
 
 
 	pushq $__USER32_DS
 	pushq $__USER32_DS
-	pushq %gs:pda_oldrsp
+	pushq PER_CPU_VAR(old_rsp)
 	pushq %r11
 	pushq %r11
 	pushq $__USER32_CS
 	pushq $__USER32_CS
 	pushq %rcx
 	pushq %rcx