14 年之前 · 0fc0531e0a
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -47,6 +47,20 @@
 
				 #ifdef CONFIG_SMP
			
 
				 #define __percpu_arg(x)		"%%"__stringify(__percpu_seg)":%P" #x
			
 
				 #define __my_cpu_offset		percpu_read(this_cpu_off)
			
 
				+
			
 
				+/*
			
 
				+ * Compared to the generic __my_cpu_offset version, the following
			
 
				+ * saves one instruction and avoids clobbering a temp register.
			
 
				+ */
			
 
				+#define __this_cpu_ptr(ptr)				\
			
 
				+({							\
			
 
				+	unsigned long tcp_ptr__;			\
			
 
				+	__verify_pcpu_ptr(ptr);				\
			
 
				+	asm volatile("add " __percpu_arg(1) ", %0"	\
			
 
				+		     : "=r" (tcp_ptr__)			\
			
 
				+		     : "m" (this_cpu_off), "0" (ptr));	\
			
 
				+	(typeof(*(ptr)) __kernel __force *)tcp_ptr__;	\
			
 
				+})
			
 
				 #else
			
 
				 #define __percpu_arg(x)		"%P" #x
			
 
				 #endif
			
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -55,14 +55,18 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
 
				  */
			
 
				 #define per_cpu(var, cpu) \
			
 
				 	(*SHIFT_PERCPU_PTR(&(var), per_cpu_offset(cpu)))
			
 
				-#define __get_cpu_var(var) \
			
 
				-	(*SHIFT_PERCPU_PTR(&(var), my_cpu_offset))
			
 
				-#define __raw_get_cpu_var(var) \
			
 
				-	(*SHIFT_PERCPU_PTR(&(var), __my_cpu_offset))
			
 
				 
			
 
				-#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset)
			
 
				+#ifndef __this_cpu_ptr
			
 
				 #define __this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset)
			
 
				+#endif
			
 
				+#ifdef CONFIG_DEBUG_PREEMPT
			
 
				+#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset)
			
 
				+#else
			
 
				+#define this_cpu_ptr(ptr) __this_cpu_ptr(ptr)
			
 
				+#endif
			
 
				 
			
 
				+#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))
			
 
				+#define __raw_get_cpu_var(var) (*__this_cpu_ptr(&(var)))
			
 
				 
			
 
				 #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
			
 
				 extern void setup_per_cpu_areas(void);
			
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -48,10 +48,8 @@
 
				 	preempt_enable();				\
			
 
				 } while (0)
			
 
				 
			
 
				-#ifdef CONFIG_SMP
			
 
				-
			
 
				 /* minimum unit size, also is the maximum supported allocation size */
			
 
				-#define PCPU_MIN_UNIT_SIZE		PFN_ALIGN(64 << 10)
			
 
				+#define PCPU_MIN_UNIT_SIZE		PFN_ALIGN(32 << 10)
			
 
				 
			
 
				 /*
			
 
				  * Percpu allocator can serve percpu allocations before slab is
			
@@ -146,37 +144,20 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size,
 
				  * dynamically allocated. Non-atomic access to the current CPU's
			
 
				  * version should probably be combined with get_cpu()/put_cpu().
			
 
				  */
			
 
				+#ifdef CONFIG_SMP
			
 
				 #define per_cpu_ptr(ptr, cpu)	SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu)))
			
 
				+#else
			
 
				+#define per_cpu_ptr(ptr, cpu)	({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); })
			
 
				+#endif
			
 
				 
			
 
				 extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align);
			
 
				 extern bool is_kernel_percpu_address(unsigned long addr);
			
 
				 
			
 
				-#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
			
 
				+#if !defined(CONFIG_SMP) || !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)
			
 
				 extern void __init setup_per_cpu_areas(void);
			
 
				 #endif
			
 
				 extern void __init percpu_init_late(void);
			
 
				 
			
 
				-#else /* CONFIG_SMP */
			
 
				-
			
 
				-#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); })
			
 
				-
			
 
				-/* can't distinguish from other static vars, always false */
			
 
				-static inline bool is_kernel_percpu_address(unsigned long addr)
			
 
				-{
			
 
				-	return false;
			
 
				-}
			
 
				-
			
 
				-static inline void __init setup_per_cpu_areas(void) { }
			
 
				-
			
 
				-static inline void __init percpu_init_late(void) { }
			
 
				-
			
 
				-static inline void *pcpu_lpage_remapped(void *kaddr)
			
 
				-{
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-#endif /* CONFIG_SMP */
			
 
				-
			
 
				 extern void __percpu *__alloc_percpu(size_t size, size_t align);
			
 
				 extern void free_percpu(void __percpu *__pdata);
			
 
				 extern phys_addr_t per_cpu_ptr_to_phys(void *addr);
			
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -117,10 +117,12 @@ extern rwlock_t vmlist_lock;
 
				 extern struct vm_struct *vmlist;
			
 
				 extern __init void vm_area_register_early(struct vm_struct *vm, size_t align);
			
 
				 
			
 
				+#ifdef CONFIG_SMP
			
 
				 struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
			
 
				 				     const size_t *sizes, int nr_vms,
			
 
				 				     size_t align, gfp_t gfp_mask);
			
 
				 
			
 
				 void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms);
			
 
				+#endif
			
 
				 
			
 
				 #endif /* _LINUX_VMALLOC_H */
			
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -301,3 +301,11 @@ config NOMMU_INITIAL_TRIM_EXCESS
 
				 	  of 1 says that all excess pages should be trimmed.
			
 
				 
			
 
				 	  See Documentation/nommu-mmap.txt for more information.
			
 
				+
			
 
				+#
			
 
				+# UP and nommu archs use km based percpu allocator
			
 
				+#
			
 
				+config NEED_PER_CPU_KM
			
 
				+	depends on !SMP
			
 
				+	bool
			
 
				+	default y
			
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -11,7 +11,7 @@ obj-y			:= bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
 
				 			   maccess.o page_alloc.o page-writeback.o \
			
 
				 			   readahead.o swap.o truncate.o vmscan.o shmem.o \
			
 
				 			   prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
			
 
				-			   page_isolation.o mm_init.o mmu_context.o \
			
 
				+			   page_isolation.o mm_init.o mmu_context.o percpu.o \
			
 
				 			   $(mmu-y)
			
 
				 obj-y += init-mm.o
			
 
				 
			
@@ -36,11 +36,6 @@ obj-$(CONFIG_FAILSLAB) += failslab.o
 
				 obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
			
 
				 obj-$(CONFIG_FS_XIP) += filemap_xip.o
			
 
				 obj-$(CONFIG_MIGRATION) += migrate.o
			
 
				-ifdef CONFIG_SMP
			
 
				-obj-y += percpu.o
			
 
				-else
			
 
				-obj-y += percpu_up.o
			
 
				-endif
			
 
				 obj-$(CONFIG_QUICKLIST) += quicklist.o
			
 
				 obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
			
 
				 obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
			
--- a/mm/percpu-km.c
+++ b/mm/percpu-km.c
@@ -27,7 +27,7 @@
 
				  *   chunk size is not aligned.  percpu-km code will whine about it.
			
 
				  */
			
 
				 
			
 
				-#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
			
 
				+#if defined(CONFIG_SMP) && defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK)
			
 
				 #error "contiguous percpu allocation is incompatible with paged first chunk"
			
 
				 #endif
			
 
				 
			
@@ -35,7 +35,11 @@
 
				 
			
 
				 static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size)
			
 
				 {
			
 
				-	/* noop */
			
 
				+	unsigned int cpu;
			
 
				+
			
 
				+	for_each_possible_cpu(cpu)
			
 
				+		memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size);
			
 
				+
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -76,6 +76,7 @@
 
				 #define PCPU_SLOT_BASE_SHIFT		5	/* 1-31 shares the same slot */
			
 
				 #define PCPU_DFL_MAP_ALLOC		16	/* start a map with 16 ents */
			
 
				 
			
 
				+#ifdef CONFIG_SMP
			
 
				 /* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */
			
 
				 #ifndef __addr_to_pcpu_ptr
			
 
				 #define __addr_to_pcpu_ptr(addr)					\
			
@@ -89,6 +90,11 @@
 
				 			 (unsigned long)pcpu_base_addr -		\
			
 
				 			 (unsigned long)__per_cpu_start)
			
 
				 #endif
			
 
				+#else	/* CONFIG_SMP */
			
 
				+/* on UP, it's always identity mapped */
			
 
				+#define __addr_to_pcpu_ptr(addr)	(void __percpu *)(addr)
			
 
				+#define __pcpu_ptr_to_addr(ptr)		(void __force *)(ptr)
			
 
				+#endif	/* CONFIG_SMP */
			
 
				 
			
 
				 struct pcpu_chunk {
			
 
				 	struct list_head	list;		/* linked to pcpu_slot lists */
			
@@ -820,8 +826,8 @@ fail_unlock_mutex:
 
				  * @size: size of area to allocate in bytes
			
 
				  * @align: alignment of area (max PAGE_SIZE)
			
 
				  *
			
 
				- * Allocate percpu area of @size bytes aligned at @align.  Might
			
 
				- * sleep.  Might trigger writeouts.
			
 
				+ * Allocate zero-filled percpu area of @size bytes aligned at @align.
			
 
				+ * Might sleep.  Might trigger writeouts.
			
 
				  *
			
 
				  * CONTEXT:
			
 
				  * Does GFP_KERNEL allocation.
			
@@ -840,9 +846,10 @@ EXPORT_SYMBOL_GPL(__alloc_percpu);
 
				  * @size: size of area to allocate in bytes
			
 
				  * @align: alignment of area (max PAGE_SIZE)
			
 
				  *
			
 
				- * Allocate percpu area of @size bytes aligned at @align from reserved
			
 
				- * percpu area if arch has set it up; otherwise, allocation is served
			
 
				- * from the same dynamic area.  Might sleep.  Might trigger writeouts.
			
 
				+ * Allocate zero-filled percpu area of @size bytes aligned at @align
			
 
				+ * from reserved percpu area if arch has set it up; otherwise,
			
 
				+ * allocation is served from the same dynamic area.  Might sleep.
			
 
				+ * Might trigger writeouts.
			
 
				  *
			
 
				  * CONTEXT:
			
 
				  * Does GFP_KERNEL allocation.
			
@@ -949,6 +956,7 @@ EXPORT_SYMBOL_GPL(free_percpu);
 
				  */
			
 
				 bool is_kernel_percpu_address(unsigned long addr)
			
 
				 {
			
 
				+#ifdef CONFIG_SMP
			
 
				 	const size_t static_size = __per_cpu_end - __per_cpu_start;
			
 
				 	void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr);
			
 
				 	unsigned int cpu;
			
@@ -959,6 +967,8 @@ bool is_kernel_percpu_address(unsigned long addr)
 
				 		if ((void *)addr >= start && (void *)addr < start + static_size)
			
 
				 			return true;
			
 
				         }
			
 
				+#endif
			
 
				+	/* on UP, can't distinguish from other static vars, always false */
			
 
				 	return false;
			
 
				 }
			
 
				 
			
@@ -1066,161 +1076,6 @@ void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai)
 
				 	free_bootmem(__pa(ai), ai->__ai_size);
			
 
				 }
			
 
				 
			
 
				-/**
			
 
				- * pcpu_build_alloc_info - build alloc_info considering distances between CPUs
			
 
				- * @reserved_size: the size of reserved percpu area in bytes
			
 
				- * @dyn_size: minimum free size for dynamic allocation in bytes
			
 
				- * @atom_size: allocation atom size
			
 
				- * @cpu_distance_fn: callback to determine distance between cpus, optional
			
 
				- *
			
 
				- * This function determines grouping of units, their mappings to cpus
			
 
				- * and other parameters considering needed percpu size, allocation
			
 
				- * atom size and distances between CPUs.
			
 
				- *
			
 
				- * Groups are always mutliples of atom size and CPUs which are of
			
 
				- * LOCAL_DISTANCE both ways are grouped together and share space for
			
 
				- * units in the same group.  The returned configuration is guaranteed
			
 
				- * to have CPUs on different nodes on different groups and >=75% usage
			
 
				- * of allocated virtual address space.
			
 
				- *
			
 
				- * RETURNS:
			
 
				- * On success, pointer to the new allocation_info is returned.  On
			
 
				- * failure, ERR_PTR value is returned.
			
 
				- */
			
 
				-static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
			
 
				-				size_t reserved_size, size_t dyn_size,
			
 
				-				size_t atom_size,
			
 
				-				pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
			
 
				-{
			
 
				-	static int group_map[NR_CPUS] __initdata;
			
 
				-	static int group_cnt[NR_CPUS] __initdata;
			
 
				-	const size_t static_size = __per_cpu_end - __per_cpu_start;
			
 
				-	int nr_groups = 1, nr_units = 0;
			
 
				-	size_t size_sum, min_unit_size, alloc_size;
			
 
				-	int upa, max_upa, uninitialized_var(best_upa);	/* units_per_alloc */
			
 
				-	int last_allocs, group, unit;
			
 
				-	unsigned int cpu, tcpu;
			
 
				-	struct pcpu_alloc_info *ai;
			
 
				-	unsigned int *cpu_map;
			
 
				-
			
 
				-	/* this function may be called multiple times */
			
 
				-	memset(group_map, 0, sizeof(group_map));
			
 
				-	memset(group_cnt, 0, sizeof(group_cnt));
			
 
				-
			
 
				-	/* calculate size_sum and ensure dyn_size is enough for early alloc */
			
 
				-	size_sum = PFN_ALIGN(static_size + reserved_size +
			
 
				-			    max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE));
			
 
				-	dyn_size = size_sum - static_size - reserved_size;
			
 
				-
			
 
				-	/*
			
 
				-	 * Determine min_unit_size, alloc_size and max_upa such that
			
 
				-	 * alloc_size is multiple of atom_size and is the smallest
			
 
				-	 * which can accomodate 4k aligned segments which are equal to
			
 
				-	 * or larger than min_unit_size.
			
 
				-	 */
			
 
				-	min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
			
 
				-
			
 
				-	alloc_size = roundup(min_unit_size, atom_size);
			
 
				-	upa = alloc_size / min_unit_size;
			
 
				-	while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
			
 
				-		upa--;
			
 
				-	max_upa = upa;
			
 
				-
			
 
				-	/* group cpus according to their proximity */
			
 
				-	for_each_possible_cpu(cpu) {
			
 
				-		group = 0;
			
 
				-	next_group:
			
 
				-		for_each_possible_cpu(tcpu) {
			
 
				-			if (cpu == tcpu)
			
 
				-				break;
			
 
				-			if (group_map[tcpu] == group && cpu_distance_fn &&
			
 
				-			    (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE ||
			
 
				-			     cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
			
 
				-				group++;
			
 
				-				nr_groups = max(nr_groups, group + 1);
			
 
				-				goto next_group;
			
 
				-			}
			
 
				-		}
			
 
				-		group_map[cpu] = group;
			
 
				-		group_cnt[group]++;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Expand unit size until address space usage goes over 75%
			
 
				-	 * and then as much as possible without using more address
			
 
				-	 * space.
			
 
				-	 */
			
 
				-	last_allocs = INT_MAX;
			
 
				-	for (upa = max_upa; upa; upa--) {
			
 
				-		int allocs = 0, wasted = 0;
			
 
				-
			
 
				-		if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
			
 
				-			continue;
			
 
				-
			
 
				-		for (group = 0; group < nr_groups; group++) {
			
 
				-			int this_allocs = DIV_ROUND_UP(group_cnt[group], upa);
			
 
				-			allocs += this_allocs;
			
 
				-			wasted += this_allocs * upa - group_cnt[group];
			
 
				-		}
			
 
				-
			
 
				-		/*
			
 
				-		 * Don't accept if wastage is over 1/3.  The
			
 
				-		 * greater-than comparison ensures upa==1 always
			
 
				-		 * passes the following check.
			
 
				-		 */
			
 
				-		if (wasted > num_possible_cpus() / 3)
			
 
				-			continue;
			
 
				-
			
 
				-		/* and then don't consume more memory */
			
 
				-		if (allocs > last_allocs)
			
 
				-			break;
			
 
				-		last_allocs = allocs;
			
 
				-		best_upa = upa;
			
 
				-	}
			
 
				-	upa = best_upa;
			
 
				-
			
 
				-	/* allocate and fill alloc_info */
			
 
				-	for (group = 0; group < nr_groups; group++)
			
 
				-		nr_units += roundup(group_cnt[group], upa);
			
 
				-
			
 
				-	ai = pcpu_alloc_alloc_info(nr_groups, nr_units);
			
 
				-	if (!ai)
			
 
				-		return ERR_PTR(-ENOMEM);
			
 
				-	cpu_map = ai->groups[0].cpu_map;
			
 
				-
			
 
				-	for (group = 0; group < nr_groups; group++) {
			
 
				-		ai->groups[group].cpu_map = cpu_map;
			
 
				-		cpu_map += roundup(group_cnt[group], upa);
			
 
				-	}
			
 
				-
			
 
				-	ai->static_size = static_size;
			
 
				-	ai->reserved_size = reserved_size;
			
 
				-	ai->dyn_size = dyn_size;
			
 
				-	ai->unit_size = alloc_size / upa;
			
 
				-	ai->atom_size = atom_size;
			
 
				-	ai->alloc_size = alloc_size;
			
 
				-
			
 
				-	for (group = 0, unit = 0; group_cnt[group]; group++) {
			
 
				-		struct pcpu_group_info *gi = &ai->groups[group];
			
 
				-
			
 
				-		/*
			
 
				-		 * Initialize base_offset as if all groups are located
			
 
				-		 * back-to-back.  The caller should update this to
			
 
				-		 * reflect actual allocation.
			
 
				-		 */
			
 
				-		gi->base_offset = unit * ai->unit_size;
			
 
				-
			
 
				-		for_each_possible_cpu(cpu)
			
 
				-			if (group_map[cpu] == group)
			
 
				-				gi->cpu_map[gi->nr_units++] = cpu;
			
 
				-		gi->nr_units = roundup(gi->nr_units, upa);
			
 
				-		unit += gi->nr_units;
			
 
				-	}
			
 
				-	BUG_ON(unit != nr_units);
			
 
				-
			
 
				-	return ai;
			
 
				-}
			
 
				-
			
 
				 /**
			
 
				  * pcpu_dump_alloc_info - print out information about pcpu_alloc_info
			
 
				  * @lvl: loglevel
			
@@ -1363,7 +1218,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 
				 
			
 
				 	/* sanity checks */
			
 
				 	PCPU_SETUP_BUG_ON(ai->nr_groups <= 0);
			
 
				+#ifdef CONFIG_SMP
			
 
				 	PCPU_SETUP_BUG_ON(!ai->static_size);
			
 
				+#endif
			
 
				 	PCPU_SETUP_BUG_ON(!base_addr);
			
 
				 	PCPU_SETUP_BUG_ON(ai->unit_size < size_sum);
			
 
				 	PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK);
			
@@ -1488,6 +1345,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+#ifdef CONFIG_SMP
			
 
				+
			
 
				 const char *pcpu_fc_names[PCPU_FC_NR] __initdata = {
			
 
				 	[PCPU_FC_AUTO]	= "auto",
			
 
				 	[PCPU_FC_EMBED]	= "embed",
			
@@ -1515,8 +1374,180 @@ static int __init percpu_alloc_setup(char *str)
 
				 }
			
 
				 early_param("percpu_alloc", percpu_alloc_setup);
			
 
				 
			
 
				+/*
			
 
				+ * pcpu_embed_first_chunk() is used by the generic percpu setup.
			
 
				+ * Build it if needed by the arch config or the generic setup is going
			
 
				+ * to be used.
			
 
				+ */
			
 
				 #if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \
			
 
				 	!defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)
			
 
				+#define BUILD_EMBED_FIRST_CHUNK
			
 
				+#endif
			
 
				+
			
 
				+/* build pcpu_page_first_chunk() iff needed by the arch config */
			
 
				+#if defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK)
			
 
				+#define BUILD_PAGE_FIRST_CHUNK
			
 
				+#endif
			
 
				+
			
 
				+/* pcpu_build_alloc_info() is used by both embed and page first chunk */
			
 
				+#if defined(BUILD_EMBED_FIRST_CHUNK) || defined(BUILD_PAGE_FIRST_CHUNK)
			
 
				+/**
			
 
				+ * pcpu_build_alloc_info - build alloc_info considering distances between CPUs
			
 
				+ * @reserved_size: the size of reserved percpu area in bytes
			
 
				+ * @dyn_size: minimum free size for dynamic allocation in bytes
			
 
				+ * @atom_size: allocation atom size
			
 
				+ * @cpu_distance_fn: callback to determine distance between cpus, optional
			
 
				+ *
			
 
				+ * This function determines grouping of units, their mappings to cpus
			
 
				+ * and other parameters considering needed percpu size, allocation
			
 
				+ * atom size and distances between CPUs.
			
 
				+ *
			
 
				+ * Groups are always mutliples of atom size and CPUs which are of
			
 
				+ * LOCAL_DISTANCE both ways are grouped together and share space for
			
 
				+ * units in the same group.  The returned configuration is guaranteed
			
 
				+ * to have CPUs on different nodes on different groups and >=75% usage
			
 
				+ * of allocated virtual address space.
			
 
				+ *
			
 
				+ * RETURNS:
			
 
				+ * On success, pointer to the new allocation_info is returned.  On
			
 
				+ * failure, ERR_PTR value is returned.
			
 
				+ */
			
 
				+static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
			
 
				+				size_t reserved_size, size_t dyn_size,
			
 
				+				size_t atom_size,
			
 
				+				pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
			
 
				+{
			
 
				+	static int group_map[NR_CPUS] __initdata;
			
 
				+	static int group_cnt[NR_CPUS] __initdata;
			
 
				+	const size_t static_size = __per_cpu_end - __per_cpu_start;
			
 
				+	int nr_groups = 1, nr_units = 0;
			
 
				+	size_t size_sum, min_unit_size, alloc_size;
			
 
				+	int upa, max_upa, uninitialized_var(best_upa);	/* units_per_alloc */
			
 
				+	int last_allocs, group, unit;
			
 
				+	unsigned int cpu, tcpu;
			
 
				+	struct pcpu_alloc_info *ai;
			
 
				+	unsigned int *cpu_map;
			
 
				+
			
 
				+	/* this function may be called multiple times */
			
 
				+	memset(group_map, 0, sizeof(group_map));
			
 
				+	memset(group_cnt, 0, sizeof(group_cnt));
			
 
				+
			
 
				+	/* calculate size_sum and ensure dyn_size is enough for early alloc */
			
 
				+	size_sum = PFN_ALIGN(static_size + reserved_size +
			
 
				+			    max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE));
			
 
				+	dyn_size = size_sum - static_size - reserved_size;
			
 
				+
			
 
				+	/*
			
 
				+	 * Determine min_unit_size, alloc_size and max_upa such that
			
 
				+	 * alloc_size is multiple of atom_size and is the smallest
			
 
				+	 * which can accomodate 4k aligned segments which are equal to
			
 
				+	 * or larger than min_unit_size.
			
 
				+	 */
			
 
				+	min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
			
 
				+
			
 
				+	alloc_size = roundup(min_unit_size, atom_size);
			
 
				+	upa = alloc_size / min_unit_size;
			
 
				+	while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
			
 
				+		upa--;
			
 
				+	max_upa = upa;
			
 
				+
			
 
				+	/* group cpus according to their proximity */
			
 
				+	for_each_possible_cpu(cpu) {
			
 
				+		group = 0;
			
 
				+	next_group:
			
 
				+		for_each_possible_cpu(tcpu) {
			
 
				+			if (cpu == tcpu)
			
 
				+				break;
			
 
				+			if (group_map[tcpu] == group && cpu_distance_fn &&
			
 
				+			    (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE ||
			
 
				+			     cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
			
 
				+				group++;
			
 
				+				nr_groups = max(nr_groups, group + 1);
			
 
				+				goto next_group;
			
 
				+			}
			
 
				+		}
			
 
				+		group_map[cpu] = group;
			
 
				+		group_cnt[group]++;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Expand unit size until address space usage goes over 75%
			
 
				+	 * and then as much as possible without using more address
			
 
				+	 * space.
			
 
				+	 */
			
 
				+	last_allocs = INT_MAX;
			
 
				+	for (upa = max_upa; upa; upa--) {
			
 
				+		int allocs = 0, wasted = 0;
			
 
				+
			
 
				+		if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
			
 
				+			continue;
			
 
				+
			
 
				+		for (group = 0; group < nr_groups; group++) {
			
 
				+			int this_allocs = DIV_ROUND_UP(group_cnt[group], upa);
			
 
				+			allocs += this_allocs;
			
 
				+			wasted += this_allocs * upa - group_cnt[group];
			
 
				+		}
			
 
				+
			
 
				+		/*
			
 
				+		 * Don't accept if wastage is over 1/3.  The
			
 
				+		 * greater-than comparison ensures upa==1 always
			
 
				+		 * passes the following check.
			
 
				+		 */
			
 
				+		if (wasted > num_possible_cpus() / 3)
			
 
				+			continue;
			
 
				+
			
 
				+		/* and then don't consume more memory */
			
 
				+		if (allocs > last_allocs)
			
 
				+			break;
			
 
				+		last_allocs = allocs;
			
 
				+		best_upa = upa;
			
 
				+	}
			
 
				+	upa = best_upa;
			
 
				+
			
 
				+	/* allocate and fill alloc_info */
			
 
				+	for (group = 0; group < nr_groups; group++)
			
 
				+		nr_units += roundup(group_cnt[group], upa);
			
 
				+
			
 
				+	ai = pcpu_alloc_alloc_info(nr_groups, nr_units);
			
 
				+	if (!ai)
			
 
				+		return ERR_PTR(-ENOMEM);
			
 
				+	cpu_map = ai->groups[0].cpu_map;
			
 
				+
			
 
				+	for (group = 0; group < nr_groups; group++) {
			
 
				+		ai->groups[group].cpu_map = cpu_map;
			
 
				+		cpu_map += roundup(group_cnt[group], upa);
			
 
				+	}
			
 
				+
			
 
				+	ai->static_size = static_size;
			
 
				+	ai->reserved_size = reserved_size;
			
 
				+	ai->dyn_size = dyn_size;
			
 
				+	ai->unit_size = alloc_size / upa;
			
 
				+	ai->atom_size = atom_size;
			
 
				+	ai->alloc_size = alloc_size;
			
 
				+
			
 
				+	for (group = 0, unit = 0; group_cnt[group]; group++) {
			
 
				+		struct pcpu_group_info *gi = &ai->groups[group];
			
 
				+
			
 
				+		/*
			
 
				+		 * Initialize base_offset as if all groups are located
			
 
				+		 * back-to-back.  The caller should update this to
			
 
				+		 * reflect actual allocation.
			
 
				+		 */
			
 
				+		gi->base_offset = unit * ai->unit_size;
			
 
				+
			
 
				+		for_each_possible_cpu(cpu)
			
 
				+			if (group_map[cpu] == group)
			
 
				+				gi->cpu_map[gi->nr_units++] = cpu;
			
 
				+		gi->nr_units = roundup(gi->nr_units, upa);
			
 
				+		unit += gi->nr_units;
			
 
				+	}
			
 
				+	BUG_ON(unit != nr_units);
			
 
				+
			
 
				+	return ai;
			
 
				+}
			
 
				+#endif /* BUILD_EMBED_FIRST_CHUNK || BUILD_PAGE_FIRST_CHUNK */
			
 
				+
			
 
				+#if defined(BUILD_EMBED_FIRST_CHUNK)
			
 
				 /**
			
 
				  * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem
			
 
				  * @reserved_size: the size of reserved percpu area in bytes
			
@@ -1645,10 +1676,9 @@ out_free:
 
				 		free_bootmem(__pa(areas), areas_size);
			
 
				 	return rc;
			
 
				 }
			
 
				-#endif /* CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK ||
			
 
				-	  !CONFIG_HAVE_SETUP_PER_CPU_AREA */
			
 
				+#endif /* BUILD_EMBED_FIRST_CHUNK */
			
 
				 
			
 
				-#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
			
 
				+#ifdef BUILD_PAGE_FIRST_CHUNK
			
 
				 /**
			
 
				  * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages
			
 
				  * @reserved_size: the size of reserved percpu area in bytes
			
@@ -1756,10 +1786,11 @@ out_free_ar:
 
				 	pcpu_free_alloc_info(ai);
			
 
				 	return rc;
			
 
				 }
			
 
				-#endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */
			
 
				+#endif /* BUILD_PAGE_FIRST_CHUNK */
			
 
				 
			
 
				+#ifndef	CONFIG_HAVE_SETUP_PER_CPU_AREA
			
 
				 /*
			
 
				- * Generic percpu area setup.
			
 
				+ * Generic SMP percpu area setup.
			
 
				  *
			
 
				  * The embedding helper is used because its behavior closely resembles
			
 
				  * the original non-dynamic generic percpu area setup.  This is
			
@@ -1770,7 +1801,6 @@ out_free_ar:
 
				  * on the physical linear memory mapping which uses large page
			
 
				  * mappings on applicable archs.
			
 
				  */
			
 
				-#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
			
 
				 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
			
 
				 EXPORT_SYMBOL(__per_cpu_offset);
			
 
				 
			
@@ -1799,13 +1829,48 @@ void __init setup_per_cpu_areas(void)
 
				 				    PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL,
			
 
				 				    pcpu_dfl_fc_alloc, pcpu_dfl_fc_free);
			
 
				 	if (rc < 0)
			
 
				-		panic("Failed to initialized percpu areas.");
			
 
				+		panic("Failed to initialize percpu areas.");
			
 
				 
			
 
				 	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
			
 
				 	for_each_possible_cpu(cpu)
			
 
				 		__per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
			
 
				 }
			
 
				-#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
			
 
				+#endif	/* CONFIG_HAVE_SETUP_PER_CPU_AREA */
			
 
				+
			
 
				+#else	/* CONFIG_SMP */
			
 
				+
			
 
				+/*
			
 
				+ * UP percpu area setup.
			
 
				+ *
			
 
				+ * UP always uses km-based percpu allocator with identity mapping.
			
 
				+ * Static percpu variables are indistinguishable from the usual static
			
 
				+ * variables and don't require any special preparation.
			
 
				+ */
			
 
				+void __init setup_per_cpu_areas(void)
			
 
				+{
			
 
				+	const size_t unit_size =
			
 
				+		roundup_pow_of_two(max_t(size_t, PCPU_MIN_UNIT_SIZE,
			
 
				+					 PERCPU_DYNAMIC_RESERVE));
			
 
				+	struct pcpu_alloc_info *ai;
			
 
				+	void *fc;
			
 
				+
			
 
				+	ai = pcpu_alloc_alloc_info(1, 1);
			
 
				+	fc = __alloc_bootmem(unit_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
			
 
				+	if (!ai || !fc)
			
 
				+		panic("Failed to allocate memory for percpu areas.");
			
 
				+
			
 
				+	ai->dyn_size = unit_size;
			
 
				+	ai->unit_size = unit_size;
			
 
				+	ai->atom_size = unit_size;
			
 
				+	ai->alloc_size = unit_size;
			
 
				+	ai->groups[0].nr_units = 1;
			
 
				+	ai->groups[0].cpu_map[0] = 0;
			
 
				+
			
 
				+	if (pcpu_setup_first_chunk(ai, fc) < 0)
			
 
				+		panic("Failed to initialize percpu areas.");
			
 
				+}
			
 
				+
			
 
				+#endif	/* CONFIG_SMP */
			
 
				 
			
 
				 /*
			
 
				  * First and reserved chunks are initialized with temporary allocation
			
--- a/mm/percpu_up.c
+++ b/mm/percpu_up.c
@@ -1,30 +0,0 @@
 
				-/*
			
 
				- * mm/percpu_up.c - dummy percpu memory allocator implementation for UP
			
 
				- */
			
 
				-
			
 
				-#include <linux/module.h>
			
 
				-#include <linux/percpu.h>
			
 
				-#include <linux/slab.h>
			
 
				-
			
 
				-void __percpu *__alloc_percpu(size_t size, size_t align)
			
 
				-{
			
 
				-	/*
			
 
				-	 * Can't easily make larger alignment work with kmalloc.  WARN
			
 
				-	 * on it.  Larger alignment should only be used for module
			
 
				-	 * percpu sections on SMP for which this path isn't used.
			
 
				-	 */
			
 
				-	WARN_ON_ONCE(align > SMP_CACHE_BYTES);
			
 
				-	return (void __percpu __force *)kzalloc(size, GFP_KERNEL);
			
 
				-}
			
 
				-EXPORT_SYMBOL_GPL(__alloc_percpu);
			
 
				-
			
 
				-void free_percpu(void __percpu *p)
			
 
				-{
			
 
				-	kfree(this_cpu_ptr(p));
			
 
				-}
			
 
				-EXPORT_SYMBOL_GPL(free_percpu);
			
 
				-
			
 
				-phys_addr_t per_cpu_ptr_to_phys(void *addr)
			
 
				-{
			
 
				-	return __pa(addr);
			
 
				-}
			
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2065,6 +2065,7 @@ void free_vm_area(struct vm_struct *area)
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(free_vm_area);
			
 
				 
			
 
				+#ifdef CONFIG_SMP
			
 
				 static struct vmap_area *node_to_va(struct rb_node *n)
			
 
				 {
			
 
				 	return n ? rb_entry(n, struct vmap_area, rb_node) : NULL;
			
@@ -2345,6 +2346,7 @@ void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
 
				 		free_vm_area(vms[i]);
			
 
				 	kfree(vms);
			
 
				 }
			
 
				+#endif	/* CONFIG_SMP */
			
 
				 
			
 
				 #ifdef CONFIG_PROC_FS
			
 
				 static void *s_start(struct seq_file *m, loff_t *pos)