|
@@ -1075,165 +1075,6 @@ void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai)
|
|
|
free_bootmem(__pa(ai), ai->__ai_size);
|
|
|
}
|
|
|
|
|
|
-#if defined(CONFIG_SMP) && (defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \
|
|
|
- defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK))
|
|
|
-/**
|
|
|
- * pcpu_build_alloc_info - build alloc_info considering distances between CPUs
|
|
|
- * @reserved_size: the size of reserved percpu area in bytes
|
|
|
- * @dyn_size: minimum free size for dynamic allocation in bytes
|
|
|
- * @atom_size: allocation atom size
|
|
|
- * @cpu_distance_fn: callback to determine distance between cpus, optional
|
|
|
- *
|
|
|
- * This function determines grouping of units, their mappings to cpus
|
|
|
- * and other parameters considering needed percpu size, allocation
|
|
|
- * atom size and distances between CPUs.
|
|
|
- *
|
|
|
- * Groups are always mutliples of atom size and CPUs which are of
|
|
|
- * LOCAL_DISTANCE both ways are grouped together and share space for
|
|
|
- * units in the same group. The returned configuration is guaranteed
|
|
|
- * to have CPUs on different nodes on different groups and >=75% usage
|
|
|
- * of allocated virtual address space.
|
|
|
- *
|
|
|
- * RETURNS:
|
|
|
- * On success, pointer to the new allocation_info is returned. On
|
|
|
- * failure, ERR_PTR value is returned.
|
|
|
- */
|
|
|
-static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
|
|
|
- size_t reserved_size, size_t dyn_size,
|
|
|
- size_t atom_size,
|
|
|
- pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
|
|
|
-{
|
|
|
- static int group_map[NR_CPUS] __initdata;
|
|
|
- static int group_cnt[NR_CPUS] __initdata;
|
|
|
- const size_t static_size = __per_cpu_end - __per_cpu_start;
|
|
|
- int nr_groups = 1, nr_units = 0;
|
|
|
- size_t size_sum, min_unit_size, alloc_size;
|
|
|
- int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */
|
|
|
- int last_allocs, group, unit;
|
|
|
- unsigned int cpu, tcpu;
|
|
|
- struct pcpu_alloc_info *ai;
|
|
|
- unsigned int *cpu_map;
|
|
|
-
|
|
|
- /* this function may be called multiple times */
|
|
|
- memset(group_map, 0, sizeof(group_map));
|
|
|
- memset(group_cnt, 0, sizeof(group_cnt));
|
|
|
-
|
|
|
- /* calculate size_sum and ensure dyn_size is enough for early alloc */
|
|
|
- size_sum = PFN_ALIGN(static_size + reserved_size +
|
|
|
- max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE));
|
|
|
- dyn_size = size_sum - static_size - reserved_size;
|
|
|
-
|
|
|
- /*
|
|
|
- * Determine min_unit_size, alloc_size and max_upa such that
|
|
|
- * alloc_size is multiple of atom_size and is the smallest
|
|
|
- * which can accomodate 4k aligned segments which are equal to
|
|
|
- * or larger than min_unit_size.
|
|
|
- */
|
|
|
- min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
|
|
|
-
|
|
|
- alloc_size = roundup(min_unit_size, atom_size);
|
|
|
- upa = alloc_size / min_unit_size;
|
|
|
- while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
|
|
|
- upa--;
|
|
|
- max_upa = upa;
|
|
|
-
|
|
|
- /* group cpus according to their proximity */
|
|
|
- for_each_possible_cpu(cpu) {
|
|
|
- group = 0;
|
|
|
- next_group:
|
|
|
- for_each_possible_cpu(tcpu) {
|
|
|
- if (cpu == tcpu)
|
|
|
- break;
|
|
|
- if (group_map[tcpu] == group && cpu_distance_fn &&
|
|
|
- (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE ||
|
|
|
- cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
|
|
|
- group++;
|
|
|
- nr_groups = max(nr_groups, group + 1);
|
|
|
- goto next_group;
|
|
|
- }
|
|
|
- }
|
|
|
- group_map[cpu] = group;
|
|
|
- group_cnt[group]++;
|
|
|
- }
|
|
|
-
|
|
|
- /*
|
|
|
- * Expand unit size until address space usage goes over 75%
|
|
|
- * and then as much as possible without using more address
|
|
|
- * space.
|
|
|
- */
|
|
|
- last_allocs = INT_MAX;
|
|
|
- for (upa = max_upa; upa; upa--) {
|
|
|
- int allocs = 0, wasted = 0;
|
|
|
-
|
|
|
- if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
|
|
|
- continue;
|
|
|
-
|
|
|
- for (group = 0; group < nr_groups; group++) {
|
|
|
- int this_allocs = DIV_ROUND_UP(group_cnt[group], upa);
|
|
|
- allocs += this_allocs;
|
|
|
- wasted += this_allocs * upa - group_cnt[group];
|
|
|
- }
|
|
|
-
|
|
|
- /*
|
|
|
- * Don't accept if wastage is over 1/3. The
|
|
|
- * greater-than comparison ensures upa==1 always
|
|
|
- * passes the following check.
|
|
|
- */
|
|
|
- if (wasted > num_possible_cpus() / 3)
|
|
|
- continue;
|
|
|
-
|
|
|
- /* and then don't consume more memory */
|
|
|
- if (allocs > last_allocs)
|
|
|
- break;
|
|
|
- last_allocs = allocs;
|
|
|
- best_upa = upa;
|
|
|
- }
|
|
|
- upa = best_upa;
|
|
|
-
|
|
|
- /* allocate and fill alloc_info */
|
|
|
- for (group = 0; group < nr_groups; group++)
|
|
|
- nr_units += roundup(group_cnt[group], upa);
|
|
|
-
|
|
|
- ai = pcpu_alloc_alloc_info(nr_groups, nr_units);
|
|
|
- if (!ai)
|
|
|
- return ERR_PTR(-ENOMEM);
|
|
|
- cpu_map = ai->groups[0].cpu_map;
|
|
|
-
|
|
|
- for (group = 0; group < nr_groups; group++) {
|
|
|
- ai->groups[group].cpu_map = cpu_map;
|
|
|
- cpu_map += roundup(group_cnt[group], upa);
|
|
|
- }
|
|
|
-
|
|
|
- ai->static_size = static_size;
|
|
|
- ai->reserved_size = reserved_size;
|
|
|
- ai->dyn_size = dyn_size;
|
|
|
- ai->unit_size = alloc_size / upa;
|
|
|
- ai->atom_size = atom_size;
|
|
|
- ai->alloc_size = alloc_size;
|
|
|
-
|
|
|
- for (group = 0, unit = 0; group_cnt[group]; group++) {
|
|
|
- struct pcpu_group_info *gi = &ai->groups[group];
|
|
|
-
|
|
|
- /*
|
|
|
- * Initialize base_offset as if all groups are located
|
|
|
- * back-to-back. The caller should update this to
|
|
|
- * reflect actual allocation.
|
|
|
- */
|
|
|
- gi->base_offset = unit * ai->unit_size;
|
|
|
-
|
|
|
- for_each_possible_cpu(cpu)
|
|
|
- if (group_map[cpu] == group)
|
|
|
- gi->cpu_map[gi->nr_units++] = cpu;
|
|
|
- gi->nr_units = roundup(gi->nr_units, upa);
|
|
|
- unit += gi->nr_units;
|
|
|
- }
|
|
|
- BUG_ON(unit != nr_units);
|
|
|
-
|
|
|
- return ai;
|
|
|
-}
|
|
|
-#endif /* CONFIG_SMP && (CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK ||
|
|
|
- CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK) */
|
|
|
-
|
|
|
/**
|
|
|
* pcpu_dump_alloc_info - print out information about pcpu_alloc_info
|
|
|
* @lvl: loglevel
|
|
@@ -1532,8 +1373,180 @@ static int __init percpu_alloc_setup(char *str)
|
|
|
}
|
|
|
early_param("percpu_alloc", percpu_alloc_setup);
|
|
|
|
|
|
+/*
|
|
|
+ * pcpu_embed_first_chunk() is used by the generic percpu setup.
|
|
|
+ * Build it if needed by the arch config or the generic setup is going
|
|
|
+ * to be used.
|
|
|
+ */
|
|
|
#if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \
|
|
|
!defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)
|
|
|
+#define BUILD_EMBED_FIRST_CHUNK
|
|
|
+#endif
|
|
|
+
|
|
|
+/* build pcpu_page_first_chunk() iff needed by the arch config */
|
|
|
+#if defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK)
|
|
|
+#define BUILD_PAGE_FIRST_CHUNK
|
|
|
+#endif
|
|
|
+
|
|
|
+/* pcpu_build_alloc_info() is used by both embed and page first chunk */
|
|
|
+#if defined(BUILD_EMBED_FIRST_CHUNK) || defined(BUILD_PAGE_FIRST_CHUNK)
|
|
|
+/**
|
|
|
+ * pcpu_build_alloc_info - build alloc_info considering distances between CPUs
|
|
|
+ * @reserved_size: the size of reserved percpu area in bytes
|
|
|
+ * @dyn_size: minimum free size for dynamic allocation in bytes
|
|
|
+ * @atom_size: allocation atom size
|
|
|
+ * @cpu_distance_fn: callback to determine distance between cpus, optional
|
|
|
+ *
|
|
|
+ * This function determines grouping of units, their mappings to cpus
|
|
|
+ * and other parameters considering needed percpu size, allocation
|
|
|
+ * atom size and distances between CPUs.
|
|
|
+ *
|
|
|
+ * Groups are always mutliples of atom size and CPUs which are of
|
|
|
+ * LOCAL_DISTANCE both ways are grouped together and share space for
|
|
|
+ * units in the same group. The returned configuration is guaranteed
|
|
|
+ * to have CPUs on different nodes on different groups and >=75% usage
|
|
|
+ * of allocated virtual address space.
|
|
|
+ *
|
|
|
+ * RETURNS:
|
|
|
+ * On success, pointer to the new allocation_info is returned. On
|
|
|
+ * failure, ERR_PTR value is returned.
|
|
|
+ */
|
|
|
+static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
|
|
|
+ size_t reserved_size, size_t dyn_size,
|
|
|
+ size_t atom_size,
|
|
|
+ pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
|
|
|
+{
|
|
|
+ static int group_map[NR_CPUS] __initdata;
|
|
|
+ static int group_cnt[NR_CPUS] __initdata;
|
|
|
+ const size_t static_size = __per_cpu_end - __per_cpu_start;
|
|
|
+ int nr_groups = 1, nr_units = 0;
|
|
|
+ size_t size_sum, min_unit_size, alloc_size;
|
|
|
+ int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */
|
|
|
+ int last_allocs, group, unit;
|
|
|
+ unsigned int cpu, tcpu;
|
|
|
+ struct pcpu_alloc_info *ai;
|
|
|
+ unsigned int *cpu_map;
|
|
|
+
|
|
|
+ /* this function may be called multiple times */
|
|
|
+ memset(group_map, 0, sizeof(group_map));
|
|
|
+ memset(group_cnt, 0, sizeof(group_cnt));
|
|
|
+
|
|
|
+ /* calculate size_sum and ensure dyn_size is enough for early alloc */
|
|
|
+ size_sum = PFN_ALIGN(static_size + reserved_size +
|
|
|
+ max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE));
|
|
|
+ dyn_size = size_sum - static_size - reserved_size;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Determine min_unit_size, alloc_size and max_upa such that
|
|
|
+ * alloc_size is multiple of atom_size and is the smallest
|
|
|
+ * which can accomodate 4k aligned segments which are equal to
|
|
|
+ * or larger than min_unit_size.
|
|
|
+ */
|
|
|
+ min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
|
|
|
+
|
|
|
+ alloc_size = roundup(min_unit_size, atom_size);
|
|
|
+ upa = alloc_size / min_unit_size;
|
|
|
+ while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
|
|
|
+ upa--;
|
|
|
+ max_upa = upa;
|
|
|
+
|
|
|
+ /* group cpus according to their proximity */
|
|
|
+ for_each_possible_cpu(cpu) {
|
|
|
+ group = 0;
|
|
|
+ next_group:
|
|
|
+ for_each_possible_cpu(tcpu) {
|
|
|
+ if (cpu == tcpu)
|
|
|
+ break;
|
|
|
+ if (group_map[tcpu] == group && cpu_distance_fn &&
|
|
|
+ (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE ||
|
|
|
+ cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
|
|
|
+ group++;
|
|
|
+ nr_groups = max(nr_groups, group + 1);
|
|
|
+ goto next_group;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ group_map[cpu] = group;
|
|
|
+ group_cnt[group]++;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Expand unit size until address space usage goes over 75%
|
|
|
+ * and then as much as possible without using more address
|
|
|
+ * space.
|
|
|
+ */
|
|
|
+ last_allocs = INT_MAX;
|
|
|
+ for (upa = max_upa; upa; upa--) {
|
|
|
+ int allocs = 0, wasted = 0;
|
|
|
+
|
|
|
+ if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
|
|
|
+ continue;
|
|
|
+
|
|
|
+ for (group = 0; group < nr_groups; group++) {
|
|
|
+ int this_allocs = DIV_ROUND_UP(group_cnt[group], upa);
|
|
|
+ allocs += this_allocs;
|
|
|
+ wasted += this_allocs * upa - group_cnt[group];
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Don't accept if wastage is over 1/3. The
|
|
|
+ * greater-than comparison ensures upa==1 always
|
|
|
+ * passes the following check.
|
|
|
+ */
|
|
|
+ if (wasted > num_possible_cpus() / 3)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ /* and then don't consume more memory */
|
|
|
+ if (allocs > last_allocs)
|
|
|
+ break;
|
|
|
+ last_allocs = allocs;
|
|
|
+ best_upa = upa;
|
|
|
+ }
|
|
|
+ upa = best_upa;
|
|
|
+
|
|
|
+ /* allocate and fill alloc_info */
|
|
|
+ for (group = 0; group < nr_groups; group++)
|
|
|
+ nr_units += roundup(group_cnt[group], upa);
|
|
|
+
|
|
|
+ ai = pcpu_alloc_alloc_info(nr_groups, nr_units);
|
|
|
+ if (!ai)
|
|
|
+ return ERR_PTR(-ENOMEM);
|
|
|
+ cpu_map = ai->groups[0].cpu_map;
|
|
|
+
|
|
|
+ for (group = 0; group < nr_groups; group++) {
|
|
|
+ ai->groups[group].cpu_map = cpu_map;
|
|
|
+ cpu_map += roundup(group_cnt[group], upa);
|
|
|
+ }
|
|
|
+
|
|
|
+ ai->static_size = static_size;
|
|
|
+ ai->reserved_size = reserved_size;
|
|
|
+ ai->dyn_size = dyn_size;
|
|
|
+ ai->unit_size = alloc_size / upa;
|
|
|
+ ai->atom_size = atom_size;
|
|
|
+ ai->alloc_size = alloc_size;
|
|
|
+
|
|
|
+ for (group = 0, unit = 0; group_cnt[group]; group++) {
|
|
|
+ struct pcpu_group_info *gi = &ai->groups[group];
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Initialize base_offset as if all groups are located
|
|
|
+ * back-to-back. The caller should update this to
|
|
|
+ * reflect actual allocation.
|
|
|
+ */
|
|
|
+ gi->base_offset = unit * ai->unit_size;
|
|
|
+
|
|
|
+ for_each_possible_cpu(cpu)
|
|
|
+ if (group_map[cpu] == group)
|
|
|
+ gi->cpu_map[gi->nr_units++] = cpu;
|
|
|
+ gi->nr_units = roundup(gi->nr_units, upa);
|
|
|
+ unit += gi->nr_units;
|
|
|
+ }
|
|
|
+ BUG_ON(unit != nr_units);
|
|
|
+
|
|
|
+ return ai;
|
|
|
+}
|
|
|
+#endif /* BUILD_EMBED_FIRST_CHUNK || BUILD_PAGE_FIRST_CHUNK */
|
|
|
+
|
|
|
+#if defined(BUILD_EMBED_FIRST_CHUNK)
|
|
|
/**
|
|
|
* pcpu_embed_first_chunk - embed the first percpu chunk into bootmem
|
|
|
* @reserved_size: the size of reserved percpu area in bytes
|
|
@@ -1662,10 +1675,9 @@ out_free:
|
|
|
free_bootmem(__pa(areas), areas_size);
|
|
|
return rc;
|
|
|
}
|
|
|
-#endif /* CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK ||
|
|
|
- !CONFIG_HAVE_SETUP_PER_CPU_AREA */
|
|
|
+#endif /* BUILD_EMBED_FIRST_CHUNK */
|
|
|
|
|
|
-#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
|
|
|
+#ifdef BUILD_PAGE_FIRST_CHUNK
|
|
|
/**
|
|
|
* pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages
|
|
|
* @reserved_size: the size of reserved percpu area in bytes
|
|
@@ -1773,7 +1785,7 @@ out_free_ar:
|
|
|
pcpu_free_alloc_info(ai);
|
|
|
return rc;
|
|
|
}
|
|
|
-#endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */
|
|
|
+#endif /* BUILD_PAGE_FIRST_CHUNK */
|
|
|
|
|
|
#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
|
|
|
/*
|