|
@@ -48,8 +48,8 @@
|
|
|
* - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate
|
|
|
* regular address to percpu pointer and back
|
|
|
*
|
|
|
- * - use pcpu_setup_static() during percpu area initialization to
|
|
|
- * setup kernel static percpu area
|
|
|
+ * - use pcpu_setup_first_chunk() during percpu area initialization to
|
|
|
+ * setup the first chunk containing the kernel static percpu area
|
|
|
*/
|
|
|
|
|
|
#include <linux/bitmap.h>
|
|
@@ -67,7 +67,6 @@
|
|
|
#include <asm/cacheflush.h>
|
|
|
#include <asm/tlbflush.h>
|
|
|
|
|
|
-#define PCPU_MIN_UNIT_PAGES 16 /* max alloc size in pages */
|
|
|
#define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */
|
|
|
#define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */
|
|
|
|
|
@@ -80,6 +79,7 @@ struct pcpu_chunk {
|
|
|
int map_used; /* # of map entries used */
|
|
|
int map_alloc; /* # of map entries allocated */
|
|
|
int *map; /* allocation map */
|
|
|
+ bool immutable; /* no [de]population allowed */
|
|
|
struct page *page[]; /* #cpus * UNIT_PAGES */
|
|
|
};
|
|
|
|
|
@@ -521,6 +521,9 @@ static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end,
|
|
|
unsigned int last = num_possible_cpus() - 1;
|
|
|
unsigned int cpu;
|
|
|
|
|
|
+ /* unmap must not be done on immutable chunk */
|
|
|
+ WARN_ON(chunk->immutable);
|
|
|
+
|
|
|
/*
|
|
|
* Each flushing trial can be very expensive, issue flush on
|
|
|
* the whole region at once rather than doing it for each cpu.
|
|
@@ -602,6 +605,9 @@ static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end)
|
|
|
unsigned int cpu;
|
|
|
int err;
|
|
|
|
|
|
+ /* map must not be done on immutable chunk */
|
|
|
+ WARN_ON(chunk->immutable);
|
|
|
+
|
|
|
for_each_possible_cpu(cpu) {
|
|
|
err = map_kernel_range_noflush(
|
|
|
pcpu_chunk_addr(chunk, cpu, page_start),
|
|
@@ -727,8 +733,7 @@ void *__alloc_percpu(size_t size, size_t align)
|
|
|
struct pcpu_chunk *chunk;
|
|
|
int slot, off;
|
|
|
|
|
|
- if (unlikely(!size || size > PCPU_MIN_UNIT_PAGES * PAGE_SIZE ||
|
|
|
- align > PAGE_SIZE)) {
|
|
|
+ if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) {
|
|
|
WARN(true, "illegal size (%zu) or align (%zu) for "
|
|
|
"percpu allocation\n", size, align);
|
|
|
return NULL;
|
|
@@ -776,6 +781,7 @@ EXPORT_SYMBOL_GPL(__alloc_percpu);
|
|
|
|
|
|
static void pcpu_kill_chunk(struct pcpu_chunk *chunk)
|
|
|
{
|
|
|
+ WARN_ON(chunk->immutable);
|
|
|
pcpu_depopulate_chunk(chunk, 0, pcpu_unit_size, false);
|
|
|
list_del(&chunk->list);
|
|
|
rb_erase(&chunk->rb_node, &pcpu_addr_root);
|
|
@@ -821,33 +827,73 @@ void free_percpu(void *ptr)
|
|
|
EXPORT_SYMBOL_GPL(free_percpu);
|
|
|
|
|
|
/**
|
|
|
- * pcpu_setup_static - initialize kernel static percpu area
|
|
|
- * @populate_pte_fn: callback to allocate pagetable
|
|
|
- * @pages: num_possible_cpus() * PFN_UP(cpu_size) pages
|
|
|
- * @cpu_size: the size of static percpu area in bytes
|
|
|
- *
|
|
|
- * Initialize kernel static percpu area. The caller should allocate
|
|
|
- * all the necessary pages and pass them in @pages.
|
|
|
- * @populate_pte_fn() is called on each page to be used for percpu
|
|
|
- * mapping and is responsible for making sure all the necessary page
|
|
|
- * tables for the page is allocated.
|
|
|
+ * pcpu_setup_first_chunk - initialize the first percpu chunk
|
|
|
+ * @get_page_fn: callback to fetch page pointer
|
|
|
+ * @static_size: the size of static percpu area in bytes
|
|
|
+ * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, 0 for auto
|
|
|
+ * @free_size: free size in bytes, 0 for auto
|
|
|
+ * @base_addr: mapped address, NULL for auto
|
|
|
+ * @populate_pte_fn: callback to allocate pagetable, NULL if unnecessary
|
|
|
+ *
|
|
|
+ * Initialize the first percpu chunk which contains the kernel static
|
|
|
+ * perpcu area. This function is to be called from arch percpu area
|
|
|
+ * setup path. The first two parameters are mandatory. The rest are
|
|
|
+ * optional.
|
|
|
+ *
|
|
|
+ * @get_page_fn() should return pointer to percpu page given cpu
|
|
|
+ * number and page number. It should at least return enough pages to
|
|
|
+ * cover the static area. The returned pages for static area should
|
|
|
+ * have been initialized with valid data. If @unit_size is specified,
|
|
|
+ * it can also return pages after the static area. NULL return
|
|
|
+ * indicates end of pages for the cpu. Note that @get_page_fn() must
|
|
|
+ * return the same number of pages for all cpus.
|
|
|
+ *
|
|
|
+ * @unit_size, if non-zero, determines unit size and must be aligned
|
|
|
+ * to PAGE_SIZE and equal to or larger than @static_size + @free_size.
|
|
|
+ *
|
|
|
+ * @free_size determines the number of free bytes after the static
|
|
|
+ * area in the first chunk. If zero, whatever left is available.
|
|
|
+ * Specifying non-zero value make percpu leave the area after
|
|
|
+ * @static_size + @free_size alone.
|
|
|
+ *
|
|
|
+ * Non-null @base_addr means that the caller already allocated virtual
|
|
|
+ * region for the first chunk and mapped it. percpu must not mess
|
|
|
+ * with the chunk. Note that @base_addr with 0 @unit_size or non-NULL
|
|
|
+ * @populate_pte_fn doesn't make any sense.
|
|
|
+ *
|
|
|
+ * @populate_pte_fn is used to populate the pagetable. NULL means the
|
|
|
+ * caller already populated the pagetable.
|
|
|
*
|
|
|
* RETURNS:
|
|
|
* The determined pcpu_unit_size which can be used to initialize
|
|
|
* percpu access.
|
|
|
*/
|
|
|
-size_t __init pcpu_setup_static(pcpu_populate_pte_fn_t populate_pte_fn,
|
|
|
- struct page **pages, size_t cpu_size)
|
|
|
+size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
|
|
|
+ size_t static_size, size_t unit_size,
|
|
|
+ size_t free_size, void *base_addr,
|
|
|
+ pcpu_populate_pte_fn_t populate_pte_fn)
|
|
|
{
|
|
|
static struct vm_struct static_vm;
|
|
|
struct pcpu_chunk *static_chunk;
|
|
|
- int nr_cpu_pages = DIV_ROUND_UP(cpu_size, PAGE_SIZE);
|
|
|
unsigned int cpu;
|
|
|
+ int nr_pages;
|
|
|
int err, i;
|
|
|
|
|
|
- pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_PAGES, PFN_UP(cpu_size));
|
|
|
+ /* santiy checks */
|
|
|
+ BUG_ON(!static_size);
|
|
|
+ BUG_ON(!unit_size && free_size);
|
|
|
+ BUG_ON(unit_size && unit_size < static_size + free_size);
|
|
|
+ BUG_ON(unit_size & ~PAGE_MASK);
|
|
|
+ BUG_ON(base_addr && !unit_size);
|
|
|
+ BUG_ON(base_addr && populate_pte_fn);
|
|
|
|
|
|
- pcpu_static_size = cpu_size;
|
|
|
+ if (unit_size)
|
|
|
+ pcpu_unit_pages = unit_size >> PAGE_SHIFT;
|
|
|
+ else
|
|
|
+ pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_SIZE >> PAGE_SHIFT,
|
|
|
+ PFN_UP(static_size));
|
|
|
+
|
|
|
+ pcpu_static_size = static_size;
|
|
|
pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT;
|
|
|
pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size;
|
|
|
pcpu_chunk_struct_size = sizeof(struct pcpu_chunk)
|
|
@@ -862,29 +908,66 @@ size_t __init pcpu_setup_static(pcpu_populate_pte_fn_t populate_pte_fn,
|
|
|
for (i = 0; i < pcpu_nr_slots; i++)
|
|
|
INIT_LIST_HEAD(&pcpu_slot[i]);
|
|
|
|
|
|
- /* init and register vm area */
|
|
|
- static_vm.flags = VM_ALLOC;
|
|
|
- static_vm.size = pcpu_chunk_size;
|
|
|
- vm_area_register_early(&static_vm, PAGE_SIZE);
|
|
|
-
|
|
|
/* init static_chunk */
|
|
|
static_chunk = alloc_bootmem(pcpu_chunk_struct_size);
|
|
|
INIT_LIST_HEAD(&static_chunk->list);
|
|
|
static_chunk->vm = &static_vm;
|
|
|
- static_chunk->free_size = pcpu_unit_size - pcpu_static_size;
|
|
|
+
|
|
|
+ if (free_size)
|
|
|
+ static_chunk->free_size = free_size;
|
|
|
+ else
|
|
|
+ static_chunk->free_size = pcpu_unit_size - pcpu_static_size;
|
|
|
+
|
|
|
static_chunk->contig_hint = static_chunk->free_size;
|
|
|
|
|
|
- /* assign pages and map them */
|
|
|
+ /* allocate vm address */
|
|
|
+ static_vm.flags = VM_ALLOC;
|
|
|
+ static_vm.size = pcpu_chunk_size;
|
|
|
+
|
|
|
+ if (!base_addr)
|
|
|
+ vm_area_register_early(&static_vm, PAGE_SIZE);
|
|
|
+ else {
|
|
|
+ /*
|
|
|
+ * Pages already mapped. No need to remap into
|
|
|
+ * vmalloc area. In this case the static chunk can't
|
|
|
+ * be mapped or unmapped by percpu and is marked
|
|
|
+ * immutable.
|
|
|
+ */
|
|
|
+ static_vm.addr = base_addr;
|
|
|
+ static_chunk->immutable = true;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* assign pages */
|
|
|
+ nr_pages = -1;
|
|
|
for_each_possible_cpu(cpu) {
|
|
|
- for (i = 0; i < nr_cpu_pages; i++) {
|
|
|
- *pcpu_chunk_pagep(static_chunk, cpu, i) = *pages++;
|
|
|
- populate_pte_fn(pcpu_chunk_addr(static_chunk, cpu, i));
|
|
|
+ for (i = 0; i < pcpu_unit_pages; i++) {
|
|
|
+ struct page *page = get_page_fn(cpu, i);
|
|
|
+
|
|
|
+ if (!page)
|
|
|
+ break;
|
|
|
+ *pcpu_chunk_pagep(static_chunk, cpu, i) = page;
|
|
|
}
|
|
|
+
|
|
|
+ BUG_ON(i < PFN_UP(pcpu_static_size));
|
|
|
+
|
|
|
+ if (nr_pages < 0)
|
|
|
+ nr_pages = i;
|
|
|
+ else
|
|
|
+ BUG_ON(nr_pages != i);
|
|
|
}
|
|
|
|
|
|
- err = pcpu_map(static_chunk, 0, nr_cpu_pages);
|
|
|
- if (err)
|
|
|
- panic("failed to setup static percpu area, err=%d\n", err);
|
|
|
+ /* map them */
|
|
|
+ if (populate_pte_fn) {
|
|
|
+ for_each_possible_cpu(cpu)
|
|
|
+ for (i = 0; i < nr_pages; i++)
|
|
|
+ populate_pte_fn(pcpu_chunk_addr(static_chunk,
|
|
|
+ cpu, i));
|
|
|
+
|
|
|
+ err = pcpu_map(static_chunk, 0, nr_pages);
|
|
|
+ if (err)
|
|
|
+ panic("failed to setup static percpu area, err=%d\n",
|
|
|
+ err);
|
|
|
+ }
|
|
|
|
|
|
/* link static_chunk in */
|
|
|
pcpu_chunk_relocate(static_chunk, -1);
|