|
@@ -1008,10 +1008,10 @@ static void drain_pages(unsigned int cpu)
|
|
struct per_cpu_pageset *pset;
|
|
struct per_cpu_pageset *pset;
|
|
struct per_cpu_pages *pcp;
|
|
struct per_cpu_pages *pcp;
|
|
|
|
|
|
- pset = zone_pcp(zone, cpu);
|
|
|
|
|
|
+ local_irq_save(flags);
|
|
|
|
+ pset = per_cpu_ptr(zone->pageset, cpu);
|
|
|
|
|
|
pcp = &pset->pcp;
|
|
pcp = &pset->pcp;
|
|
- local_irq_save(flags);
|
|
|
|
free_pcppages_bulk(zone, pcp->count, pcp);
|
|
free_pcppages_bulk(zone, pcp->count, pcp);
|
|
pcp->count = 0;
|
|
pcp->count = 0;
|
|
local_irq_restore(flags);
|
|
local_irq_restore(flags);
|
|
@@ -1095,7 +1095,6 @@ static void free_hot_cold_page(struct page *page, int cold)
|
|
arch_free_page(page, 0);
|
|
arch_free_page(page, 0);
|
|
kernel_map_pages(page, 1, 0);
|
|
kernel_map_pages(page, 1, 0);
|
|
|
|
|
|
- pcp = &zone_pcp(zone, get_cpu())->pcp;
|
|
|
|
migratetype = get_pageblock_migratetype(page);
|
|
migratetype = get_pageblock_migratetype(page);
|
|
set_page_private(page, migratetype);
|
|
set_page_private(page, migratetype);
|
|
local_irq_save(flags);
|
|
local_irq_save(flags);
|
|
@@ -1118,6 +1117,7 @@ static void free_hot_cold_page(struct page *page, int cold)
|
|
migratetype = MIGRATE_MOVABLE;
|
|
migratetype = MIGRATE_MOVABLE;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ pcp = &this_cpu_ptr(zone->pageset)->pcp;
|
|
if (cold)
|
|
if (cold)
|
|
list_add_tail(&page->lru, &pcp->lists[migratetype]);
|
|
list_add_tail(&page->lru, &pcp->lists[migratetype]);
|
|
else
|
|
else
|
|
@@ -1130,7 +1130,6 @@ static void free_hot_cold_page(struct page *page, int cold)
|
|
|
|
|
|
out:
|
|
out:
|
|
local_irq_restore(flags);
|
|
local_irq_restore(flags);
|
|
- put_cpu();
|
|
|
|
}
|
|
}
|
|
|
|
|
|
void free_hot_page(struct page *page)
|
|
void free_hot_page(struct page *page)
|
|
@@ -1180,17 +1179,15 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
|
|
unsigned long flags;
|
|
unsigned long flags;
|
|
struct page *page;
|
|
struct page *page;
|
|
int cold = !!(gfp_flags & __GFP_COLD);
|
|
int cold = !!(gfp_flags & __GFP_COLD);
|
|
- int cpu;
|
|
|
|
|
|
|
|
again:
|
|
again:
|
|
- cpu = get_cpu();
|
|
|
|
if (likely(order == 0)) {
|
|
if (likely(order == 0)) {
|
|
struct per_cpu_pages *pcp;
|
|
struct per_cpu_pages *pcp;
|
|
struct list_head *list;
|
|
struct list_head *list;
|
|
|
|
|
|
- pcp = &zone_pcp(zone, cpu)->pcp;
|
|
|
|
- list = &pcp->lists[migratetype];
|
|
|
|
local_irq_save(flags);
|
|
local_irq_save(flags);
|
|
|
|
+ pcp = &this_cpu_ptr(zone->pageset)->pcp;
|
|
|
|
+ list = &pcp->lists[migratetype];
|
|
if (list_empty(list)) {
|
|
if (list_empty(list)) {
|
|
pcp->count += rmqueue_bulk(zone, 0,
|
|
pcp->count += rmqueue_bulk(zone, 0,
|
|
pcp->batch, list,
|
|
pcp->batch, list,
|
|
@@ -1231,7 +1228,6 @@ again:
|
|
__count_zone_vm_events(PGALLOC, zone, 1 << order);
|
|
__count_zone_vm_events(PGALLOC, zone, 1 << order);
|
|
zone_statistics(preferred_zone, zone);
|
|
zone_statistics(preferred_zone, zone);
|
|
local_irq_restore(flags);
|
|
local_irq_restore(flags);
|
|
- put_cpu();
|
|
|
|
|
|
|
|
VM_BUG_ON(bad_range(zone, page));
|
|
VM_BUG_ON(bad_range(zone, page));
|
|
if (prep_new_page(page, order, gfp_flags))
|
|
if (prep_new_page(page, order, gfp_flags))
|
|
@@ -1240,7 +1236,6 @@ again:
|
|
|
|
|
|
failed:
|
|
failed:
|
|
local_irq_restore(flags);
|
|
local_irq_restore(flags);
|
|
- put_cpu();
|
|
|
|
return NULL;
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -2179,7 +2174,7 @@ void show_free_areas(void)
|
|
for_each_online_cpu(cpu) {
|
|
for_each_online_cpu(cpu) {
|
|
struct per_cpu_pageset *pageset;
|
|
struct per_cpu_pageset *pageset;
|
|
|
|
|
|
- pageset = zone_pcp(zone, cpu);
|
|
|
|
|
|
+ pageset = per_cpu_ptr(zone->pageset, cpu);
|
|
|
|
|
|
printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n",
|
|
printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n",
|
|
cpu, pageset->pcp.high,
|
|
cpu, pageset->pcp.high,
|
|
@@ -2744,10 +2739,29 @@ static void build_zonelist_cache(pg_data_t *pgdat)
|
|
|
|
|
|
#endif /* CONFIG_NUMA */
|
|
#endif /* CONFIG_NUMA */
|
|
|
|
|
|
|
|
+/*
|
|
|
|
+ * Boot pageset table. One per cpu which is going to be used for all
|
|
|
|
+ * zones and all nodes. The parameters will be set in such a way
|
|
|
|
+ * that an item put on a list will immediately be handed over to
|
|
|
|
+ * the buddy list. This is safe since pageset manipulation is done
|
|
|
|
+ * with interrupts disabled.
|
|
|
|
+ *
|
|
|
|
+ * The boot_pagesets must be kept even after bootup is complete for
|
|
|
|
+ * unused processors and/or zones. They do play a role for bootstrapping
|
|
|
|
+ * hotplugged processors.
|
|
|
|
+ *
|
|
|
|
+ * zoneinfo_show() and maybe other functions do
|
|
|
|
+ * not check if the processor is online before following the pageset pointer.
|
|
|
|
+ * Other parts of the kernel may not check if the zone is available.
|
|
|
|
+ */
|
|
|
|
+static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch);
|
|
|
|
+static DEFINE_PER_CPU(struct per_cpu_pageset, boot_pageset);
|
|
|
|
+
|
|
/* return values int ....just for stop_machine() */
|
|
/* return values int ....just for stop_machine() */
|
|
static int __build_all_zonelists(void *dummy)
|
|
static int __build_all_zonelists(void *dummy)
|
|
{
|
|
{
|
|
int nid;
|
|
int nid;
|
|
|
|
+ int cpu;
|
|
|
|
|
|
#ifdef CONFIG_NUMA
|
|
#ifdef CONFIG_NUMA
|
|
memset(node_load, 0, sizeof(node_load));
|
|
memset(node_load, 0, sizeof(node_load));
|
|
@@ -2758,6 +2772,23 @@ static int __build_all_zonelists(void *dummy)
|
|
build_zonelists(pgdat);
|
|
build_zonelists(pgdat);
|
|
build_zonelist_cache(pgdat);
|
|
build_zonelist_cache(pgdat);
|
|
}
|
|
}
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Initialize the boot_pagesets that are going to be used
|
|
|
|
+ * for bootstrapping processors. The real pagesets for
|
|
|
|
+ * each zone will be allocated later when the per cpu
|
|
|
|
+ * allocator is available.
|
|
|
|
+ *
|
|
|
|
+ * boot_pagesets are used also for bootstrapping offline
|
|
|
|
+ * cpus if the system is already booted because the pagesets
|
|
|
|
+ * are needed to initialize allocators on a specific cpu too.
|
|
|
|
+ * F.e. the percpu allocator needs the page allocator which
|
|
|
|
+ * needs the percpu allocator in order to allocate its pagesets
|
|
|
|
+ * (a chicken-egg dilemma).
|
|
|
|
+ */
|
|
|
|
+ for_each_possible_cpu(cpu)
|
|
|
|
+ setup_pageset(&per_cpu(boot_pageset, cpu), 0);
|
|
|
|
+
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -3095,121 +3126,33 @@ static void setup_pagelist_highmark(struct per_cpu_pageset *p,
|
|
pcp->batch = PAGE_SHIFT * 8;
|
|
pcp->batch = PAGE_SHIFT * 8;
|
|
}
|
|
}
|
|
|
|
|
|
-
|
|
|
|
-#ifdef CONFIG_NUMA
|
|
|
|
-/*
|
|
|
|
- * Boot pageset table. One per cpu which is going to be used for all
|
|
|
|
- * zones and all nodes. The parameters will be set in such a way
|
|
|
|
- * that an item put on a list will immediately be handed over to
|
|
|
|
- * the buddy list. This is safe since pageset manipulation is done
|
|
|
|
- * with interrupts disabled.
|
|
|
|
- *
|
|
|
|
- * Some NUMA counter updates may also be caught by the boot pagesets.
|
|
|
|
- *
|
|
|
|
- * The boot_pagesets must be kept even after bootup is complete for
|
|
|
|
- * unused processors and/or zones. They do play a role for bootstrapping
|
|
|
|
- * hotplugged processors.
|
|
|
|
- *
|
|
|
|
- * zoneinfo_show() and maybe other functions do
|
|
|
|
- * not check if the processor is online before following the pageset pointer.
|
|
|
|
- * Other parts of the kernel may not check if the zone is available.
|
|
|
|
- */
|
|
|
|
-static struct per_cpu_pageset boot_pageset[NR_CPUS];
|
|
|
|
-
|
|
|
|
/*
|
|
/*
|
|
- * Dynamically allocate memory for the
|
|
|
|
- * per cpu pageset array in struct zone.
|
|
|
|
|
|
+ * Allocate per cpu pagesets and initialize them.
|
|
|
|
+ * Before this call only boot pagesets were available.
|
|
|
|
+ * Boot pagesets will no longer be used by this processorr
|
|
|
|
+ * after setup_per_cpu_pageset().
|
|
*/
|
|
*/
|
|
-static int __cpuinit process_zones(int cpu)
|
|
|
|
|
|
+void __init setup_per_cpu_pageset(void)
|
|
{
|
|
{
|
|
- struct zone *zone, *dzone;
|
|
|
|
- int node = cpu_to_node(cpu);
|
|
|
|
-
|
|
|
|
- node_set_state(node, N_CPU); /* this node has a cpu */
|
|
|
|
|
|
+ struct zone *zone;
|
|
|
|
+ int cpu;
|
|
|
|
|
|
for_each_populated_zone(zone) {
|
|
for_each_populated_zone(zone) {
|
|
- zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset),
|
|
|
|
- GFP_KERNEL, node);
|
|
|
|
- if (!zone_pcp(zone, cpu))
|
|
|
|
- goto bad;
|
|
|
|
-
|
|
|
|
- setup_pageset(zone_pcp(zone, cpu), zone_batchsize(zone));
|
|
|
|
-
|
|
|
|
- if (percpu_pagelist_fraction)
|
|
|
|
- setup_pagelist_highmark(zone_pcp(zone, cpu),
|
|
|
|
- (zone->present_pages / percpu_pagelist_fraction));
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- return 0;
|
|
|
|
-bad:
|
|
|
|
- for_each_zone(dzone) {
|
|
|
|
- if (!populated_zone(dzone))
|
|
|
|
- continue;
|
|
|
|
- if (dzone == zone)
|
|
|
|
- break;
|
|
|
|
- kfree(zone_pcp(dzone, cpu));
|
|
|
|
- zone_pcp(dzone, cpu) = &boot_pageset[cpu];
|
|
|
|
- }
|
|
|
|
- return -ENOMEM;
|
|
|
|
-}
|
|
|
|
|
|
+ zone->pageset = alloc_percpu(struct per_cpu_pageset);
|
|
|
|
|
|
-static inline void free_zone_pagesets(int cpu)
|
|
|
|
-{
|
|
|
|
- struct zone *zone;
|
|
|
|
-
|
|
|
|
- for_each_zone(zone) {
|
|
|
|
- struct per_cpu_pageset *pset = zone_pcp(zone, cpu);
|
|
|
|
|
|
+ for_each_possible_cpu(cpu) {
|
|
|
|
+ struct per_cpu_pageset *pcp = per_cpu_ptr(zone->pageset, cpu);
|
|
|
|
|
|
- /* Free per_cpu_pageset if it is slab allocated */
|
|
|
|
- if (pset != &boot_pageset[cpu])
|
|
|
|
- kfree(pset);
|
|
|
|
- zone_pcp(zone, cpu) = &boot_pageset[cpu];
|
|
|
|
- }
|
|
|
|
-}
|
|
|
|
|
|
+ setup_pageset(pcp, zone_batchsize(zone));
|
|
|
|
|
|
-static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb,
|
|
|
|
- unsigned long action,
|
|
|
|
- void *hcpu)
|
|
|
|
-{
|
|
|
|
- int cpu = (long)hcpu;
|
|
|
|
- int ret = NOTIFY_OK;
|
|
|
|
-
|
|
|
|
- switch (action) {
|
|
|
|
- case CPU_UP_PREPARE:
|
|
|
|
- case CPU_UP_PREPARE_FROZEN:
|
|
|
|
- if (process_zones(cpu))
|
|
|
|
- ret = NOTIFY_BAD;
|
|
|
|
- break;
|
|
|
|
- case CPU_UP_CANCELED:
|
|
|
|
- case CPU_UP_CANCELED_FROZEN:
|
|
|
|
- case CPU_DEAD:
|
|
|
|
- case CPU_DEAD_FROZEN:
|
|
|
|
- free_zone_pagesets(cpu);
|
|
|
|
- break;
|
|
|
|
- default:
|
|
|
|
- break;
|
|
|
|
|
|
+ if (percpu_pagelist_fraction)
|
|
|
|
+ setup_pagelist_highmark(pcp,
|
|
|
|
+ (zone->present_pages /
|
|
|
|
+ percpu_pagelist_fraction));
|
|
|
|
+ }
|
|
}
|
|
}
|
|
- return ret;
|
|
|
|
}
|
|
}
|
|
|
|
|
|
-static struct notifier_block __cpuinitdata pageset_notifier =
|
|
|
|
- { &pageset_cpuup_callback, NULL, 0 };
|
|
|
|
-
|
|
|
|
-void __init setup_per_cpu_pageset(void)
|
|
|
|
-{
|
|
|
|
- int err;
|
|
|
|
-
|
|
|
|
- /* Initialize per_cpu_pageset for cpu 0.
|
|
|
|
- * A cpuup callback will do this for every cpu
|
|
|
|
- * as it comes online
|
|
|
|
- */
|
|
|
|
- err = process_zones(smp_processor_id());
|
|
|
|
- BUG_ON(err);
|
|
|
|
- register_cpu_notifier(&pageset_notifier);
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-#endif
|
|
|
|
-
|
|
|
|
static noinline __init_refok
|
|
static noinline __init_refok
|
|
int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
|
|
int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
|
|
{
|
|
{
|
|
@@ -3263,7 +3206,7 @@ static int __zone_pcp_update(void *data)
|
|
struct per_cpu_pageset *pset;
|
|
struct per_cpu_pageset *pset;
|
|
struct per_cpu_pages *pcp;
|
|
struct per_cpu_pages *pcp;
|
|
|
|
|
|
- pset = zone_pcp(zone, cpu);
|
|
|
|
|
|
+ pset = per_cpu_ptr(zone->pageset, cpu);
|
|
pcp = &pset->pcp;
|
|
pcp = &pset->pcp;
|
|
|
|
|
|
local_irq_save(flags);
|
|
local_irq_save(flags);
|
|
@@ -3281,21 +3224,17 @@ void zone_pcp_update(struct zone *zone)
|
|
|
|
|
|
static __meminit void zone_pcp_init(struct zone *zone)
|
|
static __meminit void zone_pcp_init(struct zone *zone)
|
|
{
|
|
{
|
|
- int cpu;
|
|
|
|
- unsigned long batch = zone_batchsize(zone);
|
|
|
|
|
|
+ /*
|
|
|
|
+ * per cpu subsystem is not up at this point. The following code
|
|
|
|
+ * relies on the ability of the linker to provide the
|
|
|
|
+ * offset of a (static) per cpu variable into the per cpu area.
|
|
|
|
+ */
|
|
|
|
+ zone->pageset = &boot_pageset;
|
|
|
|
|
|
- for (cpu = 0; cpu < NR_CPUS; cpu++) {
|
|
|
|
-#ifdef CONFIG_NUMA
|
|
|
|
- /* Early boot. Slab allocator not functional yet */
|
|
|
|
- zone_pcp(zone, cpu) = &boot_pageset[cpu];
|
|
|
|
- setup_pageset(&boot_pageset[cpu],0);
|
|
|
|
-#else
|
|
|
|
- setup_pageset(zone_pcp(zone,cpu), batch);
|
|
|
|
-#endif
|
|
|
|
- }
|
|
|
|
if (zone->present_pages)
|
|
if (zone->present_pages)
|
|
- printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n",
|
|
|
|
- zone->name, zone->present_pages, batch);
|
|
|
|
|
|
+ printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%u\n",
|
|
|
|
+ zone->name, zone->present_pages,
|
|
|
|
+ zone_batchsize(zone));
|
|
}
|
|
}
|
|
|
|
|
|
__meminit int init_currently_empty_zone(struct zone *zone,
|
|
__meminit int init_currently_empty_zone(struct zone *zone,
|
|
@@ -4809,10 +4748,11 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
|
|
if (!write || (ret == -EINVAL))
|
|
if (!write || (ret == -EINVAL))
|
|
return ret;
|
|
return ret;
|
|
for_each_populated_zone(zone) {
|
|
for_each_populated_zone(zone) {
|
|
- for_each_online_cpu(cpu) {
|
|
|
|
|
|
+ for_each_possible_cpu(cpu) {
|
|
unsigned long high;
|
|
unsigned long high;
|
|
high = zone->present_pages / percpu_pagelist_fraction;
|
|
high = zone->present_pages / percpu_pagelist_fraction;
|
|
- setup_pagelist_highmark(zone_pcp(zone, cpu), high);
|
|
|
|
|
|
+ setup_pagelist_highmark(
|
|
|
|
+ per_cpu_ptr(zone->pageset, cpu), high);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
return 0;
|