|
@@ -202,11 +202,18 @@ static unsigned long __meminitdata nr_all_pages;
|
|
|
static unsigned long __meminitdata dma_reserve;
|
|
|
|
|
|
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
|
|
|
+/* Movable memory ranges, will also be used by memblock subsystem. */
|
|
|
+struct movablemem_map movablemem_map = {
|
|
|
+ .acpi = false,
|
|
|
+ .nr_map = 0,
|
|
|
+};
|
|
|
+
|
|
|
static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
|
|
|
static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
|
|
|
static unsigned long __initdata required_kernelcore;
|
|
|
static unsigned long __initdata required_movablecore;
|
|
|
static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
|
|
|
+static unsigned long __meminitdata zone_movable_limit[MAX_NUMNODES];
|
|
|
|
|
|
/* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
|
|
|
int movable_zone;
|
|
@@ -240,15 +247,20 @@ static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
|
|
|
int ret = 0;
|
|
|
unsigned seq;
|
|
|
unsigned long pfn = page_to_pfn(page);
|
|
|
+ unsigned long sp, start_pfn;
|
|
|
|
|
|
do {
|
|
|
seq = zone_span_seqbegin(zone);
|
|
|
- if (pfn >= zone->zone_start_pfn + zone->spanned_pages)
|
|
|
- ret = 1;
|
|
|
- else if (pfn < zone->zone_start_pfn)
|
|
|
+ start_pfn = zone->zone_start_pfn;
|
|
|
+ sp = zone->spanned_pages;
|
|
|
+ if (!zone_spans_pfn(zone, pfn))
|
|
|
ret = 1;
|
|
|
} while (zone_span_seqretry(zone, seq));
|
|
|
|
|
|
+ if (ret)
|
|
|
+ pr_err("page %lu outside zone [ %lu - %lu ]\n",
|
|
|
+ pfn, start_pfn, start_pfn + sp);
|
|
|
+
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
@@ -288,7 +300,7 @@ static void bad_page(struct page *page)
|
|
|
|
|
|
/* Don't complain about poisoned pages */
|
|
|
if (PageHWPoison(page)) {
|
|
|
- reset_page_mapcount(page); /* remove PageBuddy */
|
|
|
+ page_mapcount_reset(page); /* remove PageBuddy */
|
|
|
return;
|
|
|
}
|
|
|
|
|
@@ -320,7 +332,7 @@ static void bad_page(struct page *page)
|
|
|
dump_stack();
|
|
|
out:
|
|
|
/* Leave bad fields for debug, except PageBuddy could make trouble */
|
|
|
- reset_page_mapcount(page); /* remove PageBuddy */
|
|
|
+ page_mapcount_reset(page); /* remove PageBuddy */
|
|
|
add_taint(TAINT_BAD_PAGE);
|
|
|
}
|
|
|
|
|
@@ -533,6 +545,8 @@ static inline void __free_one_page(struct page *page,
|
|
|
unsigned long uninitialized_var(buddy_idx);
|
|
|
struct page *buddy;
|
|
|
|
|
|
+ VM_BUG_ON(!zone_is_initialized(zone));
|
|
|
+
|
|
|
if (unlikely(PageCompound(page)))
|
|
|
if (unlikely(destroy_compound_page(page, order)))
|
|
|
return;
|
|
@@ -606,7 +620,7 @@ static inline int free_pages_check(struct page *page)
|
|
|
bad_page(page);
|
|
|
return 1;
|
|
|
}
|
|
|
- reset_page_last_nid(page);
|
|
|
+ page_nid_reset_last(page);
|
|
|
if (page->flags & PAGE_FLAGS_CHECK_AT_PREP)
|
|
|
page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
|
|
|
return 0;
|
|
@@ -666,7 +680,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
|
|
|
/* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
|
|
|
__free_one_page(page, zone, 0, mt);
|
|
|
trace_mm_page_pcpu_drain(page, 0, mt);
|
|
|
- if (likely(get_pageblock_migratetype(page) != MIGRATE_ISOLATE)) {
|
|
|
+ if (likely(!is_migrate_isolate_page(page))) {
|
|
|
__mod_zone_page_state(zone, NR_FREE_PAGES, 1);
|
|
|
if (is_migrate_cma(mt))
|
|
|
__mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1);
|
|
@@ -684,7 +698,7 @@ static void free_one_page(struct zone *zone, struct page *page, int order,
|
|
|
zone->pages_scanned = 0;
|
|
|
|
|
|
__free_one_page(page, zone, order, migratetype);
|
|
|
- if (unlikely(migratetype != MIGRATE_ISOLATE))
|
|
|
+ if (unlikely(!is_migrate_isolate(migratetype)))
|
|
|
__mod_zone_freepage_state(zone, 1 << order, migratetype);
|
|
|
spin_unlock(&zone->lock);
|
|
|
}
|
|
@@ -916,7 +930,9 @@ static int fallbacks[MIGRATE_TYPES][4] = {
|
|
|
[MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE },
|
|
|
#endif
|
|
|
[MIGRATE_RESERVE] = { MIGRATE_RESERVE }, /* Never used */
|
|
|
+#ifdef CONFIG_MEMORY_ISOLATION
|
|
|
[MIGRATE_ISOLATE] = { MIGRATE_RESERVE }, /* Never used */
|
|
|
+#endif
|
|
|
};
|
|
|
|
|
|
/*
|
|
@@ -981,9 +997,9 @@ int move_freepages_block(struct zone *zone, struct page *page,
|
|
|
end_pfn = start_pfn + pageblock_nr_pages - 1;
|
|
|
|
|
|
/* Do not cross zone boundaries */
|
|
|
- if (start_pfn < zone->zone_start_pfn)
|
|
|
+ if (!zone_spans_pfn(zone, start_pfn))
|
|
|
start_page = page;
|
|
|
- if (end_pfn >= zone->zone_start_pfn + zone->spanned_pages)
|
|
|
+ if (!zone_spans_pfn(zone, end_pfn))
|
|
|
return 0;
|
|
|
|
|
|
return move_freepages(zone, start_page, end_page, migratetype);
|
|
@@ -1142,7 +1158,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
|
|
|
list_add_tail(&page->lru, list);
|
|
|
if (IS_ENABLED(CONFIG_CMA)) {
|
|
|
mt = get_pageblock_migratetype(page);
|
|
|
- if (!is_migrate_cma(mt) && mt != MIGRATE_ISOLATE)
|
|
|
+ if (!is_migrate_cma(mt) && !is_migrate_isolate(mt))
|
|
|
mt = migratetype;
|
|
|
}
|
|
|
set_freepage_migratetype(page, mt);
|
|
@@ -1277,7 +1293,7 @@ void mark_free_pages(struct zone *zone)
|
|
|
|
|
|
spin_lock_irqsave(&zone->lock, flags);
|
|
|
|
|
|
- max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
|
|
|
+ max_zone_pfn = zone_end_pfn(zone);
|
|
|
for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
|
|
|
if (pfn_valid(pfn)) {
|
|
|
struct page *page = pfn_to_page(pfn);
|
|
@@ -1326,7 +1342,7 @@ void free_hot_cold_page(struct page *page, int cold)
|
|
|
* excessively into the page allocator
|
|
|
*/
|
|
|
if (migratetype >= MIGRATE_PCPTYPES) {
|
|
|
- if (unlikely(migratetype == MIGRATE_ISOLATE)) {
|
|
|
+ if (unlikely(is_migrate_isolate(migratetype))) {
|
|
|
free_one_page(zone, page, 0, migratetype);
|
|
|
goto out;
|
|
|
}
|
|
@@ -1400,7 +1416,7 @@ static int __isolate_free_page(struct page *page, unsigned int order)
|
|
|
zone = page_zone(page);
|
|
|
mt = get_pageblock_migratetype(page);
|
|
|
|
|
|
- if (mt != MIGRATE_ISOLATE) {
|
|
|
+ if (!is_migrate_isolate(mt)) {
|
|
|
/* Obey watermarks as if the page was being allocated */
|
|
|
watermark = low_wmark_pages(zone) + (1 << order);
|
|
|
if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
|
|
@@ -1419,7 +1435,7 @@ static int __isolate_free_page(struct page *page, unsigned int order)
|
|
|
struct page *endpage = page + (1 << order) - 1;
|
|
|
for (; page < endpage; page += pageblock_nr_pages) {
|
|
|
int mt = get_pageblock_migratetype(page);
|
|
|
- if (mt != MIGRATE_ISOLATE && !is_migrate_cma(mt))
|
|
|
+ if (!is_migrate_isolate(mt) && !is_migrate_cma(mt))
|
|
|
set_pageblock_migratetype(page,
|
|
|
MIGRATE_MOVABLE);
|
|
|
}
|
|
@@ -2615,10 +2631,17 @@ retry_cpuset:
|
|
|
page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
|
|
|
zonelist, high_zoneidx, alloc_flags,
|
|
|
preferred_zone, migratetype);
|
|
|
- if (unlikely(!page))
|
|
|
+ if (unlikely(!page)) {
|
|
|
+ /*
|
|
|
+ * Runtime PM, block IO and its error handling path
|
|
|
+ * can deadlock because I/O on the device might not
|
|
|
+ * complete.
|
|
|
+ */
|
|
|
+ gfp_mask = memalloc_noio_flags(gfp_mask);
|
|
|
page = __alloc_pages_slowpath(gfp_mask, order,
|
|
|
zonelist, high_zoneidx, nodemask,
|
|
|
preferred_zone, migratetype);
|
|
|
+ }
|
|
|
|
|
|
trace_mm_page_alloc(page, order, gfp_mask, migratetype);
|
|
|
|
|
@@ -2790,18 +2813,27 @@ void free_pages_exact(void *virt, size_t size)
|
|
|
}
|
|
|
EXPORT_SYMBOL(free_pages_exact);
|
|
|
|
|
|
-static unsigned int nr_free_zone_pages(int offset)
|
|
|
+/**
|
|
|
+ * nr_free_zone_pages - count number of pages beyond high watermark
|
|
|
+ * @offset: The zone index of the highest zone
|
|
|
+ *
|
|
|
+ * nr_free_zone_pages() counts the number of counts pages which are beyond the
|
|
|
+ * high watermark within all zones at or below a given zone index. For each
|
|
|
+ * zone, the number of pages is calculated as:
|
|
|
+ * present_pages - high_pages
|
|
|
+ */
|
|
|
+static unsigned long nr_free_zone_pages(int offset)
|
|
|
{
|
|
|
struct zoneref *z;
|
|
|
struct zone *zone;
|
|
|
|
|
|
/* Just pick one node, since fallback list is circular */
|
|
|
- unsigned int sum = 0;
|
|
|
+ unsigned long sum = 0;
|
|
|
|
|
|
struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL);
|
|
|
|
|
|
for_each_zone_zonelist(zone, z, zonelist, offset) {
|
|
|
- unsigned long size = zone->present_pages;
|
|
|
+ unsigned long size = zone->managed_pages;
|
|
|
unsigned long high = high_wmark_pages(zone);
|
|
|
if (size > high)
|
|
|
sum += size - high;
|
|
@@ -2810,19 +2842,25 @@ static unsigned int nr_free_zone_pages(int offset)
|
|
|
return sum;
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * Amount of free RAM allocatable within ZONE_DMA and ZONE_NORMAL
|
|
|
+/**
|
|
|
+ * nr_free_buffer_pages - count number of pages beyond high watermark
|
|
|
+ *
|
|
|
+ * nr_free_buffer_pages() counts the number of pages which are beyond the high
|
|
|
+ * watermark within ZONE_DMA and ZONE_NORMAL.
|
|
|
*/
|
|
|
-unsigned int nr_free_buffer_pages(void)
|
|
|
+unsigned long nr_free_buffer_pages(void)
|
|
|
{
|
|
|
return nr_free_zone_pages(gfp_zone(GFP_USER));
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(nr_free_buffer_pages);
|
|
|
|
|
|
-/*
|
|
|
- * Amount of free RAM allocatable within all zones
|
|
|
+/**
|
|
|
+ * nr_free_pagecache_pages - count number of pages beyond high watermark
|
|
|
+ *
|
|
|
+ * nr_free_pagecache_pages() counts the number of pages which are beyond the
|
|
|
+ * high watermark within all zones.
|
|
|
*/
|
|
|
-unsigned int nr_free_pagecache_pages(void)
|
|
|
+unsigned long nr_free_pagecache_pages(void)
|
|
|
{
|
|
|
return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER_MOVABLE));
|
|
|
}
|
|
@@ -2854,7 +2892,7 @@ void si_meminfo_node(struct sysinfo *val, int nid)
|
|
|
val->totalram = pgdat->node_present_pages;
|
|
|
val->freeram = node_page_state(nid, NR_FREE_PAGES);
|
|
|
#ifdef CONFIG_HIGHMEM
|
|
|
- val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages;
|
|
|
+ val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].managed_pages;
|
|
|
val->freehigh = zone_page_state(&pgdat->node_zones[ZONE_HIGHMEM],
|
|
|
NR_FREE_PAGES);
|
|
|
#else
|
|
@@ -2897,7 +2935,9 @@ static void show_migration_types(unsigned char type)
|
|
|
#ifdef CONFIG_CMA
|
|
|
[MIGRATE_CMA] = 'C',
|
|
|
#endif
|
|
|
+#ifdef CONFIG_MEMORY_ISOLATION
|
|
|
[MIGRATE_ISOLATE] = 'I',
|
|
|
+#endif
|
|
|
};
|
|
|
char tmp[MIGRATE_TYPES + 1];
|
|
|
char *p = tmp;
|
|
@@ -3236,7 +3276,7 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask)
|
|
|
{
|
|
|
int n, val;
|
|
|
int min_val = INT_MAX;
|
|
|
- int best_node = -1;
|
|
|
+ int best_node = NUMA_NO_NODE;
|
|
|
const struct cpumask *tmp = cpumask_of_node(0);
|
|
|
|
|
|
/* Use the local node if we haven't already */
|
|
@@ -3780,7 +3820,7 @@ static void setup_zone_migrate_reserve(struct zone *zone)
|
|
|
* the block.
|
|
|
*/
|
|
|
start_pfn = zone->zone_start_pfn;
|
|
|
- end_pfn = start_pfn + zone->spanned_pages;
|
|
|
+ end_pfn = zone_end_pfn(zone);
|
|
|
start_pfn = roundup(start_pfn, pageblock_nr_pages);
|
|
|
reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >>
|
|
|
pageblock_order;
|
|
@@ -3876,8 +3916,8 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
|
|
|
set_page_links(page, zone, nid, pfn);
|
|
|
mminit_verify_page_links(page, zone, nid, pfn);
|
|
|
init_page_count(page);
|
|
|
- reset_page_mapcount(page);
|
|
|
- reset_page_last_nid(page);
|
|
|
+ page_mapcount_reset(page);
|
|
|
+ page_nid_reset_last(page);
|
|
|
SetPageReserved(page);
|
|
|
/*
|
|
|
* Mark the block movable so that blocks are reserved for
|
|
@@ -3894,7 +3934,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
|
|
|
* pfn out of zone.
|
|
|
*/
|
|
|
if ((z->zone_start_pfn <= pfn)
|
|
|
- && (pfn < z->zone_start_pfn + z->spanned_pages)
|
|
|
+ && (pfn < zone_end_pfn(z))
|
|
|
&& !(pfn & (pageblock_nr_pages - 1)))
|
|
|
set_pageblock_migratetype(page, MIGRATE_MOVABLE);
|
|
|
|
|
@@ -3932,7 +3972,7 @@ static int __meminit zone_batchsize(struct zone *zone)
|
|
|
*
|
|
|
* OK, so we don't know how big the cache is. So guess.
|
|
|
*/
|
|
|
- batch = zone->present_pages / 1024;
|
|
|
+ batch = zone->managed_pages / 1024;
|
|
|
if (batch * PAGE_SIZE > 512 * 1024)
|
|
|
batch = (512 * 1024) / PAGE_SIZE;
|
|
|
batch /= 4; /* We effectively *= 4 below */
|
|
@@ -4016,7 +4056,7 @@ static void __meminit setup_zone_pageset(struct zone *zone)
|
|
|
|
|
|
if (percpu_pagelist_fraction)
|
|
|
setup_pagelist_highmark(pcp,
|
|
|
- (zone->present_pages /
|
|
|
+ (zone->managed_pages /
|
|
|
percpu_pagelist_fraction));
|
|
|
}
|
|
|
}
|
|
@@ -4372,6 +4412,77 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
|
|
|
return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn);
|
|
|
}
|
|
|
|
|
|
+/**
|
|
|
+ * sanitize_zone_movable_limit - Sanitize the zone_movable_limit array.
|
|
|
+ *
|
|
|
+ * zone_movable_limit is initialized as 0. This function will try to get
|
|
|
+ * the first ZONE_MOVABLE pfn of each node from movablemem_map, and
|
|
|
+ * assigne them to zone_movable_limit.
|
|
|
+ * zone_movable_limit[nid] == 0 means no limit for the node.
|
|
|
+ *
|
|
|
+ * Note: Each range is represented as [start_pfn, end_pfn)
|
|
|
+ */
|
|
|
+static void __meminit sanitize_zone_movable_limit(void)
|
|
|
+{
|
|
|
+ int map_pos = 0, i, nid;
|
|
|
+ unsigned long start_pfn, end_pfn;
|
|
|
+
|
|
|
+ if (!movablemem_map.nr_map)
|
|
|
+ return;
|
|
|
+
|
|
|
+ /* Iterate all ranges from minimum to maximum */
|
|
|
+ for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
|
|
|
+ /*
|
|
|
+ * If we have found lowest pfn of ZONE_MOVABLE of the node
|
|
|
+ * specified by user, just go on to check next range.
|
|
|
+ */
|
|
|
+ if (zone_movable_limit[nid])
|
|
|
+ continue;
|
|
|
+
|
|
|
+#ifdef CONFIG_ZONE_DMA
|
|
|
+ /* Skip DMA memory. */
|
|
|
+ if (start_pfn < arch_zone_highest_possible_pfn[ZONE_DMA])
|
|
|
+ start_pfn = arch_zone_highest_possible_pfn[ZONE_DMA];
|
|
|
+#endif
|
|
|
+
|
|
|
+#ifdef CONFIG_ZONE_DMA32
|
|
|
+ /* Skip DMA32 memory. */
|
|
|
+ if (start_pfn < arch_zone_highest_possible_pfn[ZONE_DMA32])
|
|
|
+ start_pfn = arch_zone_highest_possible_pfn[ZONE_DMA32];
|
|
|
+#endif
|
|
|
+
|
|
|
+#ifdef CONFIG_HIGHMEM
|
|
|
+ /* Skip lowmem if ZONE_MOVABLE is highmem. */
|
|
|
+ if (zone_movable_is_highmem() &&
|
|
|
+ start_pfn < arch_zone_lowest_possible_pfn[ZONE_HIGHMEM])
|
|
|
+ start_pfn = arch_zone_lowest_possible_pfn[ZONE_HIGHMEM];
|
|
|
+#endif
|
|
|
+
|
|
|
+ if (start_pfn >= end_pfn)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ while (map_pos < movablemem_map.nr_map) {
|
|
|
+ if (end_pfn <= movablemem_map.map[map_pos].start_pfn)
|
|
|
+ break;
|
|
|
+
|
|
|
+ if (start_pfn >= movablemem_map.map[map_pos].end_pfn) {
|
|
|
+ map_pos++;
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * The start_pfn of ZONE_MOVABLE is either the minimum
|
|
|
+ * pfn specified by movablemem_map, or 0, which means
|
|
|
+ * the node has no ZONE_MOVABLE.
|
|
|
+ */
|
|
|
+ zone_movable_limit[nid] = max(start_pfn,
|
|
|
+ movablemem_map.map[map_pos].start_pfn);
|
|
|
+
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
#else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
|
|
|
static inline unsigned long __meminit zone_spanned_pages_in_node(int nid,
|
|
|
unsigned long zone_type,
|
|
@@ -4389,7 +4500,6 @@ static inline unsigned long __meminit zone_absent_pages_in_node(int nid,
|
|
|
|
|
|
return zholes_size[zone_type];
|
|
|
}
|
|
|
-
|
|
|
#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
|
|
|
|
|
|
static void __meminit calculate_node_totalpages(struct pglist_data *pgdat,
|
|
@@ -4573,7 +4683,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
|
|
|
nr_all_pages += freesize;
|
|
|
|
|
|
zone->spanned_pages = size;
|
|
|
- zone->present_pages = freesize;
|
|
|
+ zone->present_pages = realsize;
|
|
|
/*
|
|
|
* Set an approximate value for lowmem here, it will be adjusted
|
|
|
* when the bootmem allocator frees pages into the buddy system.
|
|
@@ -4625,7 +4735,7 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
|
|
|
* for the buddy allocator to function correctly.
|
|
|
*/
|
|
|
start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1);
|
|
|
- end = pgdat->node_start_pfn + pgdat->node_spanned_pages;
|
|
|
+ end = pgdat_end_pfn(pgdat);
|
|
|
end = ALIGN(end, MAX_ORDER_NR_PAGES);
|
|
|
size = (end - start) * sizeof(struct page);
|
|
|
map = alloc_remap(pgdat->node_id, size);
|
|
@@ -4831,12 +4941,19 @@ static void __init find_zone_movable_pfns_for_nodes(void)
|
|
|
required_kernelcore = max(required_kernelcore, corepages);
|
|
|
}
|
|
|
|
|
|
- /* If kernelcore was not specified, there is no ZONE_MOVABLE */
|
|
|
- if (!required_kernelcore)
|
|
|
+ /*
|
|
|
+ * If neither kernelcore/movablecore nor movablemem_map is specified,
|
|
|
+ * there is no ZONE_MOVABLE. But if movablemem_map is specified, the
|
|
|
+ * start pfn of ZONE_MOVABLE has been stored in zone_movable_limit[].
|
|
|
+ */
|
|
|
+ if (!required_kernelcore) {
|
|
|
+ if (movablemem_map.nr_map)
|
|
|
+ memcpy(zone_movable_pfn, zone_movable_limit,
|
|
|
+ sizeof(zone_movable_pfn));
|
|
|
goto out;
|
|
|
+ }
|
|
|
|
|
|
/* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */
|
|
|
- find_usable_zone_for_movable();
|
|
|
usable_startpfn = arch_zone_lowest_possible_pfn[movable_zone];
|
|
|
|
|
|
restart:
|
|
@@ -4864,10 +4981,24 @@ restart:
|
|
|
for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
|
|
|
unsigned long size_pages;
|
|
|
|
|
|
+ /*
|
|
|
+ * Find more memory for kernelcore in
|
|
|
+ * [zone_movable_pfn[nid], zone_movable_limit[nid]).
|
|
|
+ */
|
|
|
start_pfn = max(start_pfn, zone_movable_pfn[nid]);
|
|
|
if (start_pfn >= end_pfn)
|
|
|
continue;
|
|
|
|
|
|
+ if (zone_movable_limit[nid]) {
|
|
|
+ end_pfn = min(end_pfn, zone_movable_limit[nid]);
|
|
|
+ /* No range left for kernelcore in this node */
|
|
|
+ if (start_pfn >= end_pfn) {
|
|
|
+ zone_movable_pfn[nid] =
|
|
|
+ zone_movable_limit[nid];
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
/* Account for what is only usable for kernelcore */
|
|
|
if (start_pfn < usable_startpfn) {
|
|
|
unsigned long kernel_pages;
|
|
@@ -4927,12 +5058,12 @@ restart:
|
|
|
if (usable_nodes && required_kernelcore > usable_nodes)
|
|
|
goto restart;
|
|
|
|
|
|
+out:
|
|
|
/* Align start of ZONE_MOVABLE on all nids to MAX_ORDER_NR_PAGES */
|
|
|
for (nid = 0; nid < MAX_NUMNODES; nid++)
|
|
|
zone_movable_pfn[nid] =
|
|
|
roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES);
|
|
|
|
|
|
-out:
|
|
|
/* restore the node_state */
|
|
|
node_states[N_MEMORY] = saved_node_state;
|
|
|
}
|
|
@@ -4995,6 +5126,8 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
|
|
|
|
|
|
/* Find the PFNs that ZONE_MOVABLE begins at in each node */
|
|
|
memset(zone_movable_pfn, 0, sizeof(zone_movable_pfn));
|
|
|
+ find_usable_zone_for_movable();
|
|
|
+ sanitize_zone_movable_limit();
|
|
|
find_zone_movable_pfns_for_nodes();
|
|
|
|
|
|
/* Print out the zone ranges */
|
|
@@ -5078,6 +5211,181 @@ static int __init cmdline_parse_movablecore(char *p)
|
|
|
early_param("kernelcore", cmdline_parse_kernelcore);
|
|
|
early_param("movablecore", cmdline_parse_movablecore);
|
|
|
|
|
|
+/**
|
|
|
+ * movablemem_map_overlap() - Check if a range overlaps movablemem_map.map[].
|
|
|
+ * @start_pfn: start pfn of the range to be checked
|
|
|
+ * @end_pfn: end pfn of the range to be checked (exclusive)
|
|
|
+ *
|
|
|
+ * This function checks if a given memory range [start_pfn, end_pfn) overlaps
|
|
|
+ * the movablemem_map.map[] array.
|
|
|
+ *
|
|
|
+ * Return: index of the first overlapped element in movablemem_map.map[]
|
|
|
+ * or -1 if they don't overlap each other.
|
|
|
+ */
|
|
|
+int __init movablemem_map_overlap(unsigned long start_pfn,
|
|
|
+ unsigned long end_pfn)
|
|
|
+{
|
|
|
+ int overlap;
|
|
|
+
|
|
|
+ if (!movablemem_map.nr_map)
|
|
|
+ return -1;
|
|
|
+
|
|
|
+ for (overlap = 0; overlap < movablemem_map.nr_map; overlap++)
|
|
|
+ if (start_pfn < movablemem_map.map[overlap].end_pfn)
|
|
|
+ break;
|
|
|
+
|
|
|
+ if (overlap == movablemem_map.nr_map ||
|
|
|
+ end_pfn <= movablemem_map.map[overlap].start_pfn)
|
|
|
+ return -1;
|
|
|
+
|
|
|
+ return overlap;
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * insert_movablemem_map - Insert a memory range in to movablemem_map.map.
|
|
|
+ * @start_pfn: start pfn of the range
|
|
|
+ * @end_pfn: end pfn of the range
|
|
|
+ *
|
|
|
+ * This function will also merge the overlapped ranges, and sort the array
|
|
|
+ * by start_pfn in monotonic increasing order.
|
|
|
+ */
|
|
|
+void __init insert_movablemem_map(unsigned long start_pfn,
|
|
|
+ unsigned long end_pfn)
|
|
|
+{
|
|
|
+ int pos, overlap;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * pos will be at the 1st overlapped range, or the position
|
|
|
+ * where the element should be inserted.
|
|
|
+ */
|
|
|
+ for (pos = 0; pos < movablemem_map.nr_map; pos++)
|
|
|
+ if (start_pfn <= movablemem_map.map[pos].end_pfn)
|
|
|
+ break;
|
|
|
+
|
|
|
+ /* If there is no overlapped range, just insert the element. */
|
|
|
+ if (pos == movablemem_map.nr_map ||
|
|
|
+ end_pfn < movablemem_map.map[pos].start_pfn) {
|
|
|
+ /*
|
|
|
+ * If pos is not the end of array, we need to move all
|
|
|
+ * the rest elements backward.
|
|
|
+ */
|
|
|
+ if (pos < movablemem_map.nr_map)
|
|
|
+ memmove(&movablemem_map.map[pos+1],
|
|
|
+ &movablemem_map.map[pos],
|
|
|
+ sizeof(struct movablemem_entry) *
|
|
|
+ (movablemem_map.nr_map - pos));
|
|
|
+ movablemem_map.map[pos].start_pfn = start_pfn;
|
|
|
+ movablemem_map.map[pos].end_pfn = end_pfn;
|
|
|
+ movablemem_map.nr_map++;
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* overlap will be at the last overlapped range */
|
|
|
+ for (overlap = pos + 1; overlap < movablemem_map.nr_map; overlap++)
|
|
|
+ if (end_pfn < movablemem_map.map[overlap].start_pfn)
|
|
|
+ break;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If there are more ranges overlapped, we need to merge them,
|
|
|
+ * and move the rest elements forward.
|
|
|
+ */
|
|
|
+ overlap--;
|
|
|
+ movablemem_map.map[pos].start_pfn = min(start_pfn,
|
|
|
+ movablemem_map.map[pos].start_pfn);
|
|
|
+ movablemem_map.map[pos].end_pfn = max(end_pfn,
|
|
|
+ movablemem_map.map[overlap].end_pfn);
|
|
|
+
|
|
|
+ if (pos != overlap && overlap + 1 != movablemem_map.nr_map)
|
|
|
+ memmove(&movablemem_map.map[pos+1],
|
|
|
+ &movablemem_map.map[overlap+1],
|
|
|
+ sizeof(struct movablemem_entry) *
|
|
|
+ (movablemem_map.nr_map - overlap - 1));
|
|
|
+
|
|
|
+ movablemem_map.nr_map -= overlap - pos;
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * movablemem_map_add_region - Add a memory range into movablemem_map.
|
|
|
+ * @start: physical start address of range
|
|
|
+ * @end: physical end address of range
|
|
|
+ *
|
|
|
+ * This function transform the physical address into pfn, and then add the
|
|
|
+ * range into movablemem_map by calling insert_movablemem_map().
|
|
|
+ */
|
|
|
+static void __init movablemem_map_add_region(u64 start, u64 size)
|
|
|
+{
|
|
|
+ unsigned long start_pfn, end_pfn;
|
|
|
+
|
|
|
+ /* In case size == 0 or start + size overflows */
|
|
|
+ if (start + size <= start)
|
|
|
+ return;
|
|
|
+
|
|
|
+ if (movablemem_map.nr_map >= ARRAY_SIZE(movablemem_map.map)) {
|
|
|
+ pr_err("movablemem_map: too many entries;"
|
|
|
+ " ignoring [mem %#010llx-%#010llx]\n",
|
|
|
+ (unsigned long long) start,
|
|
|
+ (unsigned long long) (start + size - 1));
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ start_pfn = PFN_DOWN(start);
|
|
|
+ end_pfn = PFN_UP(start + size);
|
|
|
+ insert_movablemem_map(start_pfn, end_pfn);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * cmdline_parse_movablemem_map - Parse boot option movablemem_map.
|
|
|
+ * @p: The boot option of the following format:
|
|
|
+ * movablemem_map=nn[KMG]@ss[KMG]
|
|
|
+ *
|
|
|
+ * This option sets the memory range [ss, ss+nn) to be used as movable memory.
|
|
|
+ *
|
|
|
+ * Return: 0 on success or -EINVAL on failure.
|
|
|
+ */
|
|
|
+static int __init cmdline_parse_movablemem_map(char *p)
|
|
|
+{
|
|
|
+ char *oldp;
|
|
|
+ u64 start_at, mem_size;
|
|
|
+
|
|
|
+ if (!p)
|
|
|
+ goto err;
|
|
|
+
|
|
|
+ if (!strcmp(p, "acpi"))
|
|
|
+ movablemem_map.acpi = true;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If user decide to use info from BIOS, all the other user specified
|
|
|
+ * ranges will be ingored.
|
|
|
+ */
|
|
|
+ if (movablemem_map.acpi) {
|
|
|
+ if (movablemem_map.nr_map) {
|
|
|
+ memset(movablemem_map.map, 0,
|
|
|
+ sizeof(struct movablemem_entry)
|
|
|
+ * movablemem_map.nr_map);
|
|
|
+ movablemem_map.nr_map = 0;
|
|
|
+ }
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ oldp = p;
|
|
|
+ mem_size = memparse(p, &p);
|
|
|
+ if (p == oldp)
|
|
|
+ goto err;
|
|
|
+
|
|
|
+ if (*p == '@') {
|
|
|
+ oldp = ++p;
|
|
|
+ start_at = memparse(p, &p);
|
|
|
+ if (p == oldp || *p != '\0')
|
|
|
+ goto err;
|
|
|
+
|
|
|
+ movablemem_map_add_region(start_at, mem_size);
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+err:
|
|
|
+ return -EINVAL;
|
|
|
+}
|
|
|
+early_param("movablemem_map", cmdline_parse_movablemem_map);
|
|
|
+
|
|
|
#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
|
|
|
|
|
|
/**
|
|
@@ -5160,8 +5468,8 @@ static void calculate_totalreserve_pages(void)
|
|
|
/* we treat the high watermark as reserved pages. */
|
|
|
max += high_wmark_pages(zone);
|
|
|
|
|
|
- if (max > zone->present_pages)
|
|
|
- max = zone->present_pages;
|
|
|
+ if (max > zone->managed_pages)
|
|
|
+ max = zone->managed_pages;
|
|
|
reserve_pages += max;
|
|
|
/*
|
|
|
* Lowmem reserves are not available to
|
|
@@ -5193,7 +5501,7 @@ static void setup_per_zone_lowmem_reserve(void)
|
|
|
for_each_online_pgdat(pgdat) {
|
|
|
for (j = 0; j < MAX_NR_ZONES; j++) {
|
|
|
struct zone *zone = pgdat->node_zones + j;
|
|
|
- unsigned long present_pages = zone->present_pages;
|
|
|
+ unsigned long managed_pages = zone->managed_pages;
|
|
|
|
|
|
zone->lowmem_reserve[j] = 0;
|
|
|
|
|
@@ -5207,9 +5515,9 @@ static void setup_per_zone_lowmem_reserve(void)
|
|
|
sysctl_lowmem_reserve_ratio[idx] = 1;
|
|
|
|
|
|
lower_zone = pgdat->node_zones + idx;
|
|
|
- lower_zone->lowmem_reserve[j] = present_pages /
|
|
|
+ lower_zone->lowmem_reserve[j] = managed_pages /
|
|
|
sysctl_lowmem_reserve_ratio[idx];
|
|
|
- present_pages += lower_zone->present_pages;
|
|
|
+ managed_pages += lower_zone->managed_pages;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -5228,14 +5536,14 @@ static void __setup_per_zone_wmarks(void)
|
|
|
/* Calculate total number of !ZONE_HIGHMEM pages */
|
|
|
for_each_zone(zone) {
|
|
|
if (!is_highmem(zone))
|
|
|
- lowmem_pages += zone->present_pages;
|
|
|
+ lowmem_pages += zone->managed_pages;
|
|
|
}
|
|
|
|
|
|
for_each_zone(zone) {
|
|
|
u64 tmp;
|
|
|
|
|
|
spin_lock_irqsave(&zone->lock, flags);
|
|
|
- tmp = (u64)pages_min * zone->present_pages;
|
|
|
+ tmp = (u64)pages_min * zone->managed_pages;
|
|
|
do_div(tmp, lowmem_pages);
|
|
|
if (is_highmem(zone)) {
|
|
|
/*
|
|
@@ -5247,13 +5555,10 @@ static void __setup_per_zone_wmarks(void)
|
|
|
* deltas controls asynch page reclaim, and so should
|
|
|
* not be capped for highmem.
|
|
|
*/
|
|
|
- int min_pages;
|
|
|
+ unsigned long min_pages;
|
|
|
|
|
|
- min_pages = zone->present_pages / 1024;
|
|
|
- if (min_pages < SWAP_CLUSTER_MAX)
|
|
|
- min_pages = SWAP_CLUSTER_MAX;
|
|
|
- if (min_pages > 128)
|
|
|
- min_pages = 128;
|
|
|
+ min_pages = zone->managed_pages / 1024;
|
|
|
+ min_pages = clamp(min_pages, SWAP_CLUSTER_MAX, 128UL);
|
|
|
zone->watermark[WMARK_MIN] = min_pages;
|
|
|
} else {
|
|
|
/*
|
|
@@ -5314,7 +5619,7 @@ static void __meminit calculate_zone_inactive_ratio(struct zone *zone)
|
|
|
unsigned int gb, ratio;
|
|
|
|
|
|
/* Zone size in gigabytes */
|
|
|
- gb = zone->present_pages >> (30 - PAGE_SHIFT);
|
|
|
+ gb = zone->managed_pages >> (30 - PAGE_SHIFT);
|
|
|
if (gb)
|
|
|
ratio = int_sqrt(10 * gb);
|
|
|
else
|
|
@@ -5400,7 +5705,7 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
|
|
|
return rc;
|
|
|
|
|
|
for_each_zone(zone)
|
|
|
- zone->min_unmapped_pages = (zone->present_pages *
|
|
|
+ zone->min_unmapped_pages = (zone->managed_pages *
|
|
|
sysctl_min_unmapped_ratio) / 100;
|
|
|
return 0;
|
|
|
}
|
|
@@ -5416,7 +5721,7 @@ int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
|
|
|
return rc;
|
|
|
|
|
|
for_each_zone(zone)
|
|
|
- zone->min_slab_pages = (zone->present_pages *
|
|
|
+ zone->min_slab_pages = (zone->managed_pages *
|
|
|
sysctl_min_slab_ratio) / 100;
|
|
|
return 0;
|
|
|
}
|
|
@@ -5458,7 +5763,7 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
|
|
|
for_each_populated_zone(zone) {
|
|
|
for_each_possible_cpu(cpu) {
|
|
|
unsigned long high;
|
|
|
- high = zone->present_pages / percpu_pagelist_fraction;
|
|
|
+ high = zone->managed_pages / percpu_pagelist_fraction;
|
|
|
setup_pagelist_highmark(
|
|
|
per_cpu_ptr(zone->pageset, cpu), high);
|
|
|
}
|
|
@@ -5645,8 +5950,7 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags,
|
|
|
pfn = page_to_pfn(page);
|
|
|
bitmap = get_pageblock_bitmap(zone, pfn);
|
|
|
bitidx = pfn_to_bitidx(zone, pfn);
|
|
|
- VM_BUG_ON(pfn < zone->zone_start_pfn);
|
|
|
- VM_BUG_ON(pfn >= zone->zone_start_pfn + zone->spanned_pages);
|
|
|
+ VM_BUG_ON(!zone_spans_pfn(zone, pfn));
|
|
|
|
|
|
for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1)
|
|
|
if (flags & value)
|
|
@@ -5744,8 +6048,7 @@ bool is_pageblock_removable_nolock(struct page *page)
|
|
|
|
|
|
zone = page_zone(page);
|
|
|
pfn = page_to_pfn(page);
|
|
|
- if (zone->zone_start_pfn > pfn ||
|
|
|
- zone->zone_start_pfn + zone->spanned_pages <= pfn)
|
|
|
+ if (!zone_spans_pfn(zone, pfn))
|
|
|
return false;
|
|
|
|
|
|
return !has_unmovable_pages(zone, page, 0, true);
|
|
@@ -5801,14 +6104,14 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
|
|
|
&cc->migratepages);
|
|
|
cc->nr_migratepages -= nr_reclaimed;
|
|
|
|
|
|
- ret = migrate_pages(&cc->migratepages,
|
|
|
- alloc_migrate_target,
|
|
|
- 0, false, MIGRATE_SYNC,
|
|
|
- MR_CMA);
|
|
|
+ ret = migrate_pages(&cc->migratepages, alloc_migrate_target,
|
|
|
+ 0, MIGRATE_SYNC, MR_CMA);
|
|
|
}
|
|
|
-
|
|
|
- putback_movable_pages(&cc->migratepages);
|
|
|
- return ret > 0 ? 0 : ret;
|
|
|
+ if (ret < 0) {
|
|
|
+ putback_movable_pages(&cc->migratepages);
|
|
|
+ return ret;
|
|
|
+ }
|
|
|
+ return 0;
|
|
|
}
|
|
|
|
|
|
/**
|