|
@@ -80,8 +80,9 @@ int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = {
|
|
|
256,
|
|
|
#endif
|
|
|
#ifdef CONFIG_HIGHMEM
|
|
|
- 32
|
|
|
+ 32,
|
|
|
#endif
|
|
|
+ 32,
|
|
|
};
|
|
|
|
|
|
EXPORT_SYMBOL(totalram_pages);
|
|
@@ -95,8 +96,9 @@ static char * const zone_names[MAX_NR_ZONES] = {
|
|
|
#endif
|
|
|
"Normal",
|
|
|
#ifdef CONFIG_HIGHMEM
|
|
|
- "HighMem"
|
|
|
+ "HighMem",
|
|
|
#endif
|
|
|
+ "Movable",
|
|
|
};
|
|
|
|
|
|
int min_free_kbytes = 1024;
|
|
@@ -134,6 +136,12 @@ static unsigned long __meminitdata dma_reserve;
|
|
|
static unsigned long __meminitdata node_boundary_start_pfn[MAX_NUMNODES];
|
|
|
static unsigned long __meminitdata node_boundary_end_pfn[MAX_NUMNODES];
|
|
|
#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
|
|
|
+ unsigned long __initdata required_kernelcore;
|
|
|
+ unsigned long __initdata zone_movable_pfn[MAX_NUMNODES];
|
|
|
+
|
|
|
+ /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
|
|
|
+ int movable_zone;
|
|
|
+ EXPORT_SYMBOL(movable_zone);
|
|
|
#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
|
|
|
|
|
|
#if MAX_NUMNODES > 1
|
|
@@ -1480,7 +1488,7 @@ unsigned int nr_free_buffer_pages(void)
|
|
|
*/
|
|
|
unsigned int nr_free_pagecache_pages(void)
|
|
|
{
|
|
|
- return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER));
|
|
|
+ return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER_MOVABLE));
|
|
|
}
|
|
|
|
|
|
static inline void show_node(struct zone *zone)
|
|
@@ -2666,6 +2674,63 @@ void __meminit get_pfn_range_for_nid(unsigned int nid,
|
|
|
account_node_boundary(nid, start_pfn, end_pfn);
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * This finds a zone that can be used for ZONE_MOVABLE pages. The
|
|
|
+ * assumption is made that zones within a node are ordered in monotonic
|
|
|
+ * increasing memory addresses so that the "highest" populated zone is used
|
|
|
+ */
|
|
|
+void __init find_usable_zone_for_movable(void)
|
|
|
+{
|
|
|
+ int zone_index;
|
|
|
+ for (zone_index = MAX_NR_ZONES - 1; zone_index >= 0; zone_index--) {
|
|
|
+ if (zone_index == ZONE_MOVABLE)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ if (arch_zone_highest_possible_pfn[zone_index] >
|
|
|
+ arch_zone_lowest_possible_pfn[zone_index])
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ VM_BUG_ON(zone_index == -1);
|
|
|
+ movable_zone = zone_index;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * The zone ranges provided by the architecture do not include ZONE_MOVABLE
|
|
|
+ * because it is sized independant of architecture. Unlike the other zones,
|
|
|
+ * the starting point for ZONE_MOVABLE is not fixed. It may be different
|
|
|
+ * in each node depending on the size of each node and how evenly kernelcore
|
|
|
+ * is distributed. This helper function adjusts the zone ranges
|
|
|
+ * provided by the architecture for a given node by using the end of the
|
|
|
+ * highest usable zone for ZONE_MOVABLE. This preserves the assumption that
|
|
|
+ * zones within a node are in order of monotonic increases memory addresses
|
|
|
+ */
|
|
|
+void __meminit adjust_zone_range_for_zone_movable(int nid,
|
|
|
+ unsigned long zone_type,
|
|
|
+ unsigned long node_start_pfn,
|
|
|
+ unsigned long node_end_pfn,
|
|
|
+ unsigned long *zone_start_pfn,
|
|
|
+ unsigned long *zone_end_pfn)
|
|
|
+{
|
|
|
+ /* Only adjust if ZONE_MOVABLE is on this node */
|
|
|
+ if (zone_movable_pfn[nid]) {
|
|
|
+ /* Size ZONE_MOVABLE */
|
|
|
+ if (zone_type == ZONE_MOVABLE) {
|
|
|
+ *zone_start_pfn = zone_movable_pfn[nid];
|
|
|
+ *zone_end_pfn = min(node_end_pfn,
|
|
|
+ arch_zone_highest_possible_pfn[movable_zone]);
|
|
|
+
|
|
|
+ /* Adjust for ZONE_MOVABLE starting within this range */
|
|
|
+ } else if (*zone_start_pfn < zone_movable_pfn[nid] &&
|
|
|
+ *zone_end_pfn > zone_movable_pfn[nid]) {
|
|
|
+ *zone_end_pfn = zone_movable_pfn[nid];
|
|
|
+
|
|
|
+ /* Check if this whole range is within ZONE_MOVABLE */
|
|
|
+ } else if (*zone_start_pfn >= zone_movable_pfn[nid])
|
|
|
+ *zone_start_pfn = *zone_end_pfn;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Return the number of pages a zone spans in a node, including holes
|
|
|
* present_pages = zone_spanned_pages_in_node() - zone_absent_pages_in_node()
|
|
@@ -2681,6 +2746,9 @@ static unsigned long __meminit zone_spanned_pages_in_node(int nid,
|
|
|
get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn);
|
|
|
zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type];
|
|
|
zone_end_pfn = arch_zone_highest_possible_pfn[zone_type];
|
|
|
+ adjust_zone_range_for_zone_movable(nid, zone_type,
|
|
|
+ node_start_pfn, node_end_pfn,
|
|
|
+ &zone_start_pfn, &zone_end_pfn);
|
|
|
|
|
|
/* Check that this node has pages within the zone's required range */
|
|
|
if (zone_end_pfn < node_start_pfn || zone_start_pfn > node_end_pfn)
|
|
@@ -2771,6 +2839,9 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
|
|
|
zone_end_pfn = min(arch_zone_highest_possible_pfn[zone_type],
|
|
|
node_end_pfn);
|
|
|
|
|
|
+ adjust_zone_range_for_zone_movable(nid, zone_type,
|
|
|
+ node_start_pfn, node_end_pfn,
|
|
|
+ &zone_start_pfn, &zone_end_pfn);
|
|
|
return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn);
|
|
|
}
|
|
|
|
|
@@ -3148,6 +3219,122 @@ unsigned long __init find_max_pfn_with_active_regions(void)
|
|
|
return max_pfn;
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * Find the PFN the Movable zone begins in each node. Kernel memory
|
|
|
+ * is spread evenly between nodes as long as the nodes have enough
|
|
|
+ * memory. When they don't, some nodes will have more kernelcore than
|
|
|
+ * others
|
|
|
+ */
|
|
|
+void __init find_zone_movable_pfns_for_nodes(unsigned long *movable_pfn)
|
|
|
+{
|
|
|
+ int i, nid;
|
|
|
+ unsigned long usable_startpfn;
|
|
|
+ unsigned long kernelcore_node, kernelcore_remaining;
|
|
|
+ int usable_nodes = num_online_nodes();
|
|
|
+
|
|
|
+ /* If kernelcore was not specified, there is no ZONE_MOVABLE */
|
|
|
+ if (!required_kernelcore)
|
|
|
+ return;
|
|
|
+
|
|
|
+ /* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */
|
|
|
+ find_usable_zone_for_movable();
|
|
|
+ usable_startpfn = arch_zone_lowest_possible_pfn[movable_zone];
|
|
|
+
|
|
|
+restart:
|
|
|
+ /* Spread kernelcore memory as evenly as possible throughout nodes */
|
|
|
+ kernelcore_node = required_kernelcore / usable_nodes;
|
|
|
+ for_each_online_node(nid) {
|
|
|
+ /*
|
|
|
+ * Recalculate kernelcore_node if the division per node
|
|
|
+ * now exceeds what is necessary to satisfy the requested
|
|
|
+ * amount of memory for the kernel
|
|
|
+ */
|
|
|
+ if (required_kernelcore < kernelcore_node)
|
|
|
+ kernelcore_node = required_kernelcore / usable_nodes;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * As the map is walked, we track how much memory is usable
|
|
|
+ * by the kernel using kernelcore_remaining. When it is
|
|
|
+ * 0, the rest of the node is usable by ZONE_MOVABLE
|
|
|
+ */
|
|
|
+ kernelcore_remaining = kernelcore_node;
|
|
|
+
|
|
|
+ /* Go through each range of PFNs within this node */
|
|
|
+ for_each_active_range_index_in_nid(i, nid) {
|
|
|
+ unsigned long start_pfn, end_pfn;
|
|
|
+ unsigned long size_pages;
|
|
|
+
|
|
|
+ start_pfn = max(early_node_map[i].start_pfn,
|
|
|
+ zone_movable_pfn[nid]);
|
|
|
+ end_pfn = early_node_map[i].end_pfn;
|
|
|
+ if (start_pfn >= end_pfn)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ /* Account for what is only usable for kernelcore */
|
|
|
+ if (start_pfn < usable_startpfn) {
|
|
|
+ unsigned long kernel_pages;
|
|
|
+ kernel_pages = min(end_pfn, usable_startpfn)
|
|
|
+ - start_pfn;
|
|
|
+
|
|
|
+ kernelcore_remaining -= min(kernel_pages,
|
|
|
+ kernelcore_remaining);
|
|
|
+ required_kernelcore -= min(kernel_pages,
|
|
|
+ required_kernelcore);
|
|
|
+
|
|
|
+ /* Continue if range is now fully accounted */
|
|
|
+ if (end_pfn <= usable_startpfn) {
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Push zone_movable_pfn to the end so
|
|
|
+ * that if we have to rebalance
|
|
|
+ * kernelcore across nodes, we will
|
|
|
+ * not double account here
|
|
|
+ */
|
|
|
+ zone_movable_pfn[nid] = end_pfn;
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ start_pfn = usable_startpfn;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * The usable PFN range for ZONE_MOVABLE is from
|
|
|
+ * start_pfn->end_pfn. Calculate size_pages as the
|
|
|
+ * number of pages used as kernelcore
|
|
|
+ */
|
|
|
+ size_pages = end_pfn - start_pfn;
|
|
|
+ if (size_pages > kernelcore_remaining)
|
|
|
+ size_pages = kernelcore_remaining;
|
|
|
+ zone_movable_pfn[nid] = start_pfn + size_pages;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Some kernelcore has been met, update counts and
|
|
|
+ * break if the kernelcore for this node has been
|
|
|
+ * satisified
|
|
|
+ */
|
|
|
+ required_kernelcore -= min(required_kernelcore,
|
|
|
+ size_pages);
|
|
|
+ kernelcore_remaining -= size_pages;
|
|
|
+ if (!kernelcore_remaining)
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If there is still required_kernelcore, we do another pass with one
|
|
|
+ * less node in the count. This will push zone_movable_pfn[nid] further
|
|
|
+ * along on the nodes that still have memory until kernelcore is
|
|
|
+ * satisified
|
|
|
+ */
|
|
|
+ usable_nodes--;
|
|
|
+ if (usable_nodes && required_kernelcore > usable_nodes)
|
|
|
+ goto restart;
|
|
|
+
|
|
|
+ /* Align start of ZONE_MOVABLE on all nids to MAX_ORDER_NR_PAGES */
|
|
|
+ for (nid = 0; nid < MAX_NUMNODES; nid++)
|
|
|
+ zone_movable_pfn[nid] =
|
|
|
+ roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES);
|
|
|
+}
|
|
|
+
|
|
|
/**
|
|
|
* free_area_init_nodes - Initialise all pg_data_t and zone data
|
|
|
* @max_zone_pfn: an array of max PFNs for each zone
|
|
@@ -3177,19 +3364,37 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
|
|
|
arch_zone_lowest_possible_pfn[0] = find_min_pfn_with_active_regions();
|
|
|
arch_zone_highest_possible_pfn[0] = max_zone_pfn[0];
|
|
|
for (i = 1; i < MAX_NR_ZONES; i++) {
|
|
|
+ if (i == ZONE_MOVABLE)
|
|
|
+ continue;
|
|
|
arch_zone_lowest_possible_pfn[i] =
|
|
|
arch_zone_highest_possible_pfn[i-1];
|
|
|
arch_zone_highest_possible_pfn[i] =
|
|
|
max(max_zone_pfn[i], arch_zone_lowest_possible_pfn[i]);
|
|
|
}
|
|
|
+ arch_zone_lowest_possible_pfn[ZONE_MOVABLE] = 0;
|
|
|
+ arch_zone_highest_possible_pfn[ZONE_MOVABLE] = 0;
|
|
|
+
|
|
|
+ /* Find the PFNs that ZONE_MOVABLE begins at in each node */
|
|
|
+ memset(zone_movable_pfn, 0, sizeof(zone_movable_pfn));
|
|
|
+ find_zone_movable_pfns_for_nodes(zone_movable_pfn);
|
|
|
|
|
|
/* Print out the zone ranges */
|
|
|
printk("Zone PFN ranges:\n");
|
|
|
- for (i = 0; i < MAX_NR_ZONES; i++)
|
|
|
+ for (i = 0; i < MAX_NR_ZONES; i++) {
|
|
|
+ if (i == ZONE_MOVABLE)
|
|
|
+ continue;
|
|
|
printk(" %-8s %8lu -> %8lu\n",
|
|
|
zone_names[i],
|
|
|
arch_zone_lowest_possible_pfn[i],
|
|
|
arch_zone_highest_possible_pfn[i]);
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Print out the PFNs ZONE_MOVABLE begins at in each node */
|
|
|
+ printk("Movable zone start PFN for each node\n");
|
|
|
+ for (i = 0; i < MAX_NUMNODES; i++) {
|
|
|
+ if (zone_movable_pfn[i])
|
|
|
+ printk(" Node %d: %lu\n", i, zone_movable_pfn[i]);
|
|
|
+ }
|
|
|
|
|
|
/* Print out the early_node_map[] */
|
|
|
printk("early_node_map[%d] active PFN ranges\n", nr_nodemap_entries);
|
|
@@ -3206,6 +3411,25 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
|
|
|
find_min_pfn_for_node(nid), NULL);
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+/*
|
|
|
+ * kernelcore=size sets the amount of memory for use for allocations that
|
|
|
+ * cannot be reclaimed or migrated.
|
|
|
+ */
|
|
|
+int __init cmdline_parse_kernelcore(char *p)
|
|
|
+{
|
|
|
+ unsigned long long coremem;
|
|
|
+ if (!p)
|
|
|
+ return -EINVAL;
|
|
|
+
|
|
|
+ coremem = memparse(p, &p);
|
|
|
+ required_kernelcore = coremem >> PAGE_SHIFT;
|
|
|
+
|
|
|
+ /* Paranoid check that UL is enough for required_kernelcore */
|
|
|
+ WARN_ON((coremem >> PAGE_SHIFT) > ULONG_MAX);
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
|
|
|
|
|
|
/**
|