|
@@ -1378,42 +1378,29 @@ static void zlc_mark_zone_full(struct zonelist *zonelist, struct zone **z)
|
|
*/
|
|
*/
|
|
static struct page *
|
|
static struct page *
|
|
get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
|
|
get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
|
|
- struct zonelist *zonelist, int alloc_flags)
|
|
|
|
|
|
+ struct zonelist *zonelist, int high_zoneidx, int alloc_flags)
|
|
{
|
|
{
|
|
struct zone **z;
|
|
struct zone **z;
|
|
struct page *page = NULL;
|
|
struct page *page = NULL;
|
|
- int classzone_idx = zone_idx(zonelist->zones[0]);
|
|
|
|
|
|
+ int classzone_idx;
|
|
struct zone *zone, *preferred_zone;
|
|
struct zone *zone, *preferred_zone;
|
|
nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */
|
|
nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */
|
|
int zlc_active = 0; /* set if using zonelist_cache */
|
|
int zlc_active = 0; /* set if using zonelist_cache */
|
|
int did_zlc_setup = 0; /* just call zlc_setup() one time */
|
|
int did_zlc_setup = 0; /* just call zlc_setup() one time */
|
|
- enum zone_type highest_zoneidx = -1; /* Gets set for policy zonelists */
|
|
|
|
|
|
+
|
|
|
|
+ z = first_zones_zonelist(zonelist, high_zoneidx);
|
|
|
|
+ classzone_idx = zone_idx(*z);
|
|
|
|
+ preferred_zone = *z;
|
|
|
|
|
|
zonelist_scan:
|
|
zonelist_scan:
|
|
/*
|
|
/*
|
|
* Scan zonelist, looking for a zone with enough free.
|
|
* Scan zonelist, looking for a zone with enough free.
|
|
* See also cpuset_zone_allowed() comment in kernel/cpuset.c.
|
|
* See also cpuset_zone_allowed() comment in kernel/cpuset.c.
|
|
*/
|
|
*/
|
|
- z = zonelist->zones;
|
|
|
|
- preferred_zone = *z;
|
|
|
|
-
|
|
|
|
- do {
|
|
|
|
- /*
|
|
|
|
- * In NUMA, this could be a policy zonelist which contains
|
|
|
|
- * zones that may not be allowed by the current gfp_mask.
|
|
|
|
- * Check the zone is allowed by the current flags
|
|
|
|
- */
|
|
|
|
- if (unlikely(alloc_should_filter_zonelist(zonelist))) {
|
|
|
|
- if (highest_zoneidx == -1)
|
|
|
|
- highest_zoneidx = gfp_zone(gfp_mask);
|
|
|
|
- if (zone_idx(*z) > highest_zoneidx)
|
|
|
|
- continue;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
|
|
+ for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
|
|
if (NUMA_BUILD && zlc_active &&
|
|
if (NUMA_BUILD && zlc_active &&
|
|
!zlc_zone_worth_trying(zonelist, z, allowednodes))
|
|
!zlc_zone_worth_trying(zonelist, z, allowednodes))
|
|
continue;
|
|
continue;
|
|
- zone = *z;
|
|
|
|
if ((alloc_flags & ALLOC_CPUSET) &&
|
|
if ((alloc_flags & ALLOC_CPUSET) &&
|
|
!cpuset_zone_allowed_softwall(zone, gfp_mask))
|
|
!cpuset_zone_allowed_softwall(zone, gfp_mask))
|
|
goto try_next_zone;
|
|
goto try_next_zone;
|
|
@@ -1447,7 +1434,7 @@ try_next_zone:
|
|
zlc_active = 1;
|
|
zlc_active = 1;
|
|
did_zlc_setup = 1;
|
|
did_zlc_setup = 1;
|
|
}
|
|
}
|
|
- } while (*(++z) != NULL);
|
|
|
|
|
|
+ }
|
|
|
|
|
|
if (unlikely(NUMA_BUILD && page == NULL && zlc_active)) {
|
|
if (unlikely(NUMA_BUILD && page == NULL && zlc_active)) {
|
|
/* Disable zlc cache for second zonelist scan */
|
|
/* Disable zlc cache for second zonelist scan */
|
|
@@ -1465,6 +1452,7 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
|
|
struct zonelist *zonelist)
|
|
struct zonelist *zonelist)
|
|
{
|
|
{
|
|
const gfp_t wait = gfp_mask & __GFP_WAIT;
|
|
const gfp_t wait = gfp_mask & __GFP_WAIT;
|
|
|
|
+ enum zone_type high_zoneidx = gfp_zone(gfp_mask);
|
|
struct zone **z;
|
|
struct zone **z;
|
|
struct page *page;
|
|
struct page *page;
|
|
struct reclaim_state reclaim_state;
|
|
struct reclaim_state reclaim_state;
|
|
@@ -1490,7 +1478,7 @@ restart:
|
|
}
|
|
}
|
|
|
|
|
|
page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
|
|
page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
|
|
- zonelist, ALLOC_WMARK_LOW|ALLOC_CPUSET);
|
|
|
|
|
|
+ zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET);
|
|
if (page)
|
|
if (page)
|
|
goto got_pg;
|
|
goto got_pg;
|
|
|
|
|
|
@@ -1534,7 +1522,8 @@ restart:
|
|
* Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
|
|
* Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
|
|
* See also cpuset_zone_allowed() comment in kernel/cpuset.c.
|
|
* See also cpuset_zone_allowed() comment in kernel/cpuset.c.
|
|
*/
|
|
*/
|
|
- page = get_page_from_freelist(gfp_mask, order, zonelist, alloc_flags);
|
|
|
|
|
|
+ page = get_page_from_freelist(gfp_mask, order, zonelist,
|
|
|
|
+ high_zoneidx, alloc_flags);
|
|
if (page)
|
|
if (page)
|
|
goto got_pg;
|
|
goto got_pg;
|
|
|
|
|
|
@@ -1547,7 +1536,7 @@ rebalance:
|
|
nofail_alloc:
|
|
nofail_alloc:
|
|
/* go through the zonelist yet again, ignoring mins */
|
|
/* go through the zonelist yet again, ignoring mins */
|
|
page = get_page_from_freelist(gfp_mask, order,
|
|
page = get_page_from_freelist(gfp_mask, order,
|
|
- zonelist, ALLOC_NO_WATERMARKS);
|
|
|
|
|
|
+ zonelist, high_zoneidx, ALLOC_NO_WATERMARKS);
|
|
if (page)
|
|
if (page)
|
|
goto got_pg;
|
|
goto got_pg;
|
|
if (gfp_mask & __GFP_NOFAIL) {
|
|
if (gfp_mask & __GFP_NOFAIL) {
|
|
@@ -1582,7 +1571,7 @@ nofail_alloc:
|
|
|
|
|
|
if (likely(did_some_progress)) {
|
|
if (likely(did_some_progress)) {
|
|
page = get_page_from_freelist(gfp_mask, order,
|
|
page = get_page_from_freelist(gfp_mask, order,
|
|
- zonelist, alloc_flags);
|
|
|
|
|
|
+ zonelist, high_zoneidx, alloc_flags);
|
|
if (page)
|
|
if (page)
|
|
goto got_pg;
|
|
goto got_pg;
|
|
} else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
|
|
} else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
|
|
@@ -1598,7 +1587,7 @@ nofail_alloc:
|
|
* under heavy pressure.
|
|
* under heavy pressure.
|
|
*/
|
|
*/
|
|
page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
|
|
page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
|
|
- zonelist, ALLOC_WMARK_HIGH|ALLOC_CPUSET);
|
|
|
|
|
|
+ zonelist, high_zoneidx, ALLOC_WMARK_HIGH|ALLOC_CPUSET);
|
|
if (page) {
|
|
if (page) {
|
|
clear_zonelist_oom(zonelist);
|
|
clear_zonelist_oom(zonelist);
|
|
goto got_pg;
|
|
goto got_pg;
|
|
@@ -1713,14 +1702,15 @@ EXPORT_SYMBOL(free_pages);
|
|
|
|
|
|
static unsigned int nr_free_zone_pages(int offset)
|
|
static unsigned int nr_free_zone_pages(int offset)
|
|
{
|
|
{
|
|
|
|
+ struct zone **z;
|
|
|
|
+ struct zone *zone;
|
|
|
|
+
|
|
/* Just pick one node, since fallback list is circular */
|
|
/* Just pick one node, since fallback list is circular */
|
|
unsigned int sum = 0;
|
|
unsigned int sum = 0;
|
|
|
|
|
|
struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL);
|
|
struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL);
|
|
- struct zone **zonep = zonelist->zones;
|
|
|
|
- struct zone *zone;
|
|
|
|
|
|
|
|
- for (zone = *zonep++; zone; zone = *zonep++) {
|
|
|
|
|
|
+ for_each_zone_zonelist(zone, z, zonelist, offset) {
|
|
unsigned long size = zone->present_pages;
|
|
unsigned long size = zone->present_pages;
|
|
unsigned long high = zone->pages_high;
|
|
unsigned long high = zone->pages_high;
|
|
if (size > high)
|
|
if (size > high)
|
|
@@ -2078,17 +2068,15 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask)
|
|
*/
|
|
*/
|
|
static void build_zonelists_in_node_order(pg_data_t *pgdat, int node)
|
|
static void build_zonelists_in_node_order(pg_data_t *pgdat, int node)
|
|
{
|
|
{
|
|
- enum zone_type i;
|
|
|
|
int j;
|
|
int j;
|
|
struct zonelist *zonelist;
|
|
struct zonelist *zonelist;
|
|
|
|
|
|
- for (i = 0; i < MAX_NR_ZONES; i++) {
|
|
|
|
- zonelist = pgdat->node_zonelists + i;
|
|
|
|
- for (j = 0; zonelist->zones[j] != NULL; j++)
|
|
|
|
- ;
|
|
|
|
- j = build_zonelists_node(NODE_DATA(node), zonelist, j, i);
|
|
|
|
- zonelist->zones[j] = NULL;
|
|
|
|
- }
|
|
|
|
|
|
+ zonelist = &pgdat->node_zonelists[0];
|
|
|
|
+ for (j = 0; zonelist->zones[j] != NULL; j++)
|
|
|
|
+ ;
|
|
|
|
+ j = build_zonelists_node(NODE_DATA(node), zonelist, j,
|
|
|
|
+ MAX_NR_ZONES - 1);
|
|
|
|
+ zonelist->zones[j] = NULL;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -2096,15 +2084,12 @@ static void build_zonelists_in_node_order(pg_data_t *pgdat, int node)
|
|
*/
|
|
*/
|
|
static void build_thisnode_zonelists(pg_data_t *pgdat)
|
|
static void build_thisnode_zonelists(pg_data_t *pgdat)
|
|
{
|
|
{
|
|
- enum zone_type i;
|
|
|
|
int j;
|
|
int j;
|
|
struct zonelist *zonelist;
|
|
struct zonelist *zonelist;
|
|
|
|
|
|
- for (i = 0; i < MAX_NR_ZONES; i++) {
|
|
|
|
- zonelist = pgdat->node_zonelists + MAX_NR_ZONES + i;
|
|
|
|
- j = build_zonelists_node(pgdat, zonelist, 0, i);
|
|
|
|
- zonelist->zones[j] = NULL;
|
|
|
|
- }
|
|
|
|
|
|
+ zonelist = &pgdat->node_zonelists[1];
|
|
|
|
+ j = build_zonelists_node(pgdat, zonelist, 0, MAX_NR_ZONES - 1);
|
|
|
|
+ zonelist->zones[j] = NULL;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -2117,27 +2102,24 @@ static int node_order[MAX_NUMNODES];
|
|
|
|
|
|
static void build_zonelists_in_zone_order(pg_data_t *pgdat, int nr_nodes)
|
|
static void build_zonelists_in_zone_order(pg_data_t *pgdat, int nr_nodes)
|
|
{
|
|
{
|
|
- enum zone_type i;
|
|
|
|
int pos, j, node;
|
|
int pos, j, node;
|
|
int zone_type; /* needs to be signed */
|
|
int zone_type; /* needs to be signed */
|
|
struct zone *z;
|
|
struct zone *z;
|
|
struct zonelist *zonelist;
|
|
struct zonelist *zonelist;
|
|
|
|
|
|
- for (i = 0; i < MAX_NR_ZONES; i++) {
|
|
|
|
- zonelist = pgdat->node_zonelists + i;
|
|
|
|
- pos = 0;
|
|
|
|
- for (zone_type = i; zone_type >= 0; zone_type--) {
|
|
|
|
- for (j = 0; j < nr_nodes; j++) {
|
|
|
|
- node = node_order[j];
|
|
|
|
- z = &NODE_DATA(node)->node_zones[zone_type];
|
|
|
|
- if (populated_zone(z)) {
|
|
|
|
- zonelist->zones[pos++] = z;
|
|
|
|
- check_highest_zone(zone_type);
|
|
|
|
- }
|
|
|
|
|
|
+ zonelist = &pgdat->node_zonelists[0];
|
|
|
|
+ pos = 0;
|
|
|
|
+ for (zone_type = MAX_NR_ZONES - 1; zone_type >= 0; zone_type--) {
|
|
|
|
+ for (j = 0; j < nr_nodes; j++) {
|
|
|
|
+ node = node_order[j];
|
|
|
|
+ z = &NODE_DATA(node)->node_zones[zone_type];
|
|
|
|
+ if (populated_zone(z)) {
|
|
|
|
+ zonelist->zones[pos++] = z;
|
|
|
|
+ check_highest_zone(zone_type);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
- zonelist->zones[pos] = NULL;
|
|
|
|
}
|
|
}
|
|
|
|
+ zonelist->zones[pos] = NULL;
|
|
}
|
|
}
|
|
|
|
|
|
static int default_zonelist_order(void)
|
|
static int default_zonelist_order(void)
|
|
@@ -2264,19 +2246,15 @@ static void build_zonelists(pg_data_t *pgdat)
|
|
/* Construct the zonelist performance cache - see further mmzone.h */
|
|
/* Construct the zonelist performance cache - see further mmzone.h */
|
|
static void build_zonelist_cache(pg_data_t *pgdat)
|
|
static void build_zonelist_cache(pg_data_t *pgdat)
|
|
{
|
|
{
|
|
- int i;
|
|
|
|
-
|
|
|
|
- for (i = 0; i < MAX_NR_ZONES; i++) {
|
|
|
|
- struct zonelist *zonelist;
|
|
|
|
- struct zonelist_cache *zlc;
|
|
|
|
- struct zone **z;
|
|
|
|
|
|
+ struct zonelist *zonelist;
|
|
|
|
+ struct zonelist_cache *zlc;
|
|
|
|
+ struct zone **z;
|
|
|
|
|
|
- zonelist = pgdat->node_zonelists + i;
|
|
|
|
- zonelist->zlcache_ptr = zlc = &zonelist->zlcache;
|
|
|
|
- bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST);
|
|
|
|
- for (z = zonelist->zones; *z; z++)
|
|
|
|
- zlc->z_to_n[z - zonelist->zones] = zone_to_nid(*z);
|
|
|
|
- }
|
|
|
|
|
|
+ zonelist = &pgdat->node_zonelists[0];
|
|
|
|
+ zonelist->zlcache_ptr = zlc = &zonelist->zlcache;
|
|
|
|
+ bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST);
|
|
|
|
+ for (z = zonelist->zones; *z; z++)
|
|
|
|
+ zlc->z_to_n[z - zonelist->zones] = zone_to_nid(*z);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
@@ -2290,45 +2268,43 @@ static void set_zonelist_order(void)
|
|
static void build_zonelists(pg_data_t *pgdat)
|
|
static void build_zonelists(pg_data_t *pgdat)
|
|
{
|
|
{
|
|
int node, local_node;
|
|
int node, local_node;
|
|
- enum zone_type i,j;
|
|
|
|
|
|
+ enum zone_type j;
|
|
|
|
+ struct zonelist *zonelist;
|
|
|
|
|
|
local_node = pgdat->node_id;
|
|
local_node = pgdat->node_id;
|
|
- for (i = 0; i < MAX_NR_ZONES; i++) {
|
|
|
|
- struct zonelist *zonelist;
|
|
|
|
|
|
|
|
- zonelist = pgdat->node_zonelists + i;
|
|
|
|
|
|
+ zonelist = &pgdat->node_zonelists[0];
|
|
|
|
+ j = build_zonelists_node(pgdat, zonelist, 0, MAX_NR_ZONES - 1);
|
|
|
|
|
|
- j = build_zonelists_node(pgdat, zonelist, 0, i);
|
|
|
|
- /*
|
|
|
|
- * Now we build the zonelist so that it contains the zones
|
|
|
|
- * of all the other nodes.
|
|
|
|
- * We don't want to pressure a particular node, so when
|
|
|
|
- * building the zones for node N, we make sure that the
|
|
|
|
- * zones coming right after the local ones are those from
|
|
|
|
- * node N+1 (modulo N)
|
|
|
|
- */
|
|
|
|
- for (node = local_node + 1; node < MAX_NUMNODES; node++) {
|
|
|
|
- if (!node_online(node))
|
|
|
|
- continue;
|
|
|
|
- j = build_zonelists_node(NODE_DATA(node), zonelist, j, i);
|
|
|
|
- }
|
|
|
|
- for (node = 0; node < local_node; node++) {
|
|
|
|
- if (!node_online(node))
|
|
|
|
- continue;
|
|
|
|
- j = build_zonelists_node(NODE_DATA(node), zonelist, j, i);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- zonelist->zones[j] = NULL;
|
|
|
|
|
|
+ /*
|
|
|
|
+ * Now we build the zonelist so that it contains the zones
|
|
|
|
+ * of all the other nodes.
|
|
|
|
+ * We don't want to pressure a particular node, so when
|
|
|
|
+ * building the zones for node N, we make sure that the
|
|
|
|
+ * zones coming right after the local ones are those from
|
|
|
|
+ * node N+1 (modulo N)
|
|
|
|
+ */
|
|
|
|
+ for (node = local_node + 1; node < MAX_NUMNODES; node++) {
|
|
|
|
+ if (!node_online(node))
|
|
|
|
+ continue;
|
|
|
|
+ j = build_zonelists_node(NODE_DATA(node), zonelist, j,
|
|
|
|
+ MAX_NR_ZONES - 1);
|
|
}
|
|
}
|
|
|
|
+ for (node = 0; node < local_node; node++) {
|
|
|
|
+ if (!node_online(node))
|
|
|
|
+ continue;
|
|
|
|
+ j = build_zonelists_node(NODE_DATA(node), zonelist, j,
|
|
|
|
+ MAX_NR_ZONES - 1);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ zonelist->zones[j] = NULL;
|
|
}
|
|
}
|
|
|
|
|
|
/* non-NUMA variant of zonelist performance cache - just NULL zlcache_ptr */
|
|
/* non-NUMA variant of zonelist performance cache - just NULL zlcache_ptr */
|
|
static void build_zonelist_cache(pg_data_t *pgdat)
|
|
static void build_zonelist_cache(pg_data_t *pgdat)
|
|
{
|
|
{
|
|
- int i;
|
|
|
|
-
|
|
|
|
- for (i = 0; i < MAX_NR_ZONES; i++)
|
|
|
|
- pgdat->node_zonelists[i].zlcache_ptr = NULL;
|
|
|
|
|
|
+ pgdat->node_zonelists[0].zlcache_ptr = NULL;
|
|
|
|
+ pgdat->node_zonelists[1].zlcache_ptr = NULL;
|
|
}
|
|
}
|
|
|
|
|
|
#endif /* CONFIG_NUMA */
|
|
#endif /* CONFIG_NUMA */
|