|
@@ -2198,10 +2198,40 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
|
|
|
}
|
|
|
#endif
|
|
|
|
|
|
+/*
|
|
|
+ * pgdat_balanced is used when checking if a node is balanced for high-order
|
|
|
+ * allocations. Only zones that meet watermarks and are in a zone allowed
|
|
|
+ * by the callers classzone_idx are added to balanced_pages. The total of
|
|
|
+ * balanced pages must be at least 25% of the zones allowed by classzone_idx
|
|
|
+ * for the node to be considered balanced. Forcing all zones to be balanced
|
|
|
+ * for high orders can cause excessive reclaim when there are imbalanced zones.
|
|
|
+ * The choice of 25% is due to
|
|
|
+ * o a 16M DMA zone that is balanced will not balance a zone on any
|
|
|
+ * reasonable sized machine
|
|
|
+ * o On all other machines, the top zone must be at least a reasonable
|
|
|
+ * precentage of the middle zones. For example, on 32-bit x86, highmem
|
|
|
+ * would need to be at least 256M for it to be balance a whole node.
|
|
|
+ * Similarly, on x86-64 the Normal zone would need to be at least 1G
|
|
|
+ * to balance a node on its own. These seemed like reasonable ratios.
|
|
|
+ */
|
|
|
+static bool pgdat_balanced(pg_data_t *pgdat, unsigned long balanced_pages,
|
|
|
+ int classzone_idx)
|
|
|
+{
|
|
|
+ unsigned long present_pages = 0;
|
|
|
+ int i;
|
|
|
+
|
|
|
+ for (i = 0; i <= classzone_idx; i++)
|
|
|
+ present_pages += pgdat->node_zones[i].present_pages;
|
|
|
+
|
|
|
+ return balanced_pages > (present_pages >> 2);
|
|
|
+}
|
|
|
+
|
|
|
/* is kswapd sleeping prematurely? */
|
|
|
static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
|
|
|
{
|
|
|
int i;
|
|
|
+ unsigned long balanced = 0;
|
|
|
+ bool all_zones_ok = true;
|
|
|
|
|
|
/* If a direct reclaimer woke kswapd within HZ/10, it's premature */
|
|
|
if (remaining)
|
|
@@ -2219,10 +2249,20 @@ static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
|
|
|
|
|
|
if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone),
|
|
|
0, 0))
|
|
|
- return 1;
|
|
|
+ all_zones_ok = false;
|
|
|
+ else
|
|
|
+ balanced += zone->present_pages;
|
|
|
}
|
|
|
|
|
|
- return 0;
|
|
|
+ /*
|
|
|
+ * For high-order requests, the balanced zones must contain at least
|
|
|
+ * 25% of the nodes pages for kswapd to sleep. For order-0, all zones
|
|
|
+ * must be balanced
|
|
|
+ */
|
|
|
+ if (order)
|
|
|
+ return pgdat_balanced(pgdat, balanced, 0);
|
|
|
+ else
|
|
|
+ return !all_zones_ok;
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -2250,7 +2290,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
|
|
|
int classzone_idx)
|
|
|
{
|
|
|
int all_zones_ok;
|
|
|
- int any_zone_ok;
|
|
|
+ unsigned long balanced;
|
|
|
int priority;
|
|
|
int i;
|
|
|
int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
|
|
@@ -2284,7 +2324,7 @@ loop_again:
|
|
|
disable_swap_token();
|
|
|
|
|
|
all_zones_ok = 1;
|
|
|
- any_zone_ok = 0;
|
|
|
+ balanced = 0;
|
|
|
|
|
|
/*
|
|
|
* Scan in the highmem->dma direction for the highest
|
|
@@ -2404,11 +2444,11 @@ loop_again:
|
|
|
*/
|
|
|
zone_clear_flag(zone, ZONE_CONGESTED);
|
|
|
if (i <= classzone_idx)
|
|
|
- any_zone_ok = 1;
|
|
|
+ balanced += zone->present_pages;
|
|
|
}
|
|
|
|
|
|
}
|
|
|
- if (all_zones_ok || (order && any_zone_ok))
|
|
|
+ if (all_zones_ok || (order && pgdat_balanced(pgdat, balanced, classzone_idx)))
|
|
|
break; /* kswapd: all done */
|
|
|
/*
|
|
|
* OK, kswapd is getting into trouble. Take a nap, then take
|
|
@@ -2434,10 +2474,10 @@ out:
|
|
|
|
|
|
/*
|
|
|
* order-0: All zones must meet high watermark for a balanced node
|
|
|
- * high-order: Any zone below pgdats classzone_idx must meet the high
|
|
|
- * watermark for a balanced node
|
|
|
+ * high-order: Balanced zones must make up at least 25% of the node
|
|
|
+ * for the node to be balanced
|
|
|
*/
|
|
|
- if (!(all_zones_ok || (order && any_zone_ok))) {
|
|
|
+ if (!(all_zones_ok || (order && pgdat_balanced(pgdat, balanced, classzone_idx)))) {
|
|
|
cond_resched();
|
|
|
|
|
|
try_to_freeze();
|