|
@@ -2246,11 +2246,14 @@ static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
|
|
|
* interoperates with the page allocator fallback scheme to ensure that aging
|
|
|
* of pages is balanced across the zones.
|
|
|
*/
|
|
|
-static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
|
|
|
+static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
|
|
|
+ int classzone_idx)
|
|
|
{
|
|
|
int all_zones_ok;
|
|
|
+ int any_zone_ok;
|
|
|
int priority;
|
|
|
int i;
|
|
|
+ int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
|
|
|
unsigned long total_scanned;
|
|
|
struct reclaim_state *reclaim_state = current->reclaim_state;
|
|
|
struct scan_control sc = {
|
|
@@ -2273,7 +2276,6 @@ loop_again:
|
|
|
count_vm_event(PAGEOUTRUN);
|
|
|
|
|
|
for (priority = DEF_PRIORITY; priority >= 0; priority--) {
|
|
|
- int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
|
|
|
unsigned long lru_pages = 0;
|
|
|
int has_under_min_watermark_zone = 0;
|
|
|
|
|
@@ -2282,6 +2284,7 @@ loop_again:
|
|
|
disable_swap_token();
|
|
|
|
|
|
all_zones_ok = 1;
|
|
|
+ any_zone_ok = 0;
|
|
|
|
|
|
/*
|
|
|
* Scan in the highmem->dma direction for the highest
|
|
@@ -2400,10 +2403,12 @@ loop_again:
|
|
|
* spectulatively avoid congestion waits
|
|
|
*/
|
|
|
zone_clear_flag(zone, ZONE_CONGESTED);
|
|
|
+ if (i <= classzone_idx)
|
|
|
+ any_zone_ok = 1;
|
|
|
}
|
|
|
|
|
|
}
|
|
|
- if (all_zones_ok)
|
|
|
+ if (all_zones_ok || (order && any_zone_ok))
|
|
|
break; /* kswapd: all done */
|
|
|
/*
|
|
|
* OK, kswapd is getting into trouble. Take a nap, then take
|
|
@@ -2426,7 +2431,13 @@ loop_again:
|
|
|
break;
|
|
|
}
|
|
|
out:
|
|
|
- if (!all_zones_ok) {
|
|
|
+
|
|
|
+ /*
|
|
|
+ * order-0: All zones must meet high watermark for a balanced node
|
|
|
+ * high-order: Any zone below pgdats classzone_idx must meet the high
|
|
|
+ * watermark for a balanced node
|
|
|
+ */
|
|
|
+ if (!(all_zones_ok || (order && any_zone_ok))) {
|
|
|
cond_resched();
|
|
|
|
|
|
try_to_freeze();
|
|
@@ -2451,6 +2462,36 @@ out:
|
|
|
goto loop_again;
|
|
|
}
|
|
|
|
|
|
+ /*
|
|
|
+ * If kswapd was reclaiming at a higher order, it has the option of
|
|
|
+ * sleeping without all zones being balanced. Before it does, it must
|
|
|
+ * ensure that the watermarks for order-0 on *all* zones are met and
|
|
|
+ * that the congestion flags are cleared. The congestion flag must
|
|
|
+ * be cleared as kswapd is the only mechanism that clears the flag
|
|
|
+ * and it is potentially going to sleep here.
|
|
|
+ */
|
|
|
+ if (order) {
|
|
|
+ for (i = 0; i <= end_zone; i++) {
|
|
|
+ struct zone *zone = pgdat->node_zones + i;
|
|
|
+
|
|
|
+ if (!populated_zone(zone))
|
|
|
+ continue;
|
|
|
+
|
|
|
+ if (zone->all_unreclaimable && priority != DEF_PRIORITY)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ /* Confirm the zone is balanced for order-0 */
|
|
|
+ if (!zone_watermark_ok(zone, 0,
|
|
|
+ high_wmark_pages(zone), 0, 0)) {
|
|
|
+ order = sc.order = 0;
|
|
|
+ goto loop_again;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* If balanced, clear the congested flag */
|
|
|
+ zone_clear_flag(zone, ZONE_CONGESTED);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
return sc.nr_reclaimed;
|
|
|
}
|
|
|
|
|
@@ -2514,6 +2555,7 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order)
|
|
|
static int kswapd(void *p)
|
|
|
{
|
|
|
unsigned long order;
|
|
|
+ int classzone_idx;
|
|
|
pg_data_t *pgdat = (pg_data_t*)p;
|
|
|
struct task_struct *tsk = current;
|
|
|
|
|
@@ -2544,21 +2586,27 @@ static int kswapd(void *p)
|
|
|
set_freezable();
|
|
|
|
|
|
order = 0;
|
|
|
+ classzone_idx = MAX_NR_ZONES - 1;
|
|
|
for ( ; ; ) {
|
|
|
unsigned long new_order;
|
|
|
+ int new_classzone_idx;
|
|
|
int ret;
|
|
|
|
|
|
new_order = pgdat->kswapd_max_order;
|
|
|
+ new_classzone_idx = pgdat->classzone_idx;
|
|
|
pgdat->kswapd_max_order = 0;
|
|
|
- if (order < new_order) {
|
|
|
+ pgdat->classzone_idx = MAX_NR_ZONES - 1;
|
|
|
+ if (order < new_order || classzone_idx > new_classzone_idx) {
|
|
|
/*
|
|
|
* Don't sleep if someone wants a larger 'order'
|
|
|
- * allocation
|
|
|
+ * allocation or has tigher zone constraints
|
|
|
*/
|
|
|
order = new_order;
|
|
|
+ classzone_idx = new_classzone_idx;
|
|
|
} else {
|
|
|
kswapd_try_to_sleep(pgdat, order);
|
|
|
order = pgdat->kswapd_max_order;
|
|
|
+ classzone_idx = pgdat->classzone_idx;
|
|
|
}
|
|
|
|
|
|
ret = try_to_freeze();
|
|
@@ -2571,7 +2619,7 @@ static int kswapd(void *p)
|
|
|
*/
|
|
|
if (!ret) {
|
|
|
trace_mm_vmscan_kswapd_wake(pgdat->node_id, order);
|
|
|
- balance_pgdat(pgdat, order);
|
|
|
+ balance_pgdat(pgdat, order, classzone_idx);
|
|
|
}
|
|
|
}
|
|
|
return 0;
|
|
@@ -2580,7 +2628,7 @@ static int kswapd(void *p)
|
|
|
/*
|
|
|
* A zone is low on free memory, so wake its kswapd task to service it.
|
|
|
*/
|
|
|
-void wakeup_kswapd(struct zone *zone, int order)
|
|
|
+void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
|
|
|
{
|
|
|
pg_data_t *pgdat;
|
|
|
|
|
@@ -2590,8 +2638,10 @@ void wakeup_kswapd(struct zone *zone, int order)
|
|
|
if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
|
|
|
return;
|
|
|
pgdat = zone->zone_pgdat;
|
|
|
- if (pgdat->kswapd_max_order < order)
|
|
|
+ if (pgdat->kswapd_max_order < order) {
|
|
|
pgdat->kswapd_max_order = order;
|
|
|
+ pgdat->classzone_idx = min(pgdat->classzone_idx, classzone_idx);
|
|
|
+ }
|
|
|
if (!waitqueue_active(&pgdat->kswapd_wait))
|
|
|
return;
|
|
|
if (zone_watermark_ok_safe(zone, order, low_wmark_pages(zone), 0, 0))
|