|
@@ -2654,8 +2654,12 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining,
|
|
|
/*
|
|
|
* kswapd shrinks the zone by the number of pages required to reach
|
|
|
* the high watermark.
|
|
|
+ *
|
|
|
+ * Returns true if kswapd scanned at least the requested number of pages to
|
|
|
+ * reclaim. This is used to determine if the scanning priority needs to be
|
|
|
+ * raised.
|
|
|
*/
|
|
|
-static void kswapd_shrink_zone(struct zone *zone,
|
|
|
+static bool kswapd_shrink_zone(struct zone *zone,
|
|
|
struct scan_control *sc,
|
|
|
unsigned long lru_pages)
|
|
|
{
|
|
@@ -2675,6 +2679,8 @@ static void kswapd_shrink_zone(struct zone *zone,
|
|
|
|
|
|
if (nr_slab == 0 && !zone_reclaimable(zone))
|
|
|
zone->all_unreclaimable = 1;
|
|
|
+
|
|
|
+ return sc->nr_scanned >= sc->nr_to_reclaim;
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -2701,26 +2707,26 @@ static void kswapd_shrink_zone(struct zone *zone,
|
|
|
static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
|
|
|
int *classzone_idx)
|
|
|
{
|
|
|
- bool pgdat_is_balanced = false;
|
|
|
int i;
|
|
|
int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
|
|
|
unsigned long nr_soft_reclaimed;
|
|
|
unsigned long nr_soft_scanned;
|
|
|
struct scan_control sc = {
|
|
|
.gfp_mask = GFP_KERNEL,
|
|
|
+ .priority = DEF_PRIORITY,
|
|
|
.may_unmap = 1,
|
|
|
.may_swap = 1,
|
|
|
+ .may_writepage = !laptop_mode,
|
|
|
.order = order,
|
|
|
.target_mem_cgroup = NULL,
|
|
|
};
|
|
|
-loop_again:
|
|
|
- sc.priority = DEF_PRIORITY;
|
|
|
- sc.nr_reclaimed = 0;
|
|
|
- sc.may_writepage = !laptop_mode;
|
|
|
count_vm_event(PAGEOUTRUN);
|
|
|
|
|
|
do {
|
|
|
unsigned long lru_pages = 0;
|
|
|
+ bool raise_priority = true;
|
|
|
+
|
|
|
+ sc.nr_reclaimed = 0;
|
|
|
|
|
|
/*
|
|
|
* Scan in the highmem->dma direction for the highest
|
|
@@ -2762,10 +2768,8 @@ loop_again:
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- if (i < 0) {
|
|
|
- pgdat_is_balanced = true;
|
|
|
+ if (i < 0)
|
|
|
goto out;
|
|
|
- }
|
|
|
|
|
|
for (i = 0; i <= end_zone; i++) {
|
|
|
struct zone *zone = pgdat->node_zones + i;
|
|
@@ -2832,8 +2836,16 @@ loop_again:
|
|
|
|
|
|
if ((buffer_heads_over_limit && is_highmem_idx(i)) ||
|
|
|
!zone_balanced(zone, testorder,
|
|
|
- balance_gap, end_zone))
|
|
|
- kswapd_shrink_zone(zone, &sc, lru_pages);
|
|
|
+ balance_gap, end_zone)) {
|
|
|
+ /*
|
|
|
+ * There should be no need to raise the
|
|
|
+ * scanning priority if enough pages are
|
|
|
+ * already being scanned that high
|
|
|
+ * watermark would be met at 100% efficiency.
|
|
|
+ */
|
|
|
+ if (kswapd_shrink_zone(zone, &sc, lru_pages))
|
|
|
+ raise_priority = false;
|
|
|
+ }
|
|
|
|
|
|
/*
|
|
|
* If we're getting trouble reclaiming, start doing
|
|
@@ -2868,46 +2880,29 @@ loop_again:
|
|
|
pfmemalloc_watermark_ok(pgdat))
|
|
|
wake_up(&pgdat->pfmemalloc_wait);
|
|
|
|
|
|
- if (pgdat_balanced(pgdat, order, *classzone_idx)) {
|
|
|
- pgdat_is_balanced = true;
|
|
|
- break; /* kswapd: all done */
|
|
|
- }
|
|
|
-
|
|
|
/*
|
|
|
- * We do this so kswapd doesn't build up large priorities for
|
|
|
- * example when it is freeing in parallel with allocators. It
|
|
|
- * matches the direct reclaim path behaviour in terms of impact
|
|
|
- * on zone->*_priority.
|
|
|
+ * Fragmentation may mean that the system cannot be rebalanced
|
|
|
+ * for high-order allocations in all zones. If twice the
|
|
|
+ * allocation size has been reclaimed and the zones are still
|
|
|
+ * not balanced then recheck the watermarks at order-0 to
|
|
|
+ * prevent kswapd reclaiming excessively. Assume that a
|
|
|
+ * process requested a high-order can direct reclaim/compact.
|
|
|
*/
|
|
|
- if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX)
|
|
|
- break;
|
|
|
- } while (--sc.priority >= 0);
|
|
|
-
|
|
|
-out:
|
|
|
- if (!pgdat_is_balanced) {
|
|
|
- cond_resched();
|
|
|
+ if (order && sc.nr_reclaimed >= 2UL << order)
|
|
|
+ order = sc.order = 0;
|
|
|
|
|
|
- try_to_freeze();
|
|
|
+ /* Check if kswapd should be suspending */
|
|
|
+ if (try_to_freeze() || kthread_should_stop())
|
|
|
+ break;
|
|
|
|
|
|
/*
|
|
|
- * Fragmentation may mean that the system cannot be
|
|
|
- * rebalanced for high-order allocations in all zones.
|
|
|
- * At this point, if nr_reclaimed < SWAP_CLUSTER_MAX,
|
|
|
- * it means the zones have been fully scanned and are still
|
|
|
- * not balanced. For high-order allocations, there is
|
|
|
- * little point trying all over again as kswapd may
|
|
|
- * infinite loop.
|
|
|
- *
|
|
|
- * Instead, recheck all watermarks at order-0 as they
|
|
|
- * are the most important. If watermarks are ok, kswapd will go
|
|
|
- * back to sleep. High-order users can still perform direct
|
|
|
- * reclaim if they wish.
|
|
|
+ * Raise priority if scanning rate is too low or there was no
|
|
|
+ * progress in reclaiming pages
|
|
|
*/
|
|
|
- if (sc.nr_reclaimed < SWAP_CLUSTER_MAX)
|
|
|
- order = sc.order = 0;
|
|
|
-
|
|
|
- goto loop_again;
|
|
|
- }
|
|
|
+ if (raise_priority || !sc.nr_reclaimed)
|
|
|
+ sc.priority--;
|
|
|
+ } while (sc.priority >= 0 &&
|
|
|
+ !pgdat_balanced(pgdat, order, *classzone_idx));
|
|
|
|
|
|
/*
|
|
|
* If kswapd was reclaiming at a higher order, it has the option of
|
|
@@ -2936,6 +2931,7 @@ out:
|
|
|
compact_pgdat(pgdat, order);
|
|
|
}
|
|
|
|
|
|
+out:
|
|
|
/*
|
|
|
* Return the order we were reclaiming at so prepare_kswapd_sleep()
|
|
|
* makes a decision on the order we were last reclaiming at. However,
|