|
@@ -697,6 +697,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
|
|
|
enum ttu_flags ttu_flags,
|
|
|
unsigned long *ret_nr_unqueued_dirty,
|
|
|
unsigned long *ret_nr_writeback,
|
|
|
+ unsigned long *ret_nr_immediate,
|
|
|
bool force_reclaim)
|
|
|
{
|
|
|
LIST_HEAD(ret_pages);
|
|
@@ -707,6 +708,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
|
|
|
unsigned long nr_congested = 0;
|
|
|
unsigned long nr_reclaimed = 0;
|
|
|
unsigned long nr_writeback = 0;
|
|
|
+ unsigned long nr_immediate = 0;
|
|
|
|
|
|
cond_resched();
|
|
|
|
|
@@ -773,8 +775,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
|
|
|
* IO can complete. Waiting on the page itself risks an
|
|
|
* indefinite stall if it is impossible to writeback the
|
|
|
* page due to IO error or disconnected storage so instead
|
|
|
- * block for HZ/10 or until some IO completes then clear the
|
|
|
- * ZONE_WRITEBACK flag to recheck if the condition exists.
|
|
|
+ * note that the LRU is being scanned too quickly and the
|
|
|
+ * caller can stall after page list has been processed.
|
|
|
*
|
|
|
* 2) Global reclaim encounters a page, memcg encounters a
|
|
|
* page that is not marked for immediate reclaim or
|
|
@@ -804,10 +806,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
|
|
|
if (current_is_kswapd() &&
|
|
|
PageReclaim(page) &&
|
|
|
zone_is_reclaim_writeback(zone)) {
|
|
|
- unlock_page(page);
|
|
|
- congestion_wait(BLK_RW_ASYNC, HZ/10);
|
|
|
- zone_clear_flag(zone, ZONE_WRITEBACK);
|
|
|
- goto keep;
|
|
|
+ nr_immediate++;
|
|
|
+ goto keep_locked;
|
|
|
|
|
|
/* Case 2 above */
|
|
|
} else if (global_reclaim(sc) ||
|
|
@@ -1033,6 +1033,7 @@ keep:
|
|
|
mem_cgroup_uncharge_end();
|
|
|
*ret_nr_unqueued_dirty += nr_unqueued_dirty;
|
|
|
*ret_nr_writeback += nr_writeback;
|
|
|
+ *ret_nr_immediate += nr_immediate;
|
|
|
return nr_reclaimed;
|
|
|
}
|
|
|
|
|
@@ -1044,7 +1045,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
|
|
|
.priority = DEF_PRIORITY,
|
|
|
.may_unmap = 1,
|
|
|
};
|
|
|
- unsigned long ret, dummy1, dummy2;
|
|
|
+ unsigned long ret, dummy1, dummy2, dummy3;
|
|
|
struct page *page, *next;
|
|
|
LIST_HEAD(clean_pages);
|
|
|
|
|
@@ -1057,7 +1058,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
|
|
|
|
|
|
ret = shrink_page_list(&clean_pages, zone, &sc,
|
|
|
TTU_UNMAP|TTU_IGNORE_ACCESS,
|
|
|
- &dummy1, &dummy2, true);
|
|
|
+ &dummy1, &dummy2, &dummy3, true);
|
|
|
list_splice(&clean_pages, page_list);
|
|
|
__mod_zone_page_state(zone, NR_ISOLATED_FILE, -ret);
|
|
|
return ret;
|
|
@@ -1353,6 +1354,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
|
|
|
unsigned long nr_taken;
|
|
|
unsigned long nr_unqueued_dirty = 0;
|
|
|
unsigned long nr_writeback = 0;
|
|
|
+ unsigned long nr_immediate = 0;
|
|
|
isolate_mode_t isolate_mode = 0;
|
|
|
int file = is_file_lru(lru);
|
|
|
struct zone *zone = lruvec_zone(lruvec);
|
|
@@ -1394,7 +1396,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
|
|
|
return 0;
|
|
|
|
|
|
nr_reclaimed = shrink_page_list(&page_list, zone, sc, TTU_UNMAP,
|
|
|
- &nr_unqueued_dirty, &nr_writeback, false);
|
|
|
+ &nr_unqueued_dirty, &nr_writeback, &nr_immediate,
|
|
|
+ false);
|
|
|
|
|
|
spin_lock_irq(&zone->lru_lock);
|
|
|
|
|
@@ -1447,14 +1450,28 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
- * Similarly, if many dirty pages are encountered that are not
|
|
|
- * currently being written then flag that kswapd should start
|
|
|
- * writing back pages and stall to give a chance for flushers
|
|
|
- * to catch up.
|
|
|
+ * memcg will stall in page writeback so only consider forcibly
|
|
|
+ * stalling for global reclaim
|
|
|
*/
|
|
|
- if (global_reclaim(sc) && nr_unqueued_dirty == nr_taken) {
|
|
|
- congestion_wait(BLK_RW_ASYNC, HZ/10);
|
|
|
- zone_set_flag(zone, ZONE_TAIL_LRU_DIRTY);
|
|
|
+ if (global_reclaim(sc)) {
|
|
|
+ /*
|
|
|
+ * If dirty pages are scanned that are not queued for IO, it
|
|
|
+ * implies that flushers are not keeping up. In this case, flag
|
|
|
+ * the zone ZONE_TAIL_LRU_DIRTY and kswapd will start writing
|
|
|
+ * pages from reclaim context. It will forcibly stall in the
|
|
|
+ * next check.
|
|
|
+ */
|
|
|
+ if (nr_unqueued_dirty == nr_taken)
|
|
|
+ zone_set_flag(zone, ZONE_TAIL_LRU_DIRTY);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * In addition, if kswapd scans pages marked marked for
|
|
|
+ * immediate reclaim and under writeback (nr_immediate), it
|
|
|
+ * implies that pages are cycling through the LRU faster than
|
|
|
+ * they are written so also forcibly stall.
|
|
|
+ */
|
|
|
+ if (nr_unqueued_dirty == nr_taken || nr_immediate)
|
|
|
+ congestion_wait(BLK_RW_ASYNC, HZ/10);
|
|
|
}
|
|
|
|
|
|
trace_mm_vmscan_lru_shrink_inactive(zone->zone_pgdat->node_id,
|