|
@@ -66,6 +66,8 @@ struct scan_control {
|
|
|
int swappiness;
|
|
|
|
|
|
int all_unreclaimable;
|
|
|
+
|
|
|
+ int order;
|
|
|
};
|
|
|
|
|
|
/*
|
|
@@ -481,7 +483,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
|
|
|
|
|
|
referenced = page_referenced(page, 1);
|
|
|
/* In active use or really unfreeable? Activate it. */
|
|
|
- if (referenced && page_mapping_inuse(page))
|
|
|
+ if (sc->order <= PAGE_ALLOC_COSTLY_ORDER &&
|
|
|
+ referenced && page_mapping_inuse(page))
|
|
|
goto activate_locked;
|
|
|
|
|
|
#ifdef CONFIG_SWAP
|
|
@@ -514,7 +517,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
|
|
|
}
|
|
|
|
|
|
if (PageDirty(page)) {
|
|
|
- if (referenced)
|
|
|
+ if (sc->order <= PAGE_ALLOC_COSTLY_ORDER && referenced)
|
|
|
goto keep_locked;
|
|
|
if (!may_enter_fs)
|
|
|
goto keep_locked;
|
|
@@ -598,6 +601,51 @@ keep:
|
|
|
return nr_reclaimed;
|
|
|
}
|
|
|
|
|
|
+/* LRU Isolation modes. */
|
|
|
+#define ISOLATE_INACTIVE 0 /* Isolate inactive pages. */
|
|
|
+#define ISOLATE_ACTIVE 1 /* Isolate active pages. */
|
|
|
+#define ISOLATE_BOTH 2 /* Isolate both active and inactive pages. */
|
|
|
+
|
|
|
+/*
|
|
|
+ * Attempt to remove the specified page from its LRU. Only take this page
|
|
|
+ * if it is of the appropriate PageActive status. Pages which are being
|
|
|
+ * freed elsewhere are also ignored.
|
|
|
+ *
|
|
|
+ * page: page to consider
|
|
|
+ * mode: one of the LRU isolation modes defined above
|
|
|
+ *
|
|
|
+ * returns 0 on success, -ve errno on failure.
|
|
|
+ */
|
|
|
+static int __isolate_lru_page(struct page *page, int mode)
|
|
|
+{
|
|
|
+ int ret = -EINVAL;
|
|
|
+
|
|
|
+ /* Only take pages on the LRU. */
|
|
|
+ if (!PageLRU(page))
|
|
|
+ return ret;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * When checking the active state, we need to be sure we are
|
|
|
+ * dealing with comparible boolean values. Take the logical not
|
|
|
+ * of each.
|
|
|
+ */
|
|
|
+ if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode))
|
|
|
+ return ret;
|
|
|
+
|
|
|
+ ret = -EBUSY;
|
|
|
+ if (likely(get_page_unless_zero(page))) {
|
|
|
+ /*
|
|
|
+ * Be careful not to clear PageLRU until after we're
|
|
|
+ * sure the page is not being freed elsewhere -- the
|
|
|
+ * page release code relies on it.
|
|
|
+ */
|
|
|
+ ClearPageLRU(page);
|
|
|
+ ret = 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* zone->lru_lock is heavily contended. Some of the functions that
|
|
|
* shrink the lists perform better by taking out a batch of pages
|
|
@@ -612,44 +660,114 @@ keep:
|
|
|
* @src: The LRU list to pull pages off.
|
|
|
* @dst: The temp list to put pages on to.
|
|
|
* @scanned: The number of pages that were scanned.
|
|
|
+ * @order: The caller's attempted allocation order
|
|
|
+ * @mode: One of the LRU isolation modes
|
|
|
*
|
|
|
* returns how many pages were moved onto *@dst.
|
|
|
*/
|
|
|
static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
|
|
|
struct list_head *src, struct list_head *dst,
|
|
|
- unsigned long *scanned)
|
|
|
+ unsigned long *scanned, int order, int mode)
|
|
|
{
|
|
|
unsigned long nr_taken = 0;
|
|
|
- struct page *page;
|
|
|
unsigned long scan;
|
|
|
|
|
|
for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) {
|
|
|
- struct list_head *target;
|
|
|
+ struct page *page;
|
|
|
+ unsigned long pfn;
|
|
|
+ unsigned long end_pfn;
|
|
|
+ unsigned long page_pfn;
|
|
|
+ int zone_id;
|
|
|
+
|
|
|
page = lru_to_page(src);
|
|
|
prefetchw_prev_lru_page(page, src, flags);
|
|
|
|
|
|
VM_BUG_ON(!PageLRU(page));
|
|
|
|
|
|
- list_del(&page->lru);
|
|
|
- target = src;
|
|
|
- if (likely(get_page_unless_zero(page))) {
|
|
|
- /*
|
|
|
- * Be careful not to clear PageLRU until after we're
|
|
|
- * sure the page is not being freed elsewhere -- the
|
|
|
- * page release code relies on it.
|
|
|
- */
|
|
|
- ClearPageLRU(page);
|
|
|
- target = dst;
|
|
|
+ switch (__isolate_lru_page(page, mode)) {
|
|
|
+ case 0:
|
|
|
+ list_move(&page->lru, dst);
|
|
|
nr_taken++;
|
|
|
- } /* else it is being freed elsewhere */
|
|
|
+ break;
|
|
|
|
|
|
- list_add(&page->lru, target);
|
|
|
+ case -EBUSY:
|
|
|
+ /* else it is being freed elsewhere */
|
|
|
+ list_move(&page->lru, src);
|
|
|
+ continue;
|
|
|
+
|
|
|
+ default:
|
|
|
+ BUG();
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!order)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Attempt to take all pages in the order aligned region
|
|
|
+ * surrounding the tag page. Only take those pages of
|
|
|
+ * the same active state as that tag page. We may safely
|
|
|
+ * round the target page pfn down to the requested order
|
|
|
+ * as the mem_map is guarenteed valid out to MAX_ORDER,
|
|
|
+ * where that page is in a different zone we will detect
|
|
|
+ * it from its zone id and abort this block scan.
|
|
|
+ */
|
|
|
+ zone_id = page_zone_id(page);
|
|
|
+ page_pfn = page_to_pfn(page);
|
|
|
+ pfn = page_pfn & ~((1 << order) - 1);
|
|
|
+ end_pfn = pfn + (1 << order);
|
|
|
+ for (; pfn < end_pfn; pfn++) {
|
|
|
+ struct page *cursor_page;
|
|
|
+
|
|
|
+ /* The target page is in the block, ignore it. */
|
|
|
+ if (unlikely(pfn == page_pfn))
|
|
|
+ continue;
|
|
|
+
|
|
|
+ /* Avoid holes within the zone. */
|
|
|
+ if (unlikely(!pfn_valid_within(pfn)))
|
|
|
+ break;
|
|
|
+
|
|
|
+ cursor_page = pfn_to_page(pfn);
|
|
|
+ /* Check that we have not crossed a zone boundary. */
|
|
|
+ if (unlikely(page_zone_id(cursor_page) != zone_id))
|
|
|
+ continue;
|
|
|
+ switch (__isolate_lru_page(cursor_page, mode)) {
|
|
|
+ case 0:
|
|
|
+ list_move(&cursor_page->lru, dst);
|
|
|
+ nr_taken++;
|
|
|
+ scan++;
|
|
|
+ break;
|
|
|
+
|
|
|
+ case -EBUSY:
|
|
|
+ /* else it is being freed elsewhere */
|
|
|
+ list_move(&cursor_page->lru, src);
|
|
|
+ default:
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
*scanned = scan;
|
|
|
return nr_taken;
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * clear_active_flags() is a helper for shrink_active_list(), clearing
|
|
|
+ * any active bits from the pages in the list.
|
|
|
+ */
|
|
|
+static unsigned long clear_active_flags(struct list_head *page_list)
|
|
|
+{
|
|
|
+ int nr_active = 0;
|
|
|
+ struct page *page;
|
|
|
+
|
|
|
+ list_for_each_entry(page, page_list, lru)
|
|
|
+ if (PageActive(page)) {
|
|
|
+ ClearPageActive(page);
|
|
|
+ nr_active++;
|
|
|
+ }
|
|
|
+
|
|
|
+ return nr_active;
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* shrink_inactive_list() is a helper for shrink_zone(). It returns the number
|
|
|
* of reclaimed pages
|
|
@@ -671,11 +789,18 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
|
|
|
unsigned long nr_taken;
|
|
|
unsigned long nr_scan;
|
|
|
unsigned long nr_freed;
|
|
|
+ unsigned long nr_active;
|
|
|
|
|
|
nr_taken = isolate_lru_pages(sc->swap_cluster_max,
|
|
|
- &zone->inactive_list,
|
|
|
- &page_list, &nr_scan);
|
|
|
- __mod_zone_page_state(zone, NR_INACTIVE, -nr_taken);
|
|
|
+ &zone->inactive_list,
|
|
|
+ &page_list, &nr_scan, sc->order,
|
|
|
+ (sc->order > PAGE_ALLOC_COSTLY_ORDER)?
|
|
|
+ ISOLATE_BOTH : ISOLATE_INACTIVE);
|
|
|
+ nr_active = clear_active_flags(&page_list);
|
|
|
+
|
|
|
+ __mod_zone_page_state(zone, NR_ACTIVE, -nr_active);
|
|
|
+ __mod_zone_page_state(zone, NR_INACTIVE,
|
|
|
+ -(nr_taken - nr_active));
|
|
|
zone->pages_scanned += nr_scan;
|
|
|
spin_unlock_irq(&zone->lru_lock);
|
|
|
|
|
@@ -820,7 +945,7 @@ force_reclaim_mapped:
|
|
|
lru_add_drain();
|
|
|
spin_lock_irq(&zone->lru_lock);
|
|
|
pgmoved = isolate_lru_pages(nr_pages, &zone->active_list,
|
|
|
- &l_hold, &pgscanned);
|
|
|
+ &l_hold, &pgscanned, sc->order, ISOLATE_ACTIVE);
|
|
|
zone->pages_scanned += pgscanned;
|
|
|
__mod_zone_page_state(zone, NR_ACTIVE, -pgmoved);
|
|
|
spin_unlock_irq(&zone->lru_lock);
|
|
@@ -1011,7 +1136,7 @@ static unsigned long shrink_zones(int priority, struct zone **zones,
|
|
|
* holds filesystem locks which prevent writeout this might not work, and the
|
|
|
* allocation attempt will fail.
|
|
|
*/
|
|
|
-unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
|
|
|
+unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask)
|
|
|
{
|
|
|
int priority;
|
|
|
int ret = 0;
|
|
@@ -1026,6 +1151,7 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
|
|
|
.swap_cluster_max = SWAP_CLUSTER_MAX,
|
|
|
.may_swap = 1,
|
|
|
.swappiness = vm_swappiness,
|
|
|
+ .order = order,
|
|
|
};
|
|
|
|
|
|
count_vm_event(ALLOCSTALL);
|
|
@@ -1131,6 +1257,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
|
|
|
.may_swap = 1,
|
|
|
.swap_cluster_max = SWAP_CLUSTER_MAX,
|
|
|
.swappiness = vm_swappiness,
|
|
|
+ .order = order,
|
|
|
};
|
|
|
/*
|
|
|
* temp_priority is used to remember the scanning priority at which
|