17 years ago · 1cfb419b39
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -856,7 +856,8 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
 
				 		__mod_zone_page_state(zone, NR_ACTIVE, -nr_active);
			
 
				 		__mod_zone_page_state(zone, NR_INACTIVE,
			
 
				 						-(nr_taken - nr_active));
			
 
				-		zone->pages_scanned += nr_scan;
			
 
				+		if (scan_global_lru(sc))
			
 
				+			zone->pages_scanned += nr_scan;
			
 
				 		spin_unlock_irq(&zone->lru_lock);
			
 
				 
			
 
				 		nr_scanned += nr_scan;
			
@@ -888,8 +889,9 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
 
				 		if (current_is_kswapd()) {
			
 
				 			__count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scan);
			
 
				 			__count_vm_events(KSWAPD_STEAL, nr_freed);
			
 
				-		} else
			
 
				+		} else if (scan_global_lru(sc))
			
 
				 			__count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scan);
			
 
				+
			
 
				 		__count_zone_vm_events(PGSTEAL, zone, nr_freed);
			
 
				 
			
 
				 		if (nr_taken == 0)
			
@@ -942,6 +944,113 @@ static inline int zone_is_near_oom(struct zone *zone)
 
				 				+ zone_page_state(zone, NR_INACTIVE))*3;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Determine we should try to reclaim mapped pages.
			
 
				+ * This is called only when sc->mem_cgroup is NULL.
			
 
				+ */
			
 
				+static int calc_reclaim_mapped(struct scan_control *sc, struct zone *zone,
			
 
				+				int priority)
			
 
				+{
			
 
				+	long mapped_ratio;
			
 
				+	long distress;
			
 
				+	long swap_tendency;
			
 
				+	long imbalance;
			
 
				+	int reclaim_mapped = 0;
			
 
				+	int prev_priority;
			
 
				+
			
 
				+	if (scan_global_lru(sc) && zone_is_near_oom(zone))
			
 
				+		return 1;
			
 
				+	/*
			
 
				+	 * `distress' is a measure of how much trouble we're having
			
 
				+	 * reclaiming pages.  0 -> no problems.  100 -> great trouble.
			
 
				+	 */
			
 
				+	if (scan_global_lru(sc))
			
 
				+		prev_priority = zone->prev_priority;
			
 
				+	else
			
 
				+		prev_priority = mem_cgroup_get_reclaim_priority(sc->mem_cgroup);
			
 
				+
			
 
				+	distress = 100 >> min(prev_priority, priority);
			
 
				+
			
 
				+	/*
			
 
				+	 * The point of this algorithm is to decide when to start
			
 
				+	 * reclaiming mapped memory instead of just pagecache.  Work out
			
 
				+	 * how much memory
			
 
				+	 * is mapped.
			
 
				+	 */
			
 
				+	if (scan_global_lru(sc))
			
 
				+		mapped_ratio = ((global_page_state(NR_FILE_MAPPED) +
			
 
				+				global_page_state(NR_ANON_PAGES)) * 100) /
			
 
				+					vm_total_pages;
			
 
				+	else
			
 
				+		mapped_ratio = mem_cgroup_calc_mapped_ratio(sc->mem_cgroup);
			
 
				+
			
 
				+	/*
			
 
				+	 * Now decide how much we really want to unmap some pages.  The
			
 
				+	 * mapped ratio is downgraded - just because there's a lot of
			
 
				+	 * mapped memory doesn't necessarily mean that page reclaim
			
 
				+	 * isn't succeeding.
			
 
				+	 *
			
 
				+	 * The distress ratio is important - we don't want to start
			
 
				+	 * going oom.
			
 
				+	 *
			
 
				+	 * A 100% value of vm_swappiness overrides this algorithm
			
 
				+	 * altogether.
			
 
				+	 */
			
 
				+	swap_tendency = mapped_ratio / 2 + distress + sc->swappiness;
			
 
				+
			
 
				+	/*
			
 
				+	 * If there's huge imbalance between active and inactive
			
 
				+	 * (think active 100 times larger than inactive) we should
			
 
				+	 * become more permissive, or the system will take too much
			
 
				+	 * cpu before it start swapping during memory pressure.
			
 
				+	 * Distress is about avoiding early-oom, this is about
			
 
				+	 * making swappiness graceful despite setting it to low
			
 
				+	 * values.
			
 
				+	 *
			
 
				+	 * Avoid div by zero with nr_inactive+1, and max resulting
			
 
				+	 * value is vm_total_pages.
			
 
				+	 */
			
 
				+	if (scan_global_lru(sc)) {
			
 
				+		imbalance  = zone_page_state(zone, NR_ACTIVE);
			
 
				+		imbalance /= zone_page_state(zone, NR_INACTIVE) + 1;
			
 
				+	} else
			
 
				+		imbalance = mem_cgroup_reclaim_imbalance(sc->mem_cgroup);
			
 
				+
			
 
				+	/*
			
 
				+	 * Reduce the effect of imbalance if swappiness is low,
			
 
				+	 * this means for a swappiness very low, the imbalance
			
 
				+	 * must be much higher than 100 for this logic to make
			
 
				+	 * the difference.
			
 
				+	 *
			
 
				+	 * Max temporary value is vm_total_pages*100.
			
 
				+	 */
			
 
				+	imbalance *= (vm_swappiness + 1);
			
 
				+	imbalance /= 100;
			
 
				+
			
 
				+	/*
			
 
				+	 * If not much of the ram is mapped, makes the imbalance
			
 
				+	 * less relevant, it's high priority we refill the inactive
			
 
				+	 * list with mapped pages only in presence of high ratio of
			
 
				+	 * mapped pages.
			
 
				+	 *
			
 
				+	 * Max temporary value is vm_total_pages*100.
			
 
				+	 */
			
 
				+	imbalance *= mapped_ratio;
			
 
				+	imbalance /= 100;
			
 
				+
			
 
				+	/* apply imbalance feedback to swap_tendency */
			
 
				+	swap_tendency += imbalance;
			
 
				+
			
 
				+	/*
			
 
				+	 * Now use this metric to decide whether to start moving mapped
			
 
				+	 * memory onto the inactive list.
			
 
				+	 */
			
 
				+	if (swap_tendency >= 100)
			
 
				+		reclaim_mapped = 1;
			
 
				+
			
 
				+	return reclaim_mapped;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * This moves pages from the active list to the inactive list.
			
 
				  *
			
@@ -959,6 +1068,8 @@ static inline int zone_is_near_oom(struct zone *zone)
 
				  * The downside is that we have to touch page->_count against each page.
			
 
				  * But we had to alter page->flags anyway.
			
 
				  */
			
 
				+
			
 
				+
			
 
				 static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
			
 
				 				struct scan_control *sc, int priority)
			
 
				 {
			
@@ -972,100 +1083,21 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 
				 	struct pagevec pvec;
			
 
				 	int reclaim_mapped = 0;
			
 
				 
			
 
				-	if (sc->may_swap) {
			
 
				-		long mapped_ratio;
			
 
				-		long distress;
			
 
				-		long swap_tendency;
			
 
				-		long imbalance;
			
 
				-
			
 
				-		if (zone_is_near_oom(zone))
			
 
				-			goto force_reclaim_mapped;
			
 
				-
			
 
				-		/*
			
 
				-		 * `distress' is a measure of how much trouble we're having
			
 
				-		 * reclaiming pages.  0 -> no problems.  100 -> great trouble.
			
 
				-		 */
			
 
				-		distress = 100 >> min(zone->prev_priority, priority);
			
 
				-
			
 
				-		/*
			
 
				-		 * The point of this algorithm is to decide when to start
			
 
				-		 * reclaiming mapped memory instead of just pagecache.  Work out
			
 
				-		 * how much memory
			
 
				-		 * is mapped.
			
 
				-		 */
			
 
				-		mapped_ratio = ((global_page_state(NR_FILE_MAPPED) +
			
 
				-				global_page_state(NR_ANON_PAGES)) * 100) /
			
 
				-					vm_total_pages;
			
 
				-
			
 
				-		/*
			
 
				-		 * Now decide how much we really want to unmap some pages.  The
			
 
				-		 * mapped ratio is downgraded - just because there's a lot of
			
 
				-		 * mapped memory doesn't necessarily mean that page reclaim
			
 
				-		 * isn't succeeding.
			
 
				-		 *
			
 
				-		 * The distress ratio is important - we don't want to start
			
 
				-		 * going oom.
			
 
				-		 *
			
 
				-		 * A 100% value of vm_swappiness overrides this algorithm
			
 
				-		 * altogether.
			
 
				-		 */
			
 
				-		swap_tendency = mapped_ratio / 2 + distress + sc->swappiness;
			
 
				-
			
 
				-		/*
			
 
				-		 * If there's huge imbalance between active and inactive
			
 
				-		 * (think active 100 times larger than inactive) we should
			
 
				-		 * become more permissive, or the system will take too much
			
 
				-		 * cpu before it start swapping during memory pressure.
			
 
				-		 * Distress is about avoiding early-oom, this is about
			
 
				-		 * making swappiness graceful despite setting it to low
			
 
				-		 * values.
			
 
				-		 *
			
 
				-		 * Avoid div by zero with nr_inactive+1, and max resulting
			
 
				-		 * value is vm_total_pages.
			
 
				-		 */
			
 
				-		imbalance  = zone_page_state(zone, NR_ACTIVE);
			
 
				-		imbalance /= zone_page_state(zone, NR_INACTIVE) + 1;
			
 
				-
			
 
				-		/*
			
 
				-		 * Reduce the effect of imbalance if swappiness is low,
			
 
				-		 * this means for a swappiness very low, the imbalance
			
 
				-		 * must be much higher than 100 for this logic to make
			
 
				-		 * the difference.
			
 
				-		 *
			
 
				-		 * Max temporary value is vm_total_pages*100.
			
 
				-		 */
			
 
				-		imbalance *= (vm_swappiness + 1);
			
 
				-		imbalance /= 100;
			
 
				-
			
 
				-		/*
			
 
				-		 * If not much of the ram is mapped, makes the imbalance
			
 
				-		 * less relevant, it's high priority we refill the inactive
			
 
				-		 * list with mapped pages only in presence of high ratio of
			
 
				-		 * mapped pages.
			
 
				-		 *
			
 
				-		 * Max temporary value is vm_total_pages*100.
			
 
				-		 */
			
 
				-		imbalance *= mapped_ratio;
			
 
				-		imbalance /= 100;
			
 
				-
			
 
				-		/* apply imbalance feedback to swap_tendency */
			
 
				-		swap_tendency += imbalance;
			
 
				-
			
 
				-		/*
			
 
				-		 * Now use this metric to decide whether to start moving mapped
			
 
				-		 * memory onto the inactive list.
			
 
				-		 */
			
 
				-		if (swap_tendency >= 100)
			
 
				-force_reclaim_mapped:
			
 
				-			reclaim_mapped = 1;
			
 
				-	}
			
 
				+	if (sc->may_swap)
			
 
				+		reclaim_mapped = calc_reclaim_mapped(sc, zone, priority);
			
 
				 
			
 
				 	lru_add_drain();
			
 
				 	spin_lock_irq(&zone->lru_lock);
			
 
				 	pgmoved = sc->isolate_pages(nr_pages, &l_hold, &pgscanned, sc->order,
			
 
				 					ISOLATE_ACTIVE, zone,
			
 
				 					sc->mem_cgroup, 1);
			
 
				-	zone->pages_scanned += pgscanned;
			
 
				+	/*
			
 
				+	 * zone->pages_scanned is used for detect zone's oom
			
 
				+	 * mem_cgroup remembers nr_scan by itself.
			
 
				+	 */
			
 
				+	if (scan_global_lru(sc))
			
 
				+		zone->pages_scanned += pgscanned;
			
 
				+
			
 
				 	__mod_zone_page_state(zone, NR_ACTIVE, -pgmoved);
			
 
				 	spin_unlock_irq(&zone->lru_lock);
			
 
				 
			
@@ -1155,25 +1187,39 @@ static unsigned long shrink_zone(int priority, struct zone *zone,
 
				 	unsigned long nr_to_scan;
			
 
				 	unsigned long nr_reclaimed = 0;
			
 
				 
			
 
				-	/*
			
 
				-	 * Add one to `nr_to_scan' just to make sure that the kernel will
			
 
				-	 * slowly sift through the active list.
			
 
				-	 */
			
 
				-	zone->nr_scan_active +=
			
 
				-		(zone_page_state(zone, NR_ACTIVE) >> priority) + 1;
			
 
				-	nr_active = zone->nr_scan_active;
			
 
				-	if (nr_active >= sc->swap_cluster_max)
			
 
				-		zone->nr_scan_active = 0;
			
 
				-	else
			
 
				-		nr_active = 0;
			
 
				+	if (scan_global_lru(sc)) {
			
 
				+		/*
			
 
				+		 * Add one to nr_to_scan just to make sure that the kernel
			
 
				+		 * will slowly sift through the active list.
			
 
				+		 */
			
 
				+		zone->nr_scan_active +=
			
 
				+			(zone_page_state(zone, NR_ACTIVE) >> priority) + 1;
			
 
				+		nr_active = zone->nr_scan_active;
			
 
				+		zone->nr_scan_inactive +=
			
 
				+			(zone_page_state(zone, NR_INACTIVE) >> priority) + 1;
			
 
				+		nr_inactive = zone->nr_scan_inactive;
			
 
				+		if (nr_inactive >= sc->swap_cluster_max)
			
 
				+			zone->nr_scan_inactive = 0;
			
 
				+		else
			
 
				+			nr_inactive = 0;
			
 
				+
			
 
				+		if (nr_active >= sc->swap_cluster_max)
			
 
				+			zone->nr_scan_active = 0;
			
 
				+		else
			
 
				+			nr_active = 0;
			
 
				+	} else {
			
 
				+		/*
			
 
				+		 * This reclaim occurs not because zone memory shortage but
			
 
				+		 * because memory controller hits its limit.
			
 
				+		 * Then, don't modify zone reclaim related data.
			
 
				+		 */
			
 
				+		nr_active = mem_cgroup_calc_reclaim_active(sc->mem_cgroup,
			
 
				+					zone, priority);
			
 
				+
			
 
				+		nr_inactive = mem_cgroup_calc_reclaim_inactive(sc->mem_cgroup,
			
 
				+					zone, priority);
			
 
				+	}
			
 
				 
			
 
				-	zone->nr_scan_inactive +=
			
 
				-		(zone_page_state(zone, NR_INACTIVE) >> priority) + 1;
			
 
				-	nr_inactive = zone->nr_scan_inactive;
			
 
				-	if (nr_inactive >= sc->swap_cluster_max)
			
 
				-		zone->nr_scan_inactive = 0;
			
 
				-	else
			
 
				-		nr_inactive = 0;
			
 
				 
			
 
				 	while (nr_active || nr_inactive) {
			
 
				 		if (nr_active) {
			
@@ -1218,25 +1264,39 @@ static unsigned long shrink_zones(int priority, struct zone **zones,
 
				 	unsigned long nr_reclaimed = 0;
			
 
				 	int i;
			
 
				 
			
 
				+
			
 
				 	sc->all_unreclaimable = 1;
			
 
				 	for (i = 0; zones[i] != NULL; i++) {
			
 
				 		struct zone *zone = zones[i];
			
 
				 
			
 
				 		if (!populated_zone(zone))
			
 
				 			continue;
			
 
				+		/*
			
 
				+		 * Take care memory controller reclaiming has small influence
			
 
				+		 * to global LRU.
			
 
				+		 */
			
 
				+		if (scan_global_lru(sc)) {
			
 
				+			if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
			
 
				+				continue;
			
 
				+			note_zone_scanning_priority(zone, priority);
			
 
				 
			
 
				-		if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
			
 
				-			continue;
			
 
				-
			
 
				-		note_zone_scanning_priority(zone, priority);
			
 
				-
			
 
				-		if (zone_is_all_unreclaimable(zone) && priority != DEF_PRIORITY)
			
 
				-			continue;	/* Let kswapd poll it */
			
 
				-
			
 
				-		sc->all_unreclaimable = 0;
			
 
				+			if (zone_is_all_unreclaimable(zone) &&
			
 
				+						priority != DEF_PRIORITY)
			
 
				+				continue;	/* Let kswapd poll it */
			
 
				+			sc->all_unreclaimable = 0;
			
 
				+		} else {
			
 
				+			/*
			
 
				+			 * Ignore cpuset limitation here. We just want to reduce
			
 
				+			 * # of used pages by us regardless of memory shortage.
			
 
				+			 */
			
 
				+			sc->all_unreclaimable = 0;
			
 
				+			mem_cgroup_note_reclaim_priority(sc->mem_cgroup,
			
 
				+							priority);
			
 
				+		}
			
 
				 
			
 
				 		nr_reclaimed += shrink_zone(priority, zone, sc);
			
 
				 	}
			
 
				+
			
 
				 	return nr_reclaimed;
			
 
				 }
			
 
				  
			
@@ -1264,16 +1324,21 @@ static unsigned long do_try_to_free_pages(struct zone **zones, gfp_t gfp_mask,
 
				 	unsigned long lru_pages = 0;
			
 
				 	int i;
			
 
				 
			
 
				-	count_vm_event(ALLOCSTALL);
			
 
				-
			
 
				-	for (i = 0; zones[i] != NULL; i++) {
			
 
				-		struct zone *zone = zones[i];
			
 
				+	if (scan_global_lru(sc))
			
 
				+		count_vm_event(ALLOCSTALL);
			
 
				+	/*
			
 
				+	 * mem_cgroup will not do shrink_slab.
			
 
				+	 */
			
 
				+	if (scan_global_lru(sc)) {
			
 
				+		for (i = 0; zones[i] != NULL; i++) {
			
 
				+			struct zone *zone = zones[i];
			
 
				 
			
 
				-		if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
			
 
				-			continue;
			
 
				+			if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
			
 
				+				continue;
			
 
				 
			
 
				-		lru_pages += zone_page_state(zone, NR_ACTIVE)
			
 
				-				+ zone_page_state(zone, NR_INACTIVE);
			
 
				+			lru_pages += zone_page_state(zone, NR_ACTIVE)
			
 
				+					+ zone_page_state(zone, NR_INACTIVE);
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	for (priority = DEF_PRIORITY; priority >= 0; priority--) {
			
@@ -1330,14 +1395,19 @@ out:
 
				 	 */
			
 
				 	if (priority < 0)
			
 
				 		priority = 0;
			
 
				-	for (i = 0; zones[i] != NULL; i++) {
			
 
				-		struct zone *zone = zones[i];
			
 
				 
			
 
				-		if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
			
 
				-			continue;
			
 
				+	if (scan_global_lru(sc)) {
			
 
				+		for (i = 0; zones[i] != NULL; i++) {
			
 
				+			struct zone *zone = zones[i];
			
 
				+
			
 
				+			if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
			
 
				+				continue;
			
 
				+
			
 
				+			zone->prev_priority = priority;
			
 
				+		}
			
 
				+	} else
			
 
				+		mem_cgroup_record_reclaim_priority(sc->mem_cgroup, priority);
			
 
				 
			
 
				-		zone->prev_priority = priority;
			
 
				-	}
			
 
				 	return ret;
			
 
				 }