20 years ago · 7fb1d9fca5
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -329,7 +329,7 @@ void get_zone_counts(unsigned long *active, unsigned long *inactive,
 
				 void build_all_zonelists(void);
			
 
				 void wakeup_kswapd(struct zone *zone, int order);
			
 
				 int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
			
 
				-		int alloc_type, int can_try_harder, gfp_t gfp_high);
			
 
				+		int classzone_idx, int alloc_flags);
			
 
				 
			
 
				 #ifdef CONFIG_HAVE_MEMORY_PRESENT
			
 
				 void memory_present(int nid, unsigned long start, unsigned long end);
			
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -732,9 +732,7 @@ buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
 
				 		}
			
 
				 		local_irq_restore(flags);
			
 
				 		put_cpu();
			
 
				-	}
			
 
				-
			
 
				-	if (page == NULL) {
			
 
				+	} else {
			
 
				 		spin_lock_irqsave(&zone->lock, flags);
			
 
				 		page = __rmqueue(zone, order);
			
 
				 		spin_unlock_irqrestore(&zone->lock, flags);
			
@@ -754,20 +752,25 @@ buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
 
				 	return page;
			
 
				 }
			
 
				 
			
 
				+#define ALLOC_NO_WATERMARKS	0x01 /* don't check watermarks at all */
			
 
				+#define ALLOC_HARDER		0x02 /* try to alloc harder */
			
 
				+#define ALLOC_HIGH		0x04 /* __GFP_HIGH set */
			
 
				+#define ALLOC_CPUSET		0x08 /* check for correct cpuset */
			
 
				+
			
 
				 /*
			
 
				  * Return 1 if free pages are above 'mark'. This takes into account the order
			
 
				  * of the allocation.
			
 
				  */
			
 
				 int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
			
 
				-		      int classzone_idx, int can_try_harder, gfp_t gfp_high)
			
 
				+		      int classzone_idx, int alloc_flags)
			
 
				 {
			
 
				 	/* free_pages my go negative - that's OK */
			
 
				 	long min = mark, free_pages = z->free_pages - (1 << order) + 1;
			
 
				 	int o;
			
 
				 
			
 
				-	if (gfp_high)
			
 
				+	if (alloc_flags & ALLOC_HIGH)
			
 
				 		min -= min / 2;
			
 
				-	if (can_try_harder)
			
 
				+	if (alloc_flags & ALLOC_HARDER)
			
 
				 		min -= min / 4;
			
 
				 
			
 
				 	if (free_pages <= min + z->lowmem_reserve[classzone_idx])
			
@@ -785,14 +788,40 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static inline int
			
 
				-should_reclaim_zone(struct zone *z, gfp_t gfp_mask)
			
 
				+/*
			
 
				+ * get_page_from_freeliest goes through the zonelist trying to allocate
			
 
				+ * a page.
			
 
				+ */
			
 
				+static struct page *
			
 
				+get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
			
 
				+		struct zonelist *zonelist, int alloc_flags)
			
 
				 {
			
 
				-	if (!z->reclaim_pages)
			
 
				-		return 0;
			
 
				-	if (gfp_mask & __GFP_NORECLAIM)
			
 
				-		return 0;
			
 
				-	return 1;
			
 
				+	struct zone **z = zonelist->zones;
			
 
				+	struct page *page = NULL;
			
 
				+	int classzone_idx = zone_idx(*z);
			
 
				+
			
 
				+	/*
			
 
				+	 * Go through the zonelist once, looking for a zone with enough free.
			
 
				+	 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
			
 
				+	 */
			
 
				+	do {
			
 
				+		if ((alloc_flags & ALLOC_CPUSET) &&
			
 
				+				!cpuset_zone_allowed(*z, gfp_mask))
			
 
				+			continue;
			
 
				+
			
 
				+		if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
			
 
				+			if (!zone_watermark_ok(*z, order, (*z)->pages_low,
			
 
				+				    classzone_idx, alloc_flags))
			
 
				+				continue;
			
 
				+		}
			
 
				+
			
 
				+		page = buffered_rmqueue(*z, order, gfp_mask);
			
 
				+		if (page) {
			
 
				+			zone_statistics(zonelist, *z);
			
 
				+			break;
			
 
				+		}
			
 
				+	} while (*(++z) != NULL);
			
 
				+	return page;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -803,92 +832,60 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
 
				 		struct zonelist *zonelist)
			
 
				 {
			
 
				 	const gfp_t wait = gfp_mask & __GFP_WAIT;
			
 
				-	struct zone **zones, *z;
			
 
				+	struct zone **z;
			
 
				 	struct page *page;
			
 
				 	struct reclaim_state reclaim_state;
			
 
				 	struct task_struct *p = current;
			
 
				-	int i;
			
 
				-	int classzone_idx;
			
 
				 	int do_retry;
			
 
				-	int can_try_harder;
			
 
				+	int alloc_flags;
			
 
				 	int did_some_progress;
			
 
				 
			
 
				 	might_sleep_if(wait);
			
 
				 
			
 
				-	/*
			
 
				-	 * The caller may dip into page reserves a bit more if the caller
			
 
				-	 * cannot run direct reclaim, or is the caller has realtime scheduling
			
 
				-	 * policy
			
 
				-	 */
			
 
				-	can_try_harder = (unlikely(rt_task(p)) && !in_interrupt()) || !wait;
			
 
				-
			
 
				-	zones = zonelist->zones;  /* the list of zones suitable for gfp_mask */
			
 
				+	z = zonelist->zones;  /* the list of zones suitable for gfp_mask */
			
 
				 
			
 
				-	if (unlikely(zones[0] == NULL)) {
			
 
				+	if (unlikely(*z == NULL)) {
			
 
				 		/* Should this ever happen?? */
			
 
				 		return NULL;
			
 
				 	}
			
 
				+restart:
			
 
				+	page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
			
 
				+				zonelist, ALLOC_CPUSET);
			
 
				+	if (page)
			
 
				+		goto got_pg;
			
 
				 
			
 
				-	classzone_idx = zone_idx(zones[0]);
			
 
				+	do
			
 
				+		wakeup_kswapd(*z, order);
			
 
				+	while (*(++z));
			
 
				 
			
 
				-restart:
			
 
				 	/*
			
 
				-	 * Go through the zonelist once, looking for a zone with enough free.
			
 
				-	 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
			
 
				+	 * OK, we're below the kswapd watermark and have kicked background
			
 
				+	 * reclaim. Now things get more complex, so set up alloc_flags according
			
 
				+	 * to how we want to proceed.
			
 
				+	 *
			
 
				+	 * The caller may dip into page reserves a bit more if the caller
			
 
				+	 * cannot run direct reclaim, or if the caller has realtime scheduling
			
 
				+	 * policy.
			
 
				 	 */
			
 
				-	for (i = 0; (z = zones[i]) != NULL; i++) {
			
 
				-		int do_reclaim = should_reclaim_zone(z, gfp_mask);
			
 
				-
			
 
				-		if (!cpuset_zone_allowed(z, __GFP_HARDWALL))
			
 
				-			continue;
			
 
				-
			
 
				-		/*
			
 
				-		 * If the zone is to attempt early page reclaim then this loop
			
 
				-		 * will try to reclaim pages and check the watermark a second
			
 
				-		 * time before giving up and falling back to the next zone.
			
 
				-		 */
			
 
				-zone_reclaim_retry:
			
 
				-		if (!zone_watermark_ok(z, order, z->pages_low,
			
 
				-				       classzone_idx, 0, 0)) {
			
 
				-			if (!do_reclaim)
			
 
				-				continue;
			
 
				-			else {
			
 
				-				zone_reclaim(z, gfp_mask, order);
			
 
				-				/* Only try reclaim once */
			
 
				-				do_reclaim = 0;
			
 
				-				goto zone_reclaim_retry;
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		page = buffered_rmqueue(z, order, gfp_mask);
			
 
				-		if (page)
			
 
				-			goto got_pg;
			
 
				-	}
			
 
				-
			
 
				-	for (i = 0; (z = zones[i]) != NULL; i++)
			
 
				-		wakeup_kswapd(z, order);
			
 
				+	alloc_flags = 0;
			
 
				+	if ((unlikely(rt_task(p)) && !in_interrupt()) || !wait)
			
 
				+		alloc_flags |= ALLOC_HARDER;
			
 
				+	if (gfp_mask & __GFP_HIGH)
			
 
				+		alloc_flags |= ALLOC_HIGH;
			
 
				+	if (wait)
			
 
				+		alloc_flags |= ALLOC_CPUSET;
			
 
				 
			
 
				 	/*
			
 
				 	 * Go through the zonelist again. Let __GFP_HIGH and allocations
			
 
				-	 * coming from realtime tasks to go deeper into reserves
			
 
				+	 * coming from realtime tasks go deeper into reserves.
			
 
				 	 *
			
 
				 	 * This is the last chance, in general, before the goto nopage.
			
 
				 	 * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
			
 
				 	 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
			
 
				 	 */
			
 
				-	for (i = 0; (z = zones[i]) != NULL; i++) {
			
 
				-		if (!zone_watermark_ok(z, order, z->pages_min,
			
 
				-				       classzone_idx, can_try_harder,
			
 
				-				       gfp_mask & __GFP_HIGH))
			
 
				-			continue;
			
 
				-
			
 
				-		if (wait && !cpuset_zone_allowed(z, gfp_mask))
			
 
				-			continue;
			
 
				-
			
 
				-		page = buffered_rmqueue(z, order, gfp_mask);
			
 
				-		if (page)
			
 
				-			goto got_pg;
			
 
				-	}
			
 
				+	page = get_page_from_freelist(gfp_mask, order, zonelist, alloc_flags);
			
 
				+	if (page)
			
 
				+		goto got_pg;
			
 
				 
			
 
				 	/* This allocation should allow future memory freeing. */
			
 
				 
			
@@ -897,13 +894,10 @@ zone_reclaim_retry:
 
				 		if (!(gfp_mask & __GFP_NOMEMALLOC)) {
			
 
				 nofail_alloc:
			
 
				 			/* go through the zonelist yet again, ignoring mins */
			
 
				-			for (i = 0; (z = zones[i]) != NULL; i++) {
			
 
				-				if (!cpuset_zone_allowed(z, gfp_mask))
			
 
				-					continue;
			
 
				-				page = buffered_rmqueue(z, order, gfp_mask);
			
 
				-				if (page)
			
 
				-					goto got_pg;
			
 
				-			}
			
 
				+			page = get_page_from_freelist(gfp_mask, order,
			
 
				+				zonelist, ALLOC_NO_WATERMARKS|ALLOC_CPUSET);
			
 
				+			if (page)
			
 
				+				goto got_pg;
			
 
				 			if (gfp_mask & __GFP_NOFAIL) {
			
 
				 				blk_congestion_wait(WRITE, HZ/50);
			
 
				 				goto nofail_alloc;
			
@@ -924,7 +918,7 @@ rebalance:
 
				 	reclaim_state.reclaimed_slab = 0;
			
 
				 	p->reclaim_state = &reclaim_state;
			
 
				 
			
 
				-	did_some_progress = try_to_free_pages(zones, gfp_mask);
			
 
				+	did_some_progress = try_to_free_pages(zonelist->zones, gfp_mask);
			
 
				 
			
 
				 	p->reclaim_state = NULL;
			
 
				 	p->flags &= ~PF_MEMALLOC;
			
@@ -932,19 +926,10 @@ rebalance:
 
				 	cond_resched();
			
 
				 
			
 
				 	if (likely(did_some_progress)) {
			
 
				-		for (i = 0; (z = zones[i]) != NULL; i++) {
			
 
				-			if (!zone_watermark_ok(z, order, z->pages_min,
			
 
				-					       classzone_idx, can_try_harder,
			
 
				-					       gfp_mask & __GFP_HIGH))
			
 
				-				continue;
			
 
				-
			
 
				-			if (!cpuset_zone_allowed(z, gfp_mask))
			
 
				-				continue;
			
 
				-
			
 
				-			page = buffered_rmqueue(z, order, gfp_mask);
			
 
				-			if (page)
			
 
				-				goto got_pg;
			
 
				-		}
			
 
				+		page = get_page_from_freelist(gfp_mask, order,
			
 
				+						zonelist, alloc_flags);
			
 
				+		if (page)
			
 
				+			goto got_pg;
			
 
				 	} else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
			
 
				 		/*
			
 
				 		 * Go through the zonelist yet one more time, keep
			
@@ -952,18 +937,10 @@ rebalance:
 
				 		 * a parallel oom killing, we must fail if we're still
			
 
				 		 * under heavy pressure.
			
 
				 		 */
			
 
				-		for (i = 0; (z = zones[i]) != NULL; i++) {
			
 
				-			if (!zone_watermark_ok(z, order, z->pages_high,
			
 
				-					       classzone_idx, 0, 0))
			
 
				-				continue;
			
 
				-
			
 
				-			if (!cpuset_zone_allowed(z, __GFP_HARDWALL))
			
 
				-				continue;
			
 
				-
			
 
				-			page = buffered_rmqueue(z, order, gfp_mask);
			
 
				-			if (page)
			
 
				-				goto got_pg;
			
 
				-		}
			
 
				+		page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
			
 
				+						zonelist, ALLOC_CPUSET);
			
 
				+		if (page)
			
 
				+			goto got_pg;
			
 
				 
			
 
				 		out_of_memory(gfp_mask, order);
			
 
				 		goto restart;
			
@@ -996,9 +973,7 @@ nopage:
 
				 		dump_stack();
			
 
				 		show_mem();
			
 
				 	}
			
 
				-	return NULL;
			
 
				 got_pg:
			
 
				-	zone_statistics(zonelist, z);
			
 
				 	return page;
			
 
				 }
			
 
				 
			
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1074,7 +1074,7 @@ loop_again:
 
				 					continue;
			
 
				 
			
 
				 				if (!zone_watermark_ok(zone, order,
			
 
				-						zone->pages_high, 0, 0, 0)) {
			
 
				+						zone->pages_high, 0, 0)) {
			
 
				 					end_zone = i;
			
 
				 					goto scan;
			
 
				 				}
			
@@ -1111,7 +1111,7 @@ scan:
 
				 
			
 
				 			if (nr_pages == 0) {	/* Not software suspend */
			
 
				 				if (!zone_watermark_ok(zone, order,
			
 
				-						zone->pages_high, end_zone, 0, 0))
			
 
				+						zone->pages_high, end_zone, 0))
			
 
				 					all_zones_ok = 0;
			
 
				 			}
			
 
				 			zone->temp_priority = priority;
			
@@ -1259,7 +1259,7 @@ void wakeup_kswapd(struct zone *zone, int order)
 
				 		return;
			
 
				 
			
 
				 	pgdat = zone->zone_pgdat;
			
 
				-	if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0, 0))
			
 
				+	if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0))
			
 
				 		return;
			
 
				 	if (pgdat->kswapd_max_order < order)
			
 
				 		pgdat->kswapd_max_order = order;