18 years ago · 02a0e53d82
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -30,10 +30,19 @@ void cpuset_update_task_memory_state(void);
 
				 		nodes_subset((nodes), current->mems_allowed)
			
 
				 int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl);
			
 
				 
			
 
				-extern int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask);
			
 
				-static int inline cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
			
 
				+extern int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask);
			
 
				+extern int __cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask);
			
 
				+
			
 
				+static int inline cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
			
 
				+{
			
 
				+	return number_of_cpusets <= 1 ||
			
 
				+		__cpuset_zone_allowed_softwall(z, gfp_mask);
			
 
				+}
			
 
				+
			
 
				+static int inline cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask)
			
 
				 {
			
 
				-	return number_of_cpusets <= 1 || __cpuset_zone_allowed(z, gfp_mask);
			
 
				+	return number_of_cpusets <= 1 ||
			
 
				+		__cpuset_zone_allowed_hardwall(z, gfp_mask);
			
 
				 }
			
 
				 
			
 
				 extern int cpuset_excl_nodes_overlap(const struct task_struct *p);
			
@@ -94,7 +103,12 @@ static inline int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static inline int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
			
 
				+static inline int cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
			
 
				+{
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+static inline int cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask)
			
 
				 {
			
 
				 	return 1;
			
 
				 }
			
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2342,32 +2342,48 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
 
				 }
			
 
				 
			
 
				 /**
			
 
				- * cpuset_zone_allowed - Can we allocate memory on zone z's memory node?
			
 
				+ * cpuset_zone_allowed_softwall - Can we allocate on zone z's memory node?
			
 
				  * @z: is this zone on an allowed node?
			
 
				- * @gfp_mask: memory allocation flags (we use __GFP_HARDWALL)
			
 
				+ * @gfp_mask: memory allocation flags
			
 
				  *
			
 
				- * If we're in interrupt, yes, we can always allocate.  If zone
			
 
				+ * If we're in interrupt, yes, we can always allocate.  If
			
 
				+ * __GFP_THISNODE is set, yes, we can always allocate.  If zone
			
 
				  * z's node is in our tasks mems_allowed, yes.  If it's not a
			
 
				  * __GFP_HARDWALL request and this zone's nodes is in the nearest
			
 
				  * mem_exclusive cpuset ancestor to this tasks cpuset, yes.
			
 
				  * Otherwise, no.
			
 
				  *
			
 
				+ * If __GFP_HARDWALL is set, cpuset_zone_allowed_softwall()
			
 
				+ * reduces to cpuset_zone_allowed_hardwall().  Otherwise,
			
 
				+ * cpuset_zone_allowed_softwall() might sleep, and might allow a zone
			
 
				+ * from an enclosing cpuset.
			
 
				+ *
			
 
				+ * cpuset_zone_allowed_hardwall() only handles the simpler case of
			
 
				+ * hardwall cpusets, and never sleeps.
			
 
				+ *
			
 
				+ * The __GFP_THISNODE placement logic is really handled elsewhere,
			
 
				+ * by forcibly using a zonelist starting at a specified node, and by
			
 
				+ * (in get_page_from_freelist()) refusing to consider the zones for
			
 
				+ * any node on the zonelist except the first.  By the time any such
			
 
				+ * calls get to this routine, we should just shut up and say 'yes'.
			
 
				+ *
			
 
				  * GFP_USER allocations are marked with the __GFP_HARDWALL bit,
			
 
				  * and do not allow allocations outside the current tasks cpuset.
			
 
				  * GFP_KERNEL allocations are not so marked, so can escape to the
			
 
				- * nearest mem_exclusive ancestor cpuset.
			
 
				+ * nearest enclosing mem_exclusive ancestor cpuset.
			
 
				  *
			
 
				- * Scanning up parent cpusets requires callback_mutex.  The __alloc_pages()
			
 
				- * routine only calls here with __GFP_HARDWALL bit _not_ set if
			
 
				- * it's a GFP_KERNEL allocation, and all nodes in the current tasks
			
 
				- * mems_allowed came up empty on the first pass over the zonelist.
			
 
				- * So only GFP_KERNEL allocations, if all nodes in the cpuset are
			
 
				- * short of memory, might require taking the callback_mutex mutex.
			
 
				+ * Scanning up parent cpusets requires callback_mutex.  The
			
 
				+ * __alloc_pages() routine only calls here with __GFP_HARDWALL bit
			
 
				+ * _not_ set if it's a GFP_KERNEL allocation, and all nodes in the
			
 
				+ * current tasks mems_allowed came up empty on the first pass over
			
 
				+ * the zonelist.  So only GFP_KERNEL allocations, if all nodes in the
			
 
				+ * cpuset are short of memory, might require taking the callback_mutex
			
 
				+ * mutex.
			
 
				  *
			
 
				  * The first call here from mm/page_alloc:get_page_from_freelist()
			
 
				- * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, so
			
 
				- * no allocation on a node outside the cpuset is allowed (unless in
			
 
				- * interrupt, of course).
			
 
				+ * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets,
			
 
				+ * so no allocation on a node outside the cpuset is allowed (unless
			
 
				+ * in interrupt, of course).
			
 
				  *
			
 
				  * The second pass through get_page_from_freelist() doesn't even call
			
 
				  * here for GFP_ATOMIC calls.  For those calls, the __alloc_pages()
			
@@ -2380,12 +2396,12 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
 
				  *	GFP_USER     - only nodes in current tasks mems allowed ok.
			
 
				  *
			
 
				  * Rule:
			
 
				- *    Don't call cpuset_zone_allowed() if you can't sleep, unless you
			
 
				+ *    Don't call cpuset_zone_allowed_softwall if you can't sleep, unless you
			
 
				  *    pass in the __GFP_HARDWALL flag set in gfp_flag, which disables
			
 
				  *    the code that might scan up ancestor cpusets and sleep.
			
 
				- **/
			
 
				+ */
			
 
				 
			
 
				-int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
			
 
				+int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
			
 
				 {
			
 
				 	int node;			/* node that zone z is on */
			
 
				 	const struct cpuset *cs;	/* current cpuset ancestors */
			
@@ -2415,6 +2431,40 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
 
				 	return allowed;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * cpuset_zone_allowed_hardwall - Can we allocate on zone z's memory node?
			
 
				+ * @z: is this zone on an allowed node?
			
 
				+ * @gfp_mask: memory allocation flags
			
 
				+ *
			
 
				+ * If we're in interrupt, yes, we can always allocate.
			
 
				+ * If __GFP_THISNODE is set, yes, we can always allocate.  If zone
			
 
				+ * z's node is in our tasks mems_allowed, yes.   Otherwise, no.
			
 
				+ *
			
 
				+ * The __GFP_THISNODE placement logic is really handled elsewhere,
			
 
				+ * by forcibly using a zonelist starting at a specified node, and by
			
 
				+ * (in get_page_from_freelist()) refusing to consider the zones for
			
 
				+ * any node on the zonelist except the first.  By the time any such
			
 
				+ * calls get to this routine, we should just shut up and say 'yes'.
			
 
				+ *
			
 
				+ * Unlike the cpuset_zone_allowed_softwall() variant, above,
			
 
				+ * this variant requires that the zone be in the current tasks
			
 
				+ * mems_allowed or that we're in interrupt.  It does not scan up the
			
 
				+ * cpuset hierarchy for the nearest enclosing mem_exclusive cpuset.
			
 
				+ * It never sleeps.
			
 
				+ */
			
 
				+
			
 
				+int __cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask)
			
 
				+{
			
 
				+	int node;			/* node that zone z is on */
			
 
				+
			
 
				+	if (in_interrupt() || (gfp_mask & __GFP_THISNODE))
			
 
				+		return 1;
			
 
				+	node = zone_to_nid(z);
			
 
				+	if (node_isset(node, current->mems_allowed))
			
 
				+		return 1;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 /**
			
 
				  * cpuset_lock - lock out any changes to cpuset structures
			
 
				  *
			
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -73,7 +73,7 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma,
 
				 
			
 
				 	for (z = zonelist->zones; *z; z++) {
			
 
				 		nid = zone_to_nid(*z);
			
 
				-		if (cpuset_zone_allowed(*z, GFP_HIGHUSER) &&
			
 
				+		if (cpuset_zone_allowed_softwall(*z, GFP_HIGHUSER) &&
			
 
				 		    !list_empty(&hugepage_freelists[nid]))
			
 
				 			break;
			
 
				 	}
			
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -177,7 +177,7 @@ static inline int constrained_alloc(struct zonelist *zonelist, gfp_t gfp_mask)
 
				 	nodemask_t nodes = node_online_map;
			
 
				 
			
 
				 	for (z = zonelist->zones; *z; z++)
			
 
				-		if (cpuset_zone_allowed(*z, gfp_mask))
			
 
				+		if (cpuset_zone_allowed_softwall(*z, gfp_mask))
			
 
				 			node_clear(zone_to_nid(*z), nodes);
			
 
				 		else
			
 
				 			return CONSTRAINT_CPUSET;
			
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1162,7 +1162,7 @@ zonelist_scan:
 
				 			zone->zone_pgdat != zonelist->zones[0]->zone_pgdat))
			
 
				 				break;
			
 
				 		if ((alloc_flags & ALLOC_CPUSET) &&
			
 
				-			!cpuset_zone_allowed(zone, gfp_mask))
			
 
				+			!cpuset_zone_allowed_softwall(zone, gfp_mask))
			
 
				 				goto try_next_zone;
			
 
				 
			
 
				 		if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
			
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3262,7 +3262,7 @@ retry:
 
				 	for (z = zonelist->zones; *z && !obj; z++) {
			
 
				 		nid = zone_to_nid(*z);
			
 
				 
			
 
				-		if (cpuset_zone_allowed(*z, flags | __GFP_HARDWALL) &&
			
 
				+		if (cpuset_zone_allowed_hardwall(*z, flags) &&
			
 
				 			cache->nodelists[nid] &&
			
 
				 			cache->nodelists[nid]->free_objects)
			
 
				 				obj = ____cache_alloc_node(cache,
			
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -984,7 +984,7 @@ static unsigned long shrink_zones(int priority, struct zone **zones,
 
				 		if (!populated_zone(zone))
			
 
				 			continue;
			
 
				 
			
 
				-		if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
			
 
				+		if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
			
 
				 			continue;
			
 
				 
			
 
				 		note_zone_scanning_priority(zone, priority);
			
@@ -1034,7 +1034,7 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
 
				 	for (i = 0; zones[i] != NULL; i++) {
			
 
				 		struct zone *zone = zones[i];
			
 
				 
			
 
				-		if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
			
 
				+		if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
			
 
				 			continue;
			
 
				 
			
 
				 		lru_pages += zone->nr_active + zone->nr_inactive;
			
@@ -1089,7 +1089,7 @@ out:
 
				 	for (i = 0; zones[i] != 0; i++) {
			
 
				 		struct zone *zone = zones[i];
			
 
				 
			
 
				-		if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
			
 
				+		if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
			
 
				 			continue;
			
 
				 
			
 
				 		zone->prev_priority = priority;
			
@@ -1354,7 +1354,7 @@ void wakeup_kswapd(struct zone *zone, int order)
 
				 		return;
			
 
				 	if (pgdat->kswapd_max_order < order)
			
 
				 		pgdat->kswapd_max_order = order;
			
 
				-	if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
			
 
				+	if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
			
 
				 		return;
			
 
				 	if (!waitqueue_active(&pgdat->kswapd_wait))
			
 
				 		return;