|
@@ -2342,32 +2342,48 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * cpuset_zone_allowed - Can we allocate memory on zone z's memory node?
|
|
|
+ * cpuset_zone_allowed_softwall - Can we allocate on zone z's memory node?
|
|
|
* @z: is this zone on an allowed node?
|
|
|
- * @gfp_mask: memory allocation flags (we use __GFP_HARDWALL)
|
|
|
+ * @gfp_mask: memory allocation flags
|
|
|
*
|
|
|
- * If we're in interrupt, yes, we can always allocate. If zone
|
|
|
+ * If we're in interrupt, yes, we can always allocate. If
|
|
|
+ * __GFP_THISNODE is set, yes, we can always allocate. If zone
|
|
|
* z's node is in our tasks mems_allowed, yes. If it's not a
|
|
|
* __GFP_HARDWALL request and this zone's nodes is in the nearest
|
|
|
* mem_exclusive cpuset ancestor to this tasks cpuset, yes.
|
|
|
* Otherwise, no.
|
|
|
*
|
|
|
+ * If __GFP_HARDWALL is set, cpuset_zone_allowed_softwall()
|
|
|
+ * reduces to cpuset_zone_allowed_hardwall(). Otherwise,
|
|
|
+ * cpuset_zone_allowed_softwall() might sleep, and might allow a zone
|
|
|
+ * from an enclosing cpuset.
|
|
|
+ *
|
|
|
+ * cpuset_zone_allowed_hardwall() only handles the simpler case of
|
|
|
+ * hardwall cpusets, and never sleeps.
|
|
|
+ *
|
|
|
+ * The __GFP_THISNODE placement logic is really handled elsewhere,
|
|
|
+ * by forcibly using a zonelist starting at a specified node, and by
|
|
|
+ * (in get_page_from_freelist()) refusing to consider the zones for
|
|
|
+ * any node on the zonelist except the first. By the time any such
|
|
|
+ * calls get to this routine, we should just shut up and say 'yes'.
|
|
|
+ *
|
|
|
* GFP_USER allocations are marked with the __GFP_HARDWALL bit,
|
|
|
* and do not allow allocations outside the current tasks cpuset.
|
|
|
* GFP_KERNEL allocations are not so marked, so can escape to the
|
|
|
- * nearest mem_exclusive ancestor cpuset.
|
|
|
+ * nearest enclosing mem_exclusive ancestor cpuset.
|
|
|
*
|
|
|
- * Scanning up parent cpusets requires callback_mutex. The __alloc_pages()
|
|
|
- * routine only calls here with __GFP_HARDWALL bit _not_ set if
|
|
|
- * it's a GFP_KERNEL allocation, and all nodes in the current tasks
|
|
|
- * mems_allowed came up empty on the first pass over the zonelist.
|
|
|
- * So only GFP_KERNEL allocations, if all nodes in the cpuset are
|
|
|
- * short of memory, might require taking the callback_mutex mutex.
|
|
|
+ * Scanning up parent cpusets requires callback_mutex. The
|
|
|
+ * __alloc_pages() routine only calls here with __GFP_HARDWALL bit
|
|
|
+ * _not_ set if it's a GFP_KERNEL allocation, and all nodes in the
|
|
|
+ * current tasks mems_allowed came up empty on the first pass over
|
|
|
+ * the zonelist. So only GFP_KERNEL allocations, if all nodes in the
|
|
|
+ * cpuset are short of memory, might require taking the callback_mutex
|
|
|
+ * mutex.
|
|
|
*
|
|
|
* The first call here from mm/page_alloc:get_page_from_freelist()
|
|
|
- * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, so
|
|
|
- * no allocation on a node outside the cpuset is allowed (unless in
|
|
|
- * interrupt, of course).
|
|
|
+ * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets,
|
|
|
+ * so no allocation on a node outside the cpuset is allowed (unless
|
|
|
+ * in interrupt, of course).
|
|
|
*
|
|
|
* The second pass through get_page_from_freelist() doesn't even call
|
|
|
* here for GFP_ATOMIC calls. For those calls, the __alloc_pages()
|
|
@@ -2380,12 +2396,12 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
|
|
|
* GFP_USER - only nodes in current tasks mems allowed ok.
|
|
|
*
|
|
|
* Rule:
|
|
|
- * Don't call cpuset_zone_allowed() if you can't sleep, unless you
|
|
|
+ * Don't call cpuset_zone_allowed_softwall if you can't sleep, unless you
|
|
|
* pass in the __GFP_HARDWALL flag set in gfp_flag, which disables
|
|
|
* the code that might scan up ancestor cpusets and sleep.
|
|
|
- **/
|
|
|
+ */
|
|
|
|
|
|
-int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
|
|
|
+int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
|
|
|
{
|
|
|
int node; /* node that zone z is on */
|
|
|
const struct cpuset *cs; /* current cpuset ancestors */
|
|
@@ -2415,6 +2431,40 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
|
|
|
return allowed;
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * cpuset_zone_allowed_hardwall - Can we allocate on zone z's memory node?
|
|
|
+ * @z: is this zone on an allowed node?
|
|
|
+ * @gfp_mask: memory allocation flags
|
|
|
+ *
|
|
|
+ * If we're in interrupt, yes, we can always allocate.
|
|
|
+ * If __GFP_THISNODE is set, yes, we can always allocate. If zone
|
|
|
+ * z's node is in our tasks mems_allowed, yes. Otherwise, no.
|
|
|
+ *
|
|
|
+ * The __GFP_THISNODE placement logic is really handled elsewhere,
|
|
|
+ * by forcibly using a zonelist starting at a specified node, and by
|
|
|
+ * (in get_page_from_freelist()) refusing to consider the zones for
|
|
|
+ * any node on the zonelist except the first. By the time any such
|
|
|
+ * calls get to this routine, we should just shut up and say 'yes'.
|
|
|
+ *
|
|
|
+ * Unlike the cpuset_zone_allowed_softwall() variant, above,
|
|
|
+ * this variant requires that the zone be in the current tasks
|
|
|
+ * mems_allowed or that we're in interrupt. It does not scan up the
|
|
|
+ * cpuset hierarchy for the nearest enclosing mem_exclusive cpuset.
|
|
|
+ * It never sleeps.
|
|
|
+ */
|
|
|
+
|
|
|
+int __cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask)
|
|
|
+{
|
|
|
+ int node; /* node that zone z is on */
|
|
|
+
|
|
|
+ if (in_interrupt() || (gfp_mask & __GFP_THISNODE))
|
|
|
+ return 1;
|
|
|
+ node = zone_to_nid(z);
|
|
|
+ if (node_isset(node, current->mems_allowed))
|
|
|
+ return 1;
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
/**
|
|
|
* cpuset_lock - lock out any changes to cpuset structures
|
|
|
*
|