|
@@ -174,6 +174,17 @@ static int __init hugetlb_setup(char *s)
|
|
|
}
|
|
|
__setup("hugepages=", hugetlb_setup);
|
|
|
|
|
|
+static unsigned int cpuset_mems_nr(unsigned int *array)
|
|
|
+{
|
|
|
+ int node;
|
|
|
+ unsigned int nr = 0;
|
|
|
+
|
|
|
+ for_each_node_mask(node, cpuset_current_mems_allowed)
|
|
|
+ nr += array[node];
|
|
|
+
|
|
|
+ return nr;
|
|
|
+}
|
|
|
+
|
|
|
#ifdef CONFIG_SYSCTL
|
|
|
static void update_and_free_page(struct page *page)
|
|
|
{
|
|
@@ -819,6 +830,26 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to)
|
|
|
chg = region_chg(&inode->i_mapping->private_list, from, to);
|
|
|
if (chg < 0)
|
|
|
return chg;
|
|
|
+ /*
|
|
|
+ * When cpuset is configured, it breaks the strict hugetlb page
|
|
|
+ * reservation as the accounting is done on a global variable. Such
|
|
|
+ * reservation is completely rubbish in the presence of cpuset because
|
|
|
+ * the reservation is not checked against page availability for the
|
|
|
+ * current cpuset. Application can still potentially OOM'ed by kernel
|
|
|
+ * with lack of free htlb page in cpuset that the task is in.
|
|
|
+ * Attempt to enforce strict accounting with cpuset is almost
|
|
|
+ * impossible (or too ugly) because cpuset is too fluid that
|
|
|
+ * task or memory node can be dynamically moved between cpusets.
|
|
|
+ *
|
|
|
+ * The change of semantics for shared hugetlb mapping with cpuset is
|
|
|
+ * undesirable. However, in order to preserve some of the semantics,
|
|
|
+ * we fall back to check against current free page availability as
|
|
|
+ * a best attempt and hopefully to minimize the impact of changing
|
|
|
+ * semantics that cpuset has.
|
|
|
+ */
|
|
|
+ if (chg > cpuset_mems_nr(free_huge_pages_node))
|
|
|
+ return -ENOMEM;
|
|
|
+
|
|
|
ret = hugetlb_acct_memory(chg);
|
|
|
if (ret < 0)
|
|
|
return ret;
|