|
@@ -466,11 +466,23 @@ static void enqueue_huge_page(struct hstate *h, struct page *page)
|
|
|
h->free_huge_pages_node[nid]++;
|
|
|
}
|
|
|
|
|
|
+static struct page *dequeue_huge_page_node(struct hstate *h, int nid)
|
|
|
+{
|
|
|
+ struct page *page;
|
|
|
+
|
|
|
+ if (list_empty(&h->hugepage_freelists[nid]))
|
|
|
+ return NULL;
|
|
|
+ page = list_entry(h->hugepage_freelists[nid].next, struct page, lru);
|
|
|
+ list_del(&page->lru);
|
|
|
+ h->free_huge_pages--;
|
|
|
+ h->free_huge_pages_node[nid]--;
|
|
|
+ return page;
|
|
|
+}
|
|
|
+
|
|
|
static struct page *dequeue_huge_page_vma(struct hstate *h,
|
|
|
struct vm_area_struct *vma,
|
|
|
unsigned long address, int avoid_reserve)
|
|
|
{
|
|
|
- int nid;
|
|
|
struct page *page = NULL;
|
|
|
struct mempolicy *mpol;
|
|
|
nodemask_t *nodemask;
|
|
@@ -496,19 +508,13 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
|
|
|
|
|
|
for_each_zone_zonelist_nodemask(zone, z, zonelist,
|
|
|
MAX_NR_ZONES - 1, nodemask) {
|
|
|
- nid = zone_to_nid(zone);
|
|
|
- if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask) &&
|
|
|
- !list_empty(&h->hugepage_freelists[nid])) {
|
|
|
- page = list_entry(h->hugepage_freelists[nid].next,
|
|
|
- struct page, lru);
|
|
|
- list_del(&page->lru);
|
|
|
- h->free_huge_pages--;
|
|
|
- h->free_huge_pages_node[nid]--;
|
|
|
-
|
|
|
- if (!avoid_reserve)
|
|
|
- decrement_hugepage_resv_vma(h, vma);
|
|
|
-
|
|
|
- break;
|
|
|
+ if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask)) {
|
|
|
+ page = dequeue_huge_page_node(h, zone_to_nid(zone));
|
|
|
+ if (page) {
|
|
|
+ if (!avoid_reserve)
|
|
|
+ decrement_hugepage_resv_vma(h, vma);
|
|
|
+ break;
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
err:
|
|
@@ -770,11 +776,10 @@ static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
-static struct page *alloc_buddy_huge_page(struct hstate *h,
|
|
|
- struct vm_area_struct *vma, unsigned long address)
|
|
|
+static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
|
|
|
{
|
|
|
struct page *page;
|
|
|
- unsigned int nid;
|
|
|
+ unsigned int r_nid;
|
|
|
|
|
|
if (h->order >= MAX_ORDER)
|
|
|
return NULL;
|
|
@@ -812,9 +817,14 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
|
|
|
}
|
|
|
spin_unlock(&hugetlb_lock);
|
|
|
|
|
|
- page = alloc_pages(htlb_alloc_mask|__GFP_COMP|
|
|
|
- __GFP_REPEAT|__GFP_NOWARN,
|
|
|
- huge_page_order(h));
|
|
|
+ if (nid == NUMA_NO_NODE)
|
|
|
+ page = alloc_pages(htlb_alloc_mask|__GFP_COMP|
|
|
|
+ __GFP_REPEAT|__GFP_NOWARN,
|
|
|
+ huge_page_order(h));
|
|
|
+ else
|
|
|
+ page = alloc_pages_exact_node(nid,
|
|
|
+ htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE|
|
|
|
+ __GFP_REPEAT|__GFP_NOWARN, huge_page_order(h));
|
|
|
|
|
|
if (page && arch_prepare_hugepage(page)) {
|
|
|
__free_pages(page, huge_page_order(h));
|
|
@@ -829,13 +839,13 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
|
|
|
*/
|
|
|
put_page_testzero(page);
|
|
|
VM_BUG_ON(page_count(page));
|
|
|
- nid = page_to_nid(page);
|
|
|
+ r_nid = page_to_nid(page);
|
|
|
set_compound_page_dtor(page, free_huge_page);
|
|
|
/*
|
|
|
* We incremented the global counters already
|
|
|
*/
|
|
|
- h->nr_huge_pages_node[nid]++;
|
|
|
- h->surplus_huge_pages_node[nid]++;
|
|
|
+ h->nr_huge_pages_node[r_nid]++;
|
|
|
+ h->surplus_huge_pages_node[r_nid]++;
|
|
|
__count_vm_event(HTLB_BUDDY_PGALLOC);
|
|
|
} else {
|
|
|
h->nr_huge_pages--;
|
|
@@ -847,6 +857,25 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
|
|
|
return page;
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * This allocation function is useful in the context where vma is irrelevant.
|
|
|
+ * E.g. soft-offlining uses this function because it only cares physical
|
|
|
+ * address of error page.
|
|
|
+ */
|
|
|
+struct page *alloc_huge_page_node(struct hstate *h, int nid)
|
|
|
+{
|
|
|
+ struct page *page;
|
|
|
+
|
|
|
+ spin_lock(&hugetlb_lock);
|
|
|
+ page = dequeue_huge_page_node(h, nid);
|
|
|
+ spin_unlock(&hugetlb_lock);
|
|
|
+
|
|
|
+ if (!page)
|
|
|
+ page = alloc_buddy_huge_page(h, nid);
|
|
|
+
|
|
|
+ return page;
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Increase the hugetlb pool such that it can accomodate a reservation
|
|
|
* of size 'delta'.
|
|
@@ -871,7 +900,7 @@ static int gather_surplus_pages(struct hstate *h, int delta)
|
|
|
retry:
|
|
|
spin_unlock(&hugetlb_lock);
|
|
|
for (i = 0; i < needed; i++) {
|
|
|
- page = alloc_buddy_huge_page(h, NULL, 0);
|
|
|
+ page = alloc_buddy_huge_page(h, NUMA_NO_NODE);
|
|
|
if (!page) {
|
|
|
/*
|
|
|
* We were not able to allocate enough pages to
|
|
@@ -1052,7 +1081,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
|
|
|
spin_unlock(&hugetlb_lock);
|
|
|
|
|
|
if (!page) {
|
|
|
- page = alloc_buddy_huge_page(h, vma, addr);
|
|
|
+ page = alloc_buddy_huge_page(h, NUMA_NO_NODE);
|
|
|
if (!page) {
|
|
|
hugetlb_put_quota(inode->i_mapping, chg);
|
|
|
return ERR_PTR(-VM_FAULT_SIGBUS);
|