преди 17 години · 7893d1d505
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -23,10 +23,12 @@
 
				 
			
 
				 const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
			
 
				 static unsigned long nr_huge_pages, free_huge_pages, resv_huge_pages;
			
 
				+static unsigned long surplus_huge_pages;
			
 
				 unsigned long max_huge_pages;
			
 
				 static struct list_head hugepage_freelists[MAX_NUMNODES];
			
 
				 static unsigned int nr_huge_pages_node[MAX_NUMNODES];
			
 
				 static unsigned int free_huge_pages_node[MAX_NUMNODES];
			
 
				+static unsigned int surplus_huge_pages_node[MAX_NUMNODES];
			
 
				 static gfp_t htlb_alloc_mask = GFP_HIGHUSER;
			
 
				 unsigned long hugepages_treat_as_movable;
			
 
				 
			
@@ -109,15 +111,57 @@ static void update_and_free_page(struct page *page)
 
				 
			
 
				 static void free_huge_page(struct page *page)
			
 
				 {
			
 
				-	BUG_ON(page_count(page));
			
 
				+	int nid = page_to_nid(page);
			
 
				 
			
 
				+	BUG_ON(page_count(page));
			
 
				 	INIT_LIST_HEAD(&page->lru);
			
 
				 
			
 
				 	spin_lock(&hugetlb_lock);
			
 
				-	enqueue_huge_page(page);
			
 
				+	if (surplus_huge_pages_node[nid]) {
			
 
				+		update_and_free_page(page);
			
 
				+		surplus_huge_pages--;
			
 
				+		surplus_huge_pages_node[nid]--;
			
 
				+	} else {
			
 
				+		enqueue_huge_page(page);
			
 
				+	}
			
 
				 	spin_unlock(&hugetlb_lock);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Increment or decrement surplus_huge_pages.  Keep node-specific counters
			
 
				+ * balanced by operating on them in a round-robin fashion.
			
 
				+ * Returns 1 if an adjustment was made.
			
 
				+ */
			
 
				+static int adjust_pool_surplus(int delta)
			
 
				+{
			
 
				+	static int prev_nid;
			
 
				+	int nid = prev_nid;
			
 
				+	int ret = 0;
			
 
				+
			
 
				+	VM_BUG_ON(delta != -1 && delta != 1);
			
 
				+	do {
			
 
				+		nid = next_node(nid, node_online_map);
			
 
				+		if (nid == MAX_NUMNODES)
			
 
				+			nid = first_node(node_online_map);
			
 
				+
			
 
				+		/* To shrink on this node, there must be a surplus page */
			
 
				+		if (delta < 0 && !surplus_huge_pages_node[nid])
			
 
				+			continue;
			
 
				+		/* Surplus cannot exceed the total number of pages */
			
 
				+		if (delta > 0 && surplus_huge_pages_node[nid] >=
			
 
				+						nr_huge_pages_node[nid])
			
 
				+			continue;
			
 
				+
			
 
				+		surplus_huge_pages += delta;
			
 
				+		surplus_huge_pages_node[nid] += delta;
			
 
				+		ret = 1;
			
 
				+		break;
			
 
				+	} while (nid != prev_nid);
			
 
				+
			
 
				+	prev_nid = nid;
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				 static int alloc_fresh_huge_page(void)
			
 
				 {
			
 
				 	static int prev_nid;
			
@@ -150,10 +194,30 @@ static int alloc_fresh_huge_page(void)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static struct page *alloc_buddy_huge_page(struct vm_area_struct *vma,
			
 
				+						unsigned long address)
			
 
				+{
			
 
				+	struct page *page;
			
 
				+
			
 
				+	page = alloc_pages(htlb_alloc_mask|__GFP_COMP|__GFP_NOWARN,
			
 
				+					HUGETLB_PAGE_ORDER);
			
 
				+	if (page) {
			
 
				+		set_compound_page_dtor(page, free_huge_page);
			
 
				+		spin_lock(&hugetlb_lock);
			
 
				+		nr_huge_pages++;
			
 
				+		nr_huge_pages_node[page_to_nid(page)]++;
			
 
				+		surplus_huge_pages++;
			
 
				+		surplus_huge_pages_node[page_to_nid(page)]++;
			
 
				+		spin_unlock(&hugetlb_lock);
			
 
				+	}
			
 
				+
			
 
				+	return page;
			
 
				+}
			
 
				+
			
 
				 static struct page *alloc_huge_page(struct vm_area_struct *vma,
			
 
				 				    unsigned long addr)
			
 
				 {
			
 
				-	struct page *page;
			
 
				+	struct page *page = NULL;
			
 
				 
			
 
				 	spin_lock(&hugetlb_lock);
			
 
				 	if (vma->vm_flags & VM_MAYSHARE)
			
@@ -173,7 +237,16 @@ fail:
 
				 	if (vma->vm_flags & VM_MAYSHARE)
			
 
				 		resv_huge_pages++;
			
 
				 	spin_unlock(&hugetlb_lock);
			
 
				-	return NULL;
			
 
				+
			
 
				+	/*
			
 
				+	 * Private mappings do not use reserved huge pages so the allocation
			
 
				+	 * may have failed due to an undersized hugetlb pool.  Try to grab a
			
 
				+	 * surplus huge page from the buddy allocator.
			
 
				+	 */
			
 
				+	if (!(vma->vm_flags & VM_MAYSHARE))
			
 
				+		page = alloc_buddy_huge_page(vma, addr);
			
 
				+
			
 
				+	return page;
			
 
				 }
			
 
				 
			
 
				 static int __init hugetlb_init(void)
			
@@ -241,26 +314,62 @@ static inline void try_to_free_low(unsigned long count)
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+#define persistent_huge_pages (nr_huge_pages - surplus_huge_pages)
			
 
				 static unsigned long set_max_huge_pages(unsigned long count)
			
 
				 {
			
 
				-	while (count > nr_huge_pages) {
			
 
				-		if (!alloc_fresh_huge_page())
			
 
				-			return nr_huge_pages;
			
 
				-	}
			
 
				-	if (count >= nr_huge_pages)
			
 
				-		return nr_huge_pages;
			
 
				+	unsigned long min_count, ret;
			
 
				 
			
 
				+	/*
			
 
				+	 * Increase the pool size
			
 
				+	 * First take pages out of surplus state.  Then make up the
			
 
				+	 * remaining difference by allocating fresh huge pages.
			
 
				+	 */
			
 
				 	spin_lock(&hugetlb_lock);
			
 
				-	count = max(count, resv_huge_pages);
			
 
				-	try_to_free_low(count);
			
 
				-	while (count < nr_huge_pages) {
			
 
				+	while (surplus_huge_pages && count > persistent_huge_pages) {
			
 
				+		if (!adjust_pool_surplus(-1))
			
 
				+			break;
			
 
				+	}
			
 
				+
			
 
				+	while (count > persistent_huge_pages) {
			
 
				+		int ret;
			
 
				+		/*
			
 
				+		 * If this allocation races such that we no longer need the
			
 
				+		 * page, free_huge_page will handle it by freeing the page
			
 
				+		 * and reducing the surplus.
			
 
				+		 */
			
 
				+		spin_unlock(&hugetlb_lock);
			
 
				+		ret = alloc_fresh_huge_page();
			
 
				+		spin_lock(&hugetlb_lock);
			
 
				+		if (!ret)
			
 
				+			goto out;
			
 
				+
			
 
				+	}
			
 
				+	if (count >= persistent_huge_pages)
			
 
				+		goto out;
			
 
				+
			
 
				+	/*
			
 
				+	 * Decrease the pool size
			
 
				+	 * First return free pages to the buddy allocator (being careful
			
 
				+	 * to keep enough around to satisfy reservations).  Then place
			
 
				+	 * pages into surplus state as needed so the pool will shrink
			
 
				+	 * to the desired size as pages become free.
			
 
				+	 */
			
 
				+	min_count = max(count, resv_huge_pages);
			
 
				+	try_to_free_low(min_count);
			
 
				+	while (min_count < persistent_huge_pages) {
			
 
				 		struct page *page = dequeue_huge_page(NULL, 0);
			
 
				 		if (!page)
			
 
				 			break;
			
 
				 		update_and_free_page(page);
			
 
				 	}
			
 
				+	while (count < persistent_huge_pages) {
			
 
				+		if (!adjust_pool_surplus(1))
			
 
				+			break;
			
 
				+	}
			
 
				+out:
			
 
				+	ret = persistent_huge_pages;
			
 
				 	spin_unlock(&hugetlb_lock);
			
 
				-	return nr_huge_pages;
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 int hugetlb_sysctl_handler(struct ctl_table *table, int write,
			
@@ -292,10 +401,12 @@ int hugetlb_report_meminfo(char *buf)
 
				 			"HugePages_Total: %5lu\n"
			
 
				 			"HugePages_Free:  %5lu\n"
			
 
				 			"HugePages_Rsvd:  %5lu\n"
			
 
				+			"HugePages_Surp:  %5lu\n"
			
 
				 			"Hugepagesize:    %5lu kB\n",
			
 
				 			nr_huge_pages,
			
 
				 			free_huge_pages,
			
 
				 			resv_huge_pages,
			
 
				+			surplus_huge_pages,
			
 
				 			HPAGE_SIZE/1024);
			
 
				 }