|
@@ -2191,7 +2191,34 @@ static void khugepaged_alloc_sleep(void)
|
|
|
msecs_to_jiffies(khugepaged_alloc_sleep_millisecs));
|
|
|
}
|
|
|
|
|
|
+static int khugepaged_node_load[MAX_NUMNODES];
|
|
|
+
|
|
|
#ifdef CONFIG_NUMA
|
|
|
+static int khugepaged_find_target_node(void)
|
|
|
+{
|
|
|
+ static int last_khugepaged_target_node = NUMA_NO_NODE;
|
|
|
+ int nid, target_node = 0, max_value = 0;
|
|
|
+
|
|
|
+ /* find first node with max normal pages hit */
|
|
|
+ for (nid = 0; nid < MAX_NUMNODES; nid++)
|
|
|
+ if (khugepaged_node_load[nid] > max_value) {
|
|
|
+ max_value = khugepaged_node_load[nid];
|
|
|
+ target_node = nid;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* do some balance if several nodes have the same hit record */
|
|
|
+ if (target_node <= last_khugepaged_target_node)
|
|
|
+ for (nid = last_khugepaged_target_node + 1; nid < MAX_NUMNODES;
|
|
|
+ nid++)
|
|
|
+ if (max_value == khugepaged_node_load[nid]) {
|
|
|
+ target_node = nid;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ last_khugepaged_target_node = target_node;
|
|
|
+ return target_node;
|
|
|
+}
|
|
|
+
|
|
|
static bool khugepaged_prealloc_page(struct page **hpage, bool *wait)
|
|
|
{
|
|
|
if (IS_ERR(*hpage)) {
|
|
@@ -2225,9 +2252,8 @@ static struct page
|
|
|
* mmap_sem in read mode is good idea also to allow greater
|
|
|
* scalability.
|
|
|
*/
|
|
|
- *hpage = alloc_hugepage_vma(khugepaged_defrag(), vma, address,
|
|
|
- node, __GFP_OTHER_NODE);
|
|
|
-
|
|
|
+ *hpage = alloc_pages_exact_node(node, alloc_hugepage_gfpmask(
|
|
|
+ khugepaged_defrag(), __GFP_OTHER_NODE), HPAGE_PMD_ORDER);
|
|
|
/*
|
|
|
* After allocating the hugepage, release the mmap_sem read lock in
|
|
|
* preparation for taking it in write mode.
|
|
@@ -2243,6 +2269,11 @@ static struct page
|
|
|
return *hpage;
|
|
|
}
|
|
|
#else
|
|
|
+static int khugepaged_find_target_node(void)
|
|
|
+{
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
static inline struct page *alloc_hugepage(int defrag)
|
|
|
{
|
|
|
return alloc_pages(alloc_hugepage_gfpmask(defrag, 0),
|
|
@@ -2455,6 +2486,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
|
|
|
if (pmd_trans_huge(*pmd))
|
|
|
goto out;
|
|
|
|
|
|
+ memset(khugepaged_node_load, 0, sizeof(khugepaged_node_load));
|
|
|
pte = pte_offset_map_lock(mm, pmd, address, &ptl);
|
|
|
for (_address = address, _pte = pte; _pte < pte+HPAGE_PMD_NR;
|
|
|
_pte++, _address += PAGE_SIZE) {
|
|
@@ -2471,12 +2503,13 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
|
|
|
if (unlikely(!page))
|
|
|
goto out_unmap;
|
|
|
/*
|
|
|
- * Chose the node of the first page. This could
|
|
|
- * be more sophisticated and look at more pages,
|
|
|
- * but isn't for now.
|
|
|
+ * Record which node the original page is from and save this
|
|
|
+ * information to khugepaged_node_load[].
|
|
|
+ * Khupaged will allocate hugepage from the node has the max
|
|
|
+ * hit record.
|
|
|
*/
|
|
|
- if (node == NUMA_NO_NODE)
|
|
|
- node = page_to_nid(page);
|
|
|
+ node = page_to_nid(page);
|
|
|
+ khugepaged_node_load[node]++;
|
|
|
VM_BUG_ON(PageCompound(page));
|
|
|
if (!PageLRU(page) || PageLocked(page) || !PageAnon(page))
|
|
|
goto out_unmap;
|
|
@@ -2491,9 +2524,11 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
|
|
|
ret = 1;
|
|
|
out_unmap:
|
|
|
pte_unmap_unlock(pte, ptl);
|
|
|
- if (ret)
|
|
|
+ if (ret) {
|
|
|
+ node = khugepaged_find_target_node();
|
|
|
/* collapse_huge_page will return with the mmap_sem released */
|
|
|
collapse_huge_page(mm, address, hpage, vma, node);
|
|
|
+ }
|
|
|
out:
|
|
|
return ret;
|
|
|
}
|