|
@@ -32,6 +32,7 @@
|
|
|
#include <linux/security.h>
|
|
|
#include <linux/memcontrol.h>
|
|
|
#include <linux/syscalls.h>
|
|
|
+#include <linux/hugetlb.h>
|
|
|
#include <linux/gfp.h>
|
|
|
|
|
|
#include "internal.h"
|
|
@@ -95,26 +96,34 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
|
|
|
pte_t *ptep, pte;
|
|
|
spinlock_t *ptl;
|
|
|
|
|
|
- pgd = pgd_offset(mm, addr);
|
|
|
- if (!pgd_present(*pgd))
|
|
|
- goto out;
|
|
|
+ if (unlikely(PageHuge(new))) {
|
|
|
+ ptep = huge_pte_offset(mm, addr);
|
|
|
+ if (!ptep)
|
|
|
+ goto out;
|
|
|
+ ptl = &mm->page_table_lock;
|
|
|
+ } else {
|
|
|
+ pgd = pgd_offset(mm, addr);
|
|
|
+ if (!pgd_present(*pgd))
|
|
|
+ goto out;
|
|
|
|
|
|
- pud = pud_offset(pgd, addr);
|
|
|
- if (!pud_present(*pud))
|
|
|
- goto out;
|
|
|
+ pud = pud_offset(pgd, addr);
|
|
|
+ if (!pud_present(*pud))
|
|
|
+ goto out;
|
|
|
|
|
|
- pmd = pmd_offset(pud, addr);
|
|
|
- if (!pmd_present(*pmd))
|
|
|
- goto out;
|
|
|
+ pmd = pmd_offset(pud, addr);
|
|
|
+ if (!pmd_present(*pmd))
|
|
|
+ goto out;
|
|
|
|
|
|
- ptep = pte_offset_map(pmd, addr);
|
|
|
+ ptep = pte_offset_map(pmd, addr);
|
|
|
|
|
|
- if (!is_swap_pte(*ptep)) {
|
|
|
- pte_unmap(ptep);
|
|
|
- goto out;
|
|
|
- }
|
|
|
+ if (!is_swap_pte(*ptep)) {
|
|
|
+ pte_unmap(ptep);
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+
|
|
|
+ ptl = pte_lockptr(mm, pmd);
|
|
|
+ }
|
|
|
|
|
|
- ptl = pte_lockptr(mm, pmd);
|
|
|
spin_lock(ptl);
|
|
|
pte = *ptep;
|
|
|
if (!is_swap_pte(pte))
|
|
@@ -130,10 +139,17 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
|
|
|
pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
|
|
|
if (is_write_migration_entry(entry))
|
|
|
pte = pte_mkwrite(pte);
|
|
|
+ if (PageHuge(new))
|
|
|
+ pte = pte_mkhuge(pte);
|
|
|
flush_cache_page(vma, addr, pte_pfn(pte));
|
|
|
set_pte_at(mm, addr, ptep, pte);
|
|
|
|
|
|
- if (PageAnon(new))
|
|
|
+ if (PageHuge(new)) {
|
|
|
+ if (PageAnon(new))
|
|
|
+ hugepage_add_anon_rmap(new, vma, addr);
|
|
|
+ else
|
|
|
+ page_dup_rmap(new);
|
|
|
+ } else if (PageAnon(new))
|
|
|
page_add_anon_rmap(new, vma, addr);
|
|
|
else
|
|
|
page_add_file_rmap(new);
|
|
@@ -275,12 +291,60 @@ static int migrate_page_move_mapping(struct address_space *mapping,
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * The expected number of remaining references is the same as that
|
|
|
+ * of migrate_page_move_mapping().
|
|
|
+ */
|
|
|
+int migrate_huge_page_move_mapping(struct address_space *mapping,
|
|
|
+ struct page *newpage, struct page *page)
|
|
|
+{
|
|
|
+ int expected_count;
|
|
|
+ void **pslot;
|
|
|
+
|
|
|
+ if (!mapping) {
|
|
|
+ if (page_count(page) != 1)
|
|
|
+ return -EAGAIN;
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ spin_lock_irq(&mapping->tree_lock);
|
|
|
+
|
|
|
+ pslot = radix_tree_lookup_slot(&mapping->page_tree,
|
|
|
+ page_index(page));
|
|
|
+
|
|
|
+ expected_count = 2 + page_has_private(page);
|
|
|
+ if (page_count(page) != expected_count ||
|
|
|
+ (struct page *)radix_tree_deref_slot(pslot) != page) {
|
|
|
+ spin_unlock_irq(&mapping->tree_lock);
|
|
|
+ return -EAGAIN;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!page_freeze_refs(page, expected_count)) {
|
|
|
+ spin_unlock_irq(&mapping->tree_lock);
|
|
|
+ return -EAGAIN;
|
|
|
+ }
|
|
|
+
|
|
|
+ get_page(newpage);
|
|
|
+
|
|
|
+ radix_tree_replace_slot(pslot, newpage);
|
|
|
+
|
|
|
+ page_unfreeze_refs(page, expected_count);
|
|
|
+
|
|
|
+ __put_page(page);
|
|
|
+
|
|
|
+ spin_unlock_irq(&mapping->tree_lock);
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Copy the page to its new location
|
|
|
*/
|
|
|
-static void migrate_page_copy(struct page *newpage, struct page *page)
|
|
|
+void migrate_page_copy(struct page *newpage, struct page *page)
|
|
|
{
|
|
|
- copy_highpage(newpage, page);
|
|
|
+ if (PageHuge(page))
|
|
|
+ copy_huge_page(newpage, page);
|
|
|
+ else
|
|
|
+ copy_highpage(newpage, page);
|
|
|
|
|
|
if (PageError(page))
|
|
|
SetPageError(newpage);
|
|
@@ -723,6 +787,92 @@ move_newpage:
|
|
|
return rc;
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * Counterpart of unmap_and_move_page() for hugepage migration.
|
|
|
+ *
|
|
|
+ * This function doesn't wait the completion of hugepage I/O
|
|
|
+ * because there is no race between I/O and migration for hugepage.
|
|
|
+ * Note that currently hugepage I/O occurs only in direct I/O
|
|
|
+ * where no lock is held and PG_writeback is irrelevant,
|
|
|
+ * and writeback status of all subpages are counted in the reference
|
|
|
+ * count of the head page (i.e. if all subpages of a 2MB hugepage are
|
|
|
+ * under direct I/O, the reference of the head page is 512 and a bit more.)
|
|
|
+ * This means that when we try to migrate hugepage whose subpages are
|
|
|
+ * doing direct I/O, some references remain after try_to_unmap() and
|
|
|
+ * hugepage migration fails without data corruption.
|
|
|
+ *
|
|
|
+ * There is also no race when direct I/O is issued on the page under migration,
|
|
|
+ * because then pte is replaced with migration swap entry and direct I/O code
|
|
|
+ * will wait in the page fault for migration to complete.
|
|
|
+ */
|
|
|
+static int unmap_and_move_huge_page(new_page_t get_new_page,
|
|
|
+ unsigned long private, struct page *hpage,
|
|
|
+ int force, int offlining)
|
|
|
+{
|
|
|
+ int rc = 0;
|
|
|
+ int *result = NULL;
|
|
|
+ struct page *new_hpage = get_new_page(hpage, private, &result);
|
|
|
+ int rcu_locked = 0;
|
|
|
+ struct anon_vma *anon_vma = NULL;
|
|
|
+
|
|
|
+ if (!new_hpage)
|
|
|
+ return -ENOMEM;
|
|
|
+
|
|
|
+ rc = -EAGAIN;
|
|
|
+
|
|
|
+ if (!trylock_page(hpage)) {
|
|
|
+ if (!force)
|
|
|
+ goto out;
|
|
|
+ lock_page(hpage);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (PageAnon(hpage)) {
|
|
|
+ rcu_read_lock();
|
|
|
+ rcu_locked = 1;
|
|
|
+
|
|
|
+ if (page_mapped(hpage)) {
|
|
|
+ anon_vma = page_anon_vma(hpage);
|
|
|
+ atomic_inc(&anon_vma->external_refcount);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ try_to_unmap(hpage, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
|
|
|
+
|
|
|
+ if (!page_mapped(hpage))
|
|
|
+ rc = move_to_new_page(new_hpage, hpage, 1);
|
|
|
+
|
|
|
+ if (rc)
|
|
|
+ remove_migration_ptes(hpage, hpage);
|
|
|
+
|
|
|
+ if (anon_vma && atomic_dec_and_lock(&anon_vma->external_refcount,
|
|
|
+ &anon_vma->lock)) {
|
|
|
+ int empty = list_empty(&anon_vma->head);
|
|
|
+ spin_unlock(&anon_vma->lock);
|
|
|
+ if (empty)
|
|
|
+ anon_vma_free(anon_vma);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (rcu_locked)
|
|
|
+ rcu_read_unlock();
|
|
|
+out:
|
|
|
+ unlock_page(hpage);
|
|
|
+
|
|
|
+ if (rc != -EAGAIN) {
|
|
|
+ list_del(&hpage->lru);
|
|
|
+ put_page(hpage);
|
|
|
+ }
|
|
|
+
|
|
|
+ put_page(new_hpage);
|
|
|
+
|
|
|
+ if (result) {
|
|
|
+ if (rc)
|
|
|
+ *result = rc;
|
|
|
+ else
|
|
|
+ *result = page_to_nid(new_hpage);
|
|
|
+ }
|
|
|
+ return rc;
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* migrate_pages
|
|
|
*
|
|
@@ -788,6 +938,52 @@ out:
|
|
|
return nr_failed + retry;
|
|
|
}
|
|
|
|
|
|
+int migrate_huge_pages(struct list_head *from,
|
|
|
+ new_page_t get_new_page, unsigned long private, int offlining)
|
|
|
+{
|
|
|
+ int retry = 1;
|
|
|
+ int nr_failed = 0;
|
|
|
+ int pass = 0;
|
|
|
+ struct page *page;
|
|
|
+ struct page *page2;
|
|
|
+ int rc;
|
|
|
+
|
|
|
+ for (pass = 0; pass < 10 && retry; pass++) {
|
|
|
+ retry = 0;
|
|
|
+
|
|
|
+ list_for_each_entry_safe(page, page2, from, lru) {
|
|
|
+ cond_resched();
|
|
|
+
|
|
|
+ rc = unmap_and_move_huge_page(get_new_page,
|
|
|
+ private, page, pass > 2, offlining);
|
|
|
+
|
|
|
+ switch(rc) {
|
|
|
+ case -ENOMEM:
|
|
|
+ goto out;
|
|
|
+ case -EAGAIN:
|
|
|
+ retry++;
|
|
|
+ break;
|
|
|
+ case 0:
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ /* Permanent failure */
|
|
|
+ nr_failed++;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ rc = 0;
|
|
|
+out:
|
|
|
+
|
|
|
+ list_for_each_entry_safe(page, page2, from, lru)
|
|
|
+ put_page(page);
|
|
|
+
|
|
|
+ if (rc)
|
|
|
+ return rc;
|
|
|
+
|
|
|
+ return nr_failed + retry;
|
|
|
+}
|
|
|
+
|
|
|
#ifdef CONFIG_NUMA
|
|
|
/*
|
|
|
* Move a list of individual pages
|