|
@@ -36,6 +36,7 @@
|
|
|
#include <linux/hugetlb_cgroup.h>
|
|
|
#include <linux/gfp.h>
|
|
|
#include <linux/balloon_compaction.h>
|
|
|
+#include <linux/mmu_notifier.h>
|
|
|
|
|
|
#include <asm/tlbflush.h>
|
|
|
|
|
@@ -1654,6 +1655,18 @@ int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
|
|
|
return 1;
|
|
|
}
|
|
|
|
|
|
+bool pmd_trans_migrating(pmd_t pmd)
|
|
|
+{
|
|
|
+ struct page *page = pmd_page(pmd);
|
|
|
+ return PageLocked(page);
|
|
|
+}
|
|
|
+
|
|
|
+void wait_migrate_huge_page(struct anon_vma *anon_vma, pmd_t *pmd)
|
|
|
+{
|
|
|
+ struct page *page = pmd_page(*pmd);
|
|
|
+ wait_on_page_locked(page);
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Attempt to migrate a misplaced page to the specified destination
|
|
|
* node. Caller is expected to have an elevated reference count on
|
|
@@ -1716,12 +1729,14 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
|
|
|
struct page *page, int node)
|
|
|
{
|
|
|
spinlock_t *ptl;
|
|
|
- unsigned long haddr = address & HPAGE_PMD_MASK;
|
|
|
pg_data_t *pgdat = NODE_DATA(node);
|
|
|
int isolated = 0;
|
|
|
struct page *new_page = NULL;
|
|
|
struct mem_cgroup *memcg = NULL;
|
|
|
int page_lru = page_is_file_cache(page);
|
|
|
+ unsigned long mmun_start = address & HPAGE_PMD_MASK;
|
|
|
+ unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE;
|
|
|
+ pmd_t orig_entry;
|
|
|
|
|
|
/*
|
|
|
* Rate-limit the amount of data that is being migrated to a node.
|
|
@@ -1744,6 +1759,9 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
|
|
|
goto out_fail;
|
|
|
}
|
|
|
|
|
|
+ if (mm_tlb_flush_pending(mm))
|
|
|
+ flush_tlb_range(vma, mmun_start, mmun_end);
|
|
|
+
|
|
|
/* Prepare a page as a migration target */
|
|
|
__set_page_locked(new_page);
|
|
|
SetPageSwapBacked(new_page);
|
|
@@ -1755,9 +1773,12 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
|
|
|
WARN_ON(PageLRU(new_page));
|
|
|
|
|
|
/* Recheck the target PMD */
|
|
|
+ mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
|
|
|
ptl = pmd_lock(mm, pmd);
|
|
|
- if (unlikely(!pmd_same(*pmd, entry))) {
|
|
|
+ if (unlikely(!pmd_same(*pmd, entry) || page_count(page) != 2)) {
|
|
|
+fail_putback:
|
|
|
spin_unlock(ptl);
|
|
|
+ mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
|
|
|
|
|
|
/* Reverse changes made by migrate_page_copy() */
|
|
|
if (TestClearPageActive(new_page))
|
|
@@ -1774,7 +1795,8 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
|
|
|
putback_lru_page(page);
|
|
|
mod_zone_page_state(page_zone(page),
|
|
|
NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR);
|
|
|
- goto out_fail;
|
|
|
+
|
|
|
+ goto out_unlock;
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -1786,16 +1808,35 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
|
|
|
*/
|
|
|
mem_cgroup_prepare_migration(page, new_page, &memcg);
|
|
|
|
|
|
+ orig_entry = *pmd;
|
|
|
entry = mk_pmd(new_page, vma->vm_page_prot);
|
|
|
- entry = pmd_mknonnuma(entry);
|
|
|
- entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
|
|
|
entry = pmd_mkhuge(entry);
|
|
|
+ entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
|
|
|
|
|
|
- pmdp_clear_flush(vma, haddr, pmd);
|
|
|
- set_pmd_at(mm, haddr, pmd, entry);
|
|
|
- page_add_new_anon_rmap(new_page, vma, haddr);
|
|
|
+ /*
|
|
|
+ * Clear the old entry under pagetable lock and establish the new PTE.
|
|
|
+ * Any parallel GUP will either observe the old page blocking on the
|
|
|
+ * page lock, block on the page table lock or observe the new page.
|
|
|
+ * The SetPageUptodate on the new page and page_add_new_anon_rmap
|
|
|
+ * guarantee the copy is visible before the pagetable update.
|
|
|
+ */
|
|
|
+ flush_cache_range(vma, mmun_start, mmun_end);
|
|
|
+ page_add_new_anon_rmap(new_page, vma, mmun_start);
|
|
|
+ pmdp_clear_flush(vma, mmun_start, pmd);
|
|
|
+ set_pmd_at(mm, mmun_start, pmd, entry);
|
|
|
+ flush_tlb_range(vma, mmun_start, mmun_end);
|
|
|
update_mmu_cache_pmd(vma, address, &entry);
|
|
|
+
|
|
|
+ if (page_count(page) != 2) {
|
|
|
+ set_pmd_at(mm, mmun_start, pmd, orig_entry);
|
|
|
+ flush_tlb_range(vma, mmun_start, mmun_end);
|
|
|
+ update_mmu_cache_pmd(vma, address, &entry);
|
|
|
+ page_remove_rmap(new_page);
|
|
|
+ goto fail_putback;
|
|
|
+ }
|
|
|
+
|
|
|
page_remove_rmap(page);
|
|
|
+
|
|
|
/*
|
|
|
* Finish the charge transaction under the page table lock to
|
|
|
* prevent split_huge_page() from dividing up the charge
|
|
@@ -1803,6 +1844,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
|
|
|
*/
|
|
|
mem_cgroup_end_migration(memcg, page, new_page, true);
|
|
|
spin_unlock(ptl);
|
|
|
+ mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
|
|
|
|
|
|
unlock_page(new_page);
|
|
|
unlock_page(page);
|
|
@@ -1820,10 +1862,15 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
|
|
|
out_fail:
|
|
|
count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
|
|
|
out_dropref:
|
|
|
- entry = pmd_mknonnuma(entry);
|
|
|
- set_pmd_at(mm, haddr, pmd, entry);
|
|
|
- update_mmu_cache_pmd(vma, address, &entry);
|
|
|
+ ptl = pmd_lock(mm, pmd);
|
|
|
+ if (pmd_same(*pmd, entry)) {
|
|
|
+ entry = pmd_mknonnuma(entry);
|
|
|
+ set_pmd_at(mm, mmun_start, pmd, entry);
|
|
|
+ update_mmu_cache_pmd(vma, address, &entry);
|
|
|
+ }
|
|
|
+ spin_unlock(ptl);
|
|
|
|
|
|
+out_unlock:
|
|
|
unlock_page(page);
|
|
|
put_page(page);
|
|
|
return 0;
|