|
@@ -695,11 +695,10 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
|
|
|
return pmd;
|
|
|
}
|
|
|
|
|
|
-static inline pmd_t mk_huge_pmd(struct page *page, struct vm_area_struct *vma)
|
|
|
+static inline pmd_t mk_huge_pmd(struct page *page, pgprot_t prot)
|
|
|
{
|
|
|
pmd_t entry;
|
|
|
- entry = mk_pmd(page, vma->vm_page_prot);
|
|
|
- entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
|
|
|
+ entry = mk_pmd(page, prot);
|
|
|
entry = pmd_mkhuge(entry);
|
|
|
return entry;
|
|
|
}
|
|
@@ -732,7 +731,8 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
|
|
|
pte_free(mm, pgtable);
|
|
|
} else {
|
|
|
pmd_t entry;
|
|
|
- entry = mk_huge_pmd(page, vma);
|
|
|
+ entry = mk_huge_pmd(page, vma->vm_page_prot);
|
|
|
+ entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
|
|
|
page_add_new_anon_rmap(page, vma, haddr);
|
|
|
pgtable_trans_huge_deposit(mm, pmd, pgtable);
|
|
|
set_pmd_at(mm, haddr, pmd, entry);
|
|
@@ -788,77 +788,57 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
|
{
|
|
|
struct page *page;
|
|
|
unsigned long haddr = address & HPAGE_PMD_MASK;
|
|
|
- pte_t *pte;
|
|
|
|
|
|
- if (haddr >= vma->vm_start && haddr + HPAGE_PMD_SIZE <= vma->vm_end) {
|
|
|
- if (unlikely(anon_vma_prepare(vma)))
|
|
|
- return VM_FAULT_OOM;
|
|
|
- if (unlikely(khugepaged_enter(vma)))
|
|
|
+ if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
|
|
|
+ return VM_FAULT_FALLBACK;
|
|
|
+ if (unlikely(anon_vma_prepare(vma)))
|
|
|
+ return VM_FAULT_OOM;
|
|
|
+ if (unlikely(khugepaged_enter(vma)))
|
|
|
+ return VM_FAULT_OOM;
|
|
|
+ if (!(flags & FAULT_FLAG_WRITE) &&
|
|
|
+ transparent_hugepage_use_zero_page()) {
|
|
|
+ pgtable_t pgtable;
|
|
|
+ struct page *zero_page;
|
|
|
+ bool set;
|
|
|
+ pgtable = pte_alloc_one(mm, haddr);
|
|
|
+ if (unlikely(!pgtable))
|
|
|
return VM_FAULT_OOM;
|
|
|
- if (!(flags & FAULT_FLAG_WRITE) &&
|
|
|
- transparent_hugepage_use_zero_page()) {
|
|
|
- pgtable_t pgtable;
|
|
|
- struct page *zero_page;
|
|
|
- bool set;
|
|
|
- pgtable = pte_alloc_one(mm, haddr);
|
|
|
- if (unlikely(!pgtable))
|
|
|
- return VM_FAULT_OOM;
|
|
|
- zero_page = get_huge_zero_page();
|
|
|
- if (unlikely(!zero_page)) {
|
|
|
- pte_free(mm, pgtable);
|
|
|
- count_vm_event(THP_FAULT_FALLBACK);
|
|
|
- goto out;
|
|
|
- }
|
|
|
- spin_lock(&mm->page_table_lock);
|
|
|
- set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd,
|
|
|
- zero_page);
|
|
|
- spin_unlock(&mm->page_table_lock);
|
|
|
- if (!set) {
|
|
|
- pte_free(mm, pgtable);
|
|
|
- put_huge_zero_page();
|
|
|
- }
|
|
|
- return 0;
|
|
|
- }
|
|
|
- page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
|
|
|
- vma, haddr, numa_node_id(), 0);
|
|
|
- if (unlikely(!page)) {
|
|
|
+ zero_page = get_huge_zero_page();
|
|
|
+ if (unlikely(!zero_page)) {
|
|
|
+ pte_free(mm, pgtable);
|
|
|
count_vm_event(THP_FAULT_FALLBACK);
|
|
|
- goto out;
|
|
|
- }
|
|
|
- count_vm_event(THP_FAULT_ALLOC);
|
|
|
- if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) {
|
|
|
- put_page(page);
|
|
|
- goto out;
|
|
|
+ return VM_FAULT_FALLBACK;
|
|
|
}
|
|
|
- if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd,
|
|
|
- page))) {
|
|
|
- mem_cgroup_uncharge_page(page);
|
|
|
- put_page(page);
|
|
|
- goto out;
|
|
|
+ spin_lock(&mm->page_table_lock);
|
|
|
+ set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd,
|
|
|
+ zero_page);
|
|
|
+ spin_unlock(&mm->page_table_lock);
|
|
|
+ if (!set) {
|
|
|
+ pte_free(mm, pgtable);
|
|
|
+ put_huge_zero_page();
|
|
|
}
|
|
|
-
|
|
|
return 0;
|
|
|
}
|
|
|
-out:
|
|
|
- /*
|
|
|
- * Use __pte_alloc instead of pte_alloc_map, because we can't
|
|
|
- * run pte_offset_map on the pmd, if an huge pmd could
|
|
|
- * materialize from under us from a different thread.
|
|
|
- */
|
|
|
- if (unlikely(pmd_none(*pmd)) &&
|
|
|
- unlikely(__pte_alloc(mm, vma, pmd, address)))
|
|
|
- return VM_FAULT_OOM;
|
|
|
- /* if an huge pmd materialized from under us just retry later */
|
|
|
- if (unlikely(pmd_trans_huge(*pmd)))
|
|
|
- return 0;
|
|
|
- /*
|
|
|
- * A regular pmd is established and it can't morph into a huge pmd
|
|
|
- * from under us anymore at this point because we hold the mmap_sem
|
|
|
- * read mode and khugepaged takes it in write mode. So now it's
|
|
|
- * safe to run pte_offset_map().
|
|
|
- */
|
|
|
- pte = pte_offset_map(pmd, address);
|
|
|
- return handle_pte_fault(mm, vma, address, pte, pmd, flags);
|
|
|
+ page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
|
|
|
+ vma, haddr, numa_node_id(), 0);
|
|
|
+ if (unlikely(!page)) {
|
|
|
+ count_vm_event(THP_FAULT_FALLBACK);
|
|
|
+ return VM_FAULT_FALLBACK;
|
|
|
+ }
|
|
|
+ if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) {
|
|
|
+ put_page(page);
|
|
|
+ count_vm_event(THP_FAULT_FALLBACK);
|
|
|
+ return VM_FAULT_FALLBACK;
|
|
|
+ }
|
|
|
+ if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page))) {
|
|
|
+ mem_cgroup_uncharge_page(page);
|
|
|
+ put_page(page);
|
|
|
+ count_vm_event(THP_FAULT_FALLBACK);
|
|
|
+ return VM_FAULT_FALLBACK;
|
|
|
+ }
|
|
|
+
|
|
|
+ count_vm_event(THP_FAULT_ALLOC);
|
|
|
+ return 0;
|
|
|
}
|
|
|
|
|
|
int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
|
|
@@ -1170,7 +1150,6 @@ alloc:
|
|
|
new_page = NULL;
|
|
|
|
|
|
if (unlikely(!new_page)) {
|
|
|
- count_vm_event(THP_FAULT_FALLBACK);
|
|
|
if (is_huge_zero_pmd(orig_pmd)) {
|
|
|
ret = do_huge_pmd_wp_zero_page_fallback(mm, vma,
|
|
|
address, pmd, orig_pmd, haddr);
|
|
@@ -1181,9 +1160,9 @@ alloc:
|
|
|
split_huge_page(page);
|
|
|
put_page(page);
|
|
|
}
|
|
|
+ count_vm_event(THP_FAULT_FALLBACK);
|
|
|
goto out;
|
|
|
}
|
|
|
- count_vm_event(THP_FAULT_ALLOC);
|
|
|
|
|
|
if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
|
|
|
put_page(new_page);
|
|
@@ -1191,10 +1170,13 @@ alloc:
|
|
|
split_huge_page(page);
|
|
|
put_page(page);
|
|
|
}
|
|
|
+ count_vm_event(THP_FAULT_FALLBACK);
|
|
|
ret |= VM_FAULT_OOM;
|
|
|
goto out;
|
|
|
}
|
|
|
|
|
|
+ count_vm_event(THP_FAULT_ALLOC);
|
|
|
+
|
|
|
if (is_huge_zero_pmd(orig_pmd))
|
|
|
clear_huge_page(new_page, haddr, HPAGE_PMD_NR);
|
|
|
else
|
|
@@ -1215,7 +1197,8 @@ alloc:
|
|
|
goto out_mn;
|
|
|
} else {
|
|
|
pmd_t entry;
|
|
|
- entry = mk_huge_pmd(new_page, vma);
|
|
|
+ entry = mk_huge_pmd(new_page, vma->vm_page_prot);
|
|
|
+ entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
|
|
|
pmdp_clear_flush(vma, haddr, pmd);
|
|
|
page_add_new_anon_rmap(new_page, vma, haddr);
|
|
|
set_pmd_at(mm, haddr, pmd, entry);
|
|
@@ -1666,7 +1649,6 @@ static void __split_huge_page_refcount(struct page *page,
|
|
|
BUG_ON(atomic_read(&page->_count) <= 0);
|
|
|
|
|
|
__mod_zone_page_state(zone, NR_ANON_TRANSPARENT_HUGEPAGES, -1);
|
|
|
- __mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR);
|
|
|
|
|
|
ClearPageCompound(page);
|
|
|
compound_unlock(page);
|
|
@@ -2364,7 +2346,8 @@ static void collapse_huge_page(struct mm_struct *mm,
|
|
|
__SetPageUptodate(new_page);
|
|
|
pgtable = pmd_pgtable(_pmd);
|
|
|
|
|
|
- _pmd = mk_huge_pmd(new_page, vma);
|
|
|
+ _pmd = mk_huge_pmd(new_page, vma->vm_page_prot);
|
|
|
+ _pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma);
|
|
|
|
|
|
/*
|
|
|
* spin_lock() below is not the equivalent of smp_wmb(), so
|