|
@@ -2376,6 +2376,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
|
|
|
cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
|
|
|
|
|
|
for (addr = vma->vm_start; addr < vma->vm_end; addr += sz) {
|
|
|
+ spinlock_t *src_ptl, *dst_ptl;
|
|
|
src_pte = huge_pte_offset(src, addr);
|
|
|
if (!src_pte)
|
|
|
continue;
|
|
@@ -2387,8 +2388,9 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
|
|
|
if (dst_pte == src_pte)
|
|
|
continue;
|
|
|
|
|
|
- spin_lock(&dst->page_table_lock);
|
|
|
- spin_lock_nested(&src->page_table_lock, SINGLE_DEPTH_NESTING);
|
|
|
+ dst_ptl = huge_pte_lock(h, dst, dst_pte);
|
|
|
+ src_ptl = huge_pte_lockptr(h, src, src_pte);
|
|
|
+ spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
|
|
|
if (!huge_pte_none(huge_ptep_get(src_pte))) {
|
|
|
if (cow)
|
|
|
huge_ptep_set_wrprotect(src, addr, src_pte);
|
|
@@ -2398,8 +2400,8 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
|
|
|
page_dup_rmap(ptepage);
|
|
|
set_huge_pte_at(dst, addr, dst_pte, entry);
|
|
|
}
|
|
|
- spin_unlock(&src->page_table_lock);
|
|
|
- spin_unlock(&dst->page_table_lock);
|
|
|
+ spin_unlock(src_ptl);
|
|
|
+ spin_unlock(dst_ptl);
|
|
|
}
|
|
|
return 0;
|
|
|
|
|
@@ -2442,6 +2444,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
|
|
|
unsigned long address;
|
|
|
pte_t *ptep;
|
|
|
pte_t pte;
|
|
|
+ spinlock_t *ptl;
|
|
|
struct page *page;
|
|
|
struct hstate *h = hstate_vma(vma);
|
|
|
unsigned long sz = huge_page_size(h);
|
|
@@ -2455,25 +2458,25 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
|
|
|
tlb_start_vma(tlb, vma);
|
|
|
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
|
|
|
again:
|
|
|
- spin_lock(&mm->page_table_lock);
|
|
|
for (address = start; address < end; address += sz) {
|
|
|
ptep = huge_pte_offset(mm, address);
|
|
|
if (!ptep)
|
|
|
continue;
|
|
|
|
|
|
+ ptl = huge_pte_lock(h, mm, ptep);
|
|
|
if (huge_pmd_unshare(mm, &address, ptep))
|
|
|
- continue;
|
|
|
+ goto unlock;
|
|
|
|
|
|
pte = huge_ptep_get(ptep);
|
|
|
if (huge_pte_none(pte))
|
|
|
- continue;
|
|
|
+ goto unlock;
|
|
|
|
|
|
/*
|
|
|
* HWPoisoned hugepage is already unmapped and dropped reference
|
|
|
*/
|
|
|
if (unlikely(is_hugetlb_entry_hwpoisoned(pte))) {
|
|
|
huge_pte_clear(mm, address, ptep);
|
|
|
- continue;
|
|
|
+ goto unlock;
|
|
|
}
|
|
|
|
|
|
page = pte_page(pte);
|
|
@@ -2484,7 +2487,7 @@ again:
|
|
|
*/
|
|
|
if (ref_page) {
|
|
|
if (page != ref_page)
|
|
|
- continue;
|
|
|
+ goto unlock;
|
|
|
|
|
|
/*
|
|
|
* Mark the VMA as having unmapped its page so that
|
|
@@ -2501,13 +2504,18 @@ again:
|
|
|
|
|
|
page_remove_rmap(page);
|
|
|
force_flush = !__tlb_remove_page(tlb, page);
|
|
|
- if (force_flush)
|
|
|
+ if (force_flush) {
|
|
|
+ spin_unlock(ptl);
|
|
|
break;
|
|
|
+ }
|
|
|
/* Bail out after unmapping reference page if supplied */
|
|
|
- if (ref_page)
|
|
|
+ if (ref_page) {
|
|
|
+ spin_unlock(ptl);
|
|
|
break;
|
|
|
+ }
|
|
|
+unlock:
|
|
|
+ spin_unlock(ptl);
|
|
|
}
|
|
|
- spin_unlock(&mm->page_table_lock);
|
|
|
/*
|
|
|
* mmu_gather ran out of room to batch pages, we break out of
|
|
|
* the PTE lock to avoid doing the potential expensive TLB invalidate
|
|
@@ -2613,7 +2621,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
|
*/
|
|
|
static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
|
unsigned long address, pte_t *ptep, pte_t pte,
|
|
|
- struct page *pagecache_page)
|
|
|
+ struct page *pagecache_page, spinlock_t *ptl)
|
|
|
{
|
|
|
struct hstate *h = hstate_vma(vma);
|
|
|
struct page *old_page, *new_page;
|
|
@@ -2647,8 +2655,8 @@ retry_avoidcopy:
|
|
|
|
|
|
page_cache_get(old_page);
|
|
|
|
|
|
- /* Drop page_table_lock as buddy allocator may be called */
|
|
|
- spin_unlock(&mm->page_table_lock);
|
|
|
+ /* Drop page table lock as buddy allocator may be called */
|
|
|
+ spin_unlock(ptl);
|
|
|
new_page = alloc_huge_page(vma, address, outside_reserve);
|
|
|
|
|
|
if (IS_ERR(new_page)) {
|
|
@@ -2666,13 +2674,13 @@ retry_avoidcopy:
|
|
|
BUG_ON(huge_pte_none(pte));
|
|
|
if (unmap_ref_private(mm, vma, old_page, address)) {
|
|
|
BUG_ON(huge_pte_none(pte));
|
|
|
- spin_lock(&mm->page_table_lock);
|
|
|
+ spin_lock(ptl);
|
|
|
ptep = huge_pte_offset(mm, address & huge_page_mask(h));
|
|
|
if (likely(pte_same(huge_ptep_get(ptep), pte)))
|
|
|
goto retry_avoidcopy;
|
|
|
/*
|
|
|
- * race occurs while re-acquiring page_table_lock, and
|
|
|
- * our job is done.
|
|
|
+ * race occurs while re-acquiring page table
|
|
|
+ * lock, and our job is done.
|
|
|
*/
|
|
|
return 0;
|
|
|
}
|
|
@@ -2680,7 +2688,7 @@ retry_avoidcopy:
|
|
|
}
|
|
|
|
|
|
/* Caller expects lock to be held */
|
|
|
- spin_lock(&mm->page_table_lock);
|
|
|
+ spin_lock(ptl);
|
|
|
if (err == -ENOMEM)
|
|
|
return VM_FAULT_OOM;
|
|
|
else
|
|
@@ -2695,7 +2703,7 @@ retry_avoidcopy:
|
|
|
page_cache_release(new_page);
|
|
|
page_cache_release(old_page);
|
|
|
/* Caller expects lock to be held */
|
|
|
- spin_lock(&mm->page_table_lock);
|
|
|
+ spin_lock(ptl);
|
|
|
return VM_FAULT_OOM;
|
|
|
}
|
|
|
|
|
@@ -2707,10 +2715,10 @@ retry_avoidcopy:
|
|
|
mmun_end = mmun_start + huge_page_size(h);
|
|
|
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
|
|
|
/*
|
|
|
- * Retake the page_table_lock to check for racing updates
|
|
|
+ * Retake the page table lock to check for racing updates
|
|
|
* before the page tables are altered
|
|
|
*/
|
|
|
- spin_lock(&mm->page_table_lock);
|
|
|
+ spin_lock(ptl);
|
|
|
ptep = huge_pte_offset(mm, address & huge_page_mask(h));
|
|
|
if (likely(pte_same(huge_ptep_get(ptep), pte))) {
|
|
|
ClearPagePrivate(new_page);
|
|
@@ -2724,13 +2732,13 @@ retry_avoidcopy:
|
|
|
/* Make the old page be freed below */
|
|
|
new_page = old_page;
|
|
|
}
|
|
|
- spin_unlock(&mm->page_table_lock);
|
|
|
+ spin_unlock(ptl);
|
|
|
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
|
|
|
page_cache_release(new_page);
|
|
|
page_cache_release(old_page);
|
|
|
|
|
|
/* Caller expects lock to be held */
|
|
|
- spin_lock(&mm->page_table_lock);
|
|
|
+ spin_lock(ptl);
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
@@ -2778,6 +2786,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
|
struct page *page;
|
|
|
struct address_space *mapping;
|
|
|
pte_t new_pte;
|
|
|
+ spinlock_t *ptl;
|
|
|
|
|
|
/*
|
|
|
* Currently, we are forced to kill the process in the event the
|
|
@@ -2864,7 +2873,8 @@ retry:
|
|
|
goto backout_unlocked;
|
|
|
}
|
|
|
|
|
|
- spin_lock(&mm->page_table_lock);
|
|
|
+ ptl = huge_pte_lockptr(h, mm, ptep);
|
|
|
+ spin_lock(ptl);
|
|
|
size = i_size_read(mapping->host) >> huge_page_shift(h);
|
|
|
if (idx >= size)
|
|
|
goto backout;
|
|
@@ -2885,16 +2895,16 @@ retry:
|
|
|
|
|
|
if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
|
|
|
/* Optimization, do the COW without a second fault */
|
|
|
- ret = hugetlb_cow(mm, vma, address, ptep, new_pte, page);
|
|
|
+ ret = hugetlb_cow(mm, vma, address, ptep, new_pte, page, ptl);
|
|
|
}
|
|
|
|
|
|
- spin_unlock(&mm->page_table_lock);
|
|
|
+ spin_unlock(ptl);
|
|
|
unlock_page(page);
|
|
|
out:
|
|
|
return ret;
|
|
|
|
|
|
backout:
|
|
|
- spin_unlock(&mm->page_table_lock);
|
|
|
+ spin_unlock(ptl);
|
|
|
backout_unlocked:
|
|
|
unlock_page(page);
|
|
|
put_page(page);
|
|
@@ -2906,6 +2916,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
|
{
|
|
|
pte_t *ptep;
|
|
|
pte_t entry;
|
|
|
+ spinlock_t *ptl;
|
|
|
int ret;
|
|
|
struct page *page = NULL;
|
|
|
struct page *pagecache_page = NULL;
|
|
@@ -2918,7 +2929,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
|
if (ptep) {
|
|
|
entry = huge_ptep_get(ptep);
|
|
|
if (unlikely(is_hugetlb_entry_migration(entry))) {
|
|
|
- migration_entry_wait_huge(mm, ptep);
|
|
|
+ migration_entry_wait_huge(vma, mm, ptep);
|
|
|
return 0;
|
|
|
} else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
|
|
|
return VM_FAULT_HWPOISON_LARGE |
|
|
@@ -2974,17 +2985,18 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
|
if (page != pagecache_page)
|
|
|
lock_page(page);
|
|
|
|
|
|
- spin_lock(&mm->page_table_lock);
|
|
|
+ ptl = huge_pte_lockptr(h, mm, ptep);
|
|
|
+ spin_lock(ptl);
|
|
|
/* Check for a racing update before calling hugetlb_cow */
|
|
|
if (unlikely(!pte_same(entry, huge_ptep_get(ptep))))
|
|
|
- goto out_page_table_lock;
|
|
|
+ goto out_ptl;
|
|
|
|
|
|
|
|
|
if (flags & FAULT_FLAG_WRITE) {
|
|
|
if (!huge_pte_write(entry)) {
|
|
|
ret = hugetlb_cow(mm, vma, address, ptep, entry,
|
|
|
- pagecache_page);
|
|
|
- goto out_page_table_lock;
|
|
|
+ pagecache_page, ptl);
|
|
|
+ goto out_ptl;
|
|
|
}
|
|
|
entry = huge_pte_mkdirty(entry);
|
|
|
}
|
|
@@ -2993,8 +3005,8 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
|
flags & FAULT_FLAG_WRITE))
|
|
|
update_mmu_cache(vma, address, ptep);
|
|
|
|
|
|
-out_page_table_lock:
|
|
|
- spin_unlock(&mm->page_table_lock);
|
|
|
+out_ptl:
|
|
|
+ spin_unlock(ptl);
|
|
|
|
|
|
if (pagecache_page) {
|
|
|
unlock_page(pagecache_page);
|
|
@@ -3020,9 +3032,9 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
|
unsigned long remainder = *nr_pages;
|
|
|
struct hstate *h = hstate_vma(vma);
|
|
|
|
|
|
- spin_lock(&mm->page_table_lock);
|
|
|
while (vaddr < vma->vm_end && remainder) {
|
|
|
pte_t *pte;
|
|
|
+ spinlock_t *ptl = NULL;
|
|
|
int absent;
|
|
|
struct page *page;
|
|
|
|
|
@@ -3030,8 +3042,12 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
|
* Some archs (sparc64, sh*) have multiple pte_ts to
|
|
|
* each hugepage. We have to make sure we get the
|
|
|
* first, for the page indexing below to work.
|
|
|
+ *
|
|
|
+ * Note that page table lock is not held when pte is null.
|
|
|
*/
|
|
|
pte = huge_pte_offset(mm, vaddr & huge_page_mask(h));
|
|
|
+ if (pte)
|
|
|
+ ptl = huge_pte_lock(h, mm, pte);
|
|
|
absent = !pte || huge_pte_none(huge_ptep_get(pte));
|
|
|
|
|
|
/*
|
|
@@ -3043,6 +3059,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
|
*/
|
|
|
if (absent && (flags & FOLL_DUMP) &&
|
|
|
!hugetlbfs_pagecache_present(h, vma, vaddr)) {
|
|
|
+ if (pte)
|
|
|
+ spin_unlock(ptl);
|
|
|
remainder = 0;
|
|
|
break;
|
|
|
}
|
|
@@ -3062,10 +3080,10 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
|
!huge_pte_write(huge_ptep_get(pte)))) {
|
|
|
int ret;
|
|
|
|
|
|
- spin_unlock(&mm->page_table_lock);
|
|
|
+ if (pte)
|
|
|
+ spin_unlock(ptl);
|
|
|
ret = hugetlb_fault(mm, vma, vaddr,
|
|
|
(flags & FOLL_WRITE) ? FAULT_FLAG_WRITE : 0);
|
|
|
- spin_lock(&mm->page_table_lock);
|
|
|
if (!(ret & VM_FAULT_ERROR))
|
|
|
continue;
|
|
|
|
|
@@ -3096,8 +3114,8 @@ same_page:
|
|
|
*/
|
|
|
goto same_page;
|
|
|
}
|
|
|
+ spin_unlock(ptl);
|
|
|
}
|
|
|
- spin_unlock(&mm->page_table_lock);
|
|
|
*nr_pages = remainder;
|
|
|
*position = vaddr;
|
|
|
|
|
@@ -3118,13 +3136,15 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
|
|
|
flush_cache_range(vma, address, end);
|
|
|
|
|
|
mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex);
|
|
|
- spin_lock(&mm->page_table_lock);
|
|
|
for (; address < end; address += huge_page_size(h)) {
|
|
|
+ spinlock_t *ptl;
|
|
|
ptep = huge_pte_offset(mm, address);
|
|
|
if (!ptep)
|
|
|
continue;
|
|
|
+ ptl = huge_pte_lock(h, mm, ptep);
|
|
|
if (huge_pmd_unshare(mm, &address, ptep)) {
|
|
|
pages++;
|
|
|
+ spin_unlock(ptl);
|
|
|
continue;
|
|
|
}
|
|
|
if (!huge_pte_none(huge_ptep_get(ptep))) {
|
|
@@ -3134,8 +3154,8 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
|
|
|
set_huge_pte_at(mm, address, ptep, pte);
|
|
|
pages++;
|
|
|
}
|
|
|
+ spin_unlock(ptl);
|
|
|
}
|
|
|
- spin_unlock(&mm->page_table_lock);
|
|
|
/*
|
|
|
* Must flush TLB before releasing i_mmap_mutex: x86's huge_pmd_unshare
|
|
|
* may have cleared our pud entry and done put_page on the page table:
|
|
@@ -3298,6 +3318,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
|
|
|
unsigned long saddr;
|
|
|
pte_t *spte = NULL;
|
|
|
pte_t *pte;
|
|
|
+ spinlock_t *ptl;
|
|
|
|
|
|
if (!vma_shareable(vma, addr))
|
|
|
return (pte_t *)pmd_alloc(mm, pud, addr);
|
|
@@ -3320,13 +3341,14 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
|
|
|
if (!spte)
|
|
|
goto out;
|
|
|
|
|
|
- spin_lock(&mm->page_table_lock);
|
|
|
+ ptl = huge_pte_lockptr(hstate_vma(vma), mm, spte);
|
|
|
+ spin_lock(ptl);
|
|
|
if (pud_none(*pud))
|
|
|
pud_populate(mm, pud,
|
|
|
(pmd_t *)((unsigned long)spte & PAGE_MASK));
|
|
|
else
|
|
|
put_page(virt_to_page(spte));
|
|
|
- spin_unlock(&mm->page_table_lock);
|
|
|
+ spin_unlock(ptl);
|
|
|
out:
|
|
|
pte = (pte_t *)pmd_alloc(mm, pud, addr);
|
|
|
mutex_unlock(&mapping->i_mmap_mutex);
|
|
@@ -3340,7 +3362,7 @@ out:
|
|
|
* indicated by page_count > 1, unmap is achieved by clearing pud and
|
|
|
* decrementing the ref count. If count == 1, the pte page is not shared.
|
|
|
*
|
|
|
- * called with vma->vm_mm->page_table_lock held.
|
|
|
+ * called with page table lock held.
|
|
|
*
|
|
|
* returns: 1 successfully unmapped a shared pte page
|
|
|
* 0 the underlying pte page is not shared, or it is the last user
|