|
@@ -986,13 +986,13 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
|
|
|
static unsigned long zap_pte_range(struct mmu_gather *tlb,
|
|
|
struct vm_area_struct *vma, pmd_t *pmd,
|
|
|
unsigned long addr, unsigned long end,
|
|
|
- long *zap_work, struct zap_details *details)
|
|
|
+ struct zap_details *details)
|
|
|
{
|
|
|
struct mm_struct *mm = tlb->mm;
|
|
|
int force_flush = 0;
|
|
|
- pte_t *pte;
|
|
|
- spinlock_t *ptl;
|
|
|
int rss[NR_MM_COUNTERS];
|
|
|
+ spinlock_t *ptl;
|
|
|
+ pte_t *pte;
|
|
|
|
|
|
again:
|
|
|
init_rss_vec(rss);
|
|
@@ -1001,12 +1001,9 @@ again:
|
|
|
do {
|
|
|
pte_t ptent = *pte;
|
|
|
if (pte_none(ptent)) {
|
|
|
- (*zap_work)--;
|
|
|
continue;
|
|
|
}
|
|
|
|
|
|
- (*zap_work) -= PAGE_SIZE;
|
|
|
-
|
|
|
if (pte_present(ptent)) {
|
|
|
struct page *page;
|
|
|
|
|
@@ -1075,7 +1072,7 @@ again:
|
|
|
print_bad_pte(vma, addr, ptent, NULL);
|
|
|
}
|
|
|
pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
|
|
|
- } while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
|
|
|
+ } while (pte++, addr += PAGE_SIZE, addr != end);
|
|
|
|
|
|
add_mm_rss_vec(mm, rss);
|
|
|
arch_leave_lazy_mmu_mode();
|
|
@@ -1099,7 +1096,7 @@ again:
|
|
|
static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
|
|
|
struct vm_area_struct *vma, pud_t *pud,
|
|
|
unsigned long addr, unsigned long end,
|
|
|
- long *zap_work, struct zap_details *details)
|
|
|
+ struct zap_details *details)
|
|
|
{
|
|
|
pmd_t *pmd;
|
|
|
unsigned long next;
|
|
@@ -1111,19 +1108,15 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
|
|
|
if (next-addr != HPAGE_PMD_SIZE) {
|
|
|
VM_BUG_ON(!rwsem_is_locked(&tlb->mm->mmap_sem));
|
|
|
split_huge_page_pmd(vma->vm_mm, pmd);
|
|
|
- } else if (zap_huge_pmd(tlb, vma, pmd)) {
|
|
|
- (*zap_work)--;
|
|
|
+ } else if (zap_huge_pmd(tlb, vma, pmd))
|
|
|
continue;
|
|
|
- }
|
|
|
/* fall through */
|
|
|
}
|
|
|
- if (pmd_none_or_clear_bad(pmd)) {
|
|
|
- (*zap_work)--;
|
|
|
+ if (pmd_none_or_clear_bad(pmd))
|
|
|
continue;
|
|
|
- }
|
|
|
- next = zap_pte_range(tlb, vma, pmd, addr, next,
|
|
|
- zap_work, details);
|
|
|
- } while (pmd++, addr = next, (addr != end && *zap_work > 0));
|
|
|
+ next = zap_pte_range(tlb, vma, pmd, addr, next, details);
|
|
|
+ cond_resched();
|
|
|
+ } while (pmd++, addr = next, addr != end);
|
|
|
|
|
|
return addr;
|
|
|
}
|
|
@@ -1131,7 +1124,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
|
|
|
static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
|
|
|
struct vm_area_struct *vma, pgd_t *pgd,
|
|
|
unsigned long addr, unsigned long end,
|
|
|
- long *zap_work, struct zap_details *details)
|
|
|
+ struct zap_details *details)
|
|
|
{
|
|
|
pud_t *pud;
|
|
|
unsigned long next;
|
|
@@ -1139,13 +1132,10 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
|
|
|
pud = pud_offset(pgd, addr);
|
|
|
do {
|
|
|
next = pud_addr_end(addr, end);
|
|
|
- if (pud_none_or_clear_bad(pud)) {
|
|
|
- (*zap_work)--;
|
|
|
+ if (pud_none_or_clear_bad(pud))
|
|
|
continue;
|
|
|
- }
|
|
|
- next = zap_pmd_range(tlb, vma, pud, addr, next,
|
|
|
- zap_work, details);
|
|
|
- } while (pud++, addr = next, (addr != end && *zap_work > 0));
|
|
|
+ next = zap_pmd_range(tlb, vma, pud, addr, next, details);
|
|
|
+ } while (pud++, addr = next, addr != end);
|
|
|
|
|
|
return addr;
|
|
|
}
|
|
@@ -1153,7 +1143,7 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
|
|
|
static unsigned long unmap_page_range(struct mmu_gather *tlb,
|
|
|
struct vm_area_struct *vma,
|
|
|
unsigned long addr, unsigned long end,
|
|
|
- long *zap_work, struct zap_details *details)
|
|
|
+ struct zap_details *details)
|
|
|
{
|
|
|
pgd_t *pgd;
|
|
|
unsigned long next;
|
|
@@ -1167,13 +1157,10 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb,
|
|
|
pgd = pgd_offset(vma->vm_mm, addr);
|
|
|
do {
|
|
|
next = pgd_addr_end(addr, end);
|
|
|
- if (pgd_none_or_clear_bad(pgd)) {
|
|
|
- (*zap_work)--;
|
|
|
+ if (pgd_none_or_clear_bad(pgd))
|
|
|
continue;
|
|
|
- }
|
|
|
- next = zap_pud_range(tlb, vma, pgd, addr, next,
|
|
|
- zap_work, details);
|
|
|
- } while (pgd++, addr = next, (addr != end && *zap_work > 0));
|
|
|
+ next = zap_pud_range(tlb, vma, pgd, addr, next, details);
|
|
|
+ } while (pgd++, addr = next, addr != end);
|
|
|
tlb_end_vma(tlb, vma);
|
|
|
mem_cgroup_uncharge_end();
|
|
|
|
|
@@ -1218,9 +1205,7 @@ unsigned long unmap_vmas(struct mmu_gather *tlb,
|
|
|
unsigned long end_addr, unsigned long *nr_accounted,
|
|
|
struct zap_details *details)
|
|
|
{
|
|
|
- long zap_work = ZAP_BLOCK_SIZE;
|
|
|
unsigned long start = start_addr;
|
|
|
- spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
|
|
|
struct mm_struct *mm = vma->vm_mm;
|
|
|
|
|
|
mmu_notifier_invalidate_range_start(mm, start_addr, end_addr);
|
|
@@ -1253,33 +1238,15 @@ unsigned long unmap_vmas(struct mmu_gather *tlb,
|
|
|
* Since no pte has actually been setup, it is
|
|
|
* safe to do nothing in this case.
|
|
|
*/
|
|
|
- if (vma->vm_file) {
|
|
|
+ if (vma->vm_file)
|
|
|
unmap_hugepage_range(vma, start, end, NULL);
|
|
|
- zap_work -= (end - start) /
|
|
|
- pages_per_huge_page(hstate_vma(vma));
|
|
|
- }
|
|
|
|
|
|
start = end;
|
|
|
} else
|
|
|
- start = unmap_page_range(tlb, vma,
|
|
|
- start, end, &zap_work, details);
|
|
|
-
|
|
|
- if (zap_work > 0) {
|
|
|
- BUG_ON(start != end);
|
|
|
- break;
|
|
|
- }
|
|
|
-
|
|
|
- if (need_resched() ||
|
|
|
- (i_mmap_lock && spin_needbreak(i_mmap_lock))) {
|
|
|
- if (i_mmap_lock)
|
|
|
- goto out;
|
|
|
- cond_resched();
|
|
|
- }
|
|
|
-
|
|
|
- zap_work = ZAP_BLOCK_SIZE;
|
|
|
+ start = unmap_page_range(tlb, vma, start, end, details);
|
|
|
}
|
|
|
}
|
|
|
-out:
|
|
|
+
|
|
|
mmu_notifier_invalidate_range_end(mm, start_addr, end_addr);
|
|
|
return start; /* which is now the end (or restart) address */
|
|
|
}
|
|
@@ -2612,96 +2579,11 @@ unwritable_page:
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * Helper functions for unmap_mapping_range().
|
|
|
- *
|
|
|
- * __ Notes on dropping i_mmap_lock to reduce latency while unmapping __
|
|
|
- *
|
|
|
- * We have to restart searching the prio_tree whenever we drop the lock,
|
|
|
- * since the iterator is only valid while the lock is held, and anyway
|
|
|
- * a later vma might be split and reinserted earlier while lock dropped.
|
|
|
- *
|
|
|
- * The list of nonlinear vmas could be handled more efficiently, using
|
|
|
- * a placeholder, but handle it in the same way until a need is shown.
|
|
|
- * It is important to search the prio_tree before nonlinear list: a vma
|
|
|
- * may become nonlinear and be shifted from prio_tree to nonlinear list
|
|
|
- * while the lock is dropped; but never shifted from list to prio_tree.
|
|
|
- *
|
|
|
- * In order to make forward progress despite restarting the search,
|
|
|
- * vm_truncate_count is used to mark a vma as now dealt with, so we can
|
|
|
- * quickly skip it next time around. Since the prio_tree search only
|
|
|
- * shows us those vmas affected by unmapping the range in question, we
|
|
|
- * can't efficiently keep all vmas in step with mapping->truncate_count:
|
|
|
- * so instead reset them all whenever it wraps back to 0 (then go to 1).
|
|
|
- * mapping->truncate_count and vma->vm_truncate_count are protected by
|
|
|
- * i_mmap_lock.
|
|
|
- *
|
|
|
- * In order to make forward progress despite repeatedly restarting some
|
|
|
- * large vma, note the restart_addr from unmap_vmas when it breaks out:
|
|
|
- * and restart from that address when we reach that vma again. It might
|
|
|
- * have been split or merged, shrunk or extended, but never shifted: so
|
|
|
- * restart_addr remains valid so long as it remains in the vma's range.
|
|
|
- * unmap_mapping_range forces truncate_count to leap over page-aligned
|
|
|
- * values so we can save vma's restart_addr in its truncate_count field.
|
|
|
- */
|
|
|
-#define is_restart_addr(truncate_count) (!((truncate_count) & ~PAGE_MASK))
|
|
|
-
|
|
|
-static void reset_vma_truncate_counts(struct address_space *mapping)
|
|
|
-{
|
|
|
- struct vm_area_struct *vma;
|
|
|
- struct prio_tree_iter iter;
|
|
|
-
|
|
|
- vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, ULONG_MAX)
|
|
|
- vma->vm_truncate_count = 0;
|
|
|
- list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
|
|
|
- vma->vm_truncate_count = 0;
|
|
|
-}
|
|
|
-
|
|
|
-static int unmap_mapping_range_vma(struct vm_area_struct *vma,
|
|
|
+static void unmap_mapping_range_vma(struct vm_area_struct *vma,
|
|
|
unsigned long start_addr, unsigned long end_addr,
|
|
|
struct zap_details *details)
|
|
|
{
|
|
|
- unsigned long restart_addr;
|
|
|
- int need_break;
|
|
|
-
|
|
|
- /*
|
|
|
- * files that support invalidating or truncating portions of the
|
|
|
- * file from under mmaped areas must have their ->fault function
|
|
|
- * return a locked page (and set VM_FAULT_LOCKED in the return).
|
|
|
- * This provides synchronisation against concurrent unmapping here.
|
|
|
- */
|
|
|
-
|
|
|
-again:
|
|
|
- restart_addr = vma->vm_truncate_count;
|
|
|
- if (is_restart_addr(restart_addr) && start_addr < restart_addr) {
|
|
|
- start_addr = restart_addr;
|
|
|
- if (start_addr >= end_addr) {
|
|
|
- /* Top of vma has been split off since last time */
|
|
|
- vma->vm_truncate_count = details->truncate_count;
|
|
|
- return 0;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- restart_addr = zap_page_range(vma, start_addr,
|
|
|
- end_addr - start_addr, details);
|
|
|
- need_break = need_resched() || spin_needbreak(details->i_mmap_lock);
|
|
|
-
|
|
|
- if (restart_addr >= end_addr) {
|
|
|
- /* We have now completed this vma: mark it so */
|
|
|
- vma->vm_truncate_count = details->truncate_count;
|
|
|
- if (!need_break)
|
|
|
- return 0;
|
|
|
- } else {
|
|
|
- /* Note restart_addr in vma's truncate_count field */
|
|
|
- vma->vm_truncate_count = restart_addr;
|
|
|
- if (!need_break)
|
|
|
- goto again;
|
|
|
- }
|
|
|
-
|
|
|
- spin_unlock(details->i_mmap_lock);
|
|
|
- cond_resched();
|
|
|
- spin_lock(details->i_mmap_lock);
|
|
|
- return -EINTR;
|
|
|
+ zap_page_range(vma, start_addr, end_addr - start_addr, details);
|
|
|
}
|
|
|
|
|
|
static inline void unmap_mapping_range_tree(struct prio_tree_root *root,
|
|
@@ -2711,12 +2593,8 @@ static inline void unmap_mapping_range_tree(struct prio_tree_root *root,
|
|
|
struct prio_tree_iter iter;
|
|
|
pgoff_t vba, vea, zba, zea;
|
|
|
|
|
|
-restart:
|
|
|
vma_prio_tree_foreach(vma, &iter, root,
|
|
|
details->first_index, details->last_index) {
|
|
|
- /* Skip quickly over those we have already dealt with */
|
|
|
- if (vma->vm_truncate_count == details->truncate_count)
|
|
|
- continue;
|
|
|
|
|
|
vba = vma->vm_pgoff;
|
|
|
vea = vba + ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) - 1;
|
|
@@ -2728,11 +2606,10 @@ restart:
|
|
|
if (zea > vea)
|
|
|
zea = vea;
|
|
|
|
|
|
- if (unmap_mapping_range_vma(vma,
|
|
|
+ unmap_mapping_range_vma(vma,
|
|
|
((zba - vba) << PAGE_SHIFT) + vma->vm_start,
|
|
|
((zea - vba + 1) << PAGE_SHIFT) + vma->vm_start,
|
|
|
- details) < 0)
|
|
|
- goto restart;
|
|
|
+ details);
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -2747,15 +2624,9 @@ static inline void unmap_mapping_range_list(struct list_head *head,
|
|
|
* across *all* the pages in each nonlinear VMA, not just the pages
|
|
|
* whose virtual address lies outside the file truncation point.
|
|
|
*/
|
|
|
-restart:
|
|
|
list_for_each_entry(vma, head, shared.vm_set.list) {
|
|
|
- /* Skip quickly over those we have already dealt with */
|
|
|
- if (vma->vm_truncate_count == details->truncate_count)
|
|
|
- continue;
|
|
|
details->nonlinear_vma = vma;
|
|
|
- if (unmap_mapping_range_vma(vma, vma->vm_start,
|
|
|
- vma->vm_end, details) < 0)
|
|
|
- goto restart;
|
|
|
+ unmap_mapping_range_vma(vma, vma->vm_start, vma->vm_end, details);
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -2794,26 +2665,14 @@ void unmap_mapping_range(struct address_space *mapping,
|
|
|
details.last_index = hba + hlen - 1;
|
|
|
if (details.last_index < details.first_index)
|
|
|
details.last_index = ULONG_MAX;
|
|
|
- details.i_mmap_lock = &mapping->i_mmap_lock;
|
|
|
|
|
|
- mutex_lock(&mapping->unmap_mutex);
|
|
|
- spin_lock(&mapping->i_mmap_lock);
|
|
|
-
|
|
|
- /* Protect against endless unmapping loops */
|
|
|
- mapping->truncate_count++;
|
|
|
- if (unlikely(is_restart_addr(mapping->truncate_count))) {
|
|
|
- if (mapping->truncate_count == 0)
|
|
|
- reset_vma_truncate_counts(mapping);
|
|
|
- mapping->truncate_count++;
|
|
|
- }
|
|
|
- details.truncate_count = mapping->truncate_count;
|
|
|
|
|
|
+ spin_lock(&mapping->i_mmap_lock);
|
|
|
if (unlikely(!prio_tree_empty(&mapping->i_mmap)))
|
|
|
unmap_mapping_range_tree(&mapping->i_mmap, &details);
|
|
|
if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
|
|
|
unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details);
|
|
|
spin_unlock(&mapping->i_mmap_lock);
|
|
|
- mutex_unlock(&mapping->unmap_mutex);
|
|
|
}
|
|
|
EXPORT_SYMBOL(unmap_mapping_range);
|
|
|
|