14 years ago · 97a894136f
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -331,7 +331,6 @@ void address_space_init_once(struct address_space *mapping)
 
				 	spin_lock_init(&mapping->private_lock);
			
 
				 	INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
			
 
				 	INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
			
 
				-	mutex_init(&mapping->unmap_mutex);
			
 
				 }
			
 
				 EXPORT_SYMBOL(address_space_init_once);
			
 
				 
			
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -635,7 +635,6 @@ struct address_space {
 
				 	struct prio_tree_root	i_mmap;		/* tree of private and shared mappings */
			
 
				 	struct list_head	i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
			
 
				 	spinlock_t		i_mmap_lock;	/* protect tree, count, list */
			
 
				-	unsigned int		truncate_count;	/* Cover race condition with truncate */
			
 
				 	unsigned long		nrpages;	/* number of total pages */
			
 
				 	pgoff_t			writeback_index;/* writeback starts here */
			
 
				 	const struct address_space_operations *a_ops;	/* methods */
			
@@ -644,7 +643,6 @@ struct address_space {
 
				 	spinlock_t		private_lock;	/* for use by the address_space */
			
 
				 	struct list_head	private_list;	/* ditto */
			
 
				 	struct address_space	*assoc_mapping;	/* ditto */
			
 
				-	struct mutex		unmap_mutex;    /* to protect unmapping */
			
 
				 } __attribute__((aligned(sizeof(long))));
			
 
				 	/*
			
 
				 	 * On most architectures that alignment is already the case; but
			
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -895,8 +895,6 @@ struct zap_details {
 
				 	struct address_space *check_mapping;	/* Check page->mapping if set */
			
 
				 	pgoff_t	first_index;			/* Lowest page->index to unmap */
			
 
				 	pgoff_t last_index;			/* Highest page->index to unmap */
			
 
				-	spinlock_t *i_mmap_lock;		/* For unmap_mapping_range: */
			
 
				-	unsigned long truncate_count;		/* Compare vm_truncate_count */
			
 
				 };
			
 
				 
			
 
				 struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
			
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -175,7 +175,6 @@ struct vm_area_struct {
 
				 					   units, *not* PAGE_CACHE_SIZE */
			
 
				 	struct file * vm_file;		/* File we map to (can be NULL). */
			
 
				 	void * vm_private_data;		/* was vm_pte (shared mem) */
			
 
				-	unsigned long vm_truncate_count;/* truncate_count or restart_addr */
			
 
				 
			
 
				 #ifndef CONFIG_MMU
			
 
				 	struct vm_region *vm_region;	/* NOMMU mapping region */
			
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -386,7 +386,6 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 
				 			spin_lock(&mapping->i_mmap_lock);
			
 
				 			if (tmp->vm_flags & VM_SHARED)
			
 
				 				mapping->i_mmap_writable++;
			
 
				-			tmp->vm_truncate_count = mpnt->vm_truncate_count;
			
 
				 			flush_dcache_mmap_lock(mapping);
			
 
				 			/* insert tmp into the share list, just after mpnt */
			
 
				 			vma_prio_tree_add(tmp, mpnt);
			
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -986,13 +986,13 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 
				 static unsigned long zap_pte_range(struct mmu_gather *tlb,
			
 
				 				struct vm_area_struct *vma, pmd_t *pmd,
			
 
				 				unsigned long addr, unsigned long end,
			
 
				-				long *zap_work, struct zap_details *details)
			
 
				+				struct zap_details *details)
			
 
				 {
			
 
				 	struct mm_struct *mm = tlb->mm;
			
 
				 	int force_flush = 0;
			
 
				-	pte_t *pte;
			
 
				-	spinlock_t *ptl;
			
 
				 	int rss[NR_MM_COUNTERS];
			
 
				+	spinlock_t *ptl;
			
 
				+	pte_t *pte;
			
 
				 
			
 
				 again:
			
 
				 	init_rss_vec(rss);
			
@@ -1001,12 +1001,9 @@ again:
 
				 	do {
			
 
				 		pte_t ptent = *pte;
			
 
				 		if (pte_none(ptent)) {
			
 
				-			(*zap_work)--;
			
 
				 			continue;
			
 
				 		}
			
 
				 
			
 
				-		(*zap_work) -= PAGE_SIZE;
			
 
				-
			
 
				 		if (pte_present(ptent)) {
			
 
				 			struct page *page;
			
 
				 
			
@@ -1075,7 +1072,7 @@ again:
 
				 				print_bad_pte(vma, addr, ptent, NULL);
			
 
				 		}
			
 
				 		pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
			
 
				-	} while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
			
 
				+	} while (pte++, addr += PAGE_SIZE, addr != end);
			
 
				 
			
 
				 	add_mm_rss_vec(mm, rss);
			
 
				 	arch_leave_lazy_mmu_mode();
			
@@ -1099,7 +1096,7 @@ again:
 
				 static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
			
 
				 				struct vm_area_struct *vma, pud_t *pud,
			
 
				 				unsigned long addr, unsigned long end,
			
 
				-				long *zap_work, struct zap_details *details)
			
 
				+				struct zap_details *details)
			
 
				 {
			
 
				 	pmd_t *pmd;
			
 
				 	unsigned long next;
			
@@ -1111,19 +1108,15 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
 
				 			if (next-addr != HPAGE_PMD_SIZE) {
			
 
				 				VM_BUG_ON(!rwsem_is_locked(&tlb->mm->mmap_sem));
			
 
				 				split_huge_page_pmd(vma->vm_mm, pmd);
			
 
				-			} else if (zap_huge_pmd(tlb, vma, pmd)) {
			
 
				-				(*zap_work)--;
			
 
				+			} else if (zap_huge_pmd(tlb, vma, pmd))
			
 
				 				continue;
			
 
				-			}
			
 
				 			/* fall through */
			
 
				 		}
			
 
				-		if (pmd_none_or_clear_bad(pmd)) {
			
 
				-			(*zap_work)--;
			
 
				+		if (pmd_none_or_clear_bad(pmd))
			
 
				 			continue;
			
 
				-		}
			
 
				-		next = zap_pte_range(tlb, vma, pmd, addr, next,
			
 
				-						zap_work, details);
			
 
				-	} while (pmd++, addr = next, (addr != end && *zap_work > 0));
			
 
				+		next = zap_pte_range(tlb, vma, pmd, addr, next, details);
			
 
				+		cond_resched();
			
 
				+	} while (pmd++, addr = next, addr != end);
			
 
				 
			
 
				 	return addr;
			
 
				 }
			
@@ -1131,7 +1124,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
 
				 static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
			
 
				 				struct vm_area_struct *vma, pgd_t *pgd,
			
 
				 				unsigned long addr, unsigned long end,
			
 
				-				long *zap_work, struct zap_details *details)
			
 
				+				struct zap_details *details)
			
 
				 {
			
 
				 	pud_t *pud;
			
 
				 	unsigned long next;
			
@@ -1139,13 +1132,10 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
 
				 	pud = pud_offset(pgd, addr);
			
 
				 	do {
			
 
				 		next = pud_addr_end(addr, end);
			
 
				-		if (pud_none_or_clear_bad(pud)) {
			
 
				-			(*zap_work)--;
			
 
				+		if (pud_none_or_clear_bad(pud))
			
 
				 			continue;
			
 
				-		}
			
 
				-		next = zap_pmd_range(tlb, vma, pud, addr, next,
			
 
				-						zap_work, details);
			
 
				-	} while (pud++, addr = next, (addr != end && *zap_work > 0));
			
 
				+		next = zap_pmd_range(tlb, vma, pud, addr, next, details);
			
 
				+	} while (pud++, addr = next, addr != end);
			
 
				 
			
 
				 	return addr;
			
 
				 }
			
@@ -1153,7 +1143,7 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
 
				 static unsigned long unmap_page_range(struct mmu_gather *tlb,
			
 
				 				struct vm_area_struct *vma,
			
 
				 				unsigned long addr, unsigned long end,
			
 
				-				long *zap_work, struct zap_details *details)
			
 
				+				struct zap_details *details)
			
 
				 {
			
 
				 	pgd_t *pgd;
			
 
				 	unsigned long next;
			
@@ -1167,13 +1157,10 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb,
 
				 	pgd = pgd_offset(vma->vm_mm, addr);
			
 
				 	do {
			
 
				 		next = pgd_addr_end(addr, end);
			
 
				-		if (pgd_none_or_clear_bad(pgd)) {
			
 
				-			(*zap_work)--;
			
 
				+		if (pgd_none_or_clear_bad(pgd))
			
 
				 			continue;
			
 
				-		}
			
 
				-		next = zap_pud_range(tlb, vma, pgd, addr, next,
			
 
				-						zap_work, details);
			
 
				-	} while (pgd++, addr = next, (addr != end && *zap_work > 0));
			
 
				+		next = zap_pud_range(tlb, vma, pgd, addr, next, details);
			
 
				+	} while (pgd++, addr = next, addr != end);
			
 
				 	tlb_end_vma(tlb, vma);
			
 
				 	mem_cgroup_uncharge_end();
			
 
				 
			
@@ -1218,9 +1205,7 @@ unsigned long unmap_vmas(struct mmu_gather *tlb,
 
				 		unsigned long end_addr, unsigned long *nr_accounted,
			
 
				 		struct zap_details *details)
			
 
				 {
			
 
				-	long zap_work = ZAP_BLOCK_SIZE;
			
 
				 	unsigned long start = start_addr;
			
 
				-	spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
			
 
				 	struct mm_struct *mm = vma->vm_mm;
			
 
				 
			
 
				 	mmu_notifier_invalidate_range_start(mm, start_addr, end_addr);
			
@@ -1253,33 +1238,15 @@ unsigned long unmap_vmas(struct mmu_gather *tlb,
 
				 				 * Since no pte has actually been setup, it is
			
 
				 				 * safe to do nothing in this case.
			
 
				 				 */
			
 
				-				if (vma->vm_file) {
			
 
				+				if (vma->vm_file)
			
 
				 					unmap_hugepage_range(vma, start, end, NULL);
			
 
				-					zap_work -= (end - start) /
			
 
				-					pages_per_huge_page(hstate_vma(vma));
			
 
				-				}
			
 
				 
			
 
				 				start = end;
			
 
				 			} else
			
 
				-				start = unmap_page_range(tlb, vma,
			
 
				-						start, end, &zap_work, details);
			
 
				-
			
 
				-			if (zap_work > 0) {
			
 
				-				BUG_ON(start != end);
			
 
				-				break;
			
 
				-			}
			
 
				-
			
 
				-			if (need_resched() ||
			
 
				-				(i_mmap_lock && spin_needbreak(i_mmap_lock))) {
			
 
				-				if (i_mmap_lock)
			
 
				-					goto out;
			
 
				-				cond_resched();
			
 
				-			}
			
 
				-
			
 
				-			zap_work = ZAP_BLOCK_SIZE;
			
 
				+				start = unmap_page_range(tlb, vma, start, end, details);
			
 
				 		}
			
 
				 	}
			
 
				-out:
			
 
				+
			
 
				 	mmu_notifier_invalidate_range_end(mm, start_addr, end_addr);
			
 
				 	return start;	/* which is now the end (or restart) address */
			
 
				 }
			
@@ -2612,96 +2579,11 @@ unwritable_page:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Helper functions for unmap_mapping_range().
			
 
				- *
			
 
				- * __ Notes on dropping i_mmap_lock to reduce latency while unmapping __
			
 
				- *
			
 
				- * We have to restart searching the prio_tree whenever we drop the lock,
			
 
				- * since the iterator is only valid while the lock is held, and anyway
			
 
				- * a later vma might be split and reinserted earlier while lock dropped.
			
 
				- *
			
 
				- * The list of nonlinear vmas could be handled more efficiently, using
			
 
				- * a placeholder, but handle it in the same way until a need is shown.
			
 
				- * It is important to search the prio_tree before nonlinear list: a vma
			
 
				- * may become nonlinear and be shifted from prio_tree to nonlinear list
			
 
				- * while the lock is dropped; but never shifted from list to prio_tree.
			
 
				- *
			
 
				- * In order to make forward progress despite restarting the search,
			
 
				- * vm_truncate_count is used to mark a vma as now dealt with, so we can
			
 
				- * quickly skip it next time around.  Since the prio_tree search only
			
 
				- * shows us those vmas affected by unmapping the range in question, we
			
 
				- * can't efficiently keep all vmas in step with mapping->truncate_count:
			
 
				- * so instead reset them all whenever it wraps back to 0 (then go to 1).
			
 
				- * mapping->truncate_count and vma->vm_truncate_count are protected by
			
 
				- * i_mmap_lock.
			
 
				- *
			
 
				- * In order to make forward progress despite repeatedly restarting some
			
 
				- * large vma, note the restart_addr from unmap_vmas when it breaks out:
			
 
				- * and restart from that address when we reach that vma again.  It might
			
 
				- * have been split or merged, shrunk or extended, but never shifted: so
			
 
				- * restart_addr remains valid so long as it remains in the vma's range.
			
 
				- * unmap_mapping_range forces truncate_count to leap over page-aligned
			
 
				- * values so we can save vma's restart_addr in its truncate_count field.
			
 
				- */
			
 
				-#define is_restart_addr(truncate_count) (!((truncate_count) & ~PAGE_MASK))
			
 
				-
			
 
				-static void reset_vma_truncate_counts(struct address_space *mapping)
			
 
				-{
			
 
				-	struct vm_area_struct *vma;
			
 
				-	struct prio_tree_iter iter;
			
 
				-
			
 
				-	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, ULONG_MAX)
			
 
				-		vma->vm_truncate_count = 0;
			
 
				-	list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
			
 
				-		vma->vm_truncate_count = 0;
			
 
				-}
			
 
				-
			
 
				-static int unmap_mapping_range_vma(struct vm_area_struct *vma,
			
 
				+static void unmap_mapping_range_vma(struct vm_area_struct *vma,
			
 
				 		unsigned long start_addr, unsigned long end_addr,
			
 
				 		struct zap_details *details)
			
 
				 {
			
 
				-	unsigned long restart_addr;
			
 
				-	int need_break;
			
 
				-
			
 
				-	/*
			
 
				-	 * files that support invalidating or truncating portions of the
			
 
				-	 * file from under mmaped areas must have their ->fault function
			
 
				-	 * return a locked page (and set VM_FAULT_LOCKED in the return).
			
 
				-	 * This provides synchronisation against concurrent unmapping here.
			
 
				-	 */
			
 
				-
			
 
				-again:
			
 
				-	restart_addr = vma->vm_truncate_count;
			
 
				-	if (is_restart_addr(restart_addr) && start_addr < restart_addr) {
			
 
				-		start_addr = restart_addr;
			
 
				-		if (start_addr >= end_addr) {
			
 
				-			/* Top of vma has been split off since last time */
			
 
				-			vma->vm_truncate_count = details->truncate_count;
			
 
				-			return 0;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	restart_addr = zap_page_range(vma, start_addr,
			
 
				-					end_addr - start_addr, details);
			
 
				-	need_break = need_resched() || spin_needbreak(details->i_mmap_lock);
			
 
				-
			
 
				-	if (restart_addr >= end_addr) {
			
 
				-		/* We have now completed this vma: mark it so */
			
 
				-		vma->vm_truncate_count = details->truncate_count;
			
 
				-		if (!need_break)
			
 
				-			return 0;
			
 
				-	} else {
			
 
				-		/* Note restart_addr in vma's truncate_count field */
			
 
				-		vma->vm_truncate_count = restart_addr;
			
 
				-		if (!need_break)
			
 
				-			goto again;
			
 
				-	}
			
 
				-
			
 
				-	spin_unlock(details->i_mmap_lock);
			
 
				-	cond_resched();
			
 
				-	spin_lock(details->i_mmap_lock);
			
 
				-	return -EINTR;
			
 
				+	zap_page_range(vma, start_addr, end_addr - start_addr, details);
			
 
				 }
			
 
				 
			
 
				 static inline void unmap_mapping_range_tree(struct prio_tree_root *root,
			
@@ -2711,12 +2593,8 @@ static inline void unmap_mapping_range_tree(struct prio_tree_root *root,
 
				 	struct prio_tree_iter iter;
			
 
				 	pgoff_t vba, vea, zba, zea;
			
 
				 
			
 
				-restart:
			
 
				 	vma_prio_tree_foreach(vma, &iter, root,
			
 
				 			details->first_index, details->last_index) {
			
 
				-		/* Skip quickly over those we have already dealt with */
			
 
				-		if (vma->vm_truncate_count == details->truncate_count)
			
 
				-			continue;
			
 
				 
			
 
				 		vba = vma->vm_pgoff;
			
 
				 		vea = vba + ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) - 1;
			
@@ -2728,11 +2606,10 @@ restart:
 
				 		if (zea > vea)
			
 
				 			zea = vea;
			
 
				 
			
 
				-		if (unmap_mapping_range_vma(vma,
			
 
				+		unmap_mapping_range_vma(vma,
			
 
				 			((zba - vba) << PAGE_SHIFT) + vma->vm_start,
			
 
				 			((zea - vba + 1) << PAGE_SHIFT) + vma->vm_start,
			
 
				-				details) < 0)
			
 
				-			goto restart;
			
 
				+				details);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -2747,15 +2624,9 @@ static inline void unmap_mapping_range_list(struct list_head *head,
 
				 	 * across *all* the pages in each nonlinear VMA, not just the pages
			
 
				 	 * whose virtual address lies outside the file truncation point.
			
 
				 	 */
			
 
				-restart:
			
 
				 	list_for_each_entry(vma, head, shared.vm_set.list) {
			
 
				-		/* Skip quickly over those we have already dealt with */
			
 
				-		if (vma->vm_truncate_count == details->truncate_count)
			
 
				-			continue;
			
 
				 		details->nonlinear_vma = vma;
			
 
				-		if (unmap_mapping_range_vma(vma, vma->vm_start,
			
 
				-					vma->vm_end, details) < 0)
			
 
				-			goto restart;
			
 
				+		unmap_mapping_range_vma(vma, vma->vm_start, vma->vm_end, details);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -2794,26 +2665,14 @@ void unmap_mapping_range(struct address_space *mapping,
 
				 	details.last_index = hba + hlen - 1;
			
 
				 	if (details.last_index < details.first_index)
			
 
				 		details.last_index = ULONG_MAX;
			
 
				-	details.i_mmap_lock = &mapping->i_mmap_lock;
			
 
				 
			
 
				-	mutex_lock(&mapping->unmap_mutex);
			
 
				-	spin_lock(&mapping->i_mmap_lock);
			
 
				-
			
 
				-	/* Protect against endless unmapping loops */
			
 
				-	mapping->truncate_count++;
			
 
				-	if (unlikely(is_restart_addr(mapping->truncate_count))) {
			
 
				-		if (mapping->truncate_count == 0)
			
 
				-			reset_vma_truncate_counts(mapping);
			
 
				-		mapping->truncate_count++;
			
 
				-	}
			
 
				-	details.truncate_count = mapping->truncate_count;
			
 
				 
			
 
				+	spin_lock(&mapping->i_mmap_lock);
			
 
				 	if (unlikely(!prio_tree_empty(&mapping->i_mmap)))
			
 
				 		unmap_mapping_range_tree(&mapping->i_mmap, &details);
			
 
				 	if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
			
 
				 		unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details);
			
 
				 	spin_unlock(&mapping->i_mmap_lock);
			
 
				-	mutex_unlock(&mapping->unmap_mutex);
			
 
				 }
			
 
				 EXPORT_SYMBOL(unmap_mapping_range);
			
 
				 
			
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -445,10 +445,8 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
 
				 	if (vma->vm_file)
			
 
				 		mapping = vma->vm_file->f_mapping;
			
 
				 
			
 
				-	if (mapping) {
			
 
				+	if (mapping)
			
 
				 		spin_lock(&mapping->i_mmap_lock);
			
 
				-		vma->vm_truncate_count = mapping->truncate_count;
			
 
				-	}
			
 
				 
			
 
				 	__vma_link(mm, vma, prev, rb_link, rb_parent);
			
 
				 	__vma_link_file(vma);
			
@@ -558,16 +556,7 @@ again:			remove_next = 1 + (end > next->vm_end);
 
				 		if (!(vma->vm_flags & VM_NONLINEAR))
			
 
				 			root = &mapping->i_mmap;
			
 
				 		spin_lock(&mapping->i_mmap_lock);
			
 
				-		if (importer &&
			
 
				-		    vma->vm_truncate_count != next->vm_truncate_count) {
			
 
				-			/*
			
 
				-			 * unmap_mapping_range might be in progress:
			
 
				-			 * ensure that the expanding vma is rescanned.
			
 
				-			 */
			
 
				-			importer->vm_truncate_count = 0;
			
 
				-		}
			
 
				 		if (insert) {
			
 
				-			insert->vm_truncate_count = vma->vm_truncate_count;
			
 
				 			/*
			
 
				 			 * Put into prio_tree now, so instantiated pages
			
 
				 			 * are visible to arm/parisc __flush_dcache_page
			
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -94,7 +94,6 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 
				 		 */
			
 
				 		mapping = vma->vm_file->f_mapping;
			
 
				 		spin_lock(&mapping->i_mmap_lock);
			
 
				-		new_vma->vm_truncate_count = 0;
			
 
				 	}
			
 
				 
			
 
				 	/*