17 years ago · 7906d00cd1
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1104,6 +1104,9 @@ extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
 
				 	unsigned long addr, unsigned long len, pgoff_t pgoff);
			
 
				 extern void exit_mmap(struct mm_struct *);
			
 
				 
			
 
				+extern int mm_take_all_locks(struct mm_struct *mm);
			
 
				+extern void mm_drop_all_locks(struct mm_struct *mm);
			
 
				+
			
 
				 #ifdef CONFIG_PROC_FS
			
 
				 /* From fs/proc/base.c. callers must _not_ hold the mm's exe_file_lock */
			
 
				 extern void added_exe_file_vma(struct mm_struct *mm);
			
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -20,6 +20,7 @@
 
				  */
			
 
				 #define	AS_EIO		(__GFP_BITS_SHIFT + 0)	/* IO error on async write */
			
 
				 #define AS_ENOSPC	(__GFP_BITS_SHIFT + 1)	/* ENOSPC on async write */
			
 
				+#define AS_MM_ALL_LOCKS	(__GFP_BITS_SHIFT + 2)	/* under mm_take_all_locks() */
			
 
				 
			
 
				 static inline void mapping_set_error(struct address_space *mapping, int error)
			
 
				 {
			
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -26,6 +26,14 @@
 
				  */
			
 
				 struct anon_vma {
			
 
				 	spinlock_t lock;	/* Serialize access to vma list */
			
 
				+	/*
			
 
				+	 * NOTE: the LSB of the head.next is set by
			
 
				+	 * mm_take_all_locks() _after_ taking the above lock. So the
			
 
				+	 * head must only be read/written after taking the above lock
			
 
				+	 * to be sure to see a valid next pointer. The LSB bit itself
			
 
				+	 * is serialized by a system wide lock only visible to
			
 
				+	 * mm_take_all_locks() (mm_all_locks_mutex).
			
 
				+	 */
			
 
				 	struct list_head head;	/* List of private "related" vmas */
			
 
				 };
			
 
				 
			
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2268,3 +2268,161 @@ int install_special_mapping(struct mm_struct *mm,
 
				 
			
 
				 	return 0;
			
 
				 }
			
 
				+
			
 
				+static DEFINE_MUTEX(mm_all_locks_mutex);
			
 
				+
			
 
				+static void vm_lock_anon_vma(struct anon_vma *anon_vma)
			
 
				+{
			
 
				+	if (!test_bit(0, (unsigned long *) &anon_vma->head.next)) {
			
 
				+		/*
			
 
				+		 * The LSB of head.next can't change from under us
			
 
				+		 * because we hold the mm_all_locks_mutex.
			
 
				+		 */
			
 
				+		spin_lock(&anon_vma->lock);
			
 
				+		/*
			
 
				+		 * We can safely modify head.next after taking the
			
 
				+		 * anon_vma->lock. If some other vma in this mm shares
			
 
				+		 * the same anon_vma we won't take it again.
			
 
				+		 *
			
 
				+		 * No need of atomic instructions here, head.next
			
 
				+		 * can't change from under us thanks to the
			
 
				+		 * anon_vma->lock.
			
 
				+		 */
			
 
				+		if (__test_and_set_bit(0, (unsigned long *)
			
 
				+				       &anon_vma->head.next))
			
 
				+			BUG();
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void vm_lock_mapping(struct address_space *mapping)
			
 
				+{
			
 
				+	if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
			
 
				+		/*
			
 
				+		 * AS_MM_ALL_LOCKS can't change from under us because
			
 
				+		 * we hold the mm_all_locks_mutex.
			
 
				+		 *
			
 
				+		 * Operations on ->flags have to be atomic because
			
 
				+		 * even if AS_MM_ALL_LOCKS is stable thanks to the
			
 
				+		 * mm_all_locks_mutex, there may be other cpus
			
 
				+		 * changing other bitflags in parallel to us.
			
 
				+		 */
			
 
				+		if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
			
 
				+			BUG();
			
 
				+		spin_lock(&mapping->i_mmap_lock);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * This operation locks against the VM for all pte/vma/mm related
			
 
				+ * operations that could ever happen on a certain mm. This includes
			
 
				+ * vmtruncate, try_to_unmap, and all page faults.
			
 
				+ *
			
 
				+ * The caller must take the mmap_sem in write mode before calling
			
 
				+ * mm_take_all_locks(). The caller isn't allowed to release the
			
 
				+ * mmap_sem until mm_drop_all_locks() returns.
			
 
				+ *
			
 
				+ * mmap_sem in write mode is required in order to block all operations
			
 
				+ * that could modify pagetables and free pages without need of
			
 
				+ * altering the vma layout (for example populate_range() with
			
 
				+ * nonlinear vmas). It's also needed in write mode to avoid new
			
 
				+ * anon_vmas to be associated with existing vmas.
			
 
				+ *
			
 
				+ * A single task can't take more than one mm_take_all_locks() in a row
			
 
				+ * or it would deadlock.
			
 
				+ *
			
 
				+ * The LSB in anon_vma->head.next and the AS_MM_ALL_LOCKS bitflag in
			
 
				+ * mapping->flags avoid to take the same lock twice, if more than one
			
 
				+ * vma in this mm is backed by the same anon_vma or address_space.
			
 
				+ *
			
 
				+ * We can take all the locks in random order because the VM code
			
 
				+ * taking i_mmap_lock or anon_vma->lock outside the mmap_sem never
			
 
				+ * takes more than one of them in a row. Secondly we're protected
			
 
				+ * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex.
			
 
				+ *
			
 
				+ * mm_take_all_locks() and mm_drop_all_locks are expensive operations
			
 
				+ * that may have to take thousand of locks.
			
 
				+ *
			
 
				+ * mm_take_all_locks() can fail if it's interrupted by signals.
			
 
				+ */
			
 
				+int mm_take_all_locks(struct mm_struct *mm)
			
 
				+{
			
 
				+	struct vm_area_struct *vma;
			
 
				+	int ret = -EINTR;
			
 
				+
			
 
				+	BUG_ON(down_read_trylock(&mm->mmap_sem));
			
 
				+
			
 
				+	mutex_lock(&mm_all_locks_mutex);
			
 
				+
			
 
				+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
			
 
				+		if (signal_pending(current))
			
 
				+			goto out_unlock;
			
 
				+		if (vma->anon_vma)
			
 
				+			vm_lock_anon_vma(vma->anon_vma);
			
 
				+		if (vma->vm_file && vma->vm_file->f_mapping)
			
 
				+			vm_lock_mapping(vma->vm_file->f_mapping);
			
 
				+	}
			
 
				+	ret = 0;
			
 
				+
			
 
				+out_unlock:
			
 
				+	if (ret)
			
 
				+		mm_drop_all_locks(mm);
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
			
 
				+{
			
 
				+	if (test_bit(0, (unsigned long *) &anon_vma->head.next)) {
			
 
				+		/*
			
 
				+		 * The LSB of head.next can't change to 0 from under
			
 
				+		 * us because we hold the mm_all_locks_mutex.
			
 
				+		 *
			
 
				+		 * We must however clear the bitflag before unlocking
			
 
				+		 * the vma so the users using the anon_vma->head will
			
 
				+		 * never see our bitflag.
			
 
				+		 *
			
 
				+		 * No need of atomic instructions here, head.next
			
 
				+		 * can't change from under us until we release the
			
 
				+		 * anon_vma->lock.
			
 
				+		 */
			
 
				+		if (!__test_and_clear_bit(0, (unsigned long *)
			
 
				+					  &anon_vma->head.next))
			
 
				+			BUG();
			
 
				+		spin_unlock(&anon_vma->lock);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void vm_unlock_mapping(struct address_space *mapping)
			
 
				+{
			
 
				+	if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
			
 
				+		/*
			
 
				+		 * AS_MM_ALL_LOCKS can't change to 0 from under us
			
 
				+		 * because we hold the mm_all_locks_mutex.
			
 
				+		 */
			
 
				+		spin_unlock(&mapping->i_mmap_lock);
			
 
				+		if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
			
 
				+					&mapping->flags))
			
 
				+			BUG();
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * The mmap_sem cannot be released by the caller until
			
 
				+ * mm_drop_all_locks() returns.
			
 
				+ */
			
 
				+void mm_drop_all_locks(struct mm_struct *mm)
			
 
				+{
			
 
				+	struct vm_area_struct *vma;
			
 
				+
			
 
				+	BUG_ON(down_read_trylock(&mm->mmap_sem));
			
 
				+	BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
			
 
				+
			
 
				+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
			
 
				+		if (vma->anon_vma)
			
 
				+			vm_unlock_anon_vma(vma->anon_vma);
			
 
				+		if (vma->vm_file && vma->vm_file->f_mapping)
			
 
				+			vm_unlock_mapping(vma->vm_file->f_mapping);
			
 
				+	}
			
 
				+
			
 
				+	mutex_unlock(&mm_all_locks_mutex);
			
 
				+}