14 years ago · d39dd11c3e
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -128,7 +128,7 @@ alloc_inode:
 
				 destroy_inode:
			
 
				 dirty_inode:				(must not sleep)
			
 
				 write_inode:
			
 
				-drop_inode:				!!!inode_lock!!!
			
 
				+drop_inode:				!!!inode->i_lock!!!
			
 
				 evict_inode:
			
 
				 put_super:		write
			
 
				 write_super:		read
			
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -298,11 +298,14 @@ be used instead.  It gets called whenever the inode is evicted, whether it has
 
				 remaining links or not.  Caller does *not* evict the pagecache or inode-associated
			
 
				 metadata buffers; getting rid of those is responsibility of method, as it had
			
 
				 been for ->delete_inode().
			
 
				-	->drop_inode() returns int now; it's called on final iput() with inode_lock
			
 
				-held and it returns true if filesystems wants the inode to be dropped.  As before,
			
 
				-generic_drop_inode() is still the default and it's been updated appropriately.
			
 
				-generic_delete_inode() is also alive and it consists simply of return 1.  Note that
			
 
				-all actual eviction work is done by caller after ->drop_inode() returns.
			
 
				+
			
 
				+	->drop_inode() returns int now; it's called on final iput() with
			
 
				+inode->i_lock held and it returns true if filesystems wants the inode to be
			
 
				+dropped.  As before, generic_drop_inode() is still the default and it's been
			
 
				+updated appropriately.  generic_delete_inode() is also alive and it consists
			
 
				+simply of return 1.  Note that all actual eviction work is done by caller after
			
 
				+->drop_inode() returns.
			
 
				+
			
 
				 	clear_inode() is gone; use end_writeback() instead.  As before, it must
			
 
				 be called exactly once on each call of ->evict_inode() (as it used to be for
			
 
				 each call of ->delete_inode()).  Unlike before, if you are using inode-associated
			
@@ -395,6 +398,9 @@ Currently you can only have FALLOC_FL_PUNCH_HOLE with FALLOC_FL_KEEP_SIZE set,
 
				 so the i_size should not change when hole punching, even when puching the end of
			
 
				 a file off.
			
 
				 
			
 
				+--
			
 
				+[mandatory]
			
 
				+
			
 
				 --
			
 
				 [mandatory]
			
 
				 	->get_sb() is gone.  Switch to use of ->mount().  Typically it's just
			
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -254,7 +254,7 @@ or bottom half).
 
				 	should be synchronous or not, not all filesystems check this flag.
			
 
				 
			
 
				   drop_inode: called when the last access to the inode is dropped,
			
 
				-	with the inode_lock spinlock held.
			
 
				+	with the inode->i_lock spinlock held.
			
 
				 
			
 
				 	This method should be either NULL (normal UNIX filesystem
			
 
				 	semantics) or "generic_delete_inode" (for filesystems that do not
			
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -61,8 +61,6 @@ do {							\
 
				 		current->pid, __func__, ##args);	\
			
 
				 } while (0)
			
 
				 
			
 
				-extern spinlock_t autofs4_lock;
			
 
				-
			
 
				 /* Unified info structure.  This is pointed to by both the dentry and
			
 
				    inode structures.  Each file in the filesystem has an instance of this
			
 
				    structure.  It holds a reference to the dentry, so dentries are never
			
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -372,6 +372,10 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp,
 
				 		return -EBUSY;
			
 
				 	} else {
			
 
				 		struct file *pipe = fget(pipefd);
			
 
				+		if (!pipe) {
			
 
				+			err = -EBADF;
			
 
				+			goto out;
			
 
				+		}
			
 
				 		if (!pipe->f_op || !pipe->f_op->write) {
			
 
				 			err = -EPIPE;
			
 
				 			fput(pipe);
			
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -86,19 +86,71 @@ done:
 
				 	return status;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Calculate and dget next entry in the subdirs list under root.
			
 
				+ */
			
 
				+static struct dentry *get_next_positive_subdir(struct dentry *prev,
			
 
				+						struct dentry *root)
			
 
				+{
			
 
				+	struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb);
			
 
				+	struct list_head *next;
			
 
				+	struct dentry *p, *q;
			
 
				+
			
 
				+	spin_lock(&sbi->lookup_lock);
			
 
				+
			
 
				+	if (prev == NULL) {
			
 
				+		spin_lock(&root->d_lock);
			
 
				+		prev = dget_dlock(root);
			
 
				+		next = prev->d_subdirs.next;
			
 
				+		p = prev;
			
 
				+		goto start;
			
 
				+	}
			
 
				+
			
 
				+	p = prev;
			
 
				+	spin_lock(&p->d_lock);
			
 
				+again:
			
 
				+	next = p->d_u.d_child.next;
			
 
				+start:
			
 
				+	if (next == &root->d_subdirs) {
			
 
				+		spin_unlock(&p->d_lock);
			
 
				+		spin_unlock(&sbi->lookup_lock);
			
 
				+		dput(prev);
			
 
				+		return NULL;
			
 
				+	}
			
 
				+
			
 
				+	q = list_entry(next, struct dentry, d_u.d_child);
			
 
				+
			
 
				+	spin_lock_nested(&q->d_lock, DENTRY_D_LOCK_NESTED);
			
 
				+	/* Negative dentry - try next */
			
 
				+	if (!simple_positive(q)) {
			
 
				+		spin_unlock(&p->d_lock);
			
 
				+		p = q;
			
 
				+		goto again;
			
 
				+	}
			
 
				+	dget_dlock(q);
			
 
				+	spin_unlock(&q->d_lock);
			
 
				+	spin_unlock(&p->d_lock);
			
 
				+	spin_unlock(&sbi->lookup_lock);
			
 
				+
			
 
				+	dput(prev);
			
 
				+
			
 
				+	return q;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Calculate and dget next entry in top down tree traversal.
			
 
				  */
			
 
				 static struct dentry *get_next_positive_dentry(struct dentry *prev,
			
 
				 						struct dentry *root)
			
 
				 {
			
 
				+	struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb);
			
 
				 	struct list_head *next;
			
 
				 	struct dentry *p, *ret;
			
 
				 
			
 
				 	if (prev == NULL)
			
 
				 		return dget(root);
			
 
				 
			
 
				-	spin_lock(&autofs4_lock);
			
 
				+	spin_lock(&sbi->lookup_lock);
			
 
				 relock:
			
 
				 	p = prev;
			
 
				 	spin_lock(&p->d_lock);
			
@@ -110,7 +162,7 @@ again:
 
				 
			
 
				 			if (p == root) {
			
 
				 				spin_unlock(&p->d_lock);
			
 
				-				spin_unlock(&autofs4_lock);
			
 
				+				spin_unlock(&sbi->lookup_lock);
			
 
				 				dput(prev);
			
 
				 				return NULL;
			
 
				 			}
			
@@ -140,7 +192,7 @@ again:
 
				 	dget_dlock(ret);
			
 
				 	spin_unlock(&ret->d_lock);
			
 
				 	spin_unlock(&p->d_lock);
			
 
				-	spin_unlock(&autofs4_lock);
			
 
				+	spin_unlock(&sbi->lookup_lock);
			
 
				 
			
 
				 	dput(prev);
			
 
				 
			
@@ -290,11 +342,8 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
 
				 	spin_lock(&sbi->fs_lock);
			
 
				 	ino = autofs4_dentry_ino(root);
			
 
				 	/* No point expiring a pending mount */
			
 
				-	if (ino->flags & AUTOFS_INF_PENDING) {
			
 
				-		spin_unlock(&sbi->fs_lock);
			
 
				-		return NULL;
			
 
				-	}
			
 
				-	managed_dentry_set_transit(root);
			
 
				+	if (ino->flags & AUTOFS_INF_PENDING)
			
 
				+		goto out;
			
 
				 	if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
			
 
				 		struct autofs_info *ino = autofs4_dentry_ino(root);
			
 
				 		ino->flags |= AUTOFS_INF_EXPIRING;
			
@@ -302,7 +351,7 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
 
				 		spin_unlock(&sbi->fs_lock);
			
 
				 		return root;
			
 
				 	}
			
 
				-	managed_dentry_clear_transit(root);
			
 
				+out:
			
 
				 	spin_unlock(&sbi->fs_lock);
			
 
				 	dput(root);
			
 
				 
			
@@ -336,13 +385,12 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
 
				 	timeout = sbi->exp_timeout;
			
 
				 
			
 
				 	dentry = NULL;
			
 
				-	while ((dentry = get_next_positive_dentry(dentry, root))) {
			
 
				+	while ((dentry = get_next_positive_subdir(dentry, root))) {
			
 
				 		spin_lock(&sbi->fs_lock);
			
 
				 		ino = autofs4_dentry_ino(dentry);
			
 
				 		/* No point expiring a pending mount */
			
 
				 		if (ino->flags & AUTOFS_INF_PENDING)
			
 
				-			goto cont;
			
 
				-		managed_dentry_set_transit(dentry);
			
 
				+			goto next;
			
 
				 
			
 
				 		/*
			
 
				 		 * Case 1: (i) indirect mount or top level pseudo direct mount
			
@@ -402,8 +450,6 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
 
				 			}
			
 
				 		}
			
 
				 next:
			
 
				-		managed_dentry_clear_transit(dentry);
			
 
				-cont:
			
 
				 		spin_unlock(&sbi->fs_lock);
			
 
				 	}
			
 
				 	return NULL;
			
@@ -415,13 +461,13 @@ found:
 
				 	ino->flags |= AUTOFS_INF_EXPIRING;
			
 
				 	init_completion(&ino->expire_complete);
			
 
				 	spin_unlock(&sbi->fs_lock);
			
 
				-	spin_lock(&autofs4_lock);
			
 
				+	spin_lock(&sbi->lookup_lock);
			
 
				 	spin_lock(&expired->d_parent->d_lock);
			
 
				 	spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED);
			
 
				 	list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
			
 
				 	spin_unlock(&expired->d_lock);
			
 
				 	spin_unlock(&expired->d_parent->d_lock);
			
 
				-	spin_unlock(&autofs4_lock);
			
 
				+	spin_unlock(&sbi->lookup_lock);
			
 
				 	return expired;
			
 
				 }
			
 
				 
			
@@ -484,8 +530,6 @@ int autofs4_expire_run(struct super_block *sb,
 
				 	spin_lock(&sbi->fs_lock);
			
 
				 	ino = autofs4_dentry_ino(dentry);
			
 
				 	ino->flags &= ~AUTOFS_INF_EXPIRING;
			
 
				-	if (!d_unhashed(dentry))
			
 
				-		managed_dentry_clear_transit(dentry);
			
 
				 	complete_all(&ino->expire_complete);
			
 
				 	spin_unlock(&sbi->fs_lock);
			
 
				 
			
@@ -513,9 +557,7 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
 
				 		spin_lock(&sbi->fs_lock);
			
 
				 		ino->flags &= ~AUTOFS_INF_EXPIRING;
			
 
				 		spin_lock(&dentry->d_lock);
			
 
				-		if (ret)
			
 
				-			__managed_dentry_clear_transit(dentry);
			
 
				-		else {
			
 
				+		if (!ret) {
			
 
				 			if ((IS_ROOT(dentry) ||
			
 
				 			    (autofs_type_indirect(sbi->type) &&
			
 
				 			     IS_ROOT(dentry->d_parent))) &&
			
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -23,8 +23,6 @@
 
				 
			
 
				 #include "autofs_i.h"
			
 
				 
			
 
				-DEFINE_SPINLOCK(autofs4_lock);
			
 
				-
			
 
				 static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *);
			
 
				 static int autofs4_dir_unlink(struct inode *,struct dentry *);
			
 
				 static int autofs4_dir_rmdir(struct inode *,struct dentry *);
			
@@ -125,15 +123,15 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
 
				 	 * autofs file system so just let the libfs routines handle
			
 
				 	 * it.
			
 
				 	 */
			
 
				-	spin_lock(&autofs4_lock);
			
 
				+	spin_lock(&sbi->lookup_lock);
			
 
				 	spin_lock(&dentry->d_lock);
			
 
				 	if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
			
 
				 		spin_unlock(&dentry->d_lock);
			
 
				-		spin_unlock(&autofs4_lock);
			
 
				+		spin_unlock(&sbi->lookup_lock);
			
 
				 		return -ENOENT;
			
 
				 	}
			
 
				 	spin_unlock(&dentry->d_lock);
			
 
				-	spin_unlock(&autofs4_lock);
			
 
				+	spin_unlock(&sbi->lookup_lock);
			
 
				 
			
 
				 out:
			
 
				 	return dcache_dir_open(inode, file);
			
@@ -171,7 +169,6 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
 
				 	const unsigned char *str = name->name;
			
 
				 	struct list_head *p, *head;
			
 
				 
			
 
				-	spin_lock(&autofs4_lock);
			
 
				 	spin_lock(&sbi->lookup_lock);
			
 
				 	head = &sbi->active_list;
			
 
				 	list_for_each(p, head) {
			
@@ -204,14 +201,12 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
 
				 			dget_dlock(active);
			
 
				 			spin_unlock(&active->d_lock);
			
 
				 			spin_unlock(&sbi->lookup_lock);
			
 
				-			spin_unlock(&autofs4_lock);
			
 
				 			return active;
			
 
				 		}
			
 
				 next:
			
 
				 		spin_unlock(&active->d_lock);
			
 
				 	}
			
 
				 	spin_unlock(&sbi->lookup_lock);
			
 
				-	spin_unlock(&autofs4_lock);
			
 
				 
			
 
				 	return NULL;
			
 
				 }
			
@@ -226,7 +221,6 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry)
 
				 	const unsigned char *str = name->name;
			
 
				 	struct list_head *p, *head;
			
 
				 
			
 
				-	spin_lock(&autofs4_lock);
			
 
				 	spin_lock(&sbi->lookup_lock);
			
 
				 	head = &sbi->expiring_list;
			
 
				 	list_for_each(p, head) {
			
@@ -259,14 +253,12 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry)
 
				 			dget_dlock(expiring);
			
 
				 			spin_unlock(&expiring->d_lock);
			
 
				 			spin_unlock(&sbi->lookup_lock);
			
 
				-			spin_unlock(&autofs4_lock);
			
 
				 			return expiring;
			
 
				 		}
			
 
				 next:
			
 
				 		spin_unlock(&expiring->d_lock);
			
 
				 	}
			
 
				 	spin_unlock(&sbi->lookup_lock);
			
 
				-	spin_unlock(&autofs4_lock);
			
 
				 
			
 
				 	return NULL;
			
 
				 }
			
@@ -275,17 +267,16 @@ static int autofs4_mount_wait(struct dentry *dentry)
 
				 {
			
 
				 	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
			
 
				 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
			
 
				-	int status;
			
 
				+	int status = 0;
			
 
				 
			
 
				 	if (ino->flags & AUTOFS_INF_PENDING) {
			
 
				 		DPRINTK("waiting for mount name=%.*s",
			
 
				 			dentry->d_name.len, dentry->d_name.name);
			
 
				 		status = autofs4_wait(sbi, dentry, NFY_MOUNT);
			
 
				 		DPRINTK("mount wait done status=%d", status);
			
 
				-		ino->last_used = jiffies;
			
 
				-		return status;
			
 
				 	}
			
 
				-	return 0;
			
 
				+	ino->last_used = jiffies;
			
 
				+	return status;
			
 
				 }
			
 
				 
			
 
				 static int do_expire_wait(struct dentry *dentry)
			
@@ -319,9 +310,12 @@ static struct dentry *autofs4_mountpoint_changed(struct path *path)
 
				 	 */
			
 
				 	if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) {
			
 
				 		struct dentry *parent = dentry->d_parent;
			
 
				+		struct autofs_info *ino;
			
 
				 		struct dentry *new = d_lookup(parent, &dentry->d_name);
			
 
				 		if (!new)
			
 
				 			return NULL;
			
 
				+		ino = autofs4_dentry_ino(new);
			
 
				+		ino->last_used = jiffies;
			
 
				 		dput(path->dentry);
			
 
				 		path->dentry = new;
			
 
				 	}
			
@@ -338,18 +332,6 @@ static struct vfsmount *autofs4_d_automount(struct path *path)
 
				 	DPRINTK("dentry=%p %.*s",
			
 
				 		dentry, dentry->d_name.len, dentry->d_name.name);
			
 
				 
			
 
				-	/*
			
 
				-	 * Someone may have manually umounted this or it was a submount
			
 
				-	 * that has gone away.
			
 
				-	 */
			
 
				-	spin_lock(&dentry->d_lock);
			
 
				-	if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
			
 
				-		if (!(dentry->d_flags & DCACHE_MANAGE_TRANSIT) &&
			
 
				-		     (dentry->d_flags & DCACHE_NEED_AUTOMOUNT))
			
 
				-			__managed_dentry_set_transit(path->dentry);
			
 
				-	}
			
 
				-	spin_unlock(&dentry->d_lock);
			
 
				-
			
 
				 	/* The daemon never triggers a mount. */
			
 
				 	if (autofs4_oz_mode(sbi))
			
 
				 		return NULL;
			
@@ -418,18 +400,17 @@ static struct vfsmount *autofs4_d_automount(struct path *path)
 
				 done:
			
 
				 	if (!(ino->flags & AUTOFS_INF_EXPIRING)) {
			
 
				 		/*
			
 
				-		 * Any needed mounting has been completed and the path updated
			
 
				-		 * so turn this into a normal dentry so we don't continually
			
 
				-		 * call ->d_automount() and ->d_manage().
			
 
				-		 */
			
 
				-		spin_lock(&dentry->d_lock);
			
 
				-		__managed_dentry_clear_transit(dentry);
			
 
				-		/*
			
 
				+		 * Any needed mounting has been completed and the path
			
 
				+		 * updated so clear DCACHE_NEED_AUTOMOUNT so we don't
			
 
				+		 * call ->d_automount() on rootless multi-mounts since
			
 
				+		 * it can lead to an incorrect ELOOP error return.
			
 
				+		 *
			
 
				 		 * Only clear DMANAGED_AUTOMOUNT for rootless multi-mounts and
			
 
				 		 * symlinks as in all other cases the dentry will be covered by
			
 
				 		 * an actual mount so ->d_automount() won't be called during
			
 
				 		 * the follow.
			
 
				 		 */
			
 
				+		spin_lock(&dentry->d_lock);
			
 
				 		if ((!d_mountpoint(dentry) &&
			
 
				 		    !list_empty(&dentry->d_subdirs)) ||
			
 
				 		    (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode)))
			
@@ -455,6 +436,8 @@ int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
 
				 
			
 
				 	/* The daemon never waits. */
			
 
				 	if (autofs4_oz_mode(sbi)) {
			
 
				+		if (rcu_walk)
			
 
				+			return 0;
			
 
				 		if (!d_mountpoint(dentry))
			
 
				 			return -EISDIR;
			
 
				 		return 0;
			
@@ -612,12 +595,12 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
 
				 
			
 
				 	dir->i_mtime = CURRENT_TIME;
			
 
				 
			
 
				-	spin_lock(&autofs4_lock);
			
 
				-	autofs4_add_expiring(dentry);
			
 
				+	spin_lock(&sbi->lookup_lock);
			
 
				+	__autofs4_add_expiring(dentry);
			
 
				 	spin_lock(&dentry->d_lock);
			
 
				 	__d_drop(dentry);
			
 
				 	spin_unlock(&dentry->d_lock);
			
 
				-	spin_unlock(&autofs4_lock);
			
 
				+	spin_unlock(&sbi->lookup_lock);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
@@ -686,20 +669,17 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
 
				 	if (!autofs4_oz_mode(sbi))
			
 
				 		return -EACCES;
			
 
				 
			
 
				-	spin_lock(&autofs4_lock);
			
 
				 	spin_lock(&sbi->lookup_lock);
			
 
				 	spin_lock(&dentry->d_lock);
			
 
				 	if (!list_empty(&dentry->d_subdirs)) {
			
 
				 		spin_unlock(&dentry->d_lock);
			
 
				 		spin_unlock(&sbi->lookup_lock);
			
 
				-		spin_unlock(&autofs4_lock);
			
 
				 		return -ENOTEMPTY;
			
 
				 	}
			
 
				 	__autofs4_add_expiring(dentry);
			
 
				-	spin_unlock(&sbi->lookup_lock);
			
 
				 	__d_drop(dentry);
			
 
				 	spin_unlock(&dentry->d_lock);
			
 
				-	spin_unlock(&autofs4_lock);
			
 
				+	spin_unlock(&sbi->lookup_lock);
			
 
				 
			
 
				 	if (sbi->version < 5)
			
 
				 		autofs_clear_leaf_automount_flags(dentry);
			
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -197,12 +197,12 @@ rename_retry:
 
				 
			
 
				 	seq = read_seqbegin(&rename_lock);
			
 
				 	rcu_read_lock();
			
 
				-	spin_lock(&autofs4_lock);
			
 
				+	spin_lock(&sbi->fs_lock);
			
 
				 	for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent)
			
 
				 		len += tmp->d_name.len + 1;
			
 
				 
			
 
				 	if (!len || --len > NAME_MAX) {
			
 
				-		spin_unlock(&autofs4_lock);
			
 
				+		spin_unlock(&sbi->fs_lock);
			
 
				 		rcu_read_unlock();
			
 
				 		if (read_seqretry(&rename_lock, seq))
			
 
				 			goto rename_retry;
			
@@ -218,7 +218,7 @@ rename_retry:
 
				 		p -= tmp->d_name.len;
			
 
				 		strncpy(p, tmp->d_name.name, tmp->d_name.len);
			
 
				 	}
			
 
				-	spin_unlock(&autofs4_lock);
			
 
				+	spin_unlock(&sbi->fs_lock);
			
 
				 	rcu_read_unlock();
			
 
				 	if (read_seqretry(&rename_lock, seq))
			
 
				 		goto rename_retry;
			
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -55,11 +55,13 @@ EXPORT_SYMBOL(I_BDEV);
 
				 static void bdev_inode_switch_bdi(struct inode *inode,
			
 
				 			struct backing_dev_info *dst)
			
 
				 {
			
 
				-	spin_lock(&inode_lock);
			
 
				+	spin_lock(&inode_wb_list_lock);
			
 
				+	spin_lock(&inode->i_lock);
			
 
				 	inode->i_data.backing_dev_info = dst;
			
 
				 	if (inode->i_state & I_DIRTY)
			
 
				 		list_move(&inode->i_wb_list, &dst->wb.b_dirty);
			
 
				-	spin_unlock(&inode_lock);
			
 
				+	spin_unlock(&inode->i_lock);
			
 
				+	spin_unlock(&inode_wb_list_lock);
			
 
				 }
			
 
				 
			
 
				 static sector_t max_block(struct block_device *bdev)
			
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1138,7 +1138,7 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
 
				  * inode list.
			
 
				  *
			
 
				  * mark_buffer_dirty() is atomic.  It takes bh->b_page->mapping->private_lock,
			
 
				- * mapping->tree_lock and the global inode_lock.
			
 
				+ * mapping->tree_lock and mapping->host->i_lock.
			
 
				  */
			
 
				 void mark_buffer_dirty(struct buffer_head *bh)
			
 
				 {
			
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -8,6 +8,7 @@
 
				 #include <linux/writeback.h>
			
 
				 #include <linux/sysctl.h>
			
 
				 #include <linux/gfp.h>
			
 
				+#include "internal.h"
			
 
				 
			
 
				 /* A global variable is a bit ugly, but it keeps the code simple */
			
 
				 int sysctl_drop_caches;
			
@@ -16,20 +17,23 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
 
				 {
			
 
				 	struct inode *inode, *toput_inode = NULL;
			
 
				 
			
 
				-	spin_lock(&inode_lock);
			
 
				+	spin_lock(&inode_sb_list_lock);
			
 
				 	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
			
 
				-		if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
			
 
				-			continue;
			
 
				-		if (inode->i_mapping->nrpages == 0)
			
 
				+		spin_lock(&inode->i_lock);
			
 
				+		if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
			
 
				+		    (inode->i_mapping->nrpages == 0)) {
			
 
				+			spin_unlock(&inode->i_lock);
			
 
				 			continue;
			
 
				+		}
			
 
				 		__iget(inode);
			
 
				-		spin_unlock(&inode_lock);
			
 
				+		spin_unlock(&inode->i_lock);
			
 
				+		spin_unlock(&inode_sb_list_lock);
			
 
				 		invalidate_mapping_pages(inode->i_mapping, 0, -1);
			
 
				 		iput(toput_inode);
			
 
				 		toput_inode = inode;
			
 
				-		spin_lock(&inode_lock);
			
 
				+		spin_lock(&inode_sb_list_lock);
			
 
				 	}
			
 
				-	spin_unlock(&inode_lock);
			
 
				+	spin_unlock(&inode_sb_list_lock);
			
 
				 	iput(toput_inode);
			
 
				 }
			
 
				 
			
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -175,6 +175,17 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi)
 
				 	spin_unlock_bh(&bdi->wb_lock);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Remove the inode from the writeback list it is on.
			
 
				+ */
			
 
				+void inode_wb_list_del(struct inode *inode)
			
 
				+{
			
 
				+	spin_lock(&inode_wb_list_lock);
			
 
				+	list_del_init(&inode->i_wb_list);
			
 
				+	spin_unlock(&inode_wb_list_lock);
			
 
				+}
			
 
				+
			
 
				+
			
 
				 /*
			
 
				  * Redirty an inode: set its when-it-was dirtied timestamp and move it to the
			
 
				  * furthest end of its superblock's dirty-inode list.
			
@@ -188,6 +199,7 @@ static void redirty_tail(struct inode *inode)
 
				 {
			
 
				 	struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
			
 
				 
			
 
				+	assert_spin_locked(&inode_wb_list_lock);
			
 
				 	if (!list_empty(&wb->b_dirty)) {
			
 
				 		struct inode *tail;
			
 
				 
			
@@ -205,14 +217,17 @@ static void requeue_io(struct inode *inode)
 
				 {
			
 
				 	struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
			
 
				 
			
 
				+	assert_spin_locked(&inode_wb_list_lock);
			
 
				 	list_move(&inode->i_wb_list, &wb->b_more_io);
			
 
				 }
			
 
				 
			
 
				 static void inode_sync_complete(struct inode *inode)
			
 
				 {
			
 
				 	/*
			
 
				-	 * Prevent speculative execution through spin_unlock(&inode_lock);
			
 
				+	 * Prevent speculative execution through
			
 
				+	 * spin_unlock(&inode_wb_list_lock);
			
 
				 	 */
			
 
				+
			
 
				 	smp_mb();
			
 
				 	wake_up_bit(&inode->i_state, __I_SYNC);
			
 
				 }
			
@@ -286,6 +301,7 @@ static void move_expired_inodes(struct list_head *delaying_queue,
 
				  */
			
 
				 static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this)
			
 
				 {
			
 
				+	assert_spin_locked(&inode_wb_list_lock);
			
 
				 	list_splice_init(&wb->b_more_io, &wb->b_io);
			
 
				 	move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this);
			
 
				 }
			
@@ -306,25 +322,25 @@ static void inode_wait_for_writeback(struct inode *inode)
 
				 	wait_queue_head_t *wqh;
			
 
				 
			
 
				 	wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
			
 
				-	 while (inode->i_state & I_SYNC) {
			
 
				-		spin_unlock(&inode_lock);
			
 
				+	while (inode->i_state & I_SYNC) {
			
 
				+		spin_unlock(&inode->i_lock);
			
 
				+		spin_unlock(&inode_wb_list_lock);
			
 
				 		__wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
			
 
				-		spin_lock(&inode_lock);
			
 
				+		spin_lock(&inode_wb_list_lock);
			
 
				+		spin_lock(&inode->i_lock);
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Write out an inode's dirty pages.  Called under inode_lock.  Either the
			
 
				- * caller has ref on the inode (either via __iget or via syscall against an fd)
			
 
				- * or the inode has I_WILL_FREE set (via generic_forget_inode)
			
 
				+ * Write out an inode's dirty pages.  Called under inode_wb_list_lock and
			
 
				+ * inode->i_lock.  Either the caller has an active reference on the inode or
			
 
				+ * the inode has I_WILL_FREE set.
			
 
				  *
			
 
				  * If `wait' is set, wait on the writeout.
			
 
				  *
			
 
				  * The whole writeout design is quite complex and fragile.  We want to avoid
			
 
				  * starvation of particular inodes when others are being redirtied, prevent
			
 
				  * livelocks, etc.
			
 
				- *
			
 
				- * Called under inode_lock.
			
 
				  */
			
 
				 static int
			
 
				 writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
			
@@ -333,6 +349,9 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 
				 	unsigned dirty;
			
 
				 	int ret;
			
 
				 
			
 
				+	assert_spin_locked(&inode_wb_list_lock);
			
 
				+	assert_spin_locked(&inode->i_lock);
			
 
				+
			
 
				 	if (!atomic_read(&inode->i_count))
			
 
				 		WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
			
 
				 	else
			
@@ -363,7 +382,8 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 
				 	/* Set I_SYNC, reset I_DIRTY_PAGES */
			
 
				 	inode->i_state |= I_SYNC;
			
 
				 	inode->i_state &= ~I_DIRTY_PAGES;
			
 
				-	spin_unlock(&inode_lock);
			
 
				+	spin_unlock(&inode->i_lock);
			
 
				+	spin_unlock(&inode_wb_list_lock);
			
 
				 
			
 
				 	ret = do_writepages(mapping, wbc);
			
 
				 
			
@@ -383,10 +403,10 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 
				 	 * due to delalloc, clear dirty metadata flags right before
			
 
				 	 * write_inode()
			
 
				 	 */
			
 
				-	spin_lock(&inode_lock);
			
 
				+	spin_lock(&inode->i_lock);
			
 
				 	dirty = inode->i_state & I_DIRTY;
			
 
				 	inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
			
 
				-	spin_unlock(&inode_lock);
			
 
				+	spin_unlock(&inode->i_lock);
			
 
				 	/* Don't write the inode if only I_DIRTY_PAGES was set */
			
 
				 	if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
			
 
				 		int err = write_inode(inode, wbc);
			
@@ -394,7 +414,8 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 
				 			ret = err;
			
 
				 	}
			
 
				 
			
 
				-	spin_lock(&inode_lock);
			
 
				+	spin_lock(&inode_wb_list_lock);
			
 
				+	spin_lock(&inode->i_lock);
			
 
				 	inode->i_state &= ~I_SYNC;
			
 
				 	if (!(inode->i_state & I_FREEING)) {
			
 
				 		if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
			
@@ -506,7 +527,9 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
 
				 		 * kind does not need peridic writeout yet, and for the latter
			
 
				 		 * kind writeout is handled by the freer.
			
 
				 		 */
			
 
				+		spin_lock(&inode->i_lock);
			
 
				 		if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
			
 
				+			spin_unlock(&inode->i_lock);
			
 
				 			requeue_io(inode);
			
 
				 			continue;
			
 
				 		}
			
@@ -515,10 +538,13 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
 
				 		 * Was this inode dirtied after sync_sb_inodes was called?
			
 
				 		 * This keeps sync from extra jobs and livelock.
			
 
				 		 */
			
 
				-		if (inode_dirtied_after(inode, wbc->wb_start))
			
 
				+		if (inode_dirtied_after(inode, wbc->wb_start)) {
			
 
				+			spin_unlock(&inode->i_lock);
			
 
				 			return 1;
			
 
				+		}
			
 
				 
			
 
				 		__iget(inode);
			
 
				+
			
 
				 		pages_skipped = wbc->pages_skipped;
			
 
				 		writeback_single_inode(inode, wbc);
			
 
				 		if (wbc->pages_skipped != pages_skipped) {
			
@@ -528,10 +554,11 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
 
				 			 */
			
 
				 			redirty_tail(inode);
			
 
				 		}
			
 
				-		spin_unlock(&inode_lock);
			
 
				+		spin_unlock(&inode->i_lock);
			
 
				+		spin_unlock(&inode_wb_list_lock);
			
 
				 		iput(inode);
			
 
				 		cond_resched();
			
 
				-		spin_lock(&inode_lock);
			
 
				+		spin_lock(&inode_wb_list_lock);
			
 
				 		if (wbc->nr_to_write <= 0) {
			
 
				 			wbc->more_io = 1;
			
 
				 			return 1;
			
@@ -550,7 +577,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb,
 
				 
			
 
				 	if (!wbc->wb_start)
			
 
				 		wbc->wb_start = jiffies; /* livelock avoidance */
			
 
				-	spin_lock(&inode_lock);
			
 
				+	spin_lock(&inode_wb_list_lock);
			
 
				 	if (!wbc->for_kupdate || list_empty(&wb->b_io))
			
 
				 		queue_io(wb, wbc->older_than_this);
			
 
				 
			
@@ -568,7 +595,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb,
 
				 		if (ret)
			
 
				 			break;
			
 
				 	}
			
 
				-	spin_unlock(&inode_lock);
			
 
				+	spin_unlock(&inode_wb_list_lock);
			
 
				 	/* Leave any unwritten inodes on b_io */
			
 
				 }
			
 
				 
			
@@ -577,11 +604,11 @@ static void __writeback_inodes_sb(struct super_block *sb,
 
				 {
			
 
				 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
			
 
				 
			
 
				-	spin_lock(&inode_lock);
			
 
				+	spin_lock(&inode_wb_list_lock);
			
 
				 	if (!wbc->for_kupdate || list_empty(&wb->b_io))
			
 
				 		queue_io(wb, wbc->older_than_this);
			
 
				 	writeback_sb_inodes(sb, wb, wbc, true);
			
 
				-	spin_unlock(&inode_lock);
			
 
				+	spin_unlock(&inode_wb_list_lock);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -720,13 +747,15 @@ static long wb_writeback(struct bdi_writeback *wb,
 
				 		 * become available for writeback. Otherwise
			
 
				 		 * we'll just busyloop.
			
 
				 		 */
			
 
				-		spin_lock(&inode_lock);
			
 
				+		spin_lock(&inode_wb_list_lock);
			
 
				 		if (!list_empty(&wb->b_more_io))  {
			
 
				 			inode = wb_inode(wb->b_more_io.prev);
			
 
				 			trace_wbc_writeback_wait(&wbc, wb->bdi);
			
 
				+			spin_lock(&inode->i_lock);
			
 
				 			inode_wait_for_writeback(inode);
			
 
				+			spin_unlock(&inode->i_lock);
			
 
				 		}
			
 
				-		spin_unlock(&inode_lock);
			
 
				+		spin_unlock(&inode_wb_list_lock);
			
 
				 	}
			
 
				 
			
 
				 	return wrote;
			
@@ -992,7 +1021,6 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 
				 {
			
 
				 	struct super_block *sb = inode->i_sb;
			
 
				 	struct backing_dev_info *bdi = NULL;
			
 
				-	bool wakeup_bdi = false;
			
 
				 
			
 
				 	/*
			
 
				 	 * Don't do this for I_DIRTY_PAGES - that doesn't actually
			
@@ -1016,7 +1044,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 
				 	if (unlikely(block_dump))
			
 
				 		block_dump___mark_inode_dirty(inode);
			
 
				 
			
 
				-	spin_lock(&inode_lock);
			
 
				+	spin_lock(&inode->i_lock);
			
 
				 	if ((inode->i_state & flags) != flags) {
			
 
				 		const int was_dirty = inode->i_state & I_DIRTY;
			
 
				 
			
@@ -1028,7 +1056,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 
				 		 * superblock list, based upon its state.
			
 
				 		 */
			
 
				 		if (inode->i_state & I_SYNC)
			
 
				-			goto out;
			
 
				+			goto out_unlock_inode;
			
 
				 
			
 
				 		/*
			
 
				 		 * Only add valid (hashed) inodes to the superblock's
			
@@ -1036,16 +1064,17 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 
				 		 */
			
 
				 		if (!S_ISBLK(inode->i_mode)) {
			
 
				 			if (inode_unhashed(inode))
			
 
				-				goto out;
			
 
				+				goto out_unlock_inode;
			
 
				 		}
			
 
				 		if (inode->i_state & I_FREEING)
			
 
				-			goto out;
			
 
				+			goto out_unlock_inode;
			
 
				 
			
 
				 		/*
			
 
				 		 * If the inode was already on b_dirty/b_io/b_more_io, don't
			
 
				 		 * reposition it (that would break b_dirty time-ordering).
			
 
				 		 */
			
 
				 		if (!was_dirty) {
			
 
				+			bool wakeup_bdi = false;
			
 
				 			bdi = inode_to_bdi(inode);
			
 
				 
			
 
				 			if (bdi_cap_writeback_dirty(bdi)) {
			
@@ -1062,15 +1091,20 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 
				 					wakeup_bdi = true;
			
 
				 			}
			
 
				 
			
 
				+			spin_unlock(&inode->i_lock);
			
 
				+			spin_lock(&inode_wb_list_lock);
			
 
				 			inode->dirtied_when = jiffies;
			
 
				 			list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
			
 
				+			spin_unlock(&inode_wb_list_lock);
			
 
				+
			
 
				+			if (wakeup_bdi)
			
 
				+				bdi_wakeup_thread_delayed(bdi);
			
 
				+			return;
			
 
				 		}
			
 
				 	}
			
 
				-out:
			
 
				-	spin_unlock(&inode_lock);
			
 
				+out_unlock_inode:
			
 
				+	spin_unlock(&inode->i_lock);
			
 
				 
			
 
				-	if (wakeup_bdi)
			
 
				-		bdi_wakeup_thread_delayed(bdi);
			
 
				 }
			
 
				 EXPORT_SYMBOL(__mark_inode_dirty);
			
 
				 
			
@@ -1101,7 +1135,7 @@ static void wait_sb_inodes(struct super_block *sb)
 
				 	 */
			
 
				 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
			
 
				 
			
 
				-	spin_lock(&inode_lock);
			
 
				+	spin_lock(&inode_sb_list_lock);
			
 
				 
			
 
				 	/*
			
 
				 	 * Data integrity sync. Must wait for all pages under writeback,
			
@@ -1111,22 +1145,25 @@ static void wait_sb_inodes(struct super_block *sb)
 
				 	 * we still have to wait for that writeout.
			
 
				 	 */
			
 
				 	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
			
 
				-		struct address_space *mapping;
			
 
				+		struct address_space *mapping = inode->i_mapping;
			
 
				 
			
 
				-		if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
			
 
				-			continue;
			
 
				-		mapping = inode->i_mapping;
			
 
				-		if (mapping->nrpages == 0)
			
 
				+		spin_lock(&inode->i_lock);
			
 
				+		if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
			
 
				+		    (mapping->nrpages == 0)) {
			
 
				+			spin_unlock(&inode->i_lock);
			
 
				 			continue;
			
 
				+		}
			
 
				 		__iget(inode);
			
 
				-		spin_unlock(&inode_lock);
			
 
				+		spin_unlock(&inode->i_lock);
			
 
				+		spin_unlock(&inode_sb_list_lock);
			
 
				+
			
 
				 		/*
			
 
				-		 * We hold a reference to 'inode' so it couldn't have
			
 
				-		 * been removed from s_inodes list while we dropped the
			
 
				-		 * inode_lock.  We cannot iput the inode now as we can
			
 
				-		 * be holding the last reference and we cannot iput it
			
 
				-		 * under inode_lock. So we keep the reference and iput
			
 
				-		 * it later.
			
 
				+		 * We hold a reference to 'inode' so it couldn't have been
			
 
				+		 * removed from s_inodes list while we dropped the
			
 
				+		 * inode_sb_list_lock.  We cannot iput the inode now as we can
			
 
				+		 * be holding the last reference and we cannot iput it under
			
 
				+		 * inode_sb_list_lock. So we keep the reference and iput it
			
 
				+		 * later.
			
 
				 		 */
			
 
				 		iput(old_inode);
			
 
				 		old_inode = inode;
			
@@ -1135,9 +1172,9 @@ static void wait_sb_inodes(struct super_block *sb)
 
				 
			
 
				 		cond_resched();
			
 
				 
			
 
				-		spin_lock(&inode_lock);
			
 
				+		spin_lock(&inode_sb_list_lock);
			
 
				 	}
			
 
				-	spin_unlock(&inode_lock);
			
 
				+	spin_unlock(&inode_sb_list_lock);
			
 
				 	iput(old_inode);
			
 
				 }
			
 
				 
			
@@ -1271,9 +1308,11 @@ int write_inode_now(struct inode *inode, int sync)
 
				 		wbc.nr_to_write = 0;
			
 
				 
			
 
				 	might_sleep();
			
 
				-	spin_lock(&inode_lock);
			
 
				+	spin_lock(&inode_wb_list_lock);
			
 
				+	spin_lock(&inode->i_lock);
			
 
				 	ret = writeback_single_inode(inode, &wbc);
			
 
				-	spin_unlock(&inode_lock);
			
 
				+	spin_unlock(&inode->i_lock);
			
 
				+	spin_unlock(&inode_wb_list_lock);
			
 
				 	if (sync)
			
 
				 		inode_sync_wait(inode);
			
 
				 	return ret;
			
@@ -1295,9 +1334,11 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc)
 
				 {
			
 
				 	int ret;
			
 
				 
			
 
				-	spin_lock(&inode_lock);
			
 
				+	spin_lock(&inode_wb_list_lock);
			
 
				+	spin_lock(&inode->i_lock);
			
 
				 	ret = writeback_single_inode(inode, wbc);
			
 
				-	spin_unlock(&inode_lock);
			
 
				+	spin_unlock(&inode->i_lock);
			
 
				+	spin_unlock(&inode_wb_list_lock);
			
 
				 	return ret;
			
 
				 }
			
 
				 EXPORT_SYMBOL(sync_inode);
			
--- a/fs/inode.c
+++ b/fs/inode.c
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -125,6 +125,13 @@ extern long do_handle_open(int mountdirfd,
 
				 /*
			
 
				  * inode.c
			
 
				  */
			
 
				+extern spinlock_t inode_sb_list_lock;
			
 
				+
			
 
				+/*
			
 
				+ * fs-writeback.c
			
 
				+ */
			
 
				+extern void inode_wb_list_del(struct inode *inode);
			
 
				+
			
 
				 extern int get_nr_dirty_inodes(void);
			
 
				 extern void evict_inodes(struct super_block *);
			
 
				 extern int invalidate_inodes(struct super_block *, bool);
			
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -293,7 +293,7 @@ static int logfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-/* called with inode_lock held */
			
 
				+/* called with inode->i_lock held */
			
 
				 static int logfs_drop_inode(struct inode *inode)
			
 
				 {
			
 
				 	struct logfs_super *super = logfs_super(inode->i_sb);
			
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -992,6 +992,12 @@ int follow_down_one(struct path *path)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static inline bool managed_dentry_might_block(struct dentry *dentry)
			
 
				+{
			
 
				+	return (dentry->d_flags & DCACHE_MANAGE_TRANSIT &&
			
 
				+		dentry->d_op->d_manage(dentry, true) < 0);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Skip to top of mountpoint pile in rcuwalk mode.  We abort the rcu-walk if we
			
 
				  * meet a managed dentry and we're not walking to "..".  True is returned to
			
@@ -1000,19 +1006,26 @@ int follow_down_one(struct path *path)
 
				 static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
			
 
				 			       struct inode **inode, bool reverse_transit)
			
 
				 {
			
 
				-	while (d_mountpoint(path->dentry)) {
			
 
				+	for (;;) {
			
 
				 		struct vfsmount *mounted;
			
 
				-		if (unlikely(path->dentry->d_flags & DCACHE_MANAGE_TRANSIT) &&
			
 
				-		    !reverse_transit &&
			
 
				-		    path->dentry->d_op->d_manage(path->dentry, true) < 0)
			
 
				+		/*
			
 
				+		 * Don't forget we might have a non-mountpoint managed dentry
			
 
				+		 * that wants to block transit.
			
 
				+		 */
			
 
				+		*inode = path->dentry->d_inode;
			
 
				+		if (!reverse_transit &&
			
 
				+		     unlikely(managed_dentry_might_block(path->dentry)))
			
 
				 			return false;
			
 
				+
			
 
				+		if (!d_mountpoint(path->dentry))
			
 
				+			break;
			
 
				+
			
 
				 		mounted = __lookup_mnt(path->mnt, path->dentry, 1);
			
 
				 		if (!mounted)
			
 
				 			break;
			
 
				 		path->mnt = mounted;
			
 
				 		path->dentry = mounted->mnt_root;
			
 
				 		nd->seq = read_seqcount_begin(&path->dentry->d_seq);
			
 
				-		*inode = path->dentry->d_inode;
			
 
				 	}
			
 
				 
			
 
				 	if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT))
			
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -22,13 +22,14 @@
 
				 #include <linux/module.h>
			
 
				 #include <linux/mutex.h>
			
 
				 #include <linux/spinlock.h>
			
 
				-#include <linux/writeback.h> /* for inode_lock */
			
 
				 
			
 
				 #include <asm/atomic.h>
			
 
				 
			
 
				 #include <linux/fsnotify_backend.h>
			
 
				 #include "fsnotify.h"
			
 
				 
			
 
				+#include "../internal.h"
			
 
				+
			
 
				 /*
			
 
				  * Recalculate the mask of events relevant to a given inode locked.
			
 
				  */
			
@@ -237,15 +238,14 @@ out:
 
				  * fsnotify_unmount_inodes - an sb is unmounting.  handle any watched inodes.
			
 
				  * @list: list of inodes being unmounted (sb->s_inodes)
			
 
				  *
			
 
				- * Called with inode_lock held, protecting the unmounting super block's list
			
 
				- * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay.
			
 
				- * We temporarily drop inode_lock, however, and CAN block.
			
 
				+ * Called during unmount with no locks held, so needs to be safe against
			
 
				+ * concurrent modifiers. We temporarily drop inode_sb_list_lock and CAN block.
			
 
				  */
			
 
				 void fsnotify_unmount_inodes(struct list_head *list)
			
 
				 {
			
 
				 	struct inode *inode, *next_i, *need_iput = NULL;
			
 
				 
			
 
				-	spin_lock(&inode_lock);
			
 
				+	spin_lock(&inode_sb_list_lock);
			
 
				 	list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
			
 
				 		struct inode *need_iput_tmp;
			
 
				 
			
@@ -254,8 +254,11 @@ void fsnotify_unmount_inodes(struct list_head *list)
 
				 		 * I_WILL_FREE, or I_NEW which is fine because by that point
			
 
				 		 * the inode cannot have any associated watches.
			
 
				 		 */
			
 
				-		if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
			
 
				+		spin_lock(&inode->i_lock);
			
 
				+		if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
			
 
				+			spin_unlock(&inode->i_lock);
			
 
				 			continue;
			
 
				+		}
			
 
				 
			
 
				 		/*
			
 
				 		 * If i_count is zero, the inode cannot have any watches and
			
@@ -263,8 +266,10 @@ void fsnotify_unmount_inodes(struct list_head *list)
 
				 		 * evict all inodes with zero i_count from icache which is
			
 
				 		 * unnecessarily violent and may in fact be illegal to do.
			
 
				 		 */
			
 
				-		if (!atomic_read(&inode->i_count))
			
 
				+		if (!atomic_read(&inode->i_count)) {
			
 
				+			spin_unlock(&inode->i_lock);
			
 
				 			continue;
			
 
				+		}
			
 
				 
			
 
				 		need_iput_tmp = need_iput;
			
 
				 		need_iput = NULL;
			
@@ -274,22 +279,25 @@ void fsnotify_unmount_inodes(struct list_head *list)
 
				 			__iget(inode);
			
 
				 		else
			
 
				 			need_iput_tmp = NULL;
			
 
				+		spin_unlock(&inode->i_lock);
			
 
				 
			
 
				 		/* In case the dropping of a reference would nuke next_i. */
			
 
				 		if ((&next_i->i_sb_list != list) &&
			
 
				-		    atomic_read(&next_i->i_count) &&
			
 
				-		    !(next_i->i_state & (I_FREEING | I_WILL_FREE))) {
			
 
				-			__iget(next_i);
			
 
				-			need_iput = next_i;
			
 
				+		    atomic_read(&next_i->i_count)) {
			
 
				+			spin_lock(&next_i->i_lock);
			
 
				+			if (!(next_i->i_state & (I_FREEING | I_WILL_FREE))) {
			
 
				+				__iget(next_i);
			
 
				+				need_iput = next_i;
			
 
				+			}
			
 
				+			spin_unlock(&next_i->i_lock);
			
 
				 		}
			
 
				 
			
 
				 		/*
			
 
				-		 * We can safely drop inode_lock here because we hold
			
 
				+		 * We can safely drop inode_sb_list_lock here because we hold
			
 
				 		 * references on both inode and next_i.  Also no new inodes
			
 
				-		 * will be added since the umount has begun.  Finally,
			
 
				-		 * iprune_mutex keeps shrink_icache_memory() away.
			
 
				+		 * will be added since the umount has begun.
			
 
				 		 */
			
 
				-		spin_unlock(&inode_lock);
			
 
				+		spin_unlock(&inode_sb_list_lock);
			
 
				 
			
 
				 		if (need_iput_tmp)
			
 
				 			iput(need_iput_tmp);
			
@@ -301,7 +309,7 @@ void fsnotify_unmount_inodes(struct list_head *list)
 
				 
			
 
				 		iput(inode);
			
 
				 
			
 
				-		spin_lock(&inode_lock);
			
 
				+		spin_lock(&inode_sb_list_lock);
			
 
				 	}
			
 
				-	spin_unlock(&inode_lock);
			
 
				+	spin_unlock(&inode_sb_list_lock);
			
 
				 }
			
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -91,7 +91,6 @@
 
				 #include <linux/slab.h>
			
 
				 #include <linux/spinlock.h>
			
 
				 #include <linux/srcu.h>
			
 
				-#include <linux/writeback.h> /* for inode_lock */
			
 
				 
			
 
				 #include <asm/atomic.h>
			
 
				 
			
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -23,7 +23,6 @@
 
				 #include <linux/mount.h>
			
 
				 #include <linux/mutex.h>
			
 
				 #include <linux/spinlock.h>
			
 
				-#include <linux/writeback.h> /* for inode_lock */
			
 
				 
			
 
				 #include <asm/atomic.h>
			
 
				 
			
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -54,7 +54,7 @@
 
				  *
			
 
				  * Return 1 if the attributes match and 0 if not.
			
 
				  *
			
 
				- * NOTE: This function runs with the inode_lock spin lock held so it is not
			
 
				+ * NOTE: This function runs with the inode->i_lock spin lock held so it is not
			
 
				  * allowed to sleep.
			
 
				  */
			
 
				 int ntfs_test_inode(struct inode *vi, ntfs_attr *na)
			
@@ -98,7 +98,7 @@ int ntfs_test_inode(struct inode *vi, ntfs_attr *na)
 
				  *
			
 
				  * Return 0 on success and -errno on error.
			
 
				  *
			
 
				- * NOTE: This function runs with the inode_lock spin lock held so it is not
			
 
				+ * NOTE: This function runs with the inode->i_lock spin lock held so it is not
			
 
				  * allowed to sleep. (Hence the GFP_ATOMIC allocation.)
			
 
				  */
			
 
				 static int ntfs_init_locked_inode(struct inode *vi, ntfs_attr *na)
			
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -76,7 +76,7 @@
 
				 #include <linux/buffer_head.h>
			
 
				 #include <linux/capability.h>
			
 
				 #include <linux/quotaops.h>
			
 
				-#include <linux/writeback.h> /* for inode_lock, oddly enough.. */
			
 
				+#include "../internal.h" /* ugh */
			
 
				 
			
 
				 #include <asm/uaccess.h>
			
 
				 
			
@@ -900,33 +900,38 @@ static void add_dquot_ref(struct super_block *sb, int type)
 
				 	int reserved = 0;
			
 
				 #endif
			
 
				 
			
 
				-	spin_lock(&inode_lock);
			
 
				+	spin_lock(&inode_sb_list_lock);
			
 
				 	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
			
 
				-		if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
			
 
				+		spin_lock(&inode->i_lock);
			
 
				+		if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
			
 
				+		    !atomic_read(&inode->i_writecount) ||
			
 
				+		    !dqinit_needed(inode, type)) {
			
 
				+			spin_unlock(&inode->i_lock);
			
 
				 			continue;
			
 
				+		}
			
 
				 #ifdef CONFIG_QUOTA_DEBUG
			
 
				 		if (unlikely(inode_get_rsv_space(inode) > 0))
			
 
				 			reserved = 1;
			
 
				 #endif
			
 
				-		if (!atomic_read(&inode->i_writecount))
			
 
				-			continue;
			
 
				-		if (!dqinit_needed(inode, type))
			
 
				-			continue;
			
 
				-
			
 
				 		__iget(inode);
			
 
				-		spin_unlock(&inode_lock);
			
 
				+		spin_unlock(&inode->i_lock);
			
 
				+		spin_unlock(&inode_sb_list_lock);
			
 
				 
			
 
				 		iput(old_inode);
			
 
				 		__dquot_initialize(inode, type);
			
 
				-		/* We hold a reference to 'inode' so it couldn't have been
			
 
				-		 * removed from s_inodes list while we dropped the inode_lock.
			
 
				-		 * We cannot iput the inode now as we can be holding the last
			
 
				-		 * reference and we cannot iput it under inode_lock. So we
			
 
				-		 * keep the reference and iput it later. */
			
 
				+
			
 
				+		/*
			
 
				+		 * We hold a reference to 'inode' so it couldn't have been
			
 
				+		 * removed from s_inodes list while we dropped the
			
 
				+		 * inode_sb_list_lock We cannot iput the inode now as we can be
			
 
				+		 * holding the last reference and we cannot iput it under
			
 
				+		 * inode_sb_list_lock. So we keep the reference and iput it
			
 
				+		 * later.
			
 
				+		 */
			
 
				 		old_inode = inode;
			
 
				-		spin_lock(&inode_lock);
			
 
				+		spin_lock(&inode_sb_list_lock);
			
 
				 	}
			
 
				-	spin_unlock(&inode_lock);
			
 
				+	spin_unlock(&inode_sb_list_lock);
			
 
				 	iput(old_inode);
			
 
				 
			
 
				 #ifdef CONFIG_QUOTA_DEBUG
			
@@ -1007,7 +1012,7 @@ static void remove_dquot_ref(struct super_block *sb, int type,
 
				 	struct inode *inode;
			
 
				 	int reserved = 0;
			
 
				 
			
 
				-	spin_lock(&inode_lock);
			
 
				+	spin_lock(&inode_sb_list_lock);
			
 
				 	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
			
 
				 		/*
			
 
				 		 *  We have to scan also I_NEW inodes because they can already
			
@@ -1021,7 +1026,7 @@ static void remove_dquot_ref(struct super_block *sb, int type,
 
				 			remove_inode_dquot_ref(inode, type, tofree_head);
			
 
				 		}
			
 
				 	}
			
 
				-	spin_unlock(&inode_lock);
			
 
				+	spin_unlock(&inode_sb_list_lock);
			
 
				 #ifdef CONFIG_QUOTA_DEBUG
			
 
				 	if (reserved) {
			
 
				 		printk(KERN_WARNING "VFS (%s): Writes happened after quota"
			
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1636,7 +1636,7 @@ struct super_operations {
 
				 };
			
 
				 
			
 
				 /*
			
 
				- * Inode state bits.  Protected by inode_lock.
			
 
				+ * Inode state bits.  Protected by inode->i_lock
			
 
				  *
			
 
				  * Three bits determine the dirty state of the inode, I_DIRTY_SYNC,
			
 
				  * I_DIRTY_DATASYNC and I_DIRTY_PAGES.
			
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -277,7 +277,7 @@ static inline int dquot_alloc_space(struct inode *inode, qsize_t nr)
 
				 		/*
			
 
				 		 * Mark inode fully dirty. Since we are allocating blocks, inode
			
 
				 		 * would become fully dirty soon anyway and it reportedly
			
 
				-		 * reduces inode_lock contention.
			
 
				+		 * reduces lock contention.
			
 
				 		 */
			
 
				 		mark_inode_dirty(inode);
			
 
				 	}
			
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -9,7 +9,7 @@
 
				 
			
 
				 struct backing_dev_info;
			
 
				 
			
 
				-extern spinlock_t inode_lock;
			
 
				+extern spinlock_t inode_wb_list_lock;
			
 
				 
			
 
				 /*
			
 
				  * fs/fs-writeback.c
			
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -67,14 +67,14 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
 
				 	struct inode *inode;
			
 
				 
			
 
				 	nr_wb = nr_dirty = nr_io = nr_more_io = 0;
			
 
				-	spin_lock(&inode_lock);
			
 
				+	spin_lock(&inode_wb_list_lock);
			
 
				 	list_for_each_entry(inode, &wb->b_dirty, i_wb_list)
			
 
				 		nr_dirty++;
			
 
				 	list_for_each_entry(inode, &wb->b_io, i_wb_list)
			
 
				 		nr_io++;
			
 
				 	list_for_each_entry(inode, &wb->b_more_io, i_wb_list)
			
 
				 		nr_more_io++;
			
 
				-	spin_unlock(&inode_lock);
			
 
				+	spin_unlock(&inode_wb_list_lock);
			
 
				 
			
 
				 	global_dirty_limits(&background_thresh, &dirty_thresh);
			
 
				 	bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh);
			
@@ -676,11 +676,11 @@ void bdi_destroy(struct backing_dev_info *bdi)
 
				 	if (bdi_has_dirty_io(bdi)) {
			
 
				 		struct bdi_writeback *dst = &default_backing_dev_info.wb;
			
 
				 
			
 
				-		spin_lock(&inode_lock);
			
 
				+		spin_lock(&inode_wb_list_lock);
			
 
				 		list_splice(&bdi->wb.b_dirty, &dst->b_dirty);
			
 
				 		list_splice(&bdi->wb.b_io, &dst->b_io);
			
 
				 		list_splice(&bdi->wb.b_more_io, &dst->b_more_io);
			
 
				-		spin_unlock(&inode_lock);
			
 
				+		spin_unlock(&inode_wb_list_lock);
			
 
				 	}
			
 
				 
			
 
				 	bdi_unregister(bdi);
			
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -80,8 +80,8 @@
 
				  *  ->i_mutex
			
 
				  *    ->i_alloc_sem             (various)
			
 
				  *
			
 
				- *  ->inode_lock
			
 
				- *    ->sb_lock			(fs/fs-writeback.c)
			
 
				+ *  inode_wb_list_lock
			
 
				+ *    sb_lock			(fs/fs-writeback.c)
			
 
				  *    ->mapping->tree_lock	(__sync_single_inode)
			
 
				  *
			
 
				  *  ->i_mmap_lock
			
@@ -98,8 +98,10 @@
 
				  *    ->zone.lru_lock		(check_pte_range->isolate_lru_page)
			
 
				  *    ->private_lock		(page_remove_rmap->set_page_dirty)
			
 
				  *    ->tree_lock		(page_remove_rmap->set_page_dirty)
			
 
				- *    ->inode_lock		(page_remove_rmap->set_page_dirty)
			
 
				- *    ->inode_lock		(zap_pte_range->set_page_dirty)
			
 
				+ *    inode_wb_list_lock	(page_remove_rmap->set_page_dirty)
			
 
				+ *    ->inode->i_lock		(page_remove_rmap->set_page_dirty)
			
 
				+ *    inode_wb_list_lock	(zap_pte_range->set_page_dirty)
			
 
				+ *    ->inode->i_lock		(zap_pte_range->set_page_dirty)
			
 
				  *    ->private_lock		(zap_pte_range->__set_page_dirty_buffers)
			
 
				  *
			
 
				  *  (code doesn't rely on that order, so you could switch it around)
			
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -31,11 +31,12 @@
 
				  *             swap_lock (in swap_duplicate, swap_info_get)
			
 
				  *               mmlist_lock (in mmput, drain_mmlist and others)
			
 
				  *               mapping->private_lock (in __set_page_dirty_buffers)
			
 
				- *               inode_lock (in set_page_dirty's __mark_inode_dirty)
			
 
				+ *               inode->i_lock (in set_page_dirty's __mark_inode_dirty)
			
 
				+ *               inode_wb_list_lock (in set_page_dirty's __mark_inode_dirty)
			
 
				  *                 sb_lock (within inode_lock in fs/fs-writeback.c)
			
 
				  *                 mapping->tree_lock (widely used, in set_page_dirty,
			
 
				  *                           in arch-dependent flush_dcache_mmap_lock,
			
 
				- *                           within inode_lock in __sync_single_inode)
			
 
				+ *                           within inode_wb_list_lock in __sync_single_inode)
			
 
				  *
			
 
				  * (code doesn't rely on that order so it could be switched around)
			
 
				  * ->tasklist_lock