19 years ago · b7a818e4fc
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2370,7 +2370,8 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
 
				 		dput(new_dentry);
			
 
				 	}
			
 
				 	if (!error)
			
 
				-		d_move(old_dentry,new_dentry);
			
 
				+		if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
			
 
				+			d_move(old_dentry,new_dentry);
			
 
				 	return error;
			
 
				 }
			
 
				 
			
@@ -2393,8 +2394,7 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
 
				 	else
			
 
				 		error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
			
 
				 	if (!error) {
			
 
				-		/* The following d_move() should become unconditional */
			
 
				-		if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME))
			
 
				+		if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
			
 
				 			d_move(old_dentry, new_dentry);
			
 
				 	}
			
 
				 	if (target)
			
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1669,8 +1669,7 @@ out:
 
				 	if (rehash)
			
 
				 		d_rehash(rehash);
			
 
				 	if (!error) {
			
 
				-		if (!S_ISDIR(old_inode->i_mode))
			
 
				-			d_move(old_dentry, new_dentry);
			
 
				+		d_move(old_dentry, new_dentry);
			
 
				 		nfs_renew_times(new_dentry);
			
 
				 		nfs_set_verifier(new_dentry, nfs_save_change_attribute(new_dir));
			
 
				 	}
			
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -71,7 +71,7 @@ static struct file_system_type nfs_fs_type = {
 
				 	.name		= "nfs",
			
 
				 	.get_sb		= nfs_get_sb,
			
 
				 	.kill_sb	= nfs_kill_super,
			
 
				-	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
			
 
				+	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
			
 
				 };
			
 
				 
			
 
				 struct file_system_type nfs_xdev_fs_type = {
			
@@ -79,7 +79,7 @@ struct file_system_type nfs_xdev_fs_type = {
 
				 	.name		= "nfs",
			
 
				 	.get_sb		= nfs_xdev_get_sb,
			
 
				 	.kill_sb	= nfs_kill_super,
			
 
				-	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
			
 
				+	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
			
 
				 };
			
 
				 
			
 
				 static struct super_operations nfs_sops = {
			
@@ -107,7 +107,7 @@ static struct file_system_type nfs4_fs_type = {
 
				 	.name		= "nfs4",
			
 
				 	.get_sb		= nfs4_get_sb,
			
 
				 	.kill_sb	= nfs4_kill_super,
			
 
				-	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
			
 
				+	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
			
 
				 };
			
 
				 
			
 
				 struct file_system_type nfs4_xdev_fs_type = {
			
@@ -115,7 +115,7 @@ struct file_system_type nfs4_xdev_fs_type = {
 
				 	.name		= "nfs4",
			
 
				 	.get_sb		= nfs4_xdev_get_sb,
			
 
				 	.kill_sb	= nfs4_kill_super,
			
 
				-	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
			
 
				+	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
			
 
				 };
			
 
				 
			
 
				 struct file_system_type nfs4_referral_fs_type = {
			
@@ -123,7 +123,7 @@ struct file_system_type nfs4_referral_fs_type = {
 
				 	.name		= "nfs4",
			
 
				 	.get_sb		= nfs4_referral_get_sb,
			
 
				 	.kill_sb	= nfs4_kill_super,
			
 
				-	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
			
 
				+	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
			
 
				 };
			
 
				 
			
 
				 static struct super_operations nfs4_sops = {
			
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -44,11 +44,17 @@
 
				  * locking semantics of the file system using the protocol.  It should 
			
 
				  * be somewhere else, I'm sure, but right now it isn't.
			
 
				  *
			
 
				+ * New in version 4:
			
 
				+ * 	- Remove i_generation from lock names for better stat performance.
			
 
				+ *
			
 
				+ * New in version 3:
			
 
				+ * 	- Replace dentry votes with a cluster lock
			
 
				+ *
			
 
				  * New in version 2:
			
 
				  * 	- full 64 bit i_size in the metadata lock lvbs
			
 
				  * 	- introduction of "rw" lock and pushing meta/data locking down
			
 
				  */
			
 
				-#define O2NET_PROTOCOL_VERSION 2ULL
			
 
				+#define O2NET_PROTOCOL_VERSION 4ULL
			
 
				 struct o2net_handshake {
			
 
				 	__be64	protocol_version;
			
 
				 	__be64	connector_id;
			
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -35,15 +35,17 @@
 
				 
			
 
				 #include "alloc.h"
			
 
				 #include "dcache.h"
			
 
				+#include "dlmglue.h"
			
 
				 #include "file.h"
			
 
				 #include "inode.h"
			
 
				 
			
 
				+
			
 
				 static int ocfs2_dentry_revalidate(struct dentry *dentry,
			
 
				 				   struct nameidata *nd)
			
 
				 {
			
 
				 	struct inode *inode = dentry->d_inode;
			
 
				 	int ret = 0;    /* if all else fails, just return false */
			
 
				-	struct ocfs2_super *osb;
			
 
				+	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
			
 
				 
			
 
				 	mlog_entry("(0x%p, '%.*s')\n", dentry,
			
 
				 		   dentry->d_name.len, dentry->d_name.name);
			
@@ -55,28 +57,31 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
 
				 		goto bail;
			
 
				 	}
			
 
				 
			
 
				-	osb = OCFS2_SB(inode->i_sb);
			
 
				-
			
 
				 	BUG_ON(!osb);
			
 
				 
			
 
				-	if (inode != osb->root_inode) {
			
 
				-		spin_lock(&OCFS2_I(inode)->ip_lock);
			
 
				-		/* did we or someone else delete this inode? */
			
 
				-		if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {
			
 
				-			spin_unlock(&OCFS2_I(inode)->ip_lock);
			
 
				-			mlog(0, "inode (%llu) deleted, returning false\n",
			
 
				-			     (unsigned long long)OCFS2_I(inode)->ip_blkno);
			
 
				-			goto bail;
			
 
				-		}
			
 
				+	if (inode == osb->root_inode || is_bad_inode(inode))
			
 
				+		goto bail;
			
 
				+
			
 
				+	spin_lock(&OCFS2_I(inode)->ip_lock);
			
 
				+	/* did we or someone else delete this inode? */
			
 
				+	if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {
			
 
				 		spin_unlock(&OCFS2_I(inode)->ip_lock);
			
 
				+		mlog(0, "inode (%llu) deleted, returning false\n",
			
 
				+		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
			
 
				+		goto bail;
			
 
				+	}
			
 
				+	spin_unlock(&OCFS2_I(inode)->ip_lock);
			
 
				 
			
 
				-		if (!inode->i_nlink) {
			
 
				-			mlog(0, "Inode %llu orphaned, returning false "
			
 
				-			     "dir = %d\n",
			
 
				-			     (unsigned long long)OCFS2_I(inode)->ip_blkno,
			
 
				-			     S_ISDIR(inode->i_mode));
			
 
				-			goto bail;
			
 
				-		}
			
 
				+	/*
			
 
				+	 * We don't need a cluster lock to test this because once an
			
 
				+	 * inode nlink hits zero, it never goes back.
			
 
				+	 */
			
 
				+	if (inode->i_nlink == 0) {
			
 
				+		mlog(0, "Inode %llu orphaned, returning false "
			
 
				+		     "dir = %d\n",
			
 
				+		     (unsigned long long)OCFS2_I(inode)->ip_blkno,
			
 
				+		     S_ISDIR(inode->i_mode));
			
 
				+		goto bail;
			
 
				 	}
			
 
				 
			
 
				 	ret = 1;
			
@@ -87,6 +92,322 @@ bail:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+static int ocfs2_match_dentry(struct dentry *dentry,
			
 
				+			      u64 parent_blkno,
			
 
				+			      int skip_unhashed)
			
 
				+{
			
 
				+	struct inode *parent;
			
 
				+
			
 
				+	/*
			
 
				+	 * ocfs2_lookup() does a d_splice_alias() _before_ attaching
			
 
				+	 * to the lock data, so we skip those here, otherwise
			
 
				+	 * ocfs2_dentry_attach_lock() will get its original dentry
			
 
				+	 * back.
			
 
				+	 */
			
 
				+	if (!dentry->d_fsdata)
			
 
				+		return 0;
			
 
				+
			
 
				+	if (!dentry->d_parent)
			
 
				+		return 0;
			
 
				+
			
 
				+	if (skip_unhashed && d_unhashed(dentry))
			
 
				+		return 0;
			
 
				+
			
 
				+	parent = dentry->d_parent->d_inode;
			
 
				+	/* Negative parent dentry? */
			
 
				+	if (!parent)
			
 
				+		return 0;
			
 
				+
			
 
				+	/* Name is in a different directory. */
			
 
				+	if (OCFS2_I(parent)->ip_blkno != parent_blkno)
			
 
				+		return 0;
			
 
				+
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Walk the inode alias list, and find a dentry which has a given
			
 
				+ * parent. ocfs2_dentry_attach_lock() wants to find _any_ alias as it
			
 
				+ * is looking for a dentry_lock reference. The vote thread is looking
			
 
				+ * to unhash aliases, so we allow it to skip any that already have
			
 
				+ * that property.
			
 
				+ */
			
 
				+struct dentry *ocfs2_find_local_alias(struct inode *inode,
			
 
				+				      u64 parent_blkno,
			
 
				+				      int skip_unhashed)
			
 
				+{
			
 
				+	struct list_head *p;
			
 
				+	struct dentry *dentry = NULL;
			
 
				+
			
 
				+	spin_lock(&dcache_lock);
			
 
				+
			
 
				+	list_for_each(p, &inode->i_dentry) {
			
 
				+		dentry = list_entry(p, struct dentry, d_alias);
			
 
				+
			
 
				+		if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) {
			
 
				+			mlog(0, "dentry found: %.*s\n",
			
 
				+			     dentry->d_name.len, dentry->d_name.name);
			
 
				+
			
 
				+			dget_locked(dentry);
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		dentry = NULL;
			
 
				+	}
			
 
				+
			
 
				+	spin_unlock(&dcache_lock);
			
 
				+
			
 
				+	return dentry;
			
 
				+}
			
 
				+
			
 
				+DEFINE_SPINLOCK(dentry_attach_lock);
			
 
				+
			
 
				+/*
			
 
				+ * Attach this dentry to a cluster lock.
			
 
				+ *
			
 
				+ * Dentry locks cover all links in a given directory to a particular
			
 
				+ * inode. We do this so that ocfs2 can build a lock name which all
			
 
				+ * nodes in the cluster can agree on at all times. Shoving full names
			
 
				+ * in the cluster lock won't work due to size restrictions. Covering
			
 
				+ * links inside of a directory is a good compromise because it still
			
 
				+ * allows us to use the parent directory lock to synchronize
			
 
				+ * operations.
			
 
				+ *
			
 
				+ * Call this function with the parent dir semaphore and the parent dir
			
 
				+ * cluster lock held.
			
 
				+ *
			
 
				+ * The dir semaphore will protect us from having to worry about
			
 
				+ * concurrent processes on our node trying to attach a lock at the
			
 
				+ * same time.
			
 
				+ *
			
 
				+ * The dir cluster lock (held at either PR or EX mode) protects us
			
 
				+ * from unlink and rename on other nodes.
			
 
				+ *
			
 
				+ * A dput() can happen asynchronously due to pruning, so we cover
			
 
				+ * attaching and detaching the dentry lock with a
			
 
				+ * dentry_attach_lock.
			
 
				+ *
			
 
				+ * A node which has done lookup on a name retains a protected read
			
 
				+ * lock until final dput. If the user requests and unlink or rename,
			
 
				+ * the protected read is upgraded to an exclusive lock. Other nodes
			
 
				+ * who have seen the dentry will then be informed that they need to
			
 
				+ * downgrade their lock, which will involve d_delete on the
			
 
				+ * dentry. This happens in ocfs2_dentry_convert_worker().
			
 
				+ */
			
 
				+int ocfs2_dentry_attach_lock(struct dentry *dentry,
			
 
				+			     struct inode *inode,
			
 
				+			     u64 parent_blkno)
			
 
				+{
			
 
				+	int ret;
			
 
				+	struct dentry *alias;
			
 
				+	struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
			
 
				+
			
 
				+	mlog(0, "Attach \"%.*s\", parent %llu, fsdata: %p\n",
			
 
				+	     dentry->d_name.len, dentry->d_name.name,
			
 
				+	     (unsigned long long)parent_blkno, dl);
			
 
				+
			
 
				+	/*
			
 
				+	 * Negative dentry. We ignore these for now.
			
 
				+	 *
			
 
				+	 * XXX: Could we can improve ocfs2_dentry_revalidate() by
			
 
				+	 * tracking these?
			
 
				+	 */
			
 
				+	if (!inode)
			
 
				+		return 0;
			
 
				+
			
 
				+	if (dl) {
			
 
				+		mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno,
			
 
				+				" \"%.*s\": old parent: %llu, new: %llu\n",
			
 
				+				dentry->d_name.len, dentry->d_name.name,
			
 
				+				(unsigned long long)parent_blkno,
			
 
				+				(unsigned long long)dl->dl_parent_blkno);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	alias = ocfs2_find_local_alias(inode, parent_blkno, 0);
			
 
				+	if (alias) {
			
 
				+		/*
			
 
				+		 * Great, an alias exists, which means we must have a
			
 
				+		 * dentry lock already. We can just grab the lock off
			
 
				+		 * the alias and add it to the list.
			
 
				+		 *
			
 
				+		 * We're depending here on the fact that this dentry
			
 
				+		 * was found and exists in the dcache and so must have
			
 
				+		 * a reference to the dentry_lock because we can't
			
 
				+		 * race creates. Final dput() cannot happen on it
			
 
				+		 * since we have it pinned, so our reference is safe.
			
 
				+		 */
			
 
				+		dl = alias->d_fsdata;
			
 
				+		mlog_bug_on_msg(!dl, "parent %llu, ino %llu\n",
			
 
				+				(unsigned long long)parent_blkno,
			
 
				+				(unsigned long long)OCFS2_I(inode)->ip_blkno);
			
 
				+
			
 
				+		mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno,
			
 
				+				" \"%.*s\": old parent: %llu, new: %llu\n",
			
 
				+				dentry->d_name.len, dentry->d_name.name,
			
 
				+				(unsigned long long)parent_blkno,
			
 
				+				(unsigned long long)dl->dl_parent_blkno);
			
 
				+
			
 
				+		mlog(0, "Found: %s\n", dl->dl_lockres.l_name);
			
 
				+
			
 
				+		goto out_attach;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * There are no other aliases
			
 
				+	 */
			
 
				+	dl = kmalloc(sizeof(*dl), GFP_NOFS);
			
 
				+	if (!dl) {
			
 
				+		ret = -ENOMEM;
			
 
				+		mlog_errno(ret);
			
 
				+		return ret;
			
 
				+	}
			
 
				+
			
 
				+	dl->dl_count = 0;
			
 
				+	/*
			
 
				+	 * Does this have to happen below, for all attaches, in case
			
 
				+	 * the struct inode gets blown away by votes?
			
 
				+	 */
			
 
				+	dl->dl_inode = igrab(inode);
			
 
				+	dl->dl_parent_blkno = parent_blkno;
			
 
				+	ocfs2_dentry_lock_res_init(dl, parent_blkno, inode);
			
 
				+
			
 
				+out_attach:
			
 
				+	spin_lock(&dentry_attach_lock);
			
 
				+	dentry->d_fsdata = dl;
			
 
				+	dl->dl_count++;
			
 
				+	spin_unlock(&dentry_attach_lock);
			
 
				+
			
 
				+	/*
			
 
				+	 * This actually gets us our PRMODE level lock. From now on,
			
 
				+	 * we'll have a notification if one of these names is
			
 
				+	 * destroyed on another node.
			
 
				+	 */
			
 
				+	ret = ocfs2_dentry_lock(dentry, 0);
			
 
				+	if (!ret)
			
 
				+		ocfs2_dentry_unlock(dentry, 0);
			
 
				+	else
			
 
				+		mlog_errno(ret);
			
 
				+
			
 
				+	dput(alias);
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * ocfs2_dentry_iput() and friends.
			
 
				+ *
			
 
				+ * At this point, our particular dentry is detached from the inodes
			
 
				+ * alias list, so there's no way that the locking code can find it.
			
 
				+ *
			
 
				+ * The interesting stuff happens when we determine that our lock needs
			
 
				+ * to go away because this is the last subdir alias in the
			
 
				+ * system. This function needs to handle a couple things:
			
 
				+ *
			
 
				+ * 1) Synchronizing lock shutdown with the downconvert threads. This
			
 
				+ *    is already handled for us via the lockres release drop function
			
 
				+ *    called in ocfs2_release_dentry_lock()
			
 
				+ *
			
 
				+ * 2) A race may occur when we're doing our lock shutdown and
			
 
				+ *    another process wants to create a new dentry lock. Right now we
			
 
				+ *    let them race, which means that for a very short while, this
			
 
				+ *    node might have two locks on a lock resource. This should be a
			
 
				+ *    problem though because one of them is in the process of being
			
 
				+ *    thrown out.
			
 
				+ */
			
 
				+static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb,
			
 
				+				   struct ocfs2_dentry_lock *dl)
			
 
				+{
			
 
				+	ocfs2_simple_drop_lockres(osb, &dl->dl_lockres);
			
 
				+	ocfs2_lock_res_free(&dl->dl_lockres);
			
 
				+	iput(dl->dl_inode);
			
 
				+	kfree(dl);
			
 
				+}
			
 
				+
			
 
				+void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
			
 
				+			   struct ocfs2_dentry_lock *dl)
			
 
				+{
			
 
				+	int unlock = 0;
			
 
				+
			
 
				+	BUG_ON(dl->dl_count == 0);
			
 
				+
			
 
				+	spin_lock(&dentry_attach_lock);
			
 
				+	dl->dl_count--;
			
 
				+	unlock = !dl->dl_count;
			
 
				+	spin_unlock(&dentry_attach_lock);
			
 
				+
			
 
				+	if (unlock)
			
 
				+		ocfs2_drop_dentry_lock(osb, dl);
			
 
				+}
			
 
				+
			
 
				+static void ocfs2_dentry_iput(struct dentry *dentry, struct inode *inode)
			
 
				+{
			
 
				+	struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
			
 
				+
			
 
				+	mlog_bug_on_msg(!dl && !(dentry->d_flags & DCACHE_DISCONNECTED),
			
 
				+			"dentry: %.*s\n", dentry->d_name.len,
			
 
				+			dentry->d_name.name);
			
 
				+
			
 
				+	if (!dl)
			
 
				+		goto out;
			
 
				+
			
 
				+	mlog_bug_on_msg(dl->dl_count == 0, "dentry: %.*s, count: %u\n",
			
 
				+			dentry->d_name.len, dentry->d_name.name,
			
 
				+			dl->dl_count);
			
 
				+
			
 
				+	ocfs2_dentry_lock_put(OCFS2_SB(dentry->d_sb), dl);
			
 
				+
			
 
				+out:
			
 
				+	iput(inode);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * d_move(), but keep the locks in sync.
			
 
				+ *
			
 
				+ * When we are done, "dentry" will have the parent dir and name of
			
 
				+ * "target", which will be thrown away.
			
 
				+ *
			
 
				+ * We manually update the lock of "dentry" if need be.
			
 
				+ *
			
 
				+ * "target" doesn't have it's dentry lock touched - we allow the later
			
 
				+ * dput() to handle this for us.
			
 
				+ *
			
 
				+ * This is called during ocfs2_rename(), while holding parent
			
 
				+ * directory locks. The dentries have already been deleted on other
			
 
				+ * nodes via ocfs2_remote_dentry_delete().
			
 
				+ *
			
 
				+ * Normally, the VFS handles the d_move() for the file sytem, after
			
 
				+ * the ->rename() callback. OCFS2 wants to handle this internally, so
			
 
				+ * the new lock can be created atomically with respect to the cluster.
			
 
				+ */
			
 
				+void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target,
			
 
				+		       struct inode *old_dir, struct inode *new_dir)
			
 
				+{
			
 
				+	int ret;
			
 
				+	struct ocfs2_super *osb = OCFS2_SB(old_dir->i_sb);
			
 
				+	struct inode *inode = dentry->d_inode;
			
 
				+
			
 
				+	/*
			
 
				+	 * Move within the same directory, so the actual lock info won't
			
 
				+	 * change.
			
 
				+	 *
			
 
				+	 * XXX: Is there any advantage to dropping the lock here?
			
 
				+	 */
			
 
				+	if (old_dir == new_dir)
			
 
				+		goto out_move;
			
 
				+
			
 
				+	ocfs2_dentry_lock_put(osb, dentry->d_fsdata);
			
 
				+
			
 
				+	dentry->d_fsdata = NULL;
			
 
				+	ret = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(new_dir)->ip_blkno);
			
 
				+	if (ret)
			
 
				+		mlog_errno(ret);
			
 
				+
			
 
				+out_move:
			
 
				+	d_move(dentry, target);
			
 
				+}
			
 
				+
			
 
				 struct dentry_operations ocfs2_dentry_ops = {
			
 
				 	.d_revalidate		= ocfs2_dentry_revalidate,
			
 
				+	.d_iput			= ocfs2_dentry_iput,
			
 
				 };
			
--- a/fs/ocfs2/dcache.h
+++ b/fs/ocfs2/dcache.h
@@ -28,4 +28,31 @@
 
				 
			
 
				 extern struct dentry_operations ocfs2_dentry_ops;
			
 
				 
			
 
				+struct ocfs2_dentry_lock {
			
 
				+	unsigned int		dl_count;
			
 
				+	u64			dl_parent_blkno;
			
 
				+
			
 
				+	/*
			
 
				+	 * The ocfs2_dentry_lock keeps an inode reference until
			
 
				+	 * dl_lockres has been destroyed. This is usually done in
			
 
				+	 * ->d_iput() anyway, so there should be minimal impact.
			
 
				+	 */
			
 
				+	struct inode		*dl_inode;
			
 
				+	struct ocfs2_lock_res	dl_lockres;
			
 
				+};
			
 
				+
			
 
				+int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode,
			
 
				+			     u64 parent_blkno);
			
 
				+
			
 
				+void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
			
 
				+			   struct ocfs2_dentry_lock *dl);
			
 
				+
			
 
				+struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno,
			
 
				+				      int skip_unhashed);
			
 
				+
			
 
				+void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target,
			
 
				+		       struct inode *old_dir, struct inode *new_dir);
			
 
				+
			
 
				+extern spinlock_t dentry_attach_lock;
			
 
				+
			
 
				 #endif /* OCFS2_DCACHE_H */
			
--- a/fs/ocfs2/dlm/dlmapi.h
+++ b/fs/ocfs2/dlm/dlmapi.h
@@ -182,6 +182,7 @@ enum dlm_status dlmlock(struct dlm_ctxt *dlm,
 
				 			struct dlm_lockstatus *lksb,
			
 
				 			int flags,
			
 
				 			const char *name,
			
 
				+			int namelen,
			
 
				 			dlm_astlockfunc_t *ast,
			
 
				 			void *data,
			
 
				 			dlm_bastlockfunc_t *bast);
			
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -320,8 +320,8 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data)
 
				 
			
 
				 	res = dlm_lookup_lockres(dlm, name, locklen);
			
 
				 	if (!res) {
			
 
				-		mlog(ML_ERROR, "got %sast for unknown lockres! "
			
 
				-			       "cookie=%u:%llu, name=%.*s, namelen=%u\n",
			
 
				+		mlog(0, "got %sast for unknown lockres! "
			
 
				+		     "cookie=%u:%llu, name=%.*s, namelen=%u\n",
			
 
				 		     past->type == DLM_AST ? "" : "b",
			
 
				 		     dlm_get_lock_cookie_node(cookie),
			
 
				 		     dlm_get_lock_cookie_seq(cookie),
			
@@ -462,7 +462,7 @@ int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
 
				 			mlog(ML_ERROR, "sent AST to node %u, it returned "
			
 
				 			     "DLM_MIGRATING!\n", lock->ml.node);
			
 
				 			BUG();
			
 
				-		} else if (status != DLM_NORMAL) {
			
 
				+		} else if (status != DLM_NORMAL && status != DLM_IVLOCKID) {
			
 
				 			mlog(ML_ERROR, "AST to node %u returned %d!\n",
			
 
				 			     lock->ml.node, status);
			
 
				 			/* ignore it */
			
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -747,6 +747,7 @@ void dlm_change_lockres_owner(struct dlm_ctxt *dlm,
 
				 			      u8 owner);
			
 
				 struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
			
 
				 						 const char *lockid,
			
 
				+						 int namelen,
			
 
				 						 int flags);
			
 
				 struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm,
			
 
				 					  const char *name,
			
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -540,8 +540,8 @@ static inline void dlm_get_next_cookie(u8 node_num, u64 *cookie)
 
				 
			
 
				 enum dlm_status dlmlock(struct dlm_ctxt *dlm, int mode,
			
 
				 			struct dlm_lockstatus *lksb, int flags,
			
 
				-			const char *name, dlm_astlockfunc_t *ast, void *data,
			
 
				-			dlm_bastlockfunc_t *bast)
			
 
				+			const char *name, int namelen, dlm_astlockfunc_t *ast,
			
 
				+			void *data, dlm_bastlockfunc_t *bast)
			
 
				 {
			
 
				 	enum dlm_status status;
			
 
				 	struct dlm_lock_resource *res = NULL;
			
@@ -571,7 +571,7 @@ enum dlm_status dlmlock(struct dlm_ctxt *dlm, int mode,
 
				 	recovery = (flags & LKM_RECOVERY);
			
 
				 
			
 
				 	if (recovery &&
			
 
				-	    (!dlm_is_recovery_lock(name, strlen(name)) || convert) ) {
			
 
				+	    (!dlm_is_recovery_lock(name, namelen) || convert) ) {
			
 
				 		dlm_error(status);
			
 
				 		goto error;
			
 
				 	}
			
@@ -643,7 +643,7 @@ retry_convert:
 
				 		}
			
 
				 
			
 
				 		status = DLM_IVBUFLEN;
			
 
				-		if (strlen(name) > DLM_LOCKID_NAME_MAX || strlen(name) < 1) {
			
 
				+		if (namelen > DLM_LOCKID_NAME_MAX || namelen < 1) {
			
 
				 			dlm_error(status);
			
 
				 			goto error;
			
 
				 		}
			
@@ -659,7 +659,7 @@ retry_convert:
 
				 			dlm_wait_for_recovery(dlm);
			
 
				 
			
 
				 		/* find or create the lock resource */
			
 
				-		res = dlm_get_lock_resource(dlm, name, flags);
			
 
				+		res = dlm_get_lock_resource(dlm, name, namelen, flags);
			
 
				 		if (!res) {
			
 
				 			status = DLM_IVLOCKID;
			
 
				 			dlm_error(status);
			
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -740,6 +740,7 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm,
 
				  */
			
 
				 struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
			
 
				 					  const char *lockid,
			
 
				+					  int namelen,
			
 
				 					  int flags)
			
 
				 {
			
 
				 	struct dlm_lock_resource *tmpres=NULL, *res=NULL;
			
@@ -748,13 +749,12 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
 
				 	int blocked = 0;
			
 
				 	int ret, nodenum;
			
 
				 	struct dlm_node_iter iter;
			
 
				-	unsigned int namelen, hash;
			
 
				+	unsigned int hash;
			
 
				 	int tries = 0;
			
 
				 	int bit, wait_on_recovery = 0;
			
 
				 
			
 
				 	BUG_ON(!lockid);
			
 
				 
			
 
				-	namelen = strlen(lockid);
			
 
				 	hash = dlm_lockid_hash(lockid, namelen);
			
 
				 
			
 
				 	mlog(0, "get lockres %s (len %d)\n", lockid, namelen);
			
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -2285,7 +2285,8 @@ again:
 
				 	memset(&lksb, 0, sizeof(lksb));
			
 
				 
			
 
				 	ret = dlmlock(dlm, LKM_EXMODE, &lksb, LKM_NOQUEUE|LKM_RECOVERY,
			
 
				-		      DLM_RECOVERY_LOCK_NAME, dlm_reco_ast, dlm, dlm_reco_bast);
			
 
				+		      DLM_RECOVERY_LOCK_NAME, DLM_RECOVERY_LOCK_NAME_LEN,
			
 
				+		      dlm_reco_ast, dlm, dlm_reco_bast);
			
 
				 
			
 
				 	mlog(0, "%s: dlmlock($RECOVERY) returned %d, lksb=%d\n",
			
 
				 	     dlm->name, ret, lksb.status);
			
--- a/fs/ocfs2/dlm/userdlm.c
+++ b/fs/ocfs2/dlm/userdlm.c
@@ -102,10 +102,10 @@ static inline void user_recover_from_dlm_error(struct user_lock_res *lockres)
 
				 	spin_unlock(&lockres->l_lock);
			
 
				 }
			
 
				 
			
 
				-#define user_log_dlm_error(_func, _stat, _lockres) do {		\
			
 
				-	mlog(ML_ERROR, "Dlm error \"%s\" while calling %s on "	\
			
 
				-		"resource %s: %s\n", dlm_errname(_stat), _func,	\
			
 
				-		_lockres->l_name, dlm_errmsg(_stat));		\
			
 
				+#define user_log_dlm_error(_func, _stat, _lockres) do {			\
			
 
				+	mlog(ML_ERROR, "Dlm error \"%s\" while calling %s on "		\
			
 
				+		"resource %.*s: %s\n", dlm_errname(_stat), _func,	\
			
 
				+		_lockres->l_namelen, _lockres->l_name, dlm_errmsg(_stat)); \
			
 
				 } while (0)
			
 
				 
			
 
				 /* WARNING: This function lives in a world where the only three lock
			
@@ -127,21 +127,22 @@ static void user_ast(void *opaque)
 
				 	struct user_lock_res *lockres = opaque;
			
 
				 	struct dlm_lockstatus *lksb;
			
 
				 
			
 
				-	mlog(0, "AST fired for lockres %s\n", lockres->l_name);
			
 
				+	mlog(0, "AST fired for lockres %.*s\n", lockres->l_namelen,
			
 
				+	     lockres->l_name);
			
 
				 
			
 
				 	spin_lock(&lockres->l_lock);
			
 
				 
			
 
				 	lksb = &(lockres->l_lksb);
			
 
				 	if (lksb->status != DLM_NORMAL) {
			
 
				-		mlog(ML_ERROR, "lksb status value of %u on lockres %s\n",
			
 
				-		     lksb->status, lockres->l_name);
			
 
				+		mlog(ML_ERROR, "lksb status value of %u on lockres %.*s\n",
			
 
				+		     lksb->status, lockres->l_namelen, lockres->l_name);
			
 
				 		spin_unlock(&lockres->l_lock);
			
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				 	mlog_bug_on_msg(lockres->l_requested == LKM_IVMODE,
			
 
				-			"Lockres %s, requested ivmode. flags 0x%x\n",
			
 
				-			lockres->l_name, lockres->l_flags);
			
 
				+			"Lockres %.*s, requested ivmode. flags 0x%x\n",
			
 
				+			lockres->l_namelen, lockres->l_name, lockres->l_flags);
			
 
				 
			
 
				 	/* we're downconverting. */
			
 
				 	if (lockres->l_requested < lockres->l_level) {
			
@@ -213,8 +214,8 @@ static void user_bast(void *opaque, int level)
 
				 {
			
 
				 	struct user_lock_res *lockres = opaque;
			
 
				 
			
 
				-	mlog(0, "Blocking AST fired for lockres %s. Blocking level %d\n",
			
 
				-		lockres->l_name, level);
			
 
				+	mlog(0, "Blocking AST fired for lockres %.*s. Blocking level %d\n",
			
 
				+	     lockres->l_namelen, lockres->l_name, level);
			
 
				 
			
 
				 	spin_lock(&lockres->l_lock);
			
 
				 	lockres->l_flags |= USER_LOCK_BLOCKED;
			
@@ -231,7 +232,8 @@ static void user_unlock_ast(void *opaque, enum dlm_status status)
 
				 {
			
 
				 	struct user_lock_res *lockres = opaque;
			
 
				 
			
 
				-	mlog(0, "UNLOCK AST called on lock %s\n", lockres->l_name);
			
 
				+	mlog(0, "UNLOCK AST called on lock %.*s\n", lockres->l_namelen,
			
 
				+	     lockres->l_name);
			
 
				 
			
 
				 	if (status != DLM_NORMAL && status != DLM_CANCELGRANT)
			
 
				 		mlog(ML_ERROR, "Dlm returns status %d\n", status);
			
@@ -244,8 +246,6 @@ static void user_unlock_ast(void *opaque, enum dlm_status status)
 
				 	    && !(lockres->l_flags & USER_LOCK_IN_CANCEL)) {
			
 
				 		lockres->l_level = LKM_IVMODE;
			
 
				 	} else if (status == DLM_CANCELGRANT) {
			
 
				-		mlog(0, "Lock %s, cancel fails, flags 0x%x\n",
			
 
				-		     lockres->l_name, lockres->l_flags);
			
 
				 		/* We tried to cancel a convert request, but it was
			
 
				 		 * already granted. Don't clear the busy flag - the
			
 
				 		 * ast should've done this already. */
			
@@ -255,8 +255,6 @@ static void user_unlock_ast(void *opaque, enum dlm_status status)
 
				 	} else {
			
 
				 		BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL));
			
 
				 		/* Cancel succeeded, we want to re-queue */
			
 
				-		mlog(0, "Lock %s, cancel succeeds, flags 0x%x\n",
			
 
				-		     lockres->l_name, lockres->l_flags);
			
 
				 		lockres->l_requested = LKM_IVMODE; /* cancel an
			
 
				 						    * upconvert
			
 
				 						    * request. */
			
@@ -287,13 +285,14 @@ static void user_dlm_unblock_lock(void *opaque)
 
				 	struct user_lock_res *lockres = (struct user_lock_res *) opaque;
			
 
				 	struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres);
			
 
				 
			
 
				-	mlog(0, "processing lockres %s\n", lockres->l_name);
			
 
				+	mlog(0, "processing lockres %.*s\n", lockres->l_namelen,
			
 
				+	     lockres->l_name);
			
 
				 
			
 
				 	spin_lock(&lockres->l_lock);
			
 
				 
			
 
				 	mlog_bug_on_msg(!(lockres->l_flags & USER_LOCK_QUEUED),
			
 
				-			"Lockres %s, flags 0x%x\n",
			
 
				-			lockres->l_name, lockres->l_flags);
			
 
				+			"Lockres %.*s, flags 0x%x\n",
			
 
				+			lockres->l_namelen, lockres->l_name, lockres->l_flags);
			
 
				 
			
 
				 	/* notice that we don't clear USER_LOCK_BLOCKED here. If it's
			
 
				 	 * set, we want user_ast clear it. */
			
@@ -305,22 +304,16 @@ static void user_dlm_unblock_lock(void *opaque)
 
				 	 * flag, and finally we might get another bast which re-queues
			
 
				 	 * us before our ast for the downconvert is called. */
			
 
				 	if (!(lockres->l_flags & USER_LOCK_BLOCKED)) {
			
 
				-		mlog(0, "Lockres %s, flags 0x%x: queued but not blocking\n",
			
 
				-			lockres->l_name, lockres->l_flags);
			
 
				 		spin_unlock(&lockres->l_lock);
			
 
				 		goto drop_ref;
			
 
				 	}
			
 
				 
			
 
				 	if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
			
 
				-		mlog(0, "lock is in teardown so we do nothing\n");
			
 
				 		spin_unlock(&lockres->l_lock);
			
 
				 		goto drop_ref;
			
 
				 	}
			
 
				 
			
 
				 	if (lockres->l_flags & USER_LOCK_BUSY) {
			
 
				-		mlog(0, "Cancel lock %s, flags 0x%x\n",
			
 
				-		     lockres->l_name, lockres->l_flags);
			
 
				-
			
 
				 		if (lockres->l_flags & USER_LOCK_IN_CANCEL) {
			
 
				 			spin_unlock(&lockres->l_lock);
			
 
				 			goto drop_ref;
			
@@ -372,6 +365,7 @@ static void user_dlm_unblock_lock(void *opaque)
 
				 			 &lockres->l_lksb,
			
 
				 			 LKM_CONVERT|LKM_VALBLK,
			
 
				 			 lockres->l_name,
			
 
				+			 lockres->l_namelen,
			
 
				 			 user_ast,
			
 
				 			 lockres,
			
 
				 			 user_bast);
			
@@ -420,16 +414,16 @@ int user_dlm_cluster_lock(struct user_lock_res *lockres,
 
				 
			
 
				 	if (level != LKM_EXMODE &&
			
 
				 	    level != LKM_PRMODE) {
			
 
				-		mlog(ML_ERROR, "lockres %s: invalid request!\n",
			
 
				-		     lockres->l_name);
			
 
				+		mlog(ML_ERROR, "lockres %.*s: invalid request!\n",
			
 
				+		     lockres->l_namelen, lockres->l_name);
			
 
				 		status = -EINVAL;
			
 
				 		goto bail;
			
 
				 	}
			
 
				 
			
 
				-	mlog(0, "lockres %s: asking for %s lock, passed flags = 0x%x\n",
			
 
				-		lockres->l_name,
			
 
				-		(level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE",
			
 
				-		lkm_flags);
			
 
				+	mlog(0, "lockres %.*s: asking for %s lock, passed flags = 0x%x\n",
			
 
				+	     lockres->l_namelen, lockres->l_name,
			
 
				+	     (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE",
			
 
				+	     lkm_flags);
			
 
				 
			
 
				 again:
			
 
				 	if (signal_pending(current)) {
			
@@ -474,15 +468,13 @@ again:
 
				 		BUG_ON(level == LKM_IVMODE);
			
 
				 		BUG_ON(level == LKM_NLMODE);
			
 
				 
			
 
				-		mlog(0, "lock %s, get lock from %d to level = %d\n",
			
 
				-			lockres->l_name, lockres->l_level, level);
			
 
				-
			
 
				 		/* call dlm_lock to upgrade lock now */
			
 
				 		status = dlmlock(dlm,
			
 
				 				 level,
			
 
				 				 &lockres->l_lksb,
			
 
				 				 local_flags,
			
 
				 				 lockres->l_name,
			
 
				+				 lockres->l_namelen,
			
 
				 				 user_ast,
			
 
				 				 lockres,
			
 
				 				 user_bast);
			
@@ -498,9 +490,6 @@ again:
 
				 			goto bail;
			
 
				 		}
			
 
				 
			
 
				-		mlog(0, "lock %s, successfull return from dlmlock\n",
			
 
				-			lockres->l_name);
			
 
				-
			
 
				 		user_wait_on_busy_lock(lockres);
			
 
				 		goto again;
			
 
				 	}
			
@@ -508,9 +497,6 @@ again:
 
				 	user_dlm_inc_holders(lockres, level);
			
 
				 	spin_unlock(&lockres->l_lock);
			
 
				 
			
 
				-	mlog(0, "lockres %s: Got %s lock!\n", lockres->l_name,
			
 
				-		(level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE");
			
 
				-
			
 
				 	status = 0;
			
 
				 bail:
			
 
				 	return status;
			
@@ -538,13 +524,11 @@ void user_dlm_cluster_unlock(struct user_lock_res *lockres,
 
				 {
			
 
				 	if (level != LKM_EXMODE &&
			
 
				 	    level != LKM_PRMODE) {
			
 
				-		mlog(ML_ERROR, "lockres %s: invalid request!\n", lockres->l_name);
			
 
				+		mlog(ML_ERROR, "lockres %.*s: invalid request!\n",
			
 
				+		     lockres->l_namelen, lockres->l_name);
			
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				-	mlog(0, "lockres %s: dropping %s lock\n", lockres->l_name,
			
 
				-		(level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE");
			
 
				-
			
 
				 	spin_lock(&lockres->l_lock);
			
 
				 	user_dlm_dec_holders(lockres, level);
			
 
				 	__user_dlm_cond_queue_lockres(lockres);
			
@@ -602,6 +586,7 @@ void user_dlm_lock_res_init(struct user_lock_res *lockres,
 
				 	memcpy(lockres->l_name,
			
 
				 	       dentry->d_name.name,
			
 
				 	       dentry->d_name.len);
			
 
				+	lockres->l_namelen = dentry->d_name.len;
			
 
				 }
			
 
				 
			
 
				 int user_dlm_destroy_lock(struct user_lock_res *lockres)
			
@@ -609,11 +594,10 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
 
				 	int status = -EBUSY;
			
 
				 	struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres);
			
 
				 
			
 
				-	mlog(0, "asked to destroy %s\n", lockres->l_name);
			
 
				+	mlog(0, "asked to destroy %.*s\n", lockres->l_namelen, lockres->l_name);
			
 
				 
			
 
				 	spin_lock(&lockres->l_lock);
			
 
				 	if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
			
 
				-		mlog(0, "Lock is already torn down\n");
			
 
				 		spin_unlock(&lockres->l_lock);
			
 
				 		return 0;
			
 
				 	}
			
@@ -623,8 +607,6 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
 
				 	while (lockres->l_flags & USER_LOCK_BUSY) {
			
 
				 		spin_unlock(&lockres->l_lock);
			
 
				 
			
 
				-		mlog(0, "lock %s is busy\n", lockres->l_name);
			
 
				-
			
 
				 		user_wait_on_busy_lock(lockres);
			
 
				 
			
 
				 		spin_lock(&lockres->l_lock);
			
@@ -632,14 +614,12 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
 
				 
			
 
				 	if (lockres->l_ro_holders || lockres->l_ex_holders) {
			
 
				 		spin_unlock(&lockres->l_lock);
			
 
				-		mlog(0, "lock %s has holders\n", lockres->l_name);
			
 
				 		goto bail;
			
 
				 	}
			
 
				 
			
 
				 	status = 0;
			
 
				 	if (!(lockres->l_flags & USER_LOCK_ATTACHED)) {
			
 
				 		spin_unlock(&lockres->l_lock);
			
 
				-		mlog(0, "lock %s is not attached\n", lockres->l_name);
			
 
				 		goto bail;
			
 
				 	}
			
 
				 
			
@@ -647,7 +627,6 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
 
				 	lockres->l_flags |= USER_LOCK_BUSY;
			
 
				 	spin_unlock(&lockres->l_lock);
			
 
				 
			
 
				-	mlog(0, "unlocking lockres %s\n", lockres->l_name);
			
 
				 	status = dlmunlock(dlm,
			
 
				 			   &lockres->l_lksb,
			
 
				 			   LKM_VALBLK,
			
--- a/fs/ocfs2/dlm/userdlm.h
+++ b/fs/ocfs2/dlm/userdlm.h
@@ -53,6 +53,7 @@ struct user_lock_res {
 
				 
			
 
				 #define USER_DLM_LOCK_ID_MAX_LEN  32
			
 
				 	char                     l_name[USER_DLM_LOCK_ID_MAX_LEN];
			
 
				+	int                      l_namelen;
			
 
				 	int                      l_level;
			
 
				 	unsigned int             l_ro_holders;
			
 
				 	unsigned int             l_ex_holders;
			
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -27,10 +27,14 @@
 
				 #ifndef DLMGLUE_H
			
 
				 #define DLMGLUE_H
			
 
				 
			
 
				-#define OCFS2_LVB_VERSION 3
			
 
				+#include "dcache.h"
			
 
				+
			
 
				+#define OCFS2_LVB_VERSION 4
			
 
				 
			
 
				 struct ocfs2_meta_lvb {
			
 
				-	__be32       lvb_version;
			
 
				+	__u8         lvb_version;
			
 
				+	__u8         lvb_reserved0;
			
 
				+	__be16       lvb_reserved1;
			
 
				 	__be32       lvb_iclusters;
			
 
				 	__be32       lvb_iuid;
			
 
				 	__be32       lvb_igid;
			
@@ -41,7 +45,8 @@ struct ocfs2_meta_lvb {
 
				 	__be16       lvb_imode;
			
 
				 	__be16       lvb_inlink;
			
 
				 	__be32       lvb_iattr;
			
 
				-	__be32       lvb_reserved[2];
			
 
				+	__be32       lvb_igeneration;
			
 
				+	__be32       lvb_reserved2;
			
 
				 };
			
 
				 
			
 
				 /* ocfs2_meta_lock_full() and ocfs2_data_lock_full() 'arg_flags' flags */
			
@@ -57,9 +62,14 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb);
 
				 void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res);
			
 
				 void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
			
 
				 			       enum ocfs2_lock_type type,
			
 
				+			       unsigned int generation,
			
 
				 			       struct inode *inode);
			
 
				+void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
			
 
				+				u64 parent, struct inode *inode);
			
 
				 void ocfs2_lock_res_free(struct ocfs2_lock_res *res);
			
 
				 int ocfs2_create_new_inode_locks(struct inode *inode);
			
 
				+int ocfs2_create_new_lock(struct ocfs2_super *osb,
			
 
				+			  struct ocfs2_lock_res *lockres, int ex, int local);
			
 
				 int ocfs2_drop_inode_locks(struct inode *inode);
			
 
				 int ocfs2_data_lock_full(struct inode *inode,
			
 
				 			 int write,
			
@@ -93,7 +103,12 @@ void ocfs2_super_unlock(struct ocfs2_super *osb,
 
				 			int ex);
			
 
				 int ocfs2_rename_lock(struct ocfs2_super *osb);
			
 
				 void ocfs2_rename_unlock(struct ocfs2_super *osb);
			
 
				+int ocfs2_dentry_lock(struct dentry *dentry, int ex);
			
 
				+void ocfs2_dentry_unlock(struct dentry *dentry, int ex);
			
 
				+
			
 
				 void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres);
			
 
				+void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
			
 
				+			       struct ocfs2_lock_res *lockres);
			
 
				 
			
 
				 /* for the vote thread */
			
 
				 void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
			
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -33,6 +33,7 @@
 
				 
			
 
				 #include "dir.h"
			
 
				 #include "dlmglue.h"
			
 
				+#include "dcache.h"
			
 
				 #include "export.h"
			
 
				 #include "inode.h"
			
 
				 
			
@@ -57,7 +58,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, void *vobjp)
 
				 		return ERR_PTR(-ESTALE);
			
 
				 	}
			
 
				 
			
 
				-	inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno);
			
 
				+	inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno, 0);
			
 
				 
			
 
				 	if (IS_ERR(inode)) {
			
 
				 		mlog_errno(PTR_ERR(inode));
			
@@ -77,6 +78,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, void *vobjp)
 
				 		mlog_errno(-ENOMEM);
			
 
				 		return ERR_PTR(-ENOMEM);
			
 
				 	}
			
 
				+	result->d_op = &ocfs2_dentry_ops;
			
 
				 
			
 
				 	mlog_exit_ptr(result);
			
 
				 	return result;
			
@@ -113,7 +115,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
 
				 		goto bail_unlock;
			
 
				 	}
			
 
				 
			
 
				-	inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno);
			
 
				+	inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0);
			
 
				 	if (IS_ERR(inode)) {
			
 
				 		mlog(ML_ERROR, "Unable to create inode %llu\n",
			
 
				 		     (unsigned long long)blkno);
			
@@ -127,6 +129,8 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
 
				 		parent = ERR_PTR(-ENOMEM);
			
 
				 	}
			
 
				 
			
 
				+	parent->d_op = &ocfs2_dentry_ops;
			
 
				+
			
 
				 bail_unlock:
			
 
				 	ocfs2_meta_unlock(dir, 0);
			
 
				 
			
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -54,8 +54,6 @@
 
				 
			
 
				 #include "buffer_head_io.h"
			
 
				 
			
 
				-#define OCFS2_FI_FLAG_NOWAIT	0x1
			
 
				-#define OCFS2_FI_FLAG_DELETE	0x2
			
 
				 struct ocfs2_find_inode_args
			
 
				 {
			
 
				 	u64		fi_blkno;
			
@@ -109,7 +107,7 @@ struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb,
 
				 	return ilookup5(osb->sb, args.fi_ino, ocfs2_find_actor, &args);
			
 
				 }
			
 
				 
			
 
				-struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno)
			
 
				+struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, int flags)
			
 
				 {
			
 
				 	struct inode *inode = NULL;
			
 
				 	struct super_block *sb = osb->sb;
			
@@ -127,7 +125,7 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno)
 
				 	}
			
 
				 
			
 
				 	args.fi_blkno = blkno;
			
 
				-	args.fi_flags = 0;
			
 
				+	args.fi_flags = flags;
			
 
				 	args.fi_ino = ino_from_blkno(sb, blkno);
			
 
				 
			
 
				 	inode = iget5_locked(sb, args.fi_ino, ocfs2_find_actor,
			
@@ -297,15 +295,11 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 
				 	OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT;
			
 
				 	OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
			
 
				 
			
 
				-	if (create_ino)
			
 
				-		inode->i_ino = ino_from_blkno(inode->i_sb,
			
 
				-			       le64_to_cpu(fe->i_blkno));
			
 
				-
			
 
				-	mlog(0, "blkno = %llu, ino = %lu, create_ino = %s\n",
			
 
				-	     (unsigned long long)fe->i_blkno, inode->i_ino, create_ino ? "true" : "false");
			
 
				-
			
 
				 	inode->i_nlink = le16_to_cpu(fe->i_links_count);
			
 
				 
			
 
				+	if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL))
			
 
				+		OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE;
			
 
				+
			
 
				 	if (fe->i_flags & cpu_to_le32(OCFS2_LOCAL_ALLOC_FL)) {
			
 
				 		OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP;
			
 
				 		mlog(0, "local alloc inode: i_ino=%lu\n", inode->i_ino);
			
@@ -343,12 +337,28 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 
				 		    break;
			
 
				 	}
			
 
				 
			
 
				+	if (create_ino) {
			
 
				+		inode->i_ino = ino_from_blkno(inode->i_sb,
			
 
				+			       le64_to_cpu(fe->i_blkno));
			
 
				+
			
 
				+		/*
			
 
				+		 * If we ever want to create system files from kernel,
			
 
				+		 * the generation argument to
			
 
				+		 * ocfs2_inode_lock_res_init() will have to change.
			
 
				+		 */
			
 
				+		BUG_ON(fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL));
			
 
				+
			
 
				+		ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres,
			
 
				+					  OCFS2_LOCK_TYPE_META, 0, inode);
			
 
				+	}
			
 
				+
			
 
				 	ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_rw_lockres,
			
 
				-				  OCFS2_LOCK_TYPE_RW, inode);
			
 
				-	ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres,
			
 
				-				  OCFS2_LOCK_TYPE_META, inode);
			
 
				+				  OCFS2_LOCK_TYPE_RW, inode->i_generation,
			
 
				+				  inode);
			
 
				+
			
 
				 	ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_data_lockres,
			
 
				-				  OCFS2_LOCK_TYPE_DATA, inode);
			
 
				+				  OCFS2_LOCK_TYPE_DATA, inode->i_generation,
			
 
				+				  inode);
			
 
				 
			
 
				 	ocfs2_set_inode_flags(inode);
			
 
				 	inode->i_flags |= S_NOATIME;
			
@@ -366,15 +376,15 @@ static int ocfs2_read_locked_inode(struct inode *inode,
 
				 	struct ocfs2_super *osb;
			
 
				 	struct ocfs2_dinode *fe;
			
 
				 	struct buffer_head *bh = NULL;
			
 
				-	int status;
			
 
				-	int sysfile = 0;
			
 
				+	int status, can_lock;
			
 
				+	u32 generation = 0;
			
 
				 
			
 
				 	mlog_entry("(0x%p, 0x%p)\n", inode, args);
			
 
				 
			
 
				 	status = -EINVAL;
			
 
				 	if (inode == NULL || inode->i_sb == NULL) {
			
 
				 		mlog(ML_ERROR, "bad inode\n");
			
 
				-		goto bail;
			
 
				+		return status;
			
 
				 	}
			
 
				 	sb = inode->i_sb;
			
 
				 	osb = OCFS2_SB(sb);
			
@@ -382,50 +392,110 @@ static int ocfs2_read_locked_inode(struct inode *inode,
 
				 	if (!args) {
			
 
				 		mlog(ML_ERROR, "bad inode args\n");
			
 
				 		make_bad_inode(inode);
			
 
				-		goto bail;
			
 
				+		return status;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * To improve performance of cold-cache inode stats, we take
			
 
				+	 * the cluster lock here if possible.
			
 
				+	 *
			
 
				+	 * Generally, OCFS2 never trusts the contents of an inode
			
 
				+	 * unless it's holding a cluster lock, so taking it here isn't
			
 
				+	 * a correctness issue as much as it is a performance
			
 
				+	 * improvement.
			
 
				+	 *
			
 
				+	 * There are three times when taking the lock is not a good idea:
			
 
				+	 *
			
 
				+	 * 1) During startup, before we have initialized the DLM.
			
 
				+	 *
			
 
				+	 * 2) If we are reading certain system files which never get
			
 
				+	 *    cluster locks (local alloc, truncate log).
			
 
				+	 *
			
 
				+	 * 3) If the process doing the iget() is responsible for
			
 
				+	 *    orphan dir recovery. We're holding the orphan dir lock and
			
 
				+	 *    can get into a deadlock with another process on another
			
 
				+	 *    node in ->delete_inode().
			
 
				+	 *
			
 
				+	 * #1 and #2 can be simply solved by never taking the lock
			
 
				+	 * here for system files (which are the only type we read
			
 
				+	 * during mount). It's a heavier approach, but our main
			
 
				+	 * concern is user-accesible files anyway.
			
 
				+	 *
			
 
				+	 * #3 works itself out because we'll eventually take the
			
 
				+	 * cluster lock before trusting anything anyway.
			
 
				+	 */
			
 
				+	can_lock = !(args->fi_flags & OCFS2_FI_FLAG_SYSFILE)
			
 
				+		&& !(args->fi_flags & OCFS2_FI_FLAG_NOLOCK);
			
 
				+
			
 
				+	/*
			
 
				+	 * To maintain backwards compatibility with older versions of
			
 
				+	 * ocfs2-tools, we still store the generation value for system
			
 
				+	 * files. The only ones that actually matter to userspace are
			
 
				+	 * the journals, but it's easier and inexpensive to just flag
			
 
				+	 * all system files similarly.
			
 
				+	 */
			
 
				+	if (args->fi_flags & OCFS2_FI_FLAG_SYSFILE)
			
 
				+		generation = osb->fs_generation;
			
 
				+
			
 
				+	ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres,
			
 
				+				  OCFS2_LOCK_TYPE_META,
			
 
				+				  generation, inode);
			
 
				+
			
 
				+	if (can_lock) {
			
 
				+		status = ocfs2_meta_lock(inode, NULL, NULL, 0);
			
 
				+		if (status) {
			
 
				+			make_bad_inode(inode);
			
 
				+			mlog_errno(status);
			
 
				+			return status;
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				-	/* Read the FE off disk. This is safe because the kernel only
			
 
				-	 * does one read_inode2 for a new inode, and if it doesn't
			
 
				-	 * exist yet then nobody can be working on it! */
			
 
				-	status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0, NULL);
			
 
				+	status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0,
			
 
				+				  can_lock ? inode : NULL);
			
 
				 	if (status < 0) {
			
 
				 		mlog_errno(status);
			
 
				-		make_bad_inode(inode);
			
 
				 		goto bail;
			
 
				 	}
			
 
				 
			
 
				+	status = -EINVAL;
			
 
				 	fe = (struct ocfs2_dinode *) bh->b_data;
			
 
				 	if (!OCFS2_IS_VALID_DINODE(fe)) {
			
 
				 		mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n",
			
 
				 		     (unsigned long long)fe->i_blkno, 7, fe->i_signature);
			
 
				-		make_bad_inode(inode);
			
 
				 		goto bail;
			
 
				 	}
			
 
				 
			
 
				-	if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL))
			
 
				-		sysfile = 1;
			
 
				+	/*
			
 
				+	 * This is a code bug. Right now the caller needs to
			
 
				+	 * understand whether it is asking for a system file inode or
			
 
				+	 * not so the proper lock names can be built.
			
 
				+	 */
			
 
				+	mlog_bug_on_msg(!!(fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) !=
			
 
				+			!!(args->fi_flags & OCFS2_FI_FLAG_SYSFILE),
			
 
				+			"Inode %llu: system file state is ambigous\n",
			
 
				+			(unsigned long long)args->fi_blkno);
			
 
				 
			
 
				 	if (S_ISCHR(le16_to_cpu(fe->i_mode)) ||
			
 
				 	    S_ISBLK(le16_to_cpu(fe->i_mode)))
			
 
				     		inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
			
 
				 
			
 
				-	status = -EINVAL;
			
 
				 	if (ocfs2_populate_inode(inode, fe, 0) < 0) {
			
 
				 		mlog(ML_ERROR, "populate failed! i_blkno=%llu, i_ino=%lu\n",
			
 
				 		     (unsigned long long)fe->i_blkno, inode->i_ino);
			
 
				-		make_bad_inode(inode);
			
 
				 		goto bail;
			
 
				 	}
			
 
				 
			
 
				 	BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno));
			
 
				 
			
 
				-	if (sysfile)
			
 
				-	       OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE;
			
 
				-
			
 
				 	status = 0;
			
 
				 
			
 
				 bail:
			
 
				+	if (can_lock)
			
 
				+		ocfs2_meta_unlock(inode, 0);
			
 
				+
			
 
				+	if (status < 0)
			
 
				+		make_bad_inode(inode);
			
 
				+
			
 
				 	if (args && bh)
			
 
				 		brelse(bh);
			
 
				 
			
@@ -898,9 +968,15 @@ void ocfs2_delete_inode(struct inode *inode)
 
				 		goto bail_unlock_inode;
			
 
				 	}
			
 
				 
			
 
				-	/* Mark the inode as successfully deleted. This is important
			
 
				-	 * for ocfs2_clear_inode as it will check this flag and skip
			
 
				-	 * any checkpointing work */
			
 
				+	/*
			
 
				+	 * Mark the inode as successfully deleted.
			
 
				+	 *
			
 
				+	 * This is important for ocfs2_clear_inode() as it will check
			
 
				+	 * this flag and skip any checkpointing work
			
 
				+	 *
			
 
				+	 * ocfs2_stuff_meta_lvb() also uses this flag to invalidate
			
 
				+	 * the LVB for other nodes.
			
 
				+	 */
			
 
				 	OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED;
			
 
				 
			
 
				 bail_unlock_inode:
			
@@ -1025,12 +1101,10 @@ void ocfs2_drop_inode(struct inode *inode)
 
				 	/* Testing ip_orphaned_slot here wouldn't work because we may
			
 
				 	 * not have gotten a delete_inode vote from any other nodes
			
 
				 	 * yet. */
			
 
				-	if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED) {
			
 
				-		mlog(0, "Inode was orphaned on another node, clearing nlink.\n");
			
 
				-		inode->i_nlink = 0;
			
 
				-	}
			
 
				-
			
 
				-	generic_drop_inode(inode);
			
 
				+	if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)
			
 
				+		generic_delete_inode(inode);
			
 
				+	else
			
 
				+		generic_drop_inode(inode);
			
 
				 
			
 
				 	mlog_exit_void();
			
 
				 }
			
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -122,7 +122,13 @@ struct buffer_head *ocfs2_bread(struct inode *inode, int block,
 
				 void ocfs2_clear_inode(struct inode *inode);
			
 
				 void ocfs2_delete_inode(struct inode *inode);
			
 
				 void ocfs2_drop_inode(struct inode *inode);
			
 
				-struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff);
			
 
				+
			
 
				+/* Flags for ocfs2_iget() */
			
 
				+#define OCFS2_FI_FLAG_NOWAIT	0x1
			
 
				+#define OCFS2_FI_FLAG_DELETE	0x2
			
 
				+#define OCFS2_FI_FLAG_SYSFILE	0x4
			
 
				+#define OCFS2_FI_FLAG_NOLOCK	0x8
			
 
				+struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, int flags);
			
 
				 struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb,
			
 
				 				     u64 blkno,
			
 
				 				     int delete_vote);
			
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1493,7 +1493,8 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
 
				 			if (de->name_len == 2 && !strncmp("..", de->name, 2))
			
 
				 				continue;
			
 
				 
			
 
				-			iter = ocfs2_iget(osb, le64_to_cpu(de->inode));
			
 
				+			iter = ocfs2_iget(osb, le64_to_cpu(de->inode),
			
 
				+					  OCFS2_FI_FLAG_NOLOCK);
			
 
				 			if (IS_ERR(iter))
			
 
				 				continue;
			
 
				 
			
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -179,7 +179,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
 
				 	if (status < 0)
			
 
				 		goto bail_add;
			
 
				 
			
 
				-	inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno);
			
 
				+	inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0);
			
 
				 	if (IS_ERR(inode)) {
			
 
				 		mlog(ML_ERROR, "Unable to create inode %llu\n",
			
 
				 		     (unsigned long long)blkno);
			
@@ -199,10 +199,32 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
 
				 	spin_unlock(&oi->ip_lock);
			
 
				 
			
 
				 bail_add:
			
 
				-
			
 
				 	dentry->d_op = &ocfs2_dentry_ops;
			
 
				 	ret = d_splice_alias(inode, dentry);
			
 
				 
			
 
				+	if (inode) {
			
 
				+		/*
			
 
				+		 * If d_splice_alias() finds a DCACHE_DISCONNECTED
			
 
				+		 * dentry, it will d_move() it on top of ourse. The
			
 
				+		 * return value will indicate this however, so in
			
 
				+		 * those cases, we switch them around for the locking
			
 
				+		 * code.
			
 
				+		 *
			
 
				+		 * NOTE: This dentry already has ->d_op set from
			
 
				+		 * ocfs2_get_parent() and ocfs2_get_dentry()
			
 
				+		 */
			
 
				+		if (ret)
			
 
				+			dentry = ret;
			
 
				+
			
 
				+		status = ocfs2_dentry_attach_lock(dentry, inode,
			
 
				+						  OCFS2_I(dir)->ip_blkno);
			
 
				+		if (status) {
			
 
				+			mlog_errno(status);
			
 
				+			ret = ERR_PTR(status);
			
 
				+			goto bail_unlock;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				 bail_unlock:
			
 
				 	/* Don't drop the cluster lock until *after* the d_add --
			
 
				 	 * unlink on another node will message us to remove that
			
@@ -418,6 +440,13 @@ static int ocfs2_mknod(struct inode *dir,
 
				 		goto leave;
			
 
				 	}
			
 
				 
			
 
				+	status = ocfs2_dentry_attach_lock(dentry, inode,
			
 
				+					  OCFS2_I(dir)->ip_blkno);
			
 
				+	if (status) {
			
 
				+		mlog_errno(status);
			
 
				+		goto leave;
			
 
				+	}
			
 
				+
			
 
				 	insert_inode_hash(inode);
			
 
				 	dentry->d_op = &ocfs2_dentry_ops;
			
 
				 	d_instantiate(dentry, inode);
			
@@ -725,6 +754,12 @@ static int ocfs2_link(struct dentry *old_dentry,
 
				 		goto bail;
			
 
				 	}
			
 
				 
			
 
				+	err = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno);
			
 
				+	if (err) {
			
 
				+		mlog_errno(err);
			
 
				+		goto bail;
			
 
				+	}
			
 
				+
			
 
				 	atomic_inc(&inode->i_count);
			
 
				 	dentry->d_op = &ocfs2_dentry_ops;
			
 
				 	d_instantiate(dentry, inode);
			
@@ -743,6 +778,23 @@ bail:
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Takes and drops an exclusive lock on the given dentry. This will
			
 
				+ * force other nodes to drop it.
			
 
				+ */
			
 
				+static int ocfs2_remote_dentry_delete(struct dentry *dentry)
			
 
				+{
			
 
				+	int ret;
			
 
				+
			
 
				+	ret = ocfs2_dentry_lock(dentry, 1);
			
 
				+	if (ret)
			
 
				+		mlog_errno(ret);
			
 
				+	else
			
 
				+		ocfs2_dentry_unlock(dentry, 1);
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				 static int ocfs2_unlink(struct inode *dir,
			
 
				 			struct dentry *dentry)
			
 
				 {
			
@@ -832,8 +884,7 @@ static int ocfs2_unlink(struct inode *dir,
 
				 	else
			
 
				 		inode->i_nlink--;
			
 
				 
			
 
				-	status = ocfs2_request_unlink_vote(inode, dentry,
			
 
				-					   (unsigned int) inode->i_nlink);
			
 
				+	status = ocfs2_remote_dentry_delete(dentry);
			
 
				 	if (status < 0) {
			
 
				 		/* This vote should succeed under all normal
			
 
				 		 * circumstances. */
			
@@ -1019,7 +1070,6 @@ static int ocfs2_rename(struct inode *old_dir,
 
				 	struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir,
			
 
				 						    // this is the 1st dirent bh
			
 
				 	nlink_t old_dir_nlink = old_dir->i_nlink, new_dir_nlink = new_dir->i_nlink;
			
 
				-	unsigned int links_count;
			
 
				 
			
 
				 	/* At some point it might be nice to break this function up a
			
 
				 	 * bit. */
			
@@ -1093,23 +1143,26 @@ static int ocfs2_rename(struct inode *old_dir,
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	if (S_ISDIR(old_inode->i_mode)) {
			
 
				-		/* Directories actually require metadata updates to
			
 
				-		 * the directory info so we can't get away with not
			
 
				-		 * doing node locking on it. */
			
 
				-		status = ocfs2_meta_lock(old_inode, handle, NULL, 1);
			
 
				-		if (status < 0) {
			
 
				-			if (status != -ENOENT)
			
 
				-				mlog_errno(status);
			
 
				-			goto bail;
			
 
				-		}
			
 
				-
			
 
				-		status = ocfs2_request_rename_vote(old_inode, old_dentry);
			
 
				-		if (status < 0) {
			
 
				+	/*
			
 
				+	 * Though we don't require an inode meta data update if
			
 
				+	 * old_inode is not a directory, we lock anyway here to ensure
			
 
				+	 * the vote thread on other nodes won't have to concurrently
			
 
				+	 * downconvert the inode and the dentry locks.
			
 
				+	 */
			
 
				+	status = ocfs2_meta_lock(old_inode, handle, NULL, 1);
			
 
				+	if (status < 0) {
			
 
				+		if (status != -ENOENT)
			
 
				 			mlog_errno(status);
			
 
				-			goto bail;
			
 
				-		}
			
 
				+		goto bail;
			
 
				+	}
			
 
				+
			
 
				+	status = ocfs2_remote_dentry_delete(old_dentry);
			
 
				+	if (status < 0) {
			
 
				+		mlog_errno(status);
			
 
				+		goto bail;
			
 
				+	}
			
 
				 
			
 
				+	if (S_ISDIR(old_inode->i_mode)) {
			
 
				 		status = -EIO;
			
 
				 		old_inode_de_bh = ocfs2_bread(old_inode, 0, &status, 0);
			
 
				 		if (!old_inode_de_bh)
			
@@ -1123,14 +1176,6 @@ static int ocfs2_rename(struct inode *old_dir,
 
				 		if (!new_inode && new_dir!=old_dir &&
			
 
				 		    new_dir->i_nlink >= OCFS2_LINK_MAX)
			
 
				 			goto bail;
			
 
				-	} else {
			
 
				-		/* Ah, the simple case - we're a file so just send a
			
 
				-		 * message. */
			
 
				-		status = ocfs2_request_rename_vote(old_inode, old_dentry);
			
 
				-		if (status < 0) {
			
 
				-			mlog_errno(status);
			
 
				-			goto bail;
			
 
				-		}
			
 
				 	}
			
 
				 
			
 
				 	status = -ENOENT;
			
@@ -1202,13 +1247,7 @@ static int ocfs2_rename(struct inode *old_dir,
 
				 			goto bail;
			
 
				 		}
			
 
				 
			
 
				-		if (S_ISDIR(new_inode->i_mode))
			
 
				-			links_count = 0;
			
 
				-		else
			
 
				-			links_count = (unsigned int) (new_inode->i_nlink - 1);
			
 
				-
			
 
				-		status = ocfs2_request_unlink_vote(new_inode, new_dentry,
			
 
				-						   links_count);
			
 
				+		status = ocfs2_remote_dentry_delete(new_dentry);
			
 
				 		if (status < 0) {
			
 
				 			mlog_errno(status);
			
 
				 			goto bail;
			
@@ -1387,6 +1426,7 @@ static int ocfs2_rename(struct inode *old_dir,
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	ocfs2_dentry_move(old_dentry, new_dentry, old_dir, new_dir);
			
 
				 	status = 0;
			
 
				 bail:
			
 
				 	if (rename_lock)
			
@@ -1675,6 +1715,12 @@ static int ocfs2_symlink(struct inode *dir,
 
				 		goto bail;
			
 
				 	}
			
 
				 
			
 
				+	status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno);
			
 
				+	if (status) {
			
 
				+		mlog_errno(status);
			
 
				+		goto bail;
			
 
				+	}
			
 
				+
			
 
				 	insert_inode_hash(inode);
			
 
				 	dentry->d_op = &ocfs2_dentry_ops;
			
 
				 	d_instantiate(dentry, inode);
			
--- a/fs/ocfs2/ocfs2_lockid.h
+++ b/fs/ocfs2/ocfs2_lockid.h
@@ -35,12 +35,15 @@
 
				 #define OCFS2_LOCK_ID_MAX_LEN  32
			
 
				 #define OCFS2_LOCK_ID_PAD "000000"
			
 
				 
			
 
				+#define OCFS2_DENTRY_LOCK_INO_START 18
			
 
				+
			
 
				 enum ocfs2_lock_type {
			
 
				 	OCFS2_LOCK_TYPE_META = 0,
			
 
				 	OCFS2_LOCK_TYPE_DATA,
			
 
				 	OCFS2_LOCK_TYPE_SUPER,
			
 
				 	OCFS2_LOCK_TYPE_RENAME,
			
 
				 	OCFS2_LOCK_TYPE_RW,
			
 
				+	OCFS2_LOCK_TYPE_DENTRY,
			
 
				 	OCFS2_NUM_LOCK_TYPES
			
 
				 };
			
 
				 
			
@@ -63,6 +66,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)
 
				 		case OCFS2_LOCK_TYPE_RW:
			
 
				 			c = 'W';
			
 
				 			break;
			
 
				+		case OCFS2_LOCK_TYPE_DENTRY:
			
 
				+			c = 'N';
			
 
				+			break;
			
 
				 		default:
			
 
				 			c = '\0';
			
 
				 	}
			
@@ -70,4 +76,23 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)
 
				 	return c;
			
 
				 }
			
 
				 
			
 
				+static char *ocfs2_lock_type_strings[] = {
			
 
				+	[OCFS2_LOCK_TYPE_META] = "Meta",
			
 
				+	[OCFS2_LOCK_TYPE_DATA] = "Data",
			
 
				+	[OCFS2_LOCK_TYPE_SUPER] = "Super",
			
 
				+	[OCFS2_LOCK_TYPE_RENAME] = "Rename",
			
 
				+	/* Need to differntiate from [R]ename.. serializing writes is the
			
 
				+	 * important job it does, anyway. */
			
 
				+	[OCFS2_LOCK_TYPE_RW] = "Write/Read",
			
 
				+	[OCFS2_LOCK_TYPE_DENTRY] = "Dentry",
			
 
				+};
			
 
				+
			
 
				+static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type)
			
 
				+{
			
 
				+#ifdef __KERNEL__
			
 
				+	mlog_bug_on_msg(type >= OCFS2_NUM_LOCK_TYPES, "%d\n", type);
			
 
				+#endif
			
 
				+	return ocfs2_lock_type_strings[type];
			
 
				+}
			
 
				+
			
 
				 #endif  /* OCFS2_LOCKID_H */
			
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -202,7 +202,7 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
 
				 
			
 
				 	mlog_entry_void();
			
 
				 
			
 
				-	new = ocfs2_iget(osb, osb->root_blkno);
			
 
				+	new = ocfs2_iget(osb, osb->root_blkno, OCFS2_FI_FLAG_SYSFILE);
			
 
				 	if (IS_ERR(new)) {
			
 
				 		status = PTR_ERR(new);
			
 
				 		mlog_errno(status);
			
@@ -210,7 +210,7 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
 
				 	}
			
 
				 	osb->root_inode = new;
			
 
				 
			
 
				-	new = ocfs2_iget(osb, osb->system_dir_blkno);
			
 
				+	new = ocfs2_iget(osb, osb->system_dir_blkno, OCFS2_FI_FLAG_SYSFILE);
			
 
				 	if (IS_ERR(new)) {
			
 
				 		status = PTR_ERR(new);
			
 
				 		mlog_errno(status);
			
@@ -682,7 +682,7 @@ static struct file_system_type ocfs2_fs_type = {
 
				 	.kill_sb        = kill_block_super, /* set to the generic one
			
 
				 					     * right now, but do we
			
 
				 					     * need to change that? */
			
 
				-	.fs_flags       = FS_REQUIRES_DEV,
			
 
				+	.fs_flags       = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE,
			
 
				 	.next           = NULL
			
 
				 };
			
 
				 
			
--- a/fs/ocfs2/sysfile.c
+++ b/fs/ocfs2/sysfile.c
@@ -28,11 +28,11 @@
 
				 #include <linux/slab.h>
			
 
				 #include <linux/highmem.h>
			
 
				 
			
 
				-#include "ocfs2.h"
			
 
				-
			
 
				 #define MLOG_MASK_PREFIX ML_INODE
			
 
				 #include <cluster/masklog.h>
			
 
				 
			
 
				+#include "ocfs2.h"
			
 
				+
			
 
				 #include "alloc.h"
			
 
				 #include "dir.h"
			
 
				 #include "inode.h"
			
@@ -115,7 +115,7 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb,
 
				 		goto bail;
			
 
				 	}
			
 
				 
			
 
				-	inode = ocfs2_iget(osb, blkno);
			
 
				+	inode = ocfs2_iget(osb, blkno, OCFS2_FI_FLAG_SYSFILE);
			
 
				 	if (IS_ERR(inode)) {
			
 
				 		mlog_errno(PTR_ERR(inode));
			
 
				 		inode = NULL;
			
--- a/fs/ocfs2/vote.c
+++ b/fs/ocfs2/vote.c
@@ -74,9 +74,6 @@ struct ocfs2_vote_msg
 
				 		__be32 v_orphaned_slot;	/* Used during delete votes */
			
 
				 		__be32 v_nlink;		/* Used during unlink votes */
			
 
				 	} md1;				/* Message type dependant 1 */
			
 
				-	__be32 v_unlink_namelen;
			
 
				-	__be64 v_unlink_parent;
			
 
				-	u8  v_unlink_dirent[OCFS2_VOTE_FILENAME_LEN];
			
 
				 };
			
 
				 
			
 
				 /* Responses are given these values to maintain backwards
			
@@ -100,8 +97,6 @@ struct ocfs2_vote_work {
 
				 enum ocfs2_vote_request {
			
 
				 	OCFS2_VOTE_REQ_INVALID = 0,
			
 
				 	OCFS2_VOTE_REQ_DELETE,
			
 
				-	OCFS2_VOTE_REQ_UNLINK,
			
 
				-	OCFS2_VOTE_REQ_RENAME,
			
 
				 	OCFS2_VOTE_REQ_MOUNT,
			
 
				 	OCFS2_VOTE_REQ_UMOUNT,
			
 
				 	OCFS2_VOTE_REQ_LAST
			
@@ -261,103 +256,13 @@ done:
 
				 	return response;
			
 
				 }
			
 
				 
			
 
				-static int ocfs2_match_dentry(struct dentry *dentry,
			
 
				-			      u64 parent_blkno,
			
 
				-			      unsigned int namelen,
			
 
				-			      const char *name)
			
 
				-{
			
 
				-	struct inode *parent;
			
 
				-
			
 
				-	if (!dentry->d_parent) {
			
 
				-		mlog(0, "Detached from parent.\n");
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	parent = dentry->d_parent->d_inode;
			
 
				-	/* Negative parent dentry? */
			
 
				-	if (!parent)
			
 
				-		return 0;
			
 
				-
			
 
				-	/* Name is in a different directory. */
			
 
				-	if (OCFS2_I(parent)->ip_blkno != parent_blkno)
			
 
				-		return 0;
			
 
				-
			
 
				-	if (dentry->d_name.len != namelen)
			
 
				-		return 0;
			
 
				-
			
 
				-	/* comparison above guarantees this is safe. */
			
 
				-	if (memcmp(dentry->d_name.name, name, namelen))
			
 
				-		return 0;
			
 
				-
			
 
				-	return 1;
			
 
				-}
			
 
				-
			
 
				-static void ocfs2_process_dentry_request(struct inode *inode,
			
 
				-					 int rename,
			
 
				-					 unsigned int new_nlink,
			
 
				-					 u64 parent_blkno,
			
 
				-					 unsigned int namelen,
			
 
				-					 const char *name)
			
 
				-{
			
 
				-	struct dentry *dentry = NULL;
			
 
				-	struct list_head *p;
			
 
				-	struct ocfs2_inode_info *oi = OCFS2_I(inode);
			
 
				-
			
 
				-	mlog(0, "parent %llu, namelen = %u, name = %.*s\n",
			
 
				-	     (unsigned long long)parent_blkno, namelen, namelen, name);
			
 
				-
			
 
				-	spin_lock(&dcache_lock);
			
 
				-
			
 
				-	/* Another node is removing this name from the system. It is
			
 
				-	 * up to us to find the corresponding dentry and if it exists,
			
 
				-	 * unhash it from the dcache. */
			
 
				-	list_for_each(p, &inode->i_dentry) {
			
 
				-		dentry = list_entry(p, struct dentry, d_alias);
			
 
				-
			
 
				-		if (ocfs2_match_dentry(dentry, parent_blkno, namelen, name)) {
			
 
				-			mlog(0, "dentry found: %.*s\n",
			
 
				-			     dentry->d_name.len, dentry->d_name.name);
			
 
				-
			
 
				-			dget_locked(dentry);
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		dentry = NULL;
			
 
				-	}
			
 
				-
			
 
				-	spin_unlock(&dcache_lock);
			
 
				-
			
 
				-	if (dentry) {
			
 
				-		d_delete(dentry);
			
 
				-		dput(dentry);
			
 
				-	}
			
 
				-
			
 
				-	/* rename votes don't send link counts */
			
 
				-	if (!rename) {
			
 
				-		mlog(0, "new_nlink = %u\n", new_nlink);
			
 
				-
			
 
				-		/* We don't have the proper locks here to directly
			
 
				-		 * change i_nlink and besides, the vote is sent
			
 
				-		 * *before* the operation so it may have failed on the
			
 
				-		 * other node. This passes a hint to ocfs2_drop_inode
			
 
				-		 * to force ocfs2_delete_inode, who will take the
			
 
				-		 * proper cluster locks to sort things out. */
			
 
				-		if (new_nlink == 0) {
			
 
				-			spin_lock(&oi->ip_lock);
			
 
				-			oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
			
 
				-			spin_unlock(&OCFS2_I(inode)->ip_lock);
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				 static void ocfs2_process_vote(struct ocfs2_super *osb,
			
 
				 			       struct ocfs2_vote_msg *msg)
			
 
				 {
			
 
				 	int net_status, vote_response;
			
 
				 	int orphaned_slot = 0;
			
 
				-	int rename = 0;
			
 
				-	unsigned int node_num, generation, new_nlink, namelen;
			
 
				-	u64 blkno, parent_blkno;
			
 
				+	unsigned int node_num, generation;
			
 
				+	u64 blkno;
			
 
				 	enum ocfs2_vote_request request;
			
 
				 	struct inode *inode = NULL;
			
 
				 	struct ocfs2_msg_hdr *hdr = &msg->v_hdr;
			
@@ -437,18 +342,6 @@ static void ocfs2_process_vote(struct ocfs2_super *osb,
 
				 		vote_response = ocfs2_process_delete_request(inode,
			
 
				 							     &orphaned_slot);
			
 
				 		break;
			
 
				-	case OCFS2_VOTE_REQ_RENAME:
			
 
				-		rename = 1;
			
 
				-		/* fall through */
			
 
				-	case OCFS2_VOTE_REQ_UNLINK:
			
 
				-		parent_blkno = be64_to_cpu(msg->v_unlink_parent);
			
 
				-		namelen = be32_to_cpu(msg->v_unlink_namelen);
			
 
				-		/* new_nlink will be ignored in case of a rename vote */
			
 
				-		new_nlink = be32_to_cpu(msg->md1.v_nlink);
			
 
				-		ocfs2_process_dentry_request(inode, rename, new_nlink,
			
 
				-					     parent_blkno, namelen,
			
 
				-					     msg->v_unlink_dirent);
			
 
				-		break;
			
 
				 	default:
			
 
				 		mlog(ML_ERROR, "node %u, invalid request: %u\n",
			
 
				 		     node_num, request);
			
@@ -889,75 +782,6 @@ int ocfs2_request_delete_vote(struct inode *inode)
 
				 	return status;
			
 
				 }
			
 
				 
			
 
				-static void ocfs2_setup_unlink_vote(struct ocfs2_vote_msg *request,
			
 
				-				    struct dentry *dentry)
			
 
				-{
			
 
				-	struct inode *parent = dentry->d_parent->d_inode;
			
 
				-
			
 
				-	/* We need some values which will uniquely identify a dentry
			
 
				-	 * on the other nodes so that they can find it and run
			
 
				-	 * d_delete against it. Parent directory block and full name
			
 
				-	 * should suffice. */
			
 
				-
			
 
				-	mlog(0, "unlink/rename request: parent: %llu name: %.*s\n",
			
 
				-	     (unsigned long long)OCFS2_I(parent)->ip_blkno, dentry->d_name.len,
			
 
				-	     dentry->d_name.name);
			
 
				-
			
 
				-	request->v_unlink_parent = cpu_to_be64(OCFS2_I(parent)->ip_blkno);
			
 
				-	request->v_unlink_namelen = cpu_to_be32(dentry->d_name.len);
			
 
				-	memcpy(request->v_unlink_dirent, dentry->d_name.name,
			
 
				-	       dentry->d_name.len);
			
 
				-}
			
 
				-
			
 
				-int ocfs2_request_unlink_vote(struct inode *inode,
			
 
				-			      struct dentry *dentry,
			
 
				-			      unsigned int nlink)
			
 
				-{
			
 
				-	int status;
			
 
				-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
			
 
				-	struct ocfs2_vote_msg *request;
			
 
				-
			
 
				-	if (dentry->d_name.len > OCFS2_VOTE_FILENAME_LEN)
			
 
				-		return -ENAMETOOLONG;
			
 
				-
			
 
				-	status = -ENOMEM;
			
 
				-	request = ocfs2_new_vote_request(osb, OCFS2_I(inode)->ip_blkno,
			
 
				-					 inode->i_generation,
			
 
				-					 OCFS2_VOTE_REQ_UNLINK, nlink);
			
 
				-	if (request) {
			
 
				-		ocfs2_setup_unlink_vote(request, dentry);
			
 
				-
			
 
				-		status = ocfs2_request_vote(inode, request, NULL);
			
 
				-
			
 
				-		kfree(request);
			
 
				-	}
			
 
				-	return status;
			
 
				-}
			
 
				-
			
 
				-int ocfs2_request_rename_vote(struct inode *inode,
			
 
				-			      struct dentry *dentry)
			
 
				-{
			
 
				-	int status;
			
 
				-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
			
 
				-	struct ocfs2_vote_msg *request;
			
 
				-
			
 
				-	if (dentry->d_name.len > OCFS2_VOTE_FILENAME_LEN)
			
 
				-		return -ENAMETOOLONG;
			
 
				-
			
 
				-	status = -ENOMEM;
			
 
				-	request = ocfs2_new_vote_request(osb, OCFS2_I(inode)->ip_blkno,
			
 
				-					 inode->i_generation,
			
 
				-					 OCFS2_VOTE_REQ_RENAME, 0);
			
 
				-	if (request) {
			
 
				-		ocfs2_setup_unlink_vote(request, dentry);
			
 
				-
			
 
				-		status = ocfs2_request_vote(inode, request, NULL);
			
 
				-
			
 
				-		kfree(request);
			
 
				-	}
			
 
				-	return status;
			
 
				-}
			
 
				-
			
 
				 int ocfs2_request_mount_vote(struct ocfs2_super *osb)
			
 
				 {
			
 
				 	int status;
			
--- a/fs/ocfs2/vote.h
+++ b/fs/ocfs2/vote.h
@@ -39,11 +39,6 @@ static inline void ocfs2_kick_vote_thread(struct ocfs2_super *osb)
 
				 }
			
 
				 
			
 
				 int ocfs2_request_delete_vote(struct inode *inode);
			
 
				-int ocfs2_request_unlink_vote(struct inode *inode,
			
 
				-			      struct dentry *dentry,
			
 
				-			      unsigned int nlink);
			
 
				-int ocfs2_request_rename_vote(struct inode *inode,
			
 
				-			      struct dentry *dentry);
			
 
				 int ocfs2_request_mount_vote(struct ocfs2_super *osb);
			
 
				 int ocfs2_request_umount_vote(struct ocfs2_super *osb);
			
 
				 int ocfs2_register_net_handlers(struct ocfs2_super *osb);
			
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -92,9 +92,10 @@ extern int dir_notify_enable;
 
				 #define FS_REQUIRES_DEV 1 
			
 
				 #define FS_BINARY_MOUNTDATA 2
			
 
				 #define FS_REVAL_DOT	16384	/* Check the paths ".", ".." for staleness */
			
 
				-#define FS_ODD_RENAME	32768	/* Temporary stuff; will go away as soon
			
 
				-				  * as nfs_rename() will be cleaned up
			
 
				-				  */
			
 
				+#define FS_RENAME_DOES_D_MOVE	32768	/* FS will handle d_move()
			
 
				+					 * during rename() internally.
			
 
				+					 */
			
 
				+
			
 
				 /*
			
 
				  * These are the fs-independent mount-flags: up to 32 flags are supported
			
 
				  */