16 年之前 · 12fcfd22fe
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -86,12 +86,6 @@ struct btrfs_inode {
 
															 	 */
														
 
															 	u64 logged_trans;
														
 
															-	/*
														
 
															-	 * trans that last made a change that should be fully fsync'd.  This
														
 
															-	 * gets reset to zero each time the inode is logged
														
 
															-	 */
														
 
															-	u64 log_dirty_trans;
														
 
															-
														
 
															 	/* total number of bytes pending delalloc, used by stat to calc the
														
 
															 	 * real block usage of the file
														
 
															 	 */
														
@@ -121,6 +115,13 @@ struct btrfs_inode {
 
															 	/* the start of block group preferred for allocations. */
														
 
															 	u64 block_group;
														
 
															+	/* the fsync log has some corner cases that mean we have to check
														
 
															+	 * directories to see if any unlinks have been done before
														
 
															+	 * the directory was logged.  See tree-log.c for all the
														
 
															+	 * details
														
 
															+	 */
														
 
															+	u64 last_unlink_trans;
														
 
															+
														
 
															 	struct inode vfs_inode;
														
 
															 };
														
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -695,7 +695,12 @@ struct btrfs_fs_info {
 
															 	u64 generation;
														
 
															 	u64 last_trans_committed;
														
 
															-	u64 last_trans_new_blockgroup;
														
 
															+
														
 
															+	/*
														
 
															+	 * this is updated to the current trans every time a full commit
														
 
															+	 * is required instead of the faster short fsync log commits
														
 
															+	 */
														
 
															+	u64 last_trans_log_full_commit;
														
 
															 	u64 open_ioctl_trans;
														
 
															 	unsigned long mount_opt;
														
 
															 	u64 max_extent;
														
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5897,7 +5897,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
 
															 	extent_root = root->fs_info->extent_root;
														
 
															-	root->fs_info->last_trans_new_blockgroup = trans->transid;
														
 
															+	root->fs_info->last_trans_log_full_commit = trans->transid;
														
 
															 	cache = kzalloc(sizeof(*cache), GFP_NOFS);
														
 
															 	if (!cache)
														
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1173,8 +1173,11 @@ out_nolock:
 
															 			ret = btrfs_log_dentry_safe(trans, root,
														
 
															 						    file->f_dentry);
														
 
															 			if (ret == 0) {
														
 
															-				btrfs_sync_log(trans, root);
														
 
															-				btrfs_end_transaction(trans, root);
														
 
															+				ret = btrfs_sync_log(trans, root);
														
 
															+				if (ret == 0)
														
 
															+					btrfs_end_transaction(trans, root);
														
 
															+				else
														
 
															+					btrfs_commit_transaction(trans, root);
														
 
															 			} else {
														
 
															 				btrfs_commit_transaction(trans, root);
														
 
															 			}
														
@@ -1266,8 +1269,11 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
 
															 	if (ret > 0) {
														
 
															 		ret = btrfs_commit_transaction(trans, root);
														
 
															 	} else {
														
 
															-		btrfs_sync_log(trans, root);
														
 
															-		ret = btrfs_end_transaction(trans, root);
														
 
															+		ret = btrfs_sync_log(trans, root);
														
 
															+		if (ret == 0)
														
 
															+			ret = btrfs_end_transaction(trans, root);
														
 
															+		else
														
 
															+			ret = btrfs_commit_transaction(trans, root);
														
 
															 	}
														
 
															 	mutex_lock(&dentry->d_inode->i_mutex);
														
 
															 out:
														
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2246,8 +2246,6 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
 
															 	ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len,
														
 
															 					 inode, dir->i_ino);
														
 
															 	BUG_ON(ret != 0 && ret != -ENOENT);
														
 
															-	if (ret != -ENOENT)
														
 
															-		BTRFS_I(dir)->log_dirty_trans = trans->transid;
														
 
															 	ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
														
 
															 					   dir, index);
														
@@ -2280,6 +2278,9 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
 
															 	trans = btrfs_start_transaction(root, 1);
														
 
															 	btrfs_set_trans_block_group(trans, dir);
														
 
															+
														
 
															+	btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0);
														
 
															+
														
 
															 	ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
														
 
															 				 dentry->d_name.name, dentry->d_name.len);
														
@@ -3042,7 +3043,7 @@ static noinline void init_btrfs_i(struct inode *inode)
 
															 	bi->disk_i_size = 0;
														
 
															 	bi->flags = 0;
														
 
															 	bi->index_cnt = (u64)-1;
														
 
															-	bi->log_dirty_trans = 0;
														
 
															+	bi->last_unlink_trans = 0;
														
 
															 	extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
														
 
															 	extent_io_tree_init(&BTRFS_I(inode)->io_tree,
														
 
															 			     inode->i_mapping, GFP_NOFS);
														
@@ -3786,6 +3787,8 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
 
															 		drop_inode = 1;
														
 
															 	nr = trans->blocks_used;
														
 
															+
														
 
															+	btrfs_log_new_name(trans, inode, NULL, dentry->d_parent);
														
 
															 	btrfs_end_transaction_throttle(trans, root);
														
 
															 fail:
														
 
															 	if (drop_inode) {
														
@@ -4666,6 +4669,15 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 
															 	trans = btrfs_start_transaction(root, 1);
														
 
															+	/*
														
 
															+	 * this is an ugly little race, but the rename is required to make
														
 
															+	 * sure that if we crash, the inode is either at the old name
														
 
															+	 * or the new one.  pinning the log transaction lets us make sure
														
 
															+	 * we don't allow a log commit to come in after we unlink the
														
 
															+	 * name but before we add the new name back in.
														
 
															+	 */
														
 
															+	btrfs_pin_log_trans(root);
														
 
															+
														
 
															 	btrfs_set_trans_block_group(trans, new_dir);
														
 
															 	btrfs_inc_nlink(old_dentry->d_inode);
														
@@ -4673,6 +4685,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 
															 	new_dir->i_ctime = new_dir->i_mtime = ctime;
														
 
															 	old_inode->i_ctime = ctime;
														
 
															+	if (old_dentry->d_parent != new_dentry->d_parent)
														
 
															+		btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
														
 
															+
														
 
															 	ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode,
														
 
															 				 old_dentry->d_name.name,
														
 
															 				 old_dentry->d_name.len);
														
@@ -4704,7 +4719,14 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 
															 	if (ret)
														
 
															 		goto out_fail;
														
 
															+	btrfs_log_new_name(trans, old_inode, old_dir,
														
 
															+				       new_dentry->d_parent);
														
 
															 out_fail:
														
 
															+
														
 
															+	/* this btrfs_end_log_trans just allows the current
														
 
															+	 * log-sub transaction to complete
														
 
															+	 */
														
 
															+	btrfs_end_log_trans(root);
														
 
															 	btrfs_end_transaction_throttle(trans, root);
														
 
															 out_unlock:
														
 
															 	return ret;
														
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -34,6 +34,49 @@
 
															 #define LOG_INODE_ALL 0
														
 
															 #define LOG_INODE_EXISTS 1
														
 
															+/*
														
 
															+ * directory trouble cases
														
 
															+ *
														
 
															+ * 1) on rename or unlink, if the inode being unlinked isn't in the fsync
														
 
															+ * log, we must force a full commit before doing an fsync of the directory
														
 
															+ * where the unlink was done.
														
 
															+ * ---> record transid of last unlink/rename per directory
														
 
															+ *
														
 
															+ * mkdir foo/some_dir
														
 
															+ * normal commit
														
 
															+ * rename foo/some_dir foo2/some_dir
														
 
															+ * mkdir foo/some_dir
														
 
															+ * fsync foo/some_dir/some_file
														
 
															+ *
														
 
															+ * The fsync above will unlink the original some_dir without recording
														
 
															+ * it in its new location (foo2).  After a crash, some_dir will be gone
														
 
															+ * unless the fsync of some_file forces a full commit
														
 
															+ *
														
 
															+ * 2) we must log any new names for any file or dir that is in the fsync
														
 
															+ * log. ---> check inode while renaming/linking.
														
 
															+ *
														
 
															+ * 2a) we must log any new names for any file or dir during rename
														
 
															+ * when the directory they are being removed from was logged.
														
 
															+ * ---> check inode and old parent dir during rename
														
 
															+ *
														
 
															+ *  2a is actually the more important variant.  With the extra logging
														
 
															+ *  a crash might unlink the old name without recreating the new one
														
 
															+ *
														
 
															+ * 3) after a crash, we must go through any directories with a link count
														
 
															+ * of zero and redo the rm -rf
														
 
															+ *
														
 
															+ * mkdir f1/foo
														
 
															+ * normal commit
														
 
															+ * rm -rf f1/foo
														
 
															+ * fsync(f1)
														
 
															+ *
														
 
															+ * The directory f1 was fully removed from the FS, but fsync was never
														
 
															+ * called on f1, only its parent dir.  After a crash the rm -rf must
														
 
															+ * be replayed.  This must be able to recurse down the entire
														
 
															+ * directory tree.  The inode link count fixup code takes care of the
														
 
															+ * ugly details.
														
 
															+ */
														
 
															+
														
 
															 /*
														
 
															  * stages for the tree walking.  The first
														
 
															  * stage (0) is to only pin down the blocks we find
														
@@ -47,12 +90,17 @@
 
															 #define LOG_WALK_REPLAY_INODES 1
														
 
															 #define LOG_WALK_REPLAY_ALL 2
														
 
															-static int __btrfs_log_inode(struct btrfs_trans_handle *trans,
														
 
															+static int btrfs_log_inode(struct btrfs_trans_handle *trans,
														
 
															 			     struct btrfs_root *root, struct inode *inode,
														
 
															 			     int inode_only);
														
 
															 static int link_to_fixup_dir(struct btrfs_trans_handle *trans,
														
 
															 			     struct btrfs_root *root,
														
 
															 			     struct btrfs_path *path, u64 objectid);
														
 
															+static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
														
 
															+				       struct btrfs_root *root,
														
 
															+				       struct btrfs_root *log,
														
 
															+				       struct btrfs_path *path,
														
 
															+				       u64 dirid, int del_all);
														
 
															 /*
														
 
															  * tree logging is a special write ahead log used to make sure that
														
@@ -132,11 +180,26 @@ static int join_running_log_trans(struct btrfs_root *root)
 
															 	return ret;
														
 
															 }
														
 
															+/*
														
 
															+ * This either makes the current running log transaction wait
														
 
															+ * until you call btrfs_end_log_trans() or it makes any future
														
 
															+ * log transactions wait until you call btrfs_end_log_trans()
														
 
															+ */
														
 
															+int btrfs_pin_log_trans(struct btrfs_root *root)
														
 
															+{
														
 
															+	int ret = -ENOENT;
														
 
															+
														
 
															+	mutex_lock(&root->log_mutex);
														
 
															+	atomic_inc(&root->log_writers);
														
 
															+	mutex_unlock(&root->log_mutex);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * indicate we're done making changes to the log tree
														
 
															  * and wake up anyone waiting to do a sync
														
 
															  */
														
 
															-static int end_log_trans(struct btrfs_root *root)
														
 
															+int btrfs_end_log_trans(struct btrfs_root *root)
														
 
															 {
														
 
															 	if (atomic_dec_and_test(&root->log_writers)) {
														
 
															 		smp_mb();
														
@@ -602,6 +665,7 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
 
															 	ret = link_to_fixup_dir(trans, root, path, location.objectid);
														
 
															 	BUG_ON(ret);
														
 
															+
														
 
															 	ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
														
 
															 	BUG_ON(ret);
														
 
															 	kfree(name);
														
@@ -803,6 +867,7 @@ conflict_again:
 
															 					    victim_name_len)) {
														
 
															 				btrfs_inc_nlink(inode);
														
 
															 				btrfs_release_path(root, path);
														
 
															+
														
 
															 				ret = btrfs_unlink_inode(trans, root, dir,
														
 
															 							 inode, victim_name,
														
 
															 							 victim_name_len);
														
@@ -921,13 +986,20 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
 
															 		key.offset--;
														
 
															 		btrfs_release_path(root, path);
														
 
															 	}
														
 
															-	btrfs_free_path(path);
														
 
															+	btrfs_release_path(root, path);
														
 
															 	if (nlink != inode->i_nlink) {
														
 
															 		inode->i_nlink = nlink;
														
 
															 		btrfs_update_inode(trans, root, inode);
														
 
															 	}
														
 
															 	BTRFS_I(inode)->index_cnt = (u64)-1;
														
 
															+	if (inode->i_nlink == 0 && S_ISDIR(inode->i_mode)) {
														
 
															+		ret = replay_dir_deletes(trans, root, NULL, path,
														
 
															+					 inode->i_ino, 1);
														
 
															+		BUG_ON(ret);
														
 
															+	}
														
 
															+	btrfs_free_path(path);
														
 
															+
														
 
															 	return 0;
														
 
															 }
														
@@ -970,9 +1042,12 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
 
															 		iput(inode);
														
 
															-		if (key.offset == 0)
														
 
															-			break;
														
 
															-		key.offset--;
														
 
															+		/*
														
 
															+		 * fixup on a directory may create new entries,
														
 
															+		 * make sure we always look for the highset possible
														
 
															+		 * offset
														
 
															+		 */
														
 
															+		key.offset = (u64)-1;
														
 
															 	}
														
 
															 	btrfs_release_path(root, path);
														
 
															 	return 0;
														
@@ -1312,11 +1387,11 @@ again:
 
															 		read_extent_buffer(eb, name, (unsigned long)(di + 1),
														
 
															 				  name_len);
														
 
															 		log_di = NULL;
														
 
															-		if (dir_key->type == BTRFS_DIR_ITEM_KEY) {
														
 
															+		if (log && dir_key->type == BTRFS_DIR_ITEM_KEY) {
														
 
															 			log_di = btrfs_lookup_dir_item(trans, log, log_path,
														
 
															 						       dir_key->objectid,
														
 
															 						       name, name_len, 0);
														
 
															-		} else if (dir_key->type == BTRFS_DIR_INDEX_KEY) {
														
 
															+		} else if (log && dir_key->type == BTRFS_DIR_INDEX_KEY) {
														
 
															 			log_di = btrfs_lookup_dir_index_item(trans, log,
														
 
															 						     log_path,
														
 
															 						     dir_key->objectid,
														
@@ -1377,7 +1452,7 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
 
															 				       struct btrfs_root *root,
														
 
															 				       struct btrfs_root *log,
														
 
															 				       struct btrfs_path *path,
														
 
															-				       u64 dirid)
														
 
															+				       u64 dirid, int del_all)
														
 
															 {
														
 
															 	u64 range_start;
														
 
															 	u64 range_end;
														
@@ -1407,10 +1482,14 @@ again:
 
															 	range_start = 0;
														
 
															 	range_end = 0;
														
 
															 	while (1) {
														
 
															-		ret = find_dir_range(log, path, dirid, key_type,
														
 
															-				     &range_start, &range_end);
														
 
															-		if (ret != 0)
														
 
															-			break;
														
 
															+		if (del_all)
														
 
															+			range_end = (u64)-1;
														
 
															+		else {
														
 
															+			ret = find_dir_range(log, path, dirid, key_type,
														
 
															+					     &range_start, &range_end);
														
 
															+			if (ret != 0)
														
 
															+				break;
														
 
															+		}
														
 
															 		dir_key.offset = range_start;
														
 
															 		while (1) {
														
@@ -1436,7 +1515,8 @@ again:
 
															 				break;
														
 
															 			ret = check_item_in_log(trans, root, log, path,
														
 
															-						log_path, dir, &found_key);
														
 
															+						log_path, dir,
														
 
															+						&found_key);
														
 
															 			BUG_ON(ret);
														
 
															 			if (found_key.offset == (u64)-1)
														
 
															 				break;
														
@@ -1513,7 +1593,7 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
 
															 			mode = btrfs_inode_mode(eb, inode_item);
														
 
															 			if (S_ISDIR(mode)) {
														
 
															 				ret = replay_dir_deletes(wc->trans,
														
 
															-					 root, log, path, key.objectid);
														
 
															+					 root, log, path, key.objectid, 0);
														
 
															 				BUG_ON(ret);
														
 
															 			}
														
 
															 			ret = overwrite_item(wc->trans, root, path,
														
@@ -1850,7 +1930,8 @@ static int update_log_root(struct btrfs_trans_handle *trans,
 
															 	return ret;
														
 
															 }
														
 
															-static int wait_log_commit(struct btrfs_root *root, unsigned long transid)
														
 
															+static int wait_log_commit(struct btrfs_trans_handle *trans,
														
 
															+			   struct btrfs_root *root, unsigned long transid)
														
 
															 {
														
 
															 	DEFINE_WAIT(wait);
														
 
															 	int index = transid % 2;
														
@@ -1864,9 +1945,12 @@ static int wait_log_commit(struct btrfs_root *root, unsigned long transid)
 
															 		prepare_to_wait(&root->log_commit_wait[index],
														
 
															 				&wait, TASK_UNINTERRUPTIBLE);
														
 
															 		mutex_unlock(&root->log_mutex);
														
 
															-		if (root->log_transid < transid + 2 &&
														
 
															+
														
 
															+		if (root->fs_info->last_trans_log_full_commit !=
														
 
															+		    trans->transid && root->log_transid < transid + 2 &&
														
 
															 		    atomic_read(&root->log_commit[index]))
														
 
															 			schedule();
														
 
															+
														
 
															 		finish_wait(&root->log_commit_wait[index], &wait);
														
 
															 		mutex_lock(&root->log_mutex);
														
 
															 	} while (root->log_transid < transid + 2 &&
														
@@ -1874,14 +1958,16 @@ static int wait_log_commit(struct btrfs_root *root, unsigned long transid)
 
															 	return 0;
														
 
															 }
														
 
															-static int wait_for_writer(struct btrfs_root *root)
														
 
															+static int wait_for_writer(struct btrfs_trans_handle *trans,
														
 
															+			   struct btrfs_root *root)
														
 
															 {
														
 
															 	DEFINE_WAIT(wait);
														
 
															 	while (atomic_read(&root->log_writers)) {
														
 
															 		prepare_to_wait(&root->log_writer_wait,
														
 
															 				&wait, TASK_UNINTERRUPTIBLE);
														
 
															 		mutex_unlock(&root->log_mutex);
														
 
															-		if (atomic_read(&root->log_writers))
														
 
															+		if (root->fs_info->last_trans_log_full_commit !=
														
 
															+		    trans->transid && atomic_read(&root->log_writers))
														
 
															 			schedule();
														
 
															 		mutex_lock(&root->log_mutex);
														
 
															 		finish_wait(&root->log_writer_wait, &wait);
														
@@ -1892,7 +1978,14 @@ static int wait_for_writer(struct btrfs_root *root)
 
															 /*
														
 
															  * btrfs_sync_log does sends a given tree log down to the disk and
														
 
															  * updates the super blocks to record it.  When this call is done,
														
 
															- * you know that any inodes previously logged are safely on disk
														
 
															+ * you know that any inodes previously logged are safely on disk only
														
 
															+ * if it returns 0.
														
 
															+ *
														
 
															+ * Any other return value means you need to call btrfs_commit_transaction.
														
 
															+ * Some of the edge cases for fsyncing directories that have had unlinks
														
 
															+ * or renames done in the past mean that sometimes the only safe
														
 
															+ * fsync is to commit the whole FS.  When btrfs_sync_log returns -EAGAIN,
														
 
															+ * that has happened.
														
 
															  */
														
 
															 int btrfs_sync_log(struct btrfs_trans_handle *trans,
														
 
															 		   struct btrfs_root *root)
														
@@ -1906,7 +1999,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 
															 	mutex_lock(&root->log_mutex);
														
 
															 	index1 = root->log_transid % 2;
														
 
															 	if (atomic_read(&root->log_commit[index1])) {
														
 
															-		wait_log_commit(root, root->log_transid);
														
 
															+		wait_log_commit(trans, root, root->log_transid);
														
 
															 		mutex_unlock(&root->log_mutex);
														
 
															 		return 0;
														
 
															 	}
														
@@ -1914,18 +2007,26 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 
															 	/* wait for previous tree log sync to complete */
														
 
															 	if (atomic_read(&root->log_commit[(index1 + 1) % 2]))
														
 
															-		wait_log_commit(root, root->log_transid - 1);
														
 
															+		wait_log_commit(trans, root, root->log_transid - 1);
														
 
															 	while (1) {
														
 
															 		unsigned long batch = root->log_batch;
														
 
															 		mutex_unlock(&root->log_mutex);
														
 
															 		schedule_timeout_uninterruptible(1);
														
 
															 		mutex_lock(&root->log_mutex);
														
 
															-		wait_for_writer(root);
														
 
															+
														
 
															+		wait_for_writer(trans, root);
														
 
															 		if (batch == root->log_batch)
														
 
															 			break;
														
 
															 	}
														
 
															+	/* bail out if we need to do a full commit */
														
 
															+	if (root->fs_info->last_trans_log_full_commit == trans->transid) {
														
 
															+		ret = -EAGAIN;
														
 
															+		mutex_unlock(&root->log_mutex);
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															 	ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages);
														
 
															 	BUG_ON(ret);
														
@@ -1961,16 +2062,29 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 
															 	index2 = log_root_tree->log_transid % 2;
														
 
															 	if (atomic_read(&log_root_tree->log_commit[index2])) {
														
 
															-		wait_log_commit(log_root_tree, log_root_tree->log_transid);
														
 
															+		wait_log_commit(trans, log_root_tree,
														
 
															+				log_root_tree->log_transid);
														
 
															 		mutex_unlock(&log_root_tree->log_mutex);
														
 
															 		goto out;
														
 
															 	}
														
 
															 	atomic_set(&log_root_tree->log_commit[index2], 1);
														
 
															-	if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2]))
														
 
															-		wait_log_commit(log_root_tree, log_root_tree->log_transid - 1);
														
 
															+	if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) {
														
 
															+		wait_log_commit(trans, log_root_tree,
														
 
															+				log_root_tree->log_transid - 1);
														
 
															+	}
														
 
															+
														
 
															+	wait_for_writer(trans, log_root_tree);
														
 
															-	wait_for_writer(log_root_tree);
														
 
															+	/*
														
 
															+	 * now that we've moved on to the tree of log tree roots,
														
 
															+	 * check the full commit flag again
														
 
															+	 */
														
 
															+	if (root->fs_info->last_trans_log_full_commit == trans->transid) {
														
 
															+		mutex_unlock(&log_root_tree->log_mutex);
														
 
															+		ret = -EAGAIN;
														
 
															+		goto out_wake_log_root;
														
 
															+	}
														
 
															 	ret = btrfs_write_and_wait_marked_extents(log_root_tree,
														
 
															 				&log_root_tree->dirty_log_pages);
														
@@ -1995,7 +2109,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 
															 	 * in and cause problems either.
														
 
															 	 */
														
 
															 	write_ctree_super(trans, root->fs_info->tree_root, 2);
														
 
															+	ret = 0;
														
 
															+out_wake_log_root:
														
 
															 	atomic_set(&log_root_tree->log_commit[index2], 0);
														
 
															 	smp_mb();
														
 
															 	if (waitqueue_active(&log_root_tree->log_commit_wait[index2]))
														
@@ -2008,7 +2124,8 @@ out:
 
															 	return 0;
														
 
															 }
														
 
															-/* * free all the extents used by the tree log.  This should be called
														
 
															+/*
														
 
															+ * free all the extents used by the tree log.  This should be called
														
 
															  * at commit time of the full transaction
														
 
															  */
														
 
															 int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
														
@@ -2142,7 +2259,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
 
															 	btrfs_free_path(path);
														
 
															 	mutex_unlock(&BTRFS_I(dir)->log_mutex);
														
 
															-	end_log_trans(root);
														
 
															+	btrfs_end_log_trans(root);
														
 
															 	return 0;
														
 
															 }
														
@@ -2169,7 +2286,7 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
 
															 	ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino,
														
 
															 				  dirid, &index);
														
 
															 	mutex_unlock(&BTRFS_I(inode)->log_mutex);
														
 
															-	end_log_trans(root);
														
 
															+	btrfs_end_log_trans(root);
														
 
															 	return ret;
														
 
															 }
														
@@ -2569,7 +2686,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
 
															  *
														
 
															  * This handles both files and directories.
														
 
															  */
														
 
															-static int __btrfs_log_inode(struct btrfs_trans_handle *trans,
														
 
															+static int btrfs_log_inode(struct btrfs_trans_handle *trans,
														
 
															 			     struct btrfs_root *root, struct inode *inode,
														
 
															 			     int inode_only)
														
 
															 {
														
@@ -2595,28 +2712,17 @@ static int __btrfs_log_inode(struct btrfs_trans_handle *trans,
 
															 	min_key.offset = 0;
														
 
															 	max_key.objectid = inode->i_ino;
														
 
															+
														
 
															+	/* today the code can only do partial logging of directories */
														
 
															+	if (!S_ISDIR(inode->i_mode))
														
 
															+	    inode_only = LOG_INODE_ALL;
														
 
															+
														
 
															 	if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode))
														
 
															 		max_key.type = BTRFS_XATTR_ITEM_KEY;
														
 
															 	else
														
 
															 		max_key.type = (u8)-1;
														
 
															 	max_key.offset = (u64)-1;
														
 
															-	/*
														
 
															-	 * if this inode has already been logged and we're in inode_only
														
 
															-	 * mode, we don't want to delete the things that have already
														
 
															-	 * been written to the log.
														
 
															-	 *
														
 
															-	 * But, if the inode has been through an inode_only log,
														
 
															-	 * the logged_trans field is not set.  This allows us to catch
														
 
															-	 * any new names for this inode in the backrefs by logging it
														
 
															-	 * again
														
 
															-	 */
														
 
															-	if (inode_only == LOG_INODE_EXISTS &&
														
 
															-	    BTRFS_I(inode)->logged_trans == trans->transid) {
														
 
															-		btrfs_free_path(path);
														
 
															-		btrfs_free_path(dst_path);
														
 
															-		goto out;
														
 
															-	}
														
 
															 	mutex_lock(&BTRFS_I(inode)->log_mutex);
														
 
															 	/*
														
@@ -2703,7 +2809,6 @@ next_slot:
 
															 	if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
														
 
															 		btrfs_release_path(root, path);
														
 
															 		btrfs_release_path(log, dst_path);
														
 
															-		BTRFS_I(inode)->log_dirty_trans = 0;
														
 
															 		ret = log_directory_changes(trans, root, inode, path, dst_path);
														
 
															 		BUG_ON(ret);
														
 
															 	}
														
@@ -2712,19 +2817,58 @@ next_slot:
 
															 	btrfs_free_path(path);
														
 
															 	btrfs_free_path(dst_path);
														
 
															-out:
														
 
															 	return 0;
														
 
															 }
														
 
															-int btrfs_log_inode(struct btrfs_trans_handle *trans,
														
 
															-		    struct btrfs_root *root, struct inode *inode,
														
 
															-		    int inode_only)
														
 
															+/*
														
 
															+ * follow the dentry parent pointers up the chain and see if any
														
 
															+ * of the directories in it require a full commit before they can
														
 
															+ * be logged.  Returns zero if nothing special needs to be done or 1 if
														
 
															+ * a full commit is required.
														
 
															+ */
														
 
															+static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
														
 
															+					       struct inode *inode,
														
 
															+					       struct dentry *parent,
														
 
															+					       struct super_block *sb,
														
 
															+					       u64 last_committed)
														
 
															 {
														
 
															-	int ret;
														
 
															+	int ret = 0;
														
 
															+	struct btrfs_root *root;
														
 
															-	start_log_trans(trans, root);
														
 
															-	ret = __btrfs_log_inode(trans, root, inode, inode_only);
														
 
															-	end_log_trans(root);
														
 
															+	if (!S_ISDIR(inode->i_mode)) {
														
 
															+		if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
														
 
															+			goto out;
														
 
															+		inode = parent->d_inode;
														
 
															+	}
														
 
															+
														
 
															+	while (1) {
														
 
															+		BTRFS_I(inode)->logged_trans = trans->transid;
														
 
															+		smp_mb();
														
 
															+
														
 
															+		if (BTRFS_I(inode)->last_unlink_trans > last_committed) {
														
 
															+			root = BTRFS_I(inode)->root;
														
 
															+
														
 
															+			/*
														
 
															+			 * make sure any commits to the log are forced
														
 
															+			 * to be full commits
														
 
															+			 */
														
 
															+			root->fs_info->last_trans_log_full_commit =
														
 
															+				trans->transid;
														
 
															+			ret = 1;
														
 
															+			break;
														
 
															+		}
														
 
															+
														
 
															+		if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
														
 
															+			break;
														
 
															+
														
 
															+		if (parent == sb->s_root)
														
 
															+			break;
														
 
															+
														
 
															+		parent = parent->d_parent;
														
 
															+		inode = parent->d_inode;
														
 
															+
														
 
															+	}
														
 
															+out:
														
 
															 	return ret;
														
 
															 }
														
@@ -2734,31 +2878,53 @@ int btrfs_log_inode(struct btrfs_trans_handle *trans,
 
															  * only logging is done of any parent directories that are older than
														
 
															  * the last committed transaction
														
 
															  */
														
 
															-int btrfs_log_dentry(struct btrfs_trans_handle *trans,
														
 
															-		    struct btrfs_root *root, struct dentry *dentry)
														
 
															+int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
														
 
															+		    struct btrfs_root *root, struct inode *inode,
														
 
															+		    struct dentry *parent, int exists_only)
														
 
															 {
														
 
															-	int inode_only = LOG_INODE_ALL;
														
 
															+	int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL;
														
 
															 	struct super_block *sb;
														
 
															-	int ret;
														
 
															+	int ret = 0;
														
 
															+	u64 last_committed = root->fs_info->last_trans_committed;
														
 
															+
														
 
															+	sb = inode->i_sb;
														
 
															+
														
 
															+	if (root->fs_info->last_trans_log_full_commit >
														
 
															+	    root->fs_info->last_trans_committed) {
														
 
															+		ret = 1;
														
 
															+		goto end_no_trans;
														
 
															+	}
														
 
															+
														
 
															+	ret = check_parent_dirs_for_sync(trans, inode, parent,
														
 
															+					 sb, last_committed);
														
 
															+	if (ret)
														
 
															+		goto end_no_trans;
														
 
															 	start_log_trans(trans, root);
														
 
															-	sb = dentry->d_inode->i_sb;
														
 
															-	while (1) {
														
 
															-		ret = __btrfs_log_inode(trans, root, dentry->d_inode,
														
 
															-					inode_only);
														
 
															-		BUG_ON(ret);
														
 
															-		inode_only = LOG_INODE_EXISTS;
														
 
															-		dentry = dentry->d_parent;
														
 
															-		if (!dentry || !dentry->d_inode || sb != dentry->d_inode->i_sb)
														
 
															+	ret = btrfs_log_inode(trans, root, inode, inode_only);
														
 
															+	BUG_ON(ret);
														
 
															+	inode_only = LOG_INODE_EXISTS;
														
 
															+
														
 
															+	while (1) {
														
 
															+		if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
														
 
															 			break;
														
 
															-		if (BTRFS_I(dentry->d_inode)->generation <=
														
 
															-		    root->fs_info->last_trans_committed)
														
 
															+		inode = parent->d_inode;
														
 
															+		if (BTRFS_I(inode)->generation >
														
 
															+		    root->fs_info->last_trans_committed) {
														
 
															+			ret = btrfs_log_inode(trans, root, inode, inode_only);
														
 
															+			BUG_ON(ret);
														
 
															+		}
														
 
															+		if (parent == sb->s_root)
														
 
															 			break;
														
 
															+
														
 
															+		parent = parent->d_parent;
														
 
															 	}
														
 
															-	end_log_trans(root);
														
 
															-	return 0;
														
 
															+	ret = 0;
														
 
															+	btrfs_end_log_trans(root);
														
 
															+end_no_trans:
														
 
															+	return ret;
														
 
															 }
														
 
															 /*
														
@@ -2770,12 +2936,8 @@ int btrfs_log_dentry(struct btrfs_trans_handle *trans,
 
															 int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
														
 
															 			  struct btrfs_root *root, struct dentry *dentry)
														
 
															 {
														
 
															-	u64 gen;
														
 
															-	gen = root->fs_info->last_trans_new_blockgroup;
														
 
															-	if (gen > root->fs_info->last_trans_committed)
														
 
															-		return 1;
														
 
															-	else
														
 
															-		return btrfs_log_dentry(trans, root, dentry);
														
 
															+	return btrfs_log_inode_parent(trans, root, dentry->d_inode,
														
 
															+				      dentry->d_parent, 0);
														
 
															 }
														
 
															 /*
														
@@ -2894,3 +3056,74 @@ again:
 
															 	kfree(log_root_tree);
														
 
															 	return 0;
														
 
															 }
														
 
															+
														
 
															+/*
														
 
															+ * there are some corner cases where we want to force a full
														
 
															+ * commit instead of allowing a directory to be logged.
														
 
															+ *
														
 
															+ * They revolve around files there were unlinked from the directory, and
														
 
															+ * this function updates the parent directory so that a full commit is
														
 
															+ * properly done if it is fsync'd later after the unlinks are done.
														
 
															+ */
														
 
															+void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
														
 
															+			     struct inode *dir, struct inode *inode,
														
 
															+			     int for_rename)
														
 
															+{
														
 
															+	/*
														
 
															+	 * if this directory was already logged any new
														
 
															+	 * names for this file/dir will get recorded
														
 
															+	 */
														
 
															+	smp_mb();
														
 
															+	if (BTRFS_I(dir)->logged_trans == trans->transid)
														
 
															+		return;
														
 
															+
														
 
															+	/*
														
 
															+	 * if the inode we're about to unlink was logged,
														
 
															+	 * the log will be properly updated for any new names
														
 
															+	 */
														
 
															+	if (BTRFS_I(inode)->logged_trans == trans->transid)
														
 
															+		return;
														
 
															+
														
 
															+	/*
														
 
															+	 * when renaming files across directories, if the directory
														
 
															+	 * there we're unlinking from gets fsync'd later on, there's
														
 
															+	 * no way to find the destination directory later and fsync it
														
 
															+	 * properly.  So, we have to be conservative and force commits
														
 
															+	 * so the new name gets discovered.
														
 
															+	 */
														
 
															+	if (for_rename)
														
 
															+		goto record;
														
 
															+
														
 
															+	/* we can safely do the unlink without any special recording */
														
 
															+	return;
														
 
															+
														
 
															+record:
														
 
															+	BTRFS_I(dir)->last_unlink_trans = trans->transid;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Call this after adding a new name for a file and it will properly
														
 
															+ * update the log to reflect the new name.
														
 
															+ *
														
 
															+ * It will return zero if all goes well, and it will return 1 if a
														
 
															+ * full transaction commit is required.
														
 
															+ */
														
 
															+int btrfs_log_new_name(struct btrfs_trans_handle *trans,
														
 
															+			struct inode *inode, struct inode *old_dir,
														
 
															+			struct dentry *parent)
														
 
															+{
														
 
															+	struct btrfs_root * root = BTRFS_I(inode)->root;
														
 
															+
														
 
															+	/*
														
 
															+	 * if this inode hasn't been logged and directory we're renaming it
														
 
															+	 * from hasn't been logged, we don't need to log it
														
 
															+	 */
														
 
															+	if (BTRFS_I(inode)->logged_trans <=
														
 
															+	    root->fs_info->last_trans_committed &&
														
 
															+	    (!old_dir || BTRFS_I(old_dir)->logged_trans <=
														
 
															+		    root->fs_info->last_trans_committed))
														
 
															+		return 0;
														
 
															+
														
 
															+	return btrfs_log_inode_parent(trans, root, inode, parent, 1);
														
 
															+}
														
 
															+
														
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -22,14 +22,9 @@
 
															 int btrfs_sync_log(struct btrfs_trans_handle *trans,
														
 
															 		   struct btrfs_root *root);
														
 
															 int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root);
														
 
															-int btrfs_log_dentry(struct btrfs_trans_handle *trans,
														
 
															-		    struct btrfs_root *root, struct dentry *dentry);
														
 
															 int btrfs_recover_log_trees(struct btrfs_root *tree_root);
														
 
															 int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
														
 
															 			  struct btrfs_root *root, struct dentry *dentry);
														
 
															-int btrfs_log_inode(struct btrfs_trans_handle *trans,
														
 
															-		    struct btrfs_root *root, struct inode *inode,
														
 
															-		    int inode_only);
														
 
															 int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
														
 
															 				 struct btrfs_root *root,
														
 
															 				 const char *name, int name_len,
														
@@ -38,4 +33,16 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
 
															 			       struct btrfs_root *root,
														
 
															 			       const char *name, int name_len,
														
 
															 			       struct inode *inode, u64 dirid);
														
 
															+int btrfs_join_running_log_trans(struct btrfs_root *root);
														
 
															+int btrfs_end_log_trans(struct btrfs_root *root);
														
 
															+int btrfs_pin_log_trans(struct btrfs_root *root);
														
 
															+int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
														
 
															+		    struct btrfs_root *root, struct inode *inode,
														
 
															+		    struct dentry *parent, int exists_only);
														
 
															+void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
														
 
															+			     struct inode *dir, struct inode *inode,
														
 
															+			     int for_rename);
														
 
															+int btrfs_log_new_name(struct btrfs_trans_handle *trans,
														
 
															+			struct inode *inode, struct inode *old_dir,
														
 
															+			struct dentry *parent);
														
 
															 #endif