15 years ago · 925d169f5b
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -163,7 +163,6 @@ fail:
 
				  */
			
 
				 static void end_compressed_bio_read(struct bio *bio, int err)
			
 
				 {
			
 
				-	struct extent_io_tree *tree;
			
 
				 	struct compressed_bio *cb = bio->bi_private;
			
 
				 	struct inode *inode;
			
 
				 	struct page *page;
			
@@ -187,7 +186,6 @@ static void end_compressed_bio_read(struct bio *bio, int err)
 
				 	/* ok, we're the last bio for this extent, lets start
			
 
				 	 * the decompression.
			
 
				 	 */
			
 
				-	tree = &BTRFS_I(inode)->io_tree;
			
 
				 	ret = btrfs_zlib_decompress_biovec(cb->compressed_pages,
			
 
				 					cb->start,
			
 
				 					cb->orig_bio->bi_io_vec,
			
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -200,7 +200,6 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
 
				 		      struct extent_buffer **cow_ret, u64 new_root_objectid)
			
 
				 {
			
 
				 	struct extent_buffer *cow;
			
 
				-	u32 nritems;
			
 
				 	int ret = 0;
			
 
				 	int level;
			
 
				 	struct btrfs_disk_key disk_key;
			
@@ -210,7 +209,6 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
 
				 	WARN_ON(root->ref_cows && trans->transid != root->last_trans);
			
 
				 
			
 
				 	level = btrfs_header_level(buf);
			
 
				-	nritems = btrfs_header_nritems(buf);
			
 
				 	if (level == 0)
			
 
				 		btrfs_item_key(buf, &disk_key, 0);
			
 
				 	else
			
@@ -1008,7 +1006,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 
				 	int wret;
			
 
				 	int pslot;
			
 
				 	int orig_slot = path->slots[level];
			
 
				-	int err_on_enospc = 0;
			
 
				 	u64 orig_ptr;
			
 
				 
			
 
				 	if (level == 0)
			
@@ -1071,8 +1068,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 
				 	    BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
			
 
				 		return 0;
			
 
				 
			
 
				-	if (btrfs_header_nritems(mid) < 2)
			
 
				-		err_on_enospc = 1;
			
 
				+	btrfs_header_nritems(mid);
			
 
				 
			
 
				 	left = read_node_slot(root, parent, pslot - 1);
			
 
				 	if (left) {
			
@@ -1103,8 +1099,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 
				 		wret = push_node_left(trans, root, left, mid, 1);
			
 
				 		if (wret < 0)
			
 
				 			ret = wret;
			
 
				-		if (btrfs_header_nritems(mid) < 2)
			
 
				-			err_on_enospc = 1;
			
 
				+		btrfs_header_nritems(mid);
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -1224,14 +1219,12 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
 
				 	int wret;
			
 
				 	int pslot;
			
 
				 	int orig_slot = path->slots[level];
			
 
				-	u64 orig_ptr;
			
 
				 
			
 
				 	if (level == 0)
			
 
				 		return 1;
			
 
				 
			
 
				 	mid = path->nodes[level];
			
 
				 	WARN_ON(btrfs_header_generation(mid) != trans->transid);
			
 
				-	orig_ptr = btrfs_node_blockptr(mid, orig_slot);
			
 
				 
			
 
				 	if (level < BTRFS_MAX_LEVEL - 1)
			
 
				 		parent = path->nodes[level + 1];
			
@@ -1577,13 +1570,33 @@ read_block_for_search(struct btrfs_trans_handle *trans,
 
				 	blocksize = btrfs_level_size(root, level - 1);
			
 
				 
			
 
				 	tmp = btrfs_find_tree_block(root, blocknr, blocksize);
			
 
				-	if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
			
 
				-		/*
			
 
				-		 * we found an up to date block without sleeping, return
			
 
				-		 * right away
			
 
				-		 */
			
 
				-		*eb_ret = tmp;
			
 
				-		return 0;
			
 
				+	if (tmp) {
			
 
				+		if (btrfs_buffer_uptodate(tmp, 0)) {
			
 
				+			if (btrfs_buffer_uptodate(tmp, gen)) {
			
 
				+				/*
			
 
				+				 * we found an up to date block without
			
 
				+				 * sleeping, return
			
 
				+				 * right away
			
 
				+				 */
			
 
				+				*eb_ret = tmp;
			
 
				+				return 0;
			
 
				+			}
			
 
				+			/* the pages were up to date, but we failed
			
 
				+			 * the generation number check.  Do a full
			
 
				+			 * read for the generation number that is correct.
			
 
				+			 * We must do this without dropping locks so
			
 
				+			 * we can trust our generation number
			
 
				+			 */
			
 
				+			free_extent_buffer(tmp);
			
 
				+			tmp = read_tree_block(root, blocknr, blocksize, gen);
			
 
				+			if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
			
 
				+				*eb_ret = tmp;
			
 
				+				return 0;
			
 
				+			}
			
 
				+			free_extent_buffer(tmp);
			
 
				+			btrfs_release_path(NULL, p);
			
 
				+			return -EIO;
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -1596,8 +1609,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
 
				 	btrfs_unlock_up_safe(p, level + 1);
			
 
				 	btrfs_set_path_blocking(p);
			
 
				 
			
 
				-	if (tmp)
			
 
				-		free_extent_buffer(tmp);
			
 
				+	free_extent_buffer(tmp);
			
 
				 	if (p->reada)
			
 
				 		reada_for_search(root, p, level, slot, key->objectid);
			
 
				 
			
@@ -2548,7 +2560,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
 
				 {
			
 
				 	struct btrfs_disk_key disk_key;
			
 
				 	struct extent_buffer *right = path->nodes[0];
			
 
				-	int slot;
			
 
				 	int i;
			
 
				 	int push_space = 0;
			
 
				 	int push_items = 0;
			
@@ -2560,8 +2571,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
 
				 	u32 this_item_size;
			
 
				 	u32 old_left_item_size;
			
 
				 
			
 
				-	slot = path->slots[1];
			
 
				-
			
 
				 	if (empty)
			
 
				 		nr = min(right_nritems, max_slot);
			
 
				 	else
			
@@ -3330,7 +3339,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
 
				 {
			
 
				 	int ret = 0;
			
 
				 	int slot;
			
 
				-	int slot_orig;
			
 
				 	struct extent_buffer *leaf;
			
 
				 	struct btrfs_item *item;
			
 
				 	u32 nritems;
			
@@ -3340,7 +3348,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
 
				 	unsigned int size_diff;
			
 
				 	int i;
			
 
				 
			
 
				-	slot_orig = path->slots[0];
			
 
				 	leaf = path->nodes[0];
			
 
				 	slot = path->slots[0];
			
 
				 
			
@@ -3445,7 +3452,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
 
				 {
			
 
				 	int ret = 0;
			
 
				 	int slot;
			
 
				-	int slot_orig;
			
 
				 	struct extent_buffer *leaf;
			
 
				 	struct btrfs_item *item;
			
 
				 	u32 nritems;
			
@@ -3454,7 +3460,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
 
				 	unsigned int old_size;
			
 
				 	int i;
			
 
				 
			
 
				-	slot_orig = path->slots[0];
			
 
				 	leaf = path->nodes[0];
			
 
				 
			
 
				 	nritems = btrfs_header_nritems(leaf);
			
@@ -3787,7 +3792,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
 
				 			    struct btrfs_key *cpu_key, u32 *data_size,
			
 
				 			    int nr)
			
 
				 {
			
 
				-	struct extent_buffer *leaf;
			
 
				 	int ret = 0;
			
 
				 	int slot;
			
 
				 	int i;
			
@@ -3804,7 +3808,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
 
				 	if (ret < 0)
			
 
				 		goto out;
			
 
				 
			
 
				-	leaf = path->nodes[0];
			
 
				 	slot = path->slots[0];
			
 
				 	BUG_ON(slot < 0);
			
 
				 
			
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -99,6 +99,9 @@ struct btrfs_ordered_sum;
 
				  */
			
 
				 #define BTRFS_EXTENT_CSUM_OBJECTID -10ULL
			
 
				 
			
 
				+/* For storing free space cache */
			
 
				+#define BTRFS_FREE_SPACE_OBJECTID -11ULL
			
 
				+
			
 
				 /* dummy objectid represents multiple objectids */
			
 
				 #define BTRFS_MULTIPLE_OBJECTIDS -255ULL
			
 
				 
			
@@ -265,6 +268,22 @@ struct btrfs_chunk {
 
				 	/* additional stripes go here */
			
 
				 } __attribute__ ((__packed__));
			
 
				 
			
 
				+#define BTRFS_FREE_SPACE_EXTENT	1
			
 
				+#define BTRFS_FREE_SPACE_BITMAP	2
			
 
				+
			
 
				+struct btrfs_free_space_entry {
			
 
				+	__le64 offset;
			
 
				+	__le64 bytes;
			
 
				+	u8 type;
			
 
				+} __attribute__ ((__packed__));
			
 
				+
			
 
				+struct btrfs_free_space_header {
			
 
				+	struct btrfs_disk_key location;
			
 
				+	__le64 generation;
			
 
				+	__le64 num_entries;
			
 
				+	__le64 num_bitmaps;
			
 
				+} __attribute__ ((__packed__));
			
 
				+
			
 
				 static inline unsigned long btrfs_chunk_item_size(int num_stripes)
			
 
				 {
			
 
				 	BUG_ON(num_stripes == 0);
			
@@ -365,8 +384,10 @@ struct btrfs_super_block {
 
				 
			
 
				 	char label[BTRFS_LABEL_SIZE];
			
 
				 
			
 
				+	__le64 cache_generation;
			
 
				+
			
 
				 	/* future expansion */
			
 
				-	__le64 reserved[32];
			
 
				+	__le64 reserved[31];
			
 
				 	u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
			
 
				 } __attribute__ ((__packed__));
			
 
				 
			
@@ -375,13 +396,15 @@ struct btrfs_super_block {
 
				  * ones specified below then we will fail to mount
			
 
				  */
			
 
				 #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF	(1ULL << 0)
			
 
				-#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL	(2ULL << 0)
			
 
				+#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL	(1ULL << 1)
			
 
				+#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS	(1ULL << 2)
			
 
				 
			
 
				 #define BTRFS_FEATURE_COMPAT_SUPP		0ULL
			
 
				 #define BTRFS_FEATURE_COMPAT_RO_SUPP		0ULL
			
 
				-#define BTRFS_FEATURE_INCOMPAT_SUPP		\
			
 
				-	(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF |	\
			
 
				-	 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL)
			
 
				+#define BTRFS_FEATURE_INCOMPAT_SUPP			\
			
 
				+	(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF |		\
			
 
				+	 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL |	\
			
 
				+	 BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
			
 
				 
			
 
				 /*
			
 
				  * A leaf is full of items. offset and size tell us where to find
			
@@ -675,7 +698,8 @@ struct btrfs_block_group_item {
 
				 struct btrfs_space_info {
			
 
				 	u64 flags;
			
 
				 
			
 
				-	u64 total_bytes;	/* total bytes in the space */
			
 
				+	u64 total_bytes;	/* total bytes in the space,
			
 
				+				   this doesn't take mirrors into account */
			
 
				 	u64 bytes_used;		/* total bytes used,
			
 
				 				   this does't take mirrors into account */
			
 
				 	u64 bytes_pinned;	/* total bytes pinned, will be freed when the
			
@@ -687,6 +711,8 @@ struct btrfs_space_info {
 
				 	u64 bytes_may_use;	/* number of bytes that may be used for
			
 
				 				   delalloc/allocations */
			
 
				 	u64 disk_used;		/* total bytes used on disk */
			
 
				+	u64 disk_total;		/* total bytes on disk, takes mirrors into
			
 
				+				   account */
			
 
				 
			
 
				 	int full;		/* indicates that we cannot allocate any more
			
 
				 				   chunks for this space */
			
@@ -750,6 +776,14 @@ enum btrfs_caching_type {
 
				 	BTRFS_CACHE_FINISHED	= 2,
			
 
				 };
			
 
				 
			
 
				+enum btrfs_disk_cache_state {
			
 
				+	BTRFS_DC_WRITTEN	= 0,
			
 
				+	BTRFS_DC_ERROR		= 1,
			
 
				+	BTRFS_DC_CLEAR		= 2,
			
 
				+	BTRFS_DC_SETUP		= 3,
			
 
				+	BTRFS_DC_NEED_WRITE	= 4,
			
 
				+};
			
 
				+
			
 
				 struct btrfs_caching_control {
			
 
				 	struct list_head list;
			
 
				 	struct mutex mutex;
			
@@ -763,6 +797,7 @@ struct btrfs_block_group_cache {
 
				 	struct btrfs_key key;
			
 
				 	struct btrfs_block_group_item item;
			
 
				 	struct btrfs_fs_info *fs_info;
			
 
				+	struct inode *inode;
			
 
				 	spinlock_t lock;
			
 
				 	u64 pinned;
			
 
				 	u64 reserved;
			
@@ -773,8 +808,11 @@ struct btrfs_block_group_cache {
 
				 	int extents_thresh;
			
 
				 	int free_extents;
			
 
				 	int total_bitmaps;
			
 
				-	int ro;
			
 
				-	int dirty;
			
 
				+	int ro:1;
			
 
				+	int dirty:1;
			
 
				+	int iref:1;
			
 
				+
			
 
				+	int disk_cache_state;
			
 
				 
			
 
				 	/* cache tracking stuff */
			
 
				 	int cached;
			
@@ -863,6 +901,7 @@ struct btrfs_fs_info {
 
				 	struct btrfs_transaction *running_transaction;
			
 
				 	wait_queue_head_t transaction_throttle;
			
 
				 	wait_queue_head_t transaction_wait;
			
 
				+	wait_queue_head_t transaction_blocked_wait;
			
 
				 	wait_queue_head_t async_submit_wait;
			
 
				 
			
 
				 	struct btrfs_super_block super_copy;
			
@@ -949,6 +988,7 @@ struct btrfs_fs_info {
 
				 	struct btrfs_workers endio_meta_workers;
			
 
				 	struct btrfs_workers endio_meta_write_workers;
			
 
				 	struct btrfs_workers endio_write_workers;
			
 
				+	struct btrfs_workers endio_freespace_worker;
			
 
				 	struct btrfs_workers submit_workers;
			
 
				 	/*
			
 
				 	 * fixup workers take dirty pages that didn't properly go through
			
@@ -1192,6 +1232,9 @@ struct btrfs_root {
 
				 #define BTRFS_MOUNT_NOSSD		(1 << 9)
			
 
				 #define BTRFS_MOUNT_DISCARD		(1 << 10)
			
 
				 #define BTRFS_MOUNT_FORCE_COMPRESS      (1 << 11)
			
 
				+#define BTRFS_MOUNT_SPACE_CACHE		(1 << 12)
			
 
				+#define BTRFS_MOUNT_CLEAR_CACHE		(1 << 13)
			
 
				+#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
			
 
				 
			
 
				 #define btrfs_clear_opt(o, opt)		((o) &= ~BTRFS_MOUNT_##opt)
			
 
				 #define btrfs_set_opt(o, opt)		((o) |= BTRFS_MOUNT_##opt)
			
@@ -1665,6 +1708,27 @@ static inline void btrfs_set_dir_item_key(struct extent_buffer *eb,
 
				 	write_eb_member(eb, item, struct btrfs_dir_item, location, key);
			
 
				 }
			
 
				 
			
 
				+BTRFS_SETGET_FUNCS(free_space_entries, struct btrfs_free_space_header,
			
 
				+		   num_entries, 64);
			
 
				+BTRFS_SETGET_FUNCS(free_space_bitmaps, struct btrfs_free_space_header,
			
 
				+		   num_bitmaps, 64);
			
 
				+BTRFS_SETGET_FUNCS(free_space_generation, struct btrfs_free_space_header,
			
 
				+		   generation, 64);
			
 
				+
			
 
				+static inline void btrfs_free_space_key(struct extent_buffer *eb,
			
 
				+					struct btrfs_free_space_header *h,
			
 
				+					struct btrfs_disk_key *key)
			
 
				+{
			
 
				+	read_eb_member(eb, h, struct btrfs_free_space_header, location, key);
			
 
				+}
			
 
				+
			
 
				+static inline void btrfs_set_free_space_key(struct extent_buffer *eb,
			
 
				+					    struct btrfs_free_space_header *h,
			
 
				+					    struct btrfs_disk_key *key)
			
 
				+{
			
 
				+	write_eb_member(eb, h, struct btrfs_free_space_header, location, key);
			
 
				+}
			
 
				+
			
 
				 /* struct btrfs_disk_key */
			
 
				 BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key,
			
 
				 			 objectid, 64);
			
@@ -1876,6 +1940,8 @@ BTRFS_SETGET_STACK_FUNCS(super_incompat_flags, struct btrfs_super_block,
 
				 			 incompat_flags, 64);
			
 
				 BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block,
			
 
				 			 csum_type, 16);
			
 
				+BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block,
			
 
				+			 cache_generation, 64);
			
 
				 
			
 
				 static inline int btrfs_super_csum_size(struct btrfs_super_block *s)
			
 
				 {
			
@@ -1988,6 +2054,12 @@ static inline struct dentry *fdentry(struct file *file)
 
				 	return file->f_path.dentry;
			
 
				 }
			
 
				 
			
 
				+static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
			
 
				+{
			
 
				+	return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) &&
			
 
				+		(space_info->flags & BTRFS_BLOCK_GROUP_DATA));
			
 
				+}
			
 
				+
			
 
				 /* extent-tree.c */
			
 
				 void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
			
 
				 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
			
@@ -2079,7 +2151,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
 
				 void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
			
 
				 int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
			
 
				 				struct btrfs_root *root,
			
 
				-				int num_items, int *retries);
			
 
				+				int num_items);
			
 
				 void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
			
 
				 				struct btrfs_root *root);
			
 
				 int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
			
@@ -2100,7 +2172,7 @@ void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
 
				 int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
			
 
				 			struct btrfs_root *root,
			
 
				 			struct btrfs_block_rsv *block_rsv,
			
 
				-			u64 num_bytes, int *retries);
			
 
				+			u64 num_bytes);
			
 
				 int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
			
 
				 			  struct btrfs_root *root,
			
 
				 			  struct btrfs_block_rsv *block_rsv,
			
@@ -2115,6 +2187,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
 
				 			     struct btrfs_block_group_cache *cache);
			
 
				 int btrfs_set_block_group_rw(struct btrfs_root *root,
			
 
				 			     struct btrfs_block_group_cache *cache);
			
 
				+void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
			
 
				 /* ctree.c */
			
 
				 int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
			
 
				 		     int level, int *slot);
			
@@ -2373,7 +2446,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 
				 			       u32 min_type);
			
 
				 
			
 
				 int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
			
 
				-int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput);
			
 
				+int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
			
 
				+				   int sync);
			
 
				 int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
			
 
				 			      struct extent_state **cached_state);
			
 
				 int btrfs_writepages(struct address_space *mapping,
			
@@ -2426,6 +2500,10 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root);
 
				 int btrfs_prealloc_file_range(struct inode *inode, int mode,
			
 
				 			      u64 start, u64 num_bytes, u64 min_size,
			
 
				 			      loff_t actual_len, u64 *alloc_hint);
			
 
				+int btrfs_prealloc_file_range_trans(struct inode *inode,
			
 
				+				    struct btrfs_trans_handle *trans, int mode,
			
 
				+				    u64 start, u64 num_bytes, u64 min_size,
			
 
				+				    loff_t actual_len, u64 *alloc_hint);
			
 
				 extern const struct dentry_operations btrfs_dentry_operations;
			
 
				 
			
 
				 /* ioctl.c */
			
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -427,5 +427,5 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
 
				 		ret = btrfs_truncate_item(trans, root, path,
			
 
				 					  item_len - sub_item_len, 1);
			
 
				 	}
			
 
				-	return 0;
			
 
				+	return ret;
			
 
				 }
			
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -338,7 +338,6 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
 
				 	struct extent_io_tree *tree;
			
 
				 	u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
			
 
				 	u64 found_start;
			
 
				-	int found_level;
			
 
				 	unsigned long len;
			
 
				 	struct extent_buffer *eb;
			
 
				 	int ret;
			
@@ -369,8 +368,6 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
 
				 		WARN_ON(1);
			
 
				 		goto err;
			
 
				 	}
			
 
				-	found_level = btrfs_header_level(eb);
			
 
				-
			
 
				 	csum_tree_block(root, eb, 0);
			
 
				 err:
			
 
				 	free_extent_buffer(eb);
			
@@ -481,9 +478,12 @@ static void end_workqueue_bio(struct bio *bio, int err)
 
				 	end_io_wq->work.flags = 0;
			
 
				 
			
 
				 	if (bio->bi_rw & REQ_WRITE) {
			
 
				-		if (end_io_wq->metadata)
			
 
				+		if (end_io_wq->metadata == 1)
			
 
				 			btrfs_queue_worker(&fs_info->endio_meta_write_workers,
			
 
				 					   &end_io_wq->work);
			
 
				+		else if (end_io_wq->metadata == 2)
			
 
				+			btrfs_queue_worker(&fs_info->endio_freespace_worker,
			
 
				+					   &end_io_wq->work);
			
 
				 		else
			
 
				 			btrfs_queue_worker(&fs_info->endio_write_workers,
			
 
				 					   &end_io_wq->work);
			
@@ -497,6 +497,13 @@ static void end_workqueue_bio(struct bio *bio, int err)
 
				 	}
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * For the metadata arg you want
			
 
				+ *
			
 
				+ * 0 - if data
			
 
				+ * 1 - if normal metadta
			
 
				+ * 2 - if writing to the free space cache area
			
 
				+ */
			
 
				 int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
			
 
				 			int metadata)
			
 
				 {
			
@@ -533,11 +540,9 @@ int btrfs_congested_async(struct btrfs_fs_info *info, int iodone)
 
				 
			
 
				 static void run_one_async_start(struct btrfs_work *work)
			
 
				 {
			
 
				-	struct btrfs_fs_info *fs_info;
			
 
				 	struct async_submit_bio *async;
			
 
				 
			
 
				 	async = container_of(work, struct  async_submit_bio, work);
			
 
				-	fs_info = BTRFS_I(async->inode)->root->fs_info;
			
 
				 	async->submit_bio_start(async->inode, async->rw, async->bio,
			
 
				 			       async->mirror_num, async->bio_flags,
			
 
				 			       async->bio_offset);
			
@@ -850,12 +855,8 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
 
				 				      u32 blocksize, u64 parent_transid)
			
 
				 {
			
 
				 	struct extent_buffer *buf = NULL;
			
 
				-	struct inode *btree_inode = root->fs_info->btree_inode;
			
 
				-	struct extent_io_tree *io_tree;
			
 
				 	int ret;
			
 
				 
			
 
				-	io_tree = &BTRFS_I(btree_inode)->io_tree;
			
 
				-
			
 
				 	buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
			
 
				 	if (!buf)
			
 
				 		return NULL;
			
@@ -1377,7 +1378,6 @@ static int bio_ready_for_csum(struct bio *bio)
 
				 	u64 start = 0;
			
 
				 	struct page *page;
			
 
				 	struct extent_io_tree *io_tree = NULL;
			
 
				-	struct btrfs_fs_info *info = NULL;
			
 
				 	struct bio_vec *bvec;
			
 
				 	int i;
			
 
				 	int ret;
			
@@ -1396,7 +1396,6 @@ static int bio_ready_for_csum(struct bio *bio)
 
				 		buf_len = page->private >> 2;
			
 
				 		start = page_offset(page) + bvec->bv_offset;
			
 
				 		io_tree = &BTRFS_I(page->mapping->host)->io_tree;
			
 
				-		info = BTRFS_I(page->mapping->host)->root->fs_info;
			
 
				 	}
			
 
				 	/* are we fully contained in this bio? */
			
 
				 	if (buf_len <= length)
			
@@ -1680,12 +1679,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 
				 
			
 
				 	init_waitqueue_head(&fs_info->transaction_throttle);
			
 
				 	init_waitqueue_head(&fs_info->transaction_wait);
			
 
				+	init_waitqueue_head(&fs_info->transaction_blocked_wait);
			
 
				 	init_waitqueue_head(&fs_info->async_submit_wait);
			
 
				 
			
 
				 	__setup_root(4096, 4096, 4096, 4096, tree_root,
			
 
				 		     fs_info, BTRFS_ROOT_TREE_OBJECTID);
			
 
				 
			
 
				-
			
 
				 	bh = btrfs_read_dev_super(fs_devices->latest_bdev);
			
 
				 	if (!bh)
			
 
				 		goto fail_iput;
			
@@ -1775,6 +1774,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 
				 	btrfs_init_workers(&fs_info->endio_write_workers, "endio-write",
			
 
				 			   fs_info->thread_pool_size,
			
 
				 			   &fs_info->generic_worker);
			
 
				+	btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write",
			
 
				+			   1, &fs_info->generic_worker);
			
 
				 
			
 
				 	/*
			
 
				 	 * endios are largely parallel and should have a very
			
@@ -1795,6 +1796,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 
				 	btrfs_start_workers(&fs_info->endio_meta_workers, 1);
			
 
				 	btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
			
 
				 	btrfs_start_workers(&fs_info->endio_write_workers, 1);
			
 
				+	btrfs_start_workers(&fs_info->endio_freespace_worker, 1);
			
 
				 
			
 
				 	fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
			
 
				 	fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
			
@@ -1993,6 +1995,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 
				 	if (!(sb->s_flags & MS_RDONLY)) {
			
 
				 		down_read(&fs_info->cleanup_work_sem);
			
 
				 		btrfs_orphan_cleanup(fs_info->fs_root);
			
 
				+		btrfs_orphan_cleanup(fs_info->tree_root);
			
 
				 		up_read(&fs_info->cleanup_work_sem);
			
 
				 	}
			
 
				 
			
@@ -2035,6 +2038,7 @@ fail_sb_buffer:
 
				 	btrfs_stop_workers(&fs_info->endio_meta_workers);
			
 
				 	btrfs_stop_workers(&fs_info->endio_meta_write_workers);
			
 
				 	btrfs_stop_workers(&fs_info->endio_write_workers);
			
 
				+	btrfs_stop_workers(&fs_info->endio_freespace_worker);
			
 
				 	btrfs_stop_workers(&fs_info->submit_workers);
			
 
				 fail_iput:
			
 
				 	invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
			
@@ -2410,6 +2414,7 @@ int close_ctree(struct btrfs_root *root)
 
				 	fs_info->closing = 1;
			
 
				 	smp_mb();
			
 
				 
			
 
				+	btrfs_put_block_group_cache(fs_info);
			
 
				 	if (!(fs_info->sb->s_flags & MS_RDONLY)) {
			
 
				 		ret =  btrfs_commit_super(root);
			
 
				 		if (ret)
			
@@ -2456,6 +2461,7 @@ int close_ctree(struct btrfs_root *root)
 
				 	btrfs_stop_workers(&fs_info->endio_meta_workers);
			
 
				 	btrfs_stop_workers(&fs_info->endio_meta_write_workers);
			
 
				 	btrfs_stop_workers(&fs_info->endio_write_workers);
			
 
				+	btrfs_stop_workers(&fs_info->endio_freespace_worker);
			
 
				 	btrfs_stop_workers(&fs_info->submit_workers);
			
 
				 
			
 
				 	btrfs_close_devices(fs_info->fs_devices);
			
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -104,7 +104,7 @@ void extent_io_tree_init(struct extent_io_tree *tree,
 
				 			  struct address_space *mapping, gfp_t mask)
			
 
				 {
			
 
				 	tree->state = RB_ROOT;
			
 
				-	tree->buffer = RB_ROOT;
			
 
				+	INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC);
			
 
				 	tree->ops = NULL;
			
 
				 	tree->dirty_bytes = 0;
			
 
				 	spin_lock_init(&tree->lock);
			
@@ -235,50 +235,6 @@ static inline struct rb_node *tree_search(struct extent_io_tree *tree,
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree,
			
 
				-					  u64 offset, struct rb_node *node)
			
 
				-{
			
 
				-	struct rb_root *root = &tree->buffer;
			
 
				-	struct rb_node **p = &root->rb_node;
			
 
				-	struct rb_node *parent = NULL;
			
 
				-	struct extent_buffer *eb;
			
 
				-
			
 
				-	while (*p) {
			
 
				-		parent = *p;
			
 
				-		eb = rb_entry(parent, struct extent_buffer, rb_node);
			
 
				-
			
 
				-		if (offset < eb->start)
			
 
				-			p = &(*p)->rb_left;
			
 
				-		else if (offset > eb->start)
			
 
				-			p = &(*p)->rb_right;
			
 
				-		else
			
 
				-			return eb;
			
 
				-	}
			
 
				-
			
 
				-	rb_link_node(node, parent, p);
			
 
				-	rb_insert_color(node, root);
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-static struct extent_buffer *buffer_search(struct extent_io_tree *tree,
			
 
				-					   u64 offset)
			
 
				-{
			
 
				-	struct rb_root *root = &tree->buffer;
			
 
				-	struct rb_node *n = root->rb_node;
			
 
				-	struct extent_buffer *eb;
			
 
				-
			
 
				-	while (n) {
			
 
				-		eb = rb_entry(n, struct extent_buffer, rb_node);
			
 
				-		if (offset < eb->start)
			
 
				-			n = n->rb_left;
			
 
				-		else if (offset > eb->start)
			
 
				-			n = n->rb_right;
			
 
				-		else
			
 
				-			return eb;
			
 
				-	}
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				 static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
			
 
				 		     struct extent_state *other)
			
 
				 {
			
@@ -1901,10 +1857,8 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num,
 
				 	struct page *page = bvec->bv_page;
			
 
				 	struct extent_io_tree *tree = bio->bi_private;
			
 
				 	u64 start;
			
 
				-	u64 end;
			
 
				 
			
 
				 	start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
			
 
				-	end = start + bvec->bv_len - 1;
			
 
				 
			
 
				 	bio->bi_private = NULL;
			
 
				 
			
@@ -2204,7 +2158,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
 
				 	u64 last_byte = i_size_read(inode);
			
 
				 	u64 block_start;
			
 
				 	u64 iosize;
			
 
				-	u64 unlock_start;
			
 
				 	sector_t sector;
			
 
				 	struct extent_state *cached_state = NULL;
			
 
				 	struct extent_map *em;
			
@@ -2329,7 +2282,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
 
				 		if (tree->ops && tree->ops->writepage_end_io_hook)
			
 
				 			tree->ops->writepage_end_io_hook(page, start,
			
 
				 							 page_end, NULL, 1);
			
 
				-		unlock_start = page_end + 1;
			
 
				 		goto done;
			
 
				 	}
			
 
				 
			
@@ -2340,7 +2292,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
 
				 			if (tree->ops && tree->ops->writepage_end_io_hook)
			
 
				 				tree->ops->writepage_end_io_hook(page, cur,
			
 
				 							 page_end, NULL, 1);
			
 
				-			unlock_start = page_end + 1;
			
 
				 			break;
			
 
				 		}
			
 
				 		em = epd->get_extent(inode, page, pg_offset, cur,
			
@@ -2387,7 +2338,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
 
				 
			
 
				 			cur += iosize;
			
 
				 			pg_offset += iosize;
			
 
				-			unlock_start = cur;
			
 
				 			continue;
			
 
				 		}
			
 
				 		/* leave this out until we have a page_mkwrite call */
			
@@ -2473,7 +2423,6 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
 
				 	pgoff_t index;
			
 
				 	pgoff_t end;		/* Inclusive */
			
 
				 	int scanned = 0;
			
 
				-	int range_whole = 0;
			
 
				 
			
 
				 	pagevec_init(&pvec, 0);
			
 
				 	if (wbc->range_cyclic) {
			
@@ -2482,8 +2431,6 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
 
				 	} else {
			
 
				 		index = wbc->range_start >> PAGE_CACHE_SHIFT;
			
 
				 		end = wbc->range_end >> PAGE_CACHE_SHIFT;
			
 
				-		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
			
 
				-			range_whole = 1;
			
 
				 		scanned = 1;
			
 
				 	}
			
 
				 retry:
			
@@ -2823,6 +2770,8 @@ int extent_prepare_write(struct extent_io_tree *tree,
 
				 					 NULL, 1,
			
 
				 					 end_bio_extent_preparewrite, 0,
			
 
				 					 0, 0);
			
 
				+			if (ret && !err)
			
 
				+				err = ret;
			
 
				 			iocount++;
			
 
				 			block_start = block_start + iosize;
			
 
				 		} else {
			
@@ -3104,6 +3053,39 @@ static void __free_extent_buffer(struct extent_buffer *eb)
 
				 	kmem_cache_free(extent_buffer_cache, eb);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Helper for releasing extent buffer page.
			
 
				+ */
			
 
				+static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
			
 
				+						unsigned long start_idx)
			
 
				+{
			
 
				+	unsigned long index;
			
 
				+	struct page *page;
			
 
				+
			
 
				+	if (!eb->first_page)
			
 
				+		return;
			
 
				+
			
 
				+	index = num_extent_pages(eb->start, eb->len);
			
 
				+	if (start_idx >= index)
			
 
				+		return;
			
 
				+
			
 
				+	do {
			
 
				+		index--;
			
 
				+		page = extent_buffer_page(eb, index);
			
 
				+		if (page)
			
 
				+			page_cache_release(page);
			
 
				+	} while (index != start_idx);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Helper for releasing the extent buffer.
			
 
				+ */
			
 
				+static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
			
 
				+{
			
 
				+	btrfs_release_extent_buffer_page(eb, 0);
			
 
				+	__free_extent_buffer(eb);
			
 
				+}
			
 
				+
			
 
				 struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
			
 
				 					  u64 start, unsigned long len,
			
 
				 					  struct page *page0,
			
@@ -3117,16 +3099,16 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
 
				 	struct page *p;
			
 
				 	struct address_space *mapping = tree->mapping;
			
 
				 	int uptodate = 1;
			
 
				+	int ret;
			
 
				 
			
 
				-	spin_lock(&tree->buffer_lock);
			
 
				-	eb = buffer_search(tree, start);
			
 
				-	if (eb) {
			
 
				-		atomic_inc(&eb->refs);
			
 
				-		spin_unlock(&tree->buffer_lock);
			
 
				+	rcu_read_lock();
			
 
				+	eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
			
 
				+	if (eb && atomic_inc_not_zero(&eb->refs)) {
			
 
				+		rcu_read_unlock();
			
 
				 		mark_page_accessed(eb->first_page);
			
 
				 		return eb;
			
 
				 	}
			
 
				-	spin_unlock(&tree->buffer_lock);
			
 
				+	rcu_read_unlock();
			
 
				 
			
 
				 	eb = __alloc_extent_buffer(tree, start, len, mask);
			
 
				 	if (!eb)
			
@@ -3165,26 +3147,31 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
 
				 	if (uptodate)
			
 
				 		set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
			
 
				 
			
 
				+	ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
			
 
				+	if (ret)
			
 
				+		goto free_eb;
			
 
				+
			
 
				 	spin_lock(&tree->buffer_lock);
			
 
				-	exists = buffer_tree_insert(tree, start, &eb->rb_node);
			
 
				-	if (exists) {
			
 
				+	ret = radix_tree_insert(&tree->buffer, start >> PAGE_CACHE_SHIFT, eb);
			
 
				+	if (ret == -EEXIST) {
			
 
				+		exists = radix_tree_lookup(&tree->buffer,
			
 
				+						start >> PAGE_CACHE_SHIFT);
			
 
				 		/* add one reference for the caller */
			
 
				 		atomic_inc(&exists->refs);
			
 
				 		spin_unlock(&tree->buffer_lock);
			
 
				+		radix_tree_preload_end();
			
 
				 		goto free_eb;
			
 
				 	}
			
 
				 	/* add one reference for the tree */
			
 
				 	atomic_inc(&eb->refs);
			
 
				 	spin_unlock(&tree->buffer_lock);
			
 
				+	radix_tree_preload_end();
			
 
				 	return eb;
			
 
				 
			
 
				 free_eb:
			
 
				 	if (!atomic_dec_and_test(&eb->refs))
			
 
				 		return exists;
			
 
				-	for (index = 1; index < i; index++)
			
 
				-		page_cache_release(extent_buffer_page(eb, index));
			
 
				-	page_cache_release(extent_buffer_page(eb, 0));
			
 
				-	__free_extent_buffer(eb);
			
 
				+	btrfs_release_extent_buffer(eb);
			
 
				 	return exists;
			
 
				 }
			
 
				 
			
@@ -3194,16 +3181,16 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
 
				 {
			
 
				 	struct extent_buffer *eb;
			
 
				 
			
 
				-	spin_lock(&tree->buffer_lock);
			
 
				-	eb = buffer_search(tree, start);
			
 
				-	if (eb)
			
 
				-		atomic_inc(&eb->refs);
			
 
				-	spin_unlock(&tree->buffer_lock);
			
 
				-
			
 
				-	if (eb)
			
 
				+	rcu_read_lock();
			
 
				+	eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
			
 
				+	if (eb && atomic_inc_not_zero(&eb->refs)) {
			
 
				+		rcu_read_unlock();
			
 
				 		mark_page_accessed(eb->first_page);
			
 
				+		return eb;
			
 
				+	}
			
 
				+	rcu_read_unlock();
			
 
				 
			
 
				-	return eb;
			
 
				+	return NULL;
			
 
				 }
			
 
				 
			
 
				 void free_extent_buffer(struct extent_buffer *eb)
			
@@ -3833,34 +3820,45 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
 
				 	}
			
 
				 }
			
 
				 
			
 
				+static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
			
 
				+{
			
 
				+	struct extent_buffer *eb =
			
 
				+			container_of(head, struct extent_buffer, rcu_head);
			
 
				+
			
 
				+	btrfs_release_extent_buffer(eb);
			
 
				+}
			
 
				+
			
 
				 int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
			
 
				 {
			
 
				 	u64 start = page_offset(page);
			
 
				 	struct extent_buffer *eb;
			
 
				 	int ret = 1;
			
 
				-	unsigned long i;
			
 
				-	unsigned long num_pages;
			
 
				 
			
 
				 	spin_lock(&tree->buffer_lock);
			
 
				-	eb = buffer_search(tree, start);
			
 
				+	eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
			
 
				 	if (!eb)
			
 
				 		goto out;
			
 
				 
			
 
				-	if (atomic_read(&eb->refs) > 1) {
			
 
				+	if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
			
 
				 		ret = 0;
			
 
				 		goto out;
			
 
				 	}
			
 
				-	if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
			
 
				+
			
 
				+	/*
			
 
				+	 * set @eb->refs to 0 if it is already 1, and then release the @eb.
			
 
				+	 * Or go back.
			
 
				+	 */
			
 
				+	if (atomic_cmpxchg(&eb->refs, 1, 0) != 1) {
			
 
				 		ret = 0;
			
 
				 		goto out;
			
 
				 	}
			
 
				-	/* at this point we can safely release the extent buffer */
			
 
				-	num_pages = num_extent_pages(eb->start, eb->len);
			
 
				-	for (i = 0; i < num_pages; i++)
			
 
				-		page_cache_release(extent_buffer_page(eb, i));
			
 
				-	rb_erase(&eb->rb_node, &tree->buffer);
			
 
				-	__free_extent_buffer(eb);
			
 
				+
			
 
				+	radix_tree_delete(&tree->buffer, start >> PAGE_CACHE_SHIFT);
			
 
				 out:
			
 
				 	spin_unlock(&tree->buffer_lock);
			
 
				+
			
 
				+	/* at this point we can safely release the extent buffer */
			
 
				+	if (atomic_read(&eb->refs) == 0)
			
 
				+		call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
			
 
				 	return ret;
			
 
				 }
			
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -85,7 +85,7 @@ struct extent_io_ops {
 
				 
			
 
				 struct extent_io_tree {
			
 
				 	struct rb_root state;
			
 
				-	struct rb_root buffer;
			
 
				+	struct radix_tree_root buffer;
			
 
				 	struct address_space *mapping;
			
 
				 	u64 dirty_bytes;
			
 
				 	spinlock_t lock;
			
@@ -123,7 +123,7 @@ struct extent_buffer {
 
				 	unsigned long bflags;
			
 
				 	atomic_t refs;
			
 
				 	struct list_head leak_list;
			
 
				-	struct rb_node rb_node;
			
 
				+	struct rcu_head rcu_head;
			
 
				 
			
 
				 	/* the spinlock is used to protect most operations */
			
 
				 	spinlock_t lock;
			
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -335,7 +335,7 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
 
				 		goto out;
			
 
				 	}
			
 
				 	if (IS_ERR(rb_node)) {
			
 
				-		em = ERR_PTR(PTR_ERR(rb_node));
			
 
				+		em = ERR_CAST(rb_node);
			
 
				 		goto out;
			
 
				 	}
			
 
				 	em = rb_entry(rb_node, struct extent_map, rb_node);
			
@@ -384,7 +384,7 @@ struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
 
				 		goto out;
			
 
				 	}
			
 
				 	if (IS_ERR(rb_node)) {
			
 
				-		em = ERR_PTR(PTR_ERR(rb_node));
			
 
				+		em = ERR_CAST(rb_node);
			
 
				 		goto out;
			
 
				 	}
			
 
				 	em = rb_entry(rb_node, struct extent_map, rb_node);
			
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -23,10 +23,761 @@
 
				 #include "ctree.h"
			
 
				 #include "free-space-cache.h"
			
 
				 #include "transaction.h"
			
 
				+#include "disk-io.h"
			
 
				 
			
 
				 #define BITS_PER_BITMAP		(PAGE_CACHE_SIZE * 8)
			
 
				 #define MAX_CACHE_BYTES_PER_GIG	(32 * 1024)
			
 
				 
			
 
				+static void recalculate_thresholds(struct btrfs_block_group_cache
			
 
				+				   *block_group);
			
 
				+static int link_free_space(struct btrfs_block_group_cache *block_group,
			
 
				+			   struct btrfs_free_space *info);
			
 
				+
			
 
				+struct inode *lookup_free_space_inode(struct btrfs_root *root,
			
 
				+				      struct btrfs_block_group_cache
			
 
				+				      *block_group, struct btrfs_path *path)
			
 
				+{
			
 
				+	struct btrfs_key key;
			
 
				+	struct btrfs_key location;
			
 
				+	struct btrfs_disk_key disk_key;
			
 
				+	struct btrfs_free_space_header *header;
			
 
				+	struct extent_buffer *leaf;
			
 
				+	struct inode *inode = NULL;
			
 
				+	int ret;
			
 
				+
			
 
				+	spin_lock(&block_group->lock);
			
 
				+	if (block_group->inode)
			
 
				+		inode = igrab(block_group->inode);
			
 
				+	spin_unlock(&block_group->lock);
			
 
				+	if (inode)
			
 
				+		return inode;
			
 
				+
			
 
				+	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
			
 
				+	key.offset = block_group->key.objectid;
			
 
				+	key.type = 0;
			
 
				+
			
 
				+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
			
 
				+	if (ret < 0)
			
 
				+		return ERR_PTR(ret);
			
 
				+	if (ret > 0) {
			
 
				+		btrfs_release_path(root, path);
			
 
				+		return ERR_PTR(-ENOENT);
			
 
				+	}
			
 
				+
			
 
				+	leaf = path->nodes[0];
			
 
				+	header = btrfs_item_ptr(leaf, path->slots[0],
			
 
				+				struct btrfs_free_space_header);
			
 
				+	btrfs_free_space_key(leaf, header, &disk_key);
			
 
				+	btrfs_disk_key_to_cpu(&location, &disk_key);
			
 
				+	btrfs_release_path(root, path);
			
 
				+
			
 
				+	inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
			
 
				+	if (!inode)
			
 
				+		return ERR_PTR(-ENOENT);
			
 
				+	if (IS_ERR(inode))
			
 
				+		return inode;
			
 
				+	if (is_bad_inode(inode)) {
			
 
				+		iput(inode);
			
 
				+		return ERR_PTR(-ENOENT);
			
 
				+	}
			
 
				+
			
 
				+	spin_lock(&block_group->lock);
			
 
				+	if (!root->fs_info->closing) {
			
 
				+		block_group->inode = igrab(inode);
			
 
				+		block_group->iref = 1;
			
 
				+	}
			
 
				+	spin_unlock(&block_group->lock);
			
 
				+
			
 
				+	return inode;
			
 
				+}
			
 
				+
			
 
				+int create_free_space_inode(struct btrfs_root *root,
			
 
				+			    struct btrfs_trans_handle *trans,
			
 
				+			    struct btrfs_block_group_cache *block_group,
			
 
				+			    struct btrfs_path *path)
			
 
				+{
			
 
				+	struct btrfs_key key;
			
 
				+	struct btrfs_disk_key disk_key;
			
 
				+	struct btrfs_free_space_header *header;
			
 
				+	struct btrfs_inode_item *inode_item;
			
 
				+	struct extent_buffer *leaf;
			
 
				+	u64 objectid;
			
 
				+	int ret;
			
 
				+
			
 
				+	ret = btrfs_find_free_objectid(trans, root, 0, &objectid);
			
 
				+	if (ret < 0)
			
 
				+		return ret;
			
 
				+
			
 
				+	ret = btrfs_insert_empty_inode(trans, root, path, objectid);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+
			
 
				+	leaf = path->nodes[0];
			
 
				+	inode_item = btrfs_item_ptr(leaf, path->slots[0],
			
 
				+				    struct btrfs_inode_item);
			
 
				+	btrfs_item_key(leaf, &disk_key, path->slots[0]);
			
 
				+	memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
			
 
				+			     sizeof(*inode_item));
			
 
				+	btrfs_set_inode_generation(leaf, inode_item, trans->transid);
			
 
				+	btrfs_set_inode_size(leaf, inode_item, 0);
			
 
				+	btrfs_set_inode_nbytes(leaf, inode_item, 0);
			
 
				+	btrfs_set_inode_uid(leaf, inode_item, 0);
			
 
				+	btrfs_set_inode_gid(leaf, inode_item, 0);
			
 
				+	btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
			
 
				+	btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS |
			
 
				+			      BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM);
			
 
				+	btrfs_set_inode_nlink(leaf, inode_item, 1);
			
 
				+	btrfs_set_inode_transid(leaf, inode_item, trans->transid);
			
 
				+	btrfs_set_inode_block_group(leaf, inode_item,
			
 
				+				    block_group->key.objectid);
			
 
				+	btrfs_mark_buffer_dirty(leaf);
			
 
				+	btrfs_release_path(root, path);
			
 
				+
			
 
				+	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
			
 
				+	key.offset = block_group->key.objectid;
			
 
				+	key.type = 0;
			
 
				+
			
 
				+	ret = btrfs_insert_empty_item(trans, root, path, &key,
			
 
				+				      sizeof(struct btrfs_free_space_header));
			
 
				+	if (ret < 0) {
			
 
				+		btrfs_release_path(root, path);
			
 
				+		return ret;
			
 
				+	}
			
 
				+	leaf = path->nodes[0];
			
 
				+	header = btrfs_item_ptr(leaf, path->slots[0],
			
 
				+				struct btrfs_free_space_header);
			
 
				+	memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
			
 
				+	btrfs_set_free_space_key(leaf, header, &disk_key);
			
 
				+	btrfs_mark_buffer_dirty(leaf);
			
 
				+	btrfs_release_path(root, path);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int btrfs_truncate_free_space_cache(struct btrfs_root *root,
			
 
				+				    struct btrfs_trans_handle *trans,
			
 
				+				    struct btrfs_path *path,
			
 
				+				    struct inode *inode)
			
 
				+{
			
 
				+	loff_t oldsize;
			
 
				+	int ret = 0;
			
 
				+
			
 
				+	trans->block_rsv = root->orphan_block_rsv;
			
 
				+	ret = btrfs_block_rsv_check(trans, root,
			
 
				+				    root->orphan_block_rsv,
			
 
				+				    0, 5);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+
			
 
				+	oldsize = i_size_read(inode);
			
 
				+	btrfs_i_size_write(inode, 0);
			
 
				+	truncate_pagecache(inode, oldsize, 0);
			
 
				+
			
 
				+	/*
			
 
				+	 * We don't need an orphan item because truncating the free space cache
			
 
				+	 * will never be split across transactions.
			
 
				+	 */
			
 
				+	ret = btrfs_truncate_inode_items(trans, root, inode,
			
 
				+					 0, BTRFS_EXTENT_DATA_KEY);
			
 
				+	if (ret) {
			
 
				+		WARN_ON(1);
			
 
				+		return ret;
			
 
				+	}
			
 
				+
			
 
				+	return btrfs_update_inode(trans, root, inode);
			
 
				+}
			
 
				+
			
 
				+static int readahead_cache(struct inode *inode)
			
 
				+{
			
 
				+	struct file_ra_state *ra;
			
 
				+	unsigned long last_index;
			
 
				+
			
 
				+	ra = kzalloc(sizeof(*ra), GFP_NOFS);
			
 
				+	if (!ra)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	file_ra_state_init(ra, inode->i_mapping);
			
 
				+	last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
			
 
				+
			
 
				+	page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
			
 
				+
			
 
				+	kfree(ra);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int load_free_space_cache(struct btrfs_fs_info *fs_info,
			
 
				+			  struct btrfs_block_group_cache *block_group)
			
 
				+{
			
 
				+	struct btrfs_root *root = fs_info->tree_root;
			
 
				+	struct inode *inode;
			
 
				+	struct btrfs_free_space_header *header;
			
 
				+	struct extent_buffer *leaf;
			
 
				+	struct page *page;
			
 
				+	struct btrfs_path *path;
			
 
				+	u32 *checksums = NULL, *crc;
			
 
				+	char *disk_crcs = NULL;
			
 
				+	struct btrfs_key key;
			
 
				+	struct list_head bitmaps;
			
 
				+	u64 num_entries;
			
 
				+	u64 num_bitmaps;
			
 
				+	u64 generation;
			
 
				+	u32 cur_crc = ~(u32)0;
			
 
				+	pgoff_t index = 0;
			
 
				+	unsigned long first_page_offset;
			
 
				+	int num_checksums;
			
 
				+	int ret = 0;
			
 
				+
			
 
				+	/*
			
 
				+	 * If we're unmounting then just return, since this does a search on the
			
 
				+	 * normal root and not the commit root and we could deadlock.
			
 
				+	 */
			
 
				+	smp_mb();
			
 
				+	if (fs_info->closing)
			
 
				+		return 0;
			
 
				+
			
 
				+	/*
			
 
				+	 * If this block group has been marked to be cleared for one reason or
			
 
				+	 * another then we can't trust the on disk cache, so just return.
			
 
				+	 */
			
 
				+	spin_lock(&block_group->lock);
			
 
				+	if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
			
 
				+		spin_unlock(&block_group->lock);
			
 
				+		return 0;
			
 
				+	}
			
 
				+	spin_unlock(&block_group->lock);
			
 
				+
			
 
				+	INIT_LIST_HEAD(&bitmaps);
			
 
				+
			
 
				+	path = btrfs_alloc_path();
			
 
				+	if (!path)
			
 
				+		return 0;
			
 
				+
			
 
				+	inode = lookup_free_space_inode(root, block_group, path);
			
 
				+	if (IS_ERR(inode)) {
			
 
				+		btrfs_free_path(path);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	/* Nothing in the space cache, goodbye */
			
 
				+	if (!i_size_read(inode)) {
			
 
				+		btrfs_free_path(path);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
			
 
				+	key.offset = block_group->key.objectid;
			
 
				+	key.type = 0;
			
 
				+
			
 
				+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
			
 
				+	if (ret) {
			
 
				+		btrfs_free_path(path);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	leaf = path->nodes[0];
			
 
				+	header = btrfs_item_ptr(leaf, path->slots[0],
			
 
				+				struct btrfs_free_space_header);
			
 
				+	num_entries = btrfs_free_space_entries(leaf, header);
			
 
				+	num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
			
 
				+	generation = btrfs_free_space_generation(leaf, header);
			
 
				+	btrfs_free_path(path);
			
 
				+
			
 
				+	if (BTRFS_I(inode)->generation != generation) {
			
 
				+		printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
			
 
				+		       " not match free space cache generation (%llu) for "
			
 
				+		       "block group %llu\n",
			
 
				+		       (unsigned long long)BTRFS_I(inode)->generation,
			
 
				+		       (unsigned long long)generation,
			
 
				+		       (unsigned long long)block_group->key.objectid);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	if (!num_entries)
			
 
				+		goto out;
			
 
				+
			
 
				+	/* Setup everything for doing checksumming */
			
 
				+	num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
			
 
				+	checksums = crc = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
			
 
				+	if (!checksums)
			
 
				+		goto out;
			
 
				+	first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
			
 
				+	disk_crcs = kzalloc(first_page_offset, GFP_NOFS);
			
 
				+	if (!disk_crcs)
			
 
				+		goto out;
			
 
				+
			
 
				+	ret = readahead_cache(inode);
			
 
				+	if (ret) {
			
 
				+		ret = 0;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	while (1) {
			
 
				+		struct btrfs_free_space_entry *entry;
			
 
				+		struct btrfs_free_space *e;
			
 
				+		void *addr;
			
 
				+		unsigned long offset = 0;
			
 
				+		unsigned long start_offset = 0;
			
 
				+		int need_loop = 0;
			
 
				+
			
 
				+		if (!num_entries && !num_bitmaps)
			
 
				+			break;
			
 
				+
			
 
				+		if (index == 0) {
			
 
				+			start_offset = first_page_offset;
			
 
				+			offset = start_offset;
			
 
				+		}
			
 
				+
			
 
				+		page = grab_cache_page(inode->i_mapping, index);
			
 
				+		if (!page) {
			
 
				+			ret = 0;
			
 
				+			goto free_cache;
			
 
				+		}
			
 
				+
			
 
				+		if (!PageUptodate(page)) {
			
 
				+			btrfs_readpage(NULL, page);
			
 
				+			lock_page(page);
			
 
				+			if (!PageUptodate(page)) {
			
 
				+				unlock_page(page);
			
 
				+				page_cache_release(page);
			
 
				+				printk(KERN_ERR "btrfs: error reading free "
			
 
				+				       "space cache: %llu\n",
			
 
				+				       (unsigned long long)
			
 
				+				       block_group->key.objectid);
			
 
				+				goto free_cache;
			
 
				+			}
			
 
				+		}
			
 
				+		addr = kmap(page);
			
 
				+
			
 
				+		if (index == 0) {
			
 
				+			u64 *gen;
			
 
				+
			
 
				+			memcpy(disk_crcs, addr, first_page_offset);
			
 
				+			gen = addr + (sizeof(u32) * num_checksums);
			
 
				+			if (*gen != BTRFS_I(inode)->generation) {
			
 
				+				printk(KERN_ERR "btrfs: space cache generation"
			
 
				+				       " (%llu) does not match inode (%llu) "
			
 
				+				       "for block group %llu\n",
			
 
				+				       (unsigned long long)*gen,
			
 
				+				       (unsigned long long)
			
 
				+				       BTRFS_I(inode)->generation,
			
 
				+				       (unsigned long long)
			
 
				+				       block_group->key.objectid);
			
 
				+				kunmap(page);
			
 
				+				unlock_page(page);
			
 
				+				page_cache_release(page);
			
 
				+				goto free_cache;
			
 
				+			}
			
 
				+			crc = (u32 *)disk_crcs;
			
 
				+		}
			
 
				+		entry = addr + start_offset;
			
 
				+
			
 
				+		/* First lets check our crc before we do anything fun */
			
 
				+		cur_crc = ~(u32)0;
			
 
				+		cur_crc = btrfs_csum_data(root, addr + start_offset, cur_crc,
			
 
				+					  PAGE_CACHE_SIZE - start_offset);
			
 
				+		btrfs_csum_final(cur_crc, (char *)&cur_crc);
			
 
				+		if (cur_crc != *crc) {
			
 
				+			printk(KERN_ERR "btrfs: crc mismatch for page %lu in "
			
 
				+			       "block group %llu\n", index,
			
 
				+			       (unsigned long long)block_group->key.objectid);
			
 
				+			kunmap(page);
			
 
				+			unlock_page(page);
			
 
				+			page_cache_release(page);
			
 
				+			goto free_cache;
			
 
				+		}
			
 
				+		crc++;
			
 
				+
			
 
				+		while (1) {
			
 
				+			if (!num_entries)
			
 
				+				break;
			
 
				+
			
 
				+			need_loop = 1;
			
 
				+			e = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
			
 
				+			if (!e) {
			
 
				+				kunmap(page);
			
 
				+				unlock_page(page);
			
 
				+				page_cache_release(page);
			
 
				+				goto free_cache;
			
 
				+			}
			
 
				+
			
 
				+			e->offset = le64_to_cpu(entry->offset);
			
 
				+			e->bytes = le64_to_cpu(entry->bytes);
			
 
				+			if (!e->bytes) {
			
 
				+				kunmap(page);
			
 
				+				kfree(e);
			
 
				+				unlock_page(page);
			
 
				+				page_cache_release(page);
			
 
				+				goto free_cache;
			
 
				+			}
			
 
				+
			
 
				+			if (entry->type == BTRFS_FREE_SPACE_EXTENT) {
			
 
				+				spin_lock(&block_group->tree_lock);
			
 
				+				ret = link_free_space(block_group, e);
			
 
				+				spin_unlock(&block_group->tree_lock);
			
 
				+				BUG_ON(ret);
			
 
				+			} else {
			
 
				+				e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
			
 
				+				if (!e->bitmap) {
			
 
				+					kunmap(page);
			
 
				+					kfree(e);
			
 
				+					unlock_page(page);
			
 
				+					page_cache_release(page);
			
 
				+					goto free_cache;
			
 
				+				}
			
 
				+				spin_lock(&block_group->tree_lock);
			
 
				+				ret = link_free_space(block_group, e);
			
 
				+				block_group->total_bitmaps++;
			
 
				+				recalculate_thresholds(block_group);
			
 
				+				spin_unlock(&block_group->tree_lock);
			
 
				+				list_add_tail(&e->list, &bitmaps);
			
 
				+			}
			
 
				+
			
 
				+			num_entries--;
			
 
				+			offset += sizeof(struct btrfs_free_space_entry);
			
 
				+			if (offset + sizeof(struct btrfs_free_space_entry) >=
			
 
				+			    PAGE_CACHE_SIZE)
			
 
				+				break;
			
 
				+			entry++;
			
 
				+		}
			
 
				+
			
 
				+		/*
			
 
				+		 * We read an entry out of this page, we need to move on to the
			
 
				+		 * next page.
			
 
				+		 */
			
 
				+		if (need_loop) {
			
 
				+			kunmap(page);
			
 
				+			goto next;
			
 
				+		}
			
 
				+
			
 
				+		/*
			
 
				+		 * We add the bitmaps at the end of the entries in order that
			
 
				+		 * the bitmap entries are added to the cache.
			
 
				+		 */
			
 
				+		e = list_entry(bitmaps.next, struct btrfs_free_space, list);
			
 
				+		list_del_init(&e->list);
			
 
				+		memcpy(e->bitmap, addr, PAGE_CACHE_SIZE);
			
 
				+		kunmap(page);
			
 
				+		num_bitmaps--;
			
 
				+next:
			
 
				+		unlock_page(page);
			
 
				+		page_cache_release(page);
			
 
				+		index++;
			
 
				+	}
			
 
				+
			
 
				+	ret = 1;
			
 
				+out:
			
 
				+	kfree(checksums);
			
 
				+	kfree(disk_crcs);
			
 
				+	iput(inode);
			
 
				+	return ret;
			
 
				+
			
 
				+free_cache:
			
 
				+	/* This cache is bogus, make sure it gets cleared */
			
 
				+	spin_lock(&block_group->lock);
			
 
				+	block_group->disk_cache_state = BTRFS_DC_CLEAR;
			
 
				+	spin_unlock(&block_group->lock);
			
 
				+	btrfs_remove_free_space_cache(block_group);
			
 
				+	goto out;
			
 
				+}
			
 
				+
			
 
				+int btrfs_write_out_cache(struct btrfs_root *root,
			
 
				+			  struct btrfs_trans_handle *trans,
			
 
				+			  struct btrfs_block_group_cache *block_group,
			
 
				+			  struct btrfs_path *path)
			
 
				+{
			
 
				+	struct btrfs_free_space_header *header;
			
 
				+	struct extent_buffer *leaf;
			
 
				+	struct inode *inode;
			
 
				+	struct rb_node *node;
			
 
				+	struct list_head *pos, *n;
			
 
				+	struct page *page;
			
 
				+	struct extent_state *cached_state = NULL;
			
 
				+	struct list_head bitmap_list;
			
 
				+	struct btrfs_key key;
			
 
				+	u64 bytes = 0;
			
 
				+	u32 *crc, *checksums;
			
 
				+	pgoff_t index = 0, last_index = 0;
			
 
				+	unsigned long first_page_offset;
			
 
				+	int num_checksums;
			
 
				+	int entries = 0;
			
 
				+	int bitmaps = 0;
			
 
				+	int ret = 0;
			
 
				+
			
 
				+	root = root->fs_info->tree_root;
			
 
				+
			
 
				+	INIT_LIST_HEAD(&bitmap_list);
			
 
				+
			
 
				+	spin_lock(&block_group->lock);
			
 
				+	if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
			
 
				+		spin_unlock(&block_group->lock);
			
 
				+		return 0;
			
 
				+	}
			
 
				+	spin_unlock(&block_group->lock);
			
 
				+
			
 
				+	inode = lookup_free_space_inode(root, block_group, path);
			
 
				+	if (IS_ERR(inode))
			
 
				+		return 0;
			
 
				+
			
 
				+	if (!i_size_read(inode)) {
			
 
				+		iput(inode);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
			
 
				+	filemap_write_and_wait(inode->i_mapping);
			
 
				+	btrfs_wait_ordered_range(inode, inode->i_size &
			
 
				+				 ~(root->sectorsize - 1), (u64)-1);
			
 
				+
			
 
				+	/* We need a checksum per page. */
			
 
				+	num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
			
 
				+	crc = checksums  = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
			
 
				+	if (!crc) {
			
 
				+		iput(inode);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	/* Since the first page has all of our checksums and our generation we
			
 
				+	 * need to calculate the offset into the page that we can start writing
			
 
				+	 * our entries.
			
 
				+	 */
			
 
				+	first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
			
 
				+
			
 
				+	node = rb_first(&block_group->free_space_offset);
			
 
				+	if (!node)
			
 
				+		goto out_free;
			
 
				+
			
 
				+	/*
			
 
				+	 * Lock all pages first so we can lock the extent safely.
			
 
				+	 *
			
 
				+	 * NOTE: Because we hold the ref the entire time we're going to write to
			
 
				+	 * the page find_get_page should never fail, so we don't do a check
			
 
				+	 * after find_get_page at this point.  Just putting this here so people
			
 
				+	 * know and don't freak out.
			
 
				+	 */
			
 
				+	while (index <= last_index) {
			
 
				+		page = grab_cache_page(inode->i_mapping, index);
			
 
				+		if (!page) {
			
 
				+			pgoff_t i = 0;
			
 
				+
			
 
				+			while (i < index) {
			
 
				+				page = find_get_page(inode->i_mapping, i);
			
 
				+				unlock_page(page);
			
 
				+				page_cache_release(page);
			
 
				+				page_cache_release(page);
			
 
				+				i++;
			
 
				+			}
			
 
				+			goto out_free;
			
 
				+		}
			
 
				+		index++;
			
 
				+	}
			
 
				+
			
 
				+	index = 0;
			
 
				+	lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
			
 
				+			 0, &cached_state, GFP_NOFS);
			
 
				+
			
 
				+	/* Write out the extent entries */
			
 
				+	do {
			
 
				+		struct btrfs_free_space_entry *entry;
			
 
				+		void *addr;
			
 
				+		unsigned long offset = 0;
			
 
				+		unsigned long start_offset = 0;
			
 
				+
			
 
				+		if (index == 0) {
			
 
				+			start_offset = first_page_offset;
			
 
				+			offset = start_offset;
			
 
				+		}
			
 
				+
			
 
				+		page = find_get_page(inode->i_mapping, index);
			
 
				+
			
 
				+		addr = kmap(page);
			
 
				+		entry = addr + start_offset;
			
 
				+
			
 
				+		memset(addr, 0, PAGE_CACHE_SIZE);
			
 
				+		while (1) {
			
 
				+			struct btrfs_free_space *e;
			
 
				+
			
 
				+			e = rb_entry(node, struct btrfs_free_space, offset_index);
			
 
				+			entries++;
			
 
				+
			
 
				+			entry->offset = cpu_to_le64(e->offset);
			
 
				+			entry->bytes = cpu_to_le64(e->bytes);
			
 
				+			if (e->bitmap) {
			
 
				+				entry->type = BTRFS_FREE_SPACE_BITMAP;
			
 
				+				list_add_tail(&e->list, &bitmap_list);
			
 
				+				bitmaps++;
			
 
				+			} else {
			
 
				+				entry->type = BTRFS_FREE_SPACE_EXTENT;
			
 
				+			}
			
 
				+			node = rb_next(node);
			
 
				+			if (!node)
			
 
				+				break;
			
 
				+			offset += sizeof(struct btrfs_free_space_entry);
			
 
				+			if (offset + sizeof(struct btrfs_free_space_entry) >=
			
 
				+			    PAGE_CACHE_SIZE)
			
 
				+				break;
			
 
				+			entry++;
			
 
				+		}
			
 
				+		*crc = ~(u32)0;
			
 
				+		*crc = btrfs_csum_data(root, addr + start_offset, *crc,
			
 
				+				       PAGE_CACHE_SIZE - start_offset);
			
 
				+		kunmap(page);
			
 
				+
			
 
				+		btrfs_csum_final(*crc, (char *)crc);
			
 
				+		crc++;
			
 
				+
			
 
				+		bytes += PAGE_CACHE_SIZE;
			
 
				+
			
 
				+		ClearPageChecked(page);
			
 
				+		set_page_extent_mapped(page);
			
 
				+		SetPageUptodate(page);
			
 
				+		set_page_dirty(page);
			
 
				+
			
 
				+		/*
			
 
				+		 * We need to release our reference we got for grab_cache_page,
			
 
				+		 * except for the first page which will hold our checksums, we
			
 
				+		 * do that below.
			
 
				+		 */
			
 
				+		if (index != 0) {
			
 
				+			unlock_page(page);
			
 
				+			page_cache_release(page);
			
 
				+		}
			
 
				+
			
 
				+		page_cache_release(page);
			
 
				+
			
 
				+		index++;
			
 
				+	} while (node);
			
 
				+
			
 
				+	/* Write out the bitmaps */
			
 
				+	list_for_each_safe(pos, n, &bitmap_list) {
			
 
				+		void *addr;
			
 
				+		struct btrfs_free_space *entry =
			
 
				+			list_entry(pos, struct btrfs_free_space, list);
			
 
				+
			
 
				+		page = find_get_page(inode->i_mapping, index);
			
 
				+
			
 
				+		addr = kmap(page);
			
 
				+		memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
			
 
				+		*crc = ~(u32)0;
			
 
				+		*crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE);
			
 
				+		kunmap(page);
			
 
				+		btrfs_csum_final(*crc, (char *)crc);
			
 
				+		crc++;
			
 
				+		bytes += PAGE_CACHE_SIZE;
			
 
				+
			
 
				+		ClearPageChecked(page);
			
 
				+		set_page_extent_mapped(page);
			
 
				+		SetPageUptodate(page);
			
 
				+		set_page_dirty(page);
			
 
				+		unlock_page(page);
			
 
				+		page_cache_release(page);
			
 
				+		page_cache_release(page);
			
 
				+		list_del_init(&entry->list);
			
 
				+		index++;
			
 
				+	}
			
 
				+
			
 
				+	/* Zero out the rest of the pages just to make sure */
			
 
				+	while (index <= last_index) {
			
 
				+		void *addr;
			
 
				+
			
 
				+		page = find_get_page(inode->i_mapping, index);
			
 
				+
			
 
				+		addr = kmap(page);
			
 
				+		memset(addr, 0, PAGE_CACHE_SIZE);
			
 
				+		kunmap(page);
			
 
				+		ClearPageChecked(page);
			
 
				+		set_page_extent_mapped(page);
			
 
				+		SetPageUptodate(page);
			
 
				+		set_page_dirty(page);
			
 
				+		unlock_page(page);
			
 
				+		page_cache_release(page);
			
 
				+		page_cache_release(page);
			
 
				+		bytes += PAGE_CACHE_SIZE;
			
 
				+		index++;
			
 
				+	}
			
 
				+
			
 
				+	btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state);
			
 
				+
			
 
				+	/* Write the checksums and trans id to the first page */
			
 
				+	{
			
 
				+		void *addr;
			
 
				+		u64 *gen;
			
 
				+
			
 
				+		page = find_get_page(inode->i_mapping, 0);
			
 
				+
			
 
				+		addr = kmap(page);
			
 
				+		memcpy(addr, checksums, sizeof(u32) * num_checksums);
			
 
				+		gen = addr + (sizeof(u32) * num_checksums);
			
 
				+		*gen = trans->transid;
			
 
				+		kunmap(page);
			
 
				+		ClearPageChecked(page);
			
 
				+		set_page_extent_mapped(page);
			
 
				+		SetPageUptodate(page);
			
 
				+		set_page_dirty(page);
			
 
				+		unlock_page(page);
			
 
				+		page_cache_release(page);
			
 
				+		page_cache_release(page);
			
 
				+	}
			
 
				+	BTRFS_I(inode)->generation = trans->transid;
			
 
				+
			
 
				+	unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
			
 
				+			     i_size_read(inode) - 1, &cached_state, GFP_NOFS);
			
 
				+
			
 
				+	filemap_write_and_wait(inode->i_mapping);
			
 
				+
			
 
				+	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
			
 
				+	key.offset = block_group->key.objectid;
			
 
				+	key.type = 0;
			
 
				+
			
 
				+	ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
			
 
				+	if (ret < 0) {
			
 
				+		ret = 0;
			
 
				+		clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
			
 
				+				 EXTENT_DIRTY | EXTENT_DELALLOC |
			
 
				+				 EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
			
 
				+		goto out_free;
			
 
				+	}
			
 
				+	leaf = path->nodes[0];
			
 
				+	if (ret > 0) {
			
 
				+		struct btrfs_key found_key;
			
 
				+		BUG_ON(!path->slots[0]);
			
 
				+		path->slots[0]--;
			
 
				+		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
			
 
				+		if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
			
 
				+		    found_key.offset != block_group->key.objectid) {
			
 
				+			ret = 0;
			
 
				+			clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
			
 
				+					 EXTENT_DIRTY | EXTENT_DELALLOC |
			
 
				+					 EXTENT_DO_ACCOUNTING, 0, 0, NULL,
			
 
				+					 GFP_NOFS);
			
 
				+			btrfs_release_path(root, path);
			
 
				+			goto out_free;
			
 
				+		}
			
 
				+	}
			
 
				+	header = btrfs_item_ptr(leaf, path->slots[0],
			
 
				+				struct btrfs_free_space_header);
			
 
				+	btrfs_set_free_space_entries(leaf, header, entries);
			
 
				+	btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
			
 
				+	btrfs_set_free_space_generation(leaf, header, trans->transid);
			
 
				+	btrfs_mark_buffer_dirty(leaf);
			
 
				+	btrfs_release_path(root, path);
			
 
				+
			
 
				+	ret = 1;
			
 
				+
			
 
				+out_free:
			
 
				+	if (ret == 0) {
			
 
				+		invalidate_inode_pages2_range(inode->i_mapping, 0, index);
			
 
				+		spin_lock(&block_group->lock);
			
 
				+		block_group->disk_cache_state = BTRFS_DC_ERROR;
			
 
				+		spin_unlock(&block_group->lock);
			
 
				+		BTRFS_I(inode)->generation = 0;
			
 
				+	}
			
 
				+	kfree(checksums);
			
 
				+	btrfs_update_inode(trans, root, inode);
			
 
				+	iput(inode);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				 static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize,
			
 
				 					  u64 offset)
			
 
				 {
			
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -27,6 +27,24 @@ struct btrfs_free_space {
 
				 	struct list_head list;
			
 
				 };
			
 
				 
			
 
				+struct inode *lookup_free_space_inode(struct btrfs_root *root,
			
 
				+				      struct btrfs_block_group_cache
			
 
				+				      *block_group, struct btrfs_path *path);
			
 
				+int create_free_space_inode(struct btrfs_root *root,
			
 
				+			    struct btrfs_trans_handle *trans,
			
 
				+			    struct btrfs_block_group_cache *block_group,
			
 
				+			    struct btrfs_path *path);
			
 
				+
			
 
				+int btrfs_truncate_free_space_cache(struct btrfs_root *root,
			
 
				+				    struct btrfs_trans_handle *trans,
			
 
				+				    struct btrfs_path *path,
			
 
				+				    struct inode *inode);
			
 
				+int load_free_space_cache(struct btrfs_fs_info *fs_info,
			
 
				+			  struct btrfs_block_group_cache *block_group);
			
 
				+int btrfs_write_out_cache(struct btrfs_root *root,
			
 
				+			  struct btrfs_trans_handle *trans,
			
 
				+			  struct btrfs_block_group_cache *block_group,
			
 
				+			  struct btrfs_path *path);
			
 
				 int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
			
 
				 			 u64 bytenr, u64 size);
			
 
				 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
			
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -319,8 +319,6 @@ static noinline int compress_file_range(struct inode *inode,
 
				 	struct btrfs_root *root = BTRFS_I(inode)->root;
			
 
				 	struct btrfs_trans_handle *trans;
			
 
				 	u64 num_bytes;
			
 
				-	u64 orig_start;
			
 
				-	u64 disk_num_bytes;
			
 
				 	u64 blocksize = root->sectorsize;
			
 
				 	u64 actual_end;
			
 
				 	u64 isize = i_size_read(inode);
			
@@ -335,8 +333,6 @@ static noinline int compress_file_range(struct inode *inode,
 
				 	int i;
			
 
				 	int will_compress;
			
 
				 
			
 
				-	orig_start = start;
			
 
				-
			
 
				 	actual_end = min_t(u64, isize, end + 1);
			
 
				 again:
			
 
				 	will_compress = 0;
			
@@ -371,7 +367,6 @@ again:
 
				 	total_compressed = min(total_compressed, max_uncompressed);
			
 
				 	num_bytes = (end - start + blocksize) & ~(blocksize - 1);
			
 
				 	num_bytes = max(blocksize,  num_bytes);
			
 
				-	disk_num_bytes = num_bytes;
			
 
				 	total_in = 0;
			
 
				 	ret = 0;
			
 
				 
			
@@ -467,7 +462,6 @@ again:
 
				 		if (total_compressed >= total_in) {
			
 
				 			will_compress = 0;
			
 
				 		} else {
			
 
				-			disk_num_bytes = total_compressed;
			
 
				 			num_bytes = total_in;
			
 
				 		}
			
 
				 	}
			
@@ -757,20 +751,17 @@ static noinline int cow_file_range(struct inode *inode,
 
				 	u64 disk_num_bytes;
			
 
				 	u64 cur_alloc_size;
			
 
				 	u64 blocksize = root->sectorsize;
			
 
				-	u64 actual_end;
			
 
				-	u64 isize = i_size_read(inode);
			
 
				 	struct btrfs_key ins;
			
 
				 	struct extent_map *em;
			
 
				 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
			
 
				 	int ret = 0;
			
 
				 
			
 
				+	BUG_ON(root == root->fs_info->tree_root);
			
 
				 	trans = btrfs_join_transaction(root, 1);
			
 
				 	BUG_ON(!trans);
			
 
				 	btrfs_set_trans_block_group(trans, inode);
			
 
				 	trans->block_rsv = &root->fs_info->delalloc_block_rsv;
			
 
				 
			
 
				-	actual_end = min_t(u64, isize, end + 1);
			
 
				-
			
 
				 	num_bytes = (end - start + blocksize) & ~(blocksize - 1);
			
 
				 	num_bytes = max(blocksize,  num_bytes);
			
 
				 	disk_num_bytes = num_bytes;
			
@@ -1035,10 +1026,16 @@ static noinline int run_delalloc_nocow(struct inode *inode,
 
				 	int type;
			
 
				 	int nocow;
			
 
				 	int check_prev = 1;
			
 
				+	bool nolock = false;
			
 
				 
			
 
				 	path = btrfs_alloc_path();
			
 
				 	BUG_ON(!path);
			
 
				-	trans = btrfs_join_transaction(root, 1);
			
 
				+	if (root == root->fs_info->tree_root) {
			
 
				+		nolock = true;
			
 
				+		trans = btrfs_join_transaction_nolock(root, 1);
			
 
				+	} else {
			
 
				+		trans = btrfs_join_transaction(root, 1);
			
 
				+	}
			
 
				 	BUG_ON(!trans);
			
 
				 
			
 
				 	cow_start = (u64)-1;
			
@@ -1211,8 +1208,13 @@ out_check:
 
				 		BUG_ON(ret);
			
 
				 	}
			
 
				 
			
 
				-	ret = btrfs_end_transaction(trans, root);
			
 
				-	BUG_ON(ret);
			
 
				+	if (nolock) {
			
 
				+		ret = btrfs_end_transaction_nolock(trans, root);
			
 
				+		BUG_ON(ret);
			
 
				+	} else {
			
 
				+		ret = btrfs_end_transaction(trans, root);
			
 
				+		BUG_ON(ret);
			
 
				+	}
			
 
				 	btrfs_free_path(path);
			
 
				 	return 0;
			
 
				 }
			
@@ -1289,6 +1291,8 @@ static int btrfs_set_bit_hook(struct inode *inode,
 
				 	if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
			
 
				 		struct btrfs_root *root = BTRFS_I(inode)->root;
			
 
				 		u64 len = state->end + 1 - state->start;
			
 
				+		int do_list = (root->root_key.objectid !=
			
 
				+			       BTRFS_ROOT_TREE_OBJECTID);
			
 
				 
			
 
				 		if (*bits & EXTENT_FIRST_DELALLOC)
			
 
				 			*bits &= ~EXTENT_FIRST_DELALLOC;
			
@@ -1298,7 +1302,7 @@ static int btrfs_set_bit_hook(struct inode *inode,
 
				 		spin_lock(&root->fs_info->delalloc_lock);
			
 
				 		BTRFS_I(inode)->delalloc_bytes += len;
			
 
				 		root->fs_info->delalloc_bytes += len;
			
 
				-		if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
			
 
				+		if (do_list && list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
			
 
				 			list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
			
 
				 				      &root->fs_info->delalloc_inodes);
			
 
				 		}
			
@@ -1321,6 +1325,8 @@ static int btrfs_clear_bit_hook(struct inode *inode,
 
				 	if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
			
 
				 		struct btrfs_root *root = BTRFS_I(inode)->root;
			
 
				 		u64 len = state->end + 1 - state->start;
			
 
				+		int do_list = (root->root_key.objectid !=
			
 
				+			       BTRFS_ROOT_TREE_OBJECTID);
			
 
				 
			
 
				 		if (*bits & EXTENT_FIRST_DELALLOC)
			
 
				 			*bits &= ~EXTENT_FIRST_DELALLOC;
			
@@ -1330,14 +1336,15 @@ static int btrfs_clear_bit_hook(struct inode *inode,
 
				 		if (*bits & EXTENT_DO_ACCOUNTING)
			
 
				 			btrfs_delalloc_release_metadata(inode, len);
			
 
				 
			
 
				-		if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID)
			
 
				+		if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
			
 
				+		    && do_list)
			
 
				 			btrfs_free_reserved_data_space(inode, len);
			
 
				 
			
 
				 		spin_lock(&root->fs_info->delalloc_lock);
			
 
				 		root->fs_info->delalloc_bytes -= len;
			
 
				 		BTRFS_I(inode)->delalloc_bytes -= len;
			
 
				 
			
 
				-		if (BTRFS_I(inode)->delalloc_bytes == 0 &&
			
 
				+		if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
			
 
				 		    !list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
			
 
				 			list_del_init(&BTRFS_I(inode)->delalloc_inodes);
			
 
				 		}
			
@@ -1372,7 +1379,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
 
				 
			
 
				 	if (map_length < length + size)
			
 
				 		return 1;
			
 
				-	return 0;
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1426,7 +1433,10 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
 
				 
			
 
				 	skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
			
 
				 
			
 
				-	ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
			
 
				+	if (root == root->fs_info->tree_root)
			
 
				+		ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2);
			
 
				+	else
			
 
				+		ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
			
 
				 	BUG_ON(ret);
			
 
				 
			
 
				 	if (!(rw & REQ_WRITE)) {
			
@@ -1662,6 +1672,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
 
				 	struct extent_state *cached_state = NULL;
			
 
				 	int compressed = 0;
			
 
				 	int ret;
			
 
				+	bool nolock = false;
			
 
				 
			
 
				 	ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
			
 
				 					     end - start + 1);
			
@@ -1669,11 +1680,17 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
 
				 		return 0;
			
 
				 	BUG_ON(!ordered_extent);
			
 
				 
			
 
				+	nolock = (root == root->fs_info->tree_root);
			
 
				+
			
 
				 	if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
			
 
				 		BUG_ON(!list_empty(&ordered_extent->list));
			
 
				 		ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
			
 
				 		if (!ret) {
			
 
				-			trans = btrfs_join_transaction(root, 1);
			
 
				+			if (nolock)
			
 
				+				trans = btrfs_join_transaction_nolock(root, 1);
			
 
				+			else
			
 
				+				trans = btrfs_join_transaction(root, 1);
			
 
				+			BUG_ON(!trans);
			
 
				 			btrfs_set_trans_block_group(trans, inode);
			
 
				 			trans->block_rsv = &root->fs_info->delalloc_block_rsv;
			
 
				 			ret = btrfs_update_inode(trans, root, inode);
			
@@ -1686,7 +1703,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
 
				 			 ordered_extent->file_offset + ordered_extent->len - 1,
			
 
				 			 0, &cached_state, GFP_NOFS);
			
 
				 
			
 
				-	trans = btrfs_join_transaction(root, 1);
			
 
				+	if (nolock)
			
 
				+		trans = btrfs_join_transaction_nolock(root, 1);
			
 
				+	else
			
 
				+		trans = btrfs_join_transaction(root, 1);
			
 
				 	btrfs_set_trans_block_group(trans, inode);
			
 
				 	trans->block_rsv = &root->fs_info->delalloc_block_rsv;
			
 
				 
			
@@ -1700,6 +1720,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
 
				 						ordered_extent->len);
			
 
				 		BUG_ON(ret);
			
 
				 	} else {
			
 
				+		BUG_ON(root == root->fs_info->tree_root);
			
 
				 		ret = insert_reserved_file_extent(trans, inode,
			
 
				 						ordered_extent->file_offset,
			
 
				 						ordered_extent->start,
			
@@ -1724,9 +1745,15 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
 
				 	ret = btrfs_update_inode(trans, root, inode);
			
 
				 	BUG_ON(ret);
			
 
				 out:
			
 
				-	btrfs_delalloc_release_metadata(inode, ordered_extent->len);
			
 
				-	if (trans)
			
 
				-		btrfs_end_transaction(trans, root);
			
 
				+	if (nolock) {
			
 
				+		if (trans)
			
 
				+			btrfs_end_transaction_nolock(trans, root);
			
 
				+	} else {
			
 
				+		btrfs_delalloc_release_metadata(inode, ordered_extent->len);
			
 
				+		if (trans)
			
 
				+			btrfs_end_transaction(trans, root);
			
 
				+	}
			
 
				+
			
 
				 	/* once for us */
			
 
				 	btrfs_put_ordered_extent(ordered_extent);
			
 
				 	/* once for the tree */
			
@@ -2237,7 +2264,6 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
 
				 {
			
 
				 	struct btrfs_path *path;
			
 
				 	struct extent_buffer *leaf;
			
 
				-	struct btrfs_item *item;
			
 
				 	struct btrfs_key key, found_key;
			
 
				 	struct btrfs_trans_handle *trans;
			
 
				 	struct inode *inode;
			
@@ -2275,7 +2301,6 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
 
				 
			
 
				 		/* pull out the item */
			
 
				 		leaf = path->nodes[0];
			
 
				-		item = btrfs_item_nr(leaf, path->slots[0]);
			
 
				 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
			
 
				 
			
 
				 		/* make sure the item matches what we want */
			
@@ -2651,7 +2676,8 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
 
				 
			
 
				 	ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
			
 
				 					   dir, index);
			
 
				-	BUG_ON(ret);
			
 
				+	if (ret == -ENOENT)
			
 
				+		ret = 0;
			
 
				 err:
			
 
				 	btrfs_free_path(path);
			
 
				 	if (ret)
			
@@ -2672,8 +2698,8 @@ static int check_path_shared(struct btrfs_root *root,
 
				 {
			
 
				 	struct extent_buffer *eb;
			
 
				 	int level;
			
 
				-	int ret;
			
 
				 	u64 refs = 1;
			
 
				+	int uninitialized_var(ret);
			
 
				 
			
 
				 	for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
			
 
				 		if (!path->nodes[level])
			
@@ -2686,7 +2712,7 @@ static int check_path_shared(struct btrfs_root *root,
 
				 		if (refs > 1)
			
 
				 			return 1;
			
 
				 	}
			
 
				-	return 0;
			
 
				+	return ret; /* XXX callers? */
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -3196,7 +3222,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 
				 
			
 
				 	BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
			
 
				 
			
 
				-	if (root->ref_cows)
			
 
				+	if (root->ref_cows || root == root->fs_info->tree_root)
			
 
				 		btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
			
 
				 
			
 
				 	path = btrfs_alloc_path();
			
@@ -3344,7 +3370,8 @@ delete:
 
				 		} else {
			
 
				 			break;
			
 
				 		}
			
 
				-		if (found_extent && root->ref_cows) {
			
 
				+		if (found_extent && (root->ref_cows ||
			
 
				+				     root == root->fs_info->tree_root)) {
			
 
				 			btrfs_set_path_blocking(path);
			
 
				 			ret = btrfs_free_extent(trans, root, extent_start,
			
 
				 						extent_num_bytes, 0,
			
@@ -3675,7 +3702,8 @@ void btrfs_evict_inode(struct inode *inode)
 
				 	int ret;
			
 
				 
			
 
				 	truncate_inode_pages(&inode->i_data, 0);
			
 
				-	if (inode->i_nlink && btrfs_root_refs(&root->root_item) != 0)
			
 
				+	if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
			
 
				+			       root == root->fs_info->tree_root))
			
 
				 		goto no_delete;
			
 
				 
			
 
				 	if (is_bad_inode(inode)) {
			
@@ -3888,7 +3916,14 @@ static void inode_tree_del(struct inode *inode)
 
				 	}
			
 
				 	spin_unlock(&root->inode_lock);
			
 
				 
			
 
				-	if (empty && btrfs_root_refs(&root->root_item) == 0) {
			
 
				+	/*
			
 
				+	 * Free space cache has inodes in the tree root, but the tree root has a
			
 
				+	 * root_refs of 0, so this could end up dropping the tree root as a
			
 
				+	 * snapshot, so we need the extra !root->fs_info->tree_root check to
			
 
				+	 * make sure we don't drop it.
			
 
				+	 */
			
 
				+	if (empty && btrfs_root_refs(&root->root_item) == 0 &&
			
 
				+	    root != root->fs_info->tree_root) {
			
 
				 		synchronize_srcu(&root->fs_info->subvol_srcu);
			
 
				 		spin_lock(&root->inode_lock);
			
 
				 		empty = RB_EMPTY_ROOT(&root->inode_tree);
			
@@ -4282,14 +4317,24 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 
				 	struct btrfs_root *root = BTRFS_I(inode)->root;
			
 
				 	struct btrfs_trans_handle *trans;
			
 
				 	int ret = 0;
			
 
				+	bool nolock = false;
			
 
				 
			
 
				 	if (BTRFS_I(inode)->dummy_inode)
			
 
				 		return 0;
			
 
				 
			
 
				+	smp_mb();
			
 
				+	nolock = (root->fs_info->closing && root == root->fs_info->tree_root);
			
 
				+
			
 
				 	if (wbc->sync_mode == WB_SYNC_ALL) {
			
 
				-		trans = btrfs_join_transaction(root, 1);
			
 
				+		if (nolock)
			
 
				+			trans = btrfs_join_transaction_nolock(root, 1);
			
 
				+		else
			
 
				+			trans = btrfs_join_transaction(root, 1);
			
 
				 		btrfs_set_trans_block_group(trans, inode);
			
 
				-		ret = btrfs_commit_transaction(trans, root);
			
 
				+		if (nolock)
			
 
				+			ret = btrfs_end_transaction_nolock(trans, root);
			
 
				+		else
			
 
				+			ret = btrfs_commit_transaction(trans, root);
			
 
				 	}
			
 
				 	return ret;
			
 
				 }
			
@@ -5645,7 +5690,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
 
				 	struct btrfs_root *root = BTRFS_I(inode)->root;
			
 
				 	struct btrfs_dio_private *dip;
			
 
				 	struct bio_vec *bvec = bio->bi_io_vec;
			
 
				-	u64 start;
			
 
				 	int skip_sum;
			
 
				 	int write = rw & REQ_WRITE;
			
 
				 	int ret = 0;
			
@@ -5671,7 +5715,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
 
				 	dip->inode = inode;
			
 
				 	dip->logical_offset = file_offset;
			
 
				 
			
 
				-	start = dip->logical_offset;
			
 
				 	dip->bytes = 0;
			
 
				 	do {
			
 
				 		dip->bytes += bvec->bv_len;
			
@@ -6308,6 +6351,21 @@ void btrfs_destroy_inode(struct inode *inode)
 
				 		spin_unlock(&root->fs_info->ordered_extent_lock);
			
 
				 	}
			
 
				 
			
 
				+	if (root == root->fs_info->tree_root) {
			
 
				+		struct btrfs_block_group_cache *block_group;
			
 
				+
			
 
				+		block_group = btrfs_lookup_block_group(root->fs_info,
			
 
				+						BTRFS_I(inode)->block_group);
			
 
				+		if (block_group && block_group->inode == inode) {
			
 
				+			spin_lock(&block_group->lock);
			
 
				+			block_group->inode = NULL;
			
 
				+			spin_unlock(&block_group->lock);
			
 
				+			btrfs_put_block_group(block_group);
			
 
				+		} else if (block_group) {
			
 
				+			btrfs_put_block_group(block_group);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				 	spin_lock(&root->orphan_lock);
			
 
				 	if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
			
 
				 		printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n",
			
@@ -6340,7 +6398,8 @@ int btrfs_drop_inode(struct inode *inode)
 
				 {
			
 
				 	struct btrfs_root *root = BTRFS_I(inode)->root;
			
 
				 
			
 
				-	if (btrfs_root_refs(&root->root_item) == 0)
			
 
				+	if (btrfs_root_refs(&root->root_item) == 0 &&
			
 
				+	    root != root->fs_info->tree_root)
			
 
				 		return 1;
			
 
				 	else
			
 
				 		return generic_drop_inode(inode);
			
@@ -6609,7 +6668,8 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput)
			
 
				+int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
			
 
				+				   int sync)
			
 
				 {
			
 
				 	struct btrfs_inode *binode;
			
 
				 	struct inode *inode = NULL;
			
@@ -6631,7 +6691,26 @@ int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput)
 
				 	spin_unlock(&root->fs_info->delalloc_lock);
			
 
				 
			
 
				 	if (inode) {
			
 
				-		write_inode_now(inode, 0);
			
 
				+		if (sync) {
			
 
				+			filemap_write_and_wait(inode->i_mapping);
			
 
				+			/*
			
 
				+			 * We have to do this because compression doesn't
			
 
				+			 * actually set PG_writeback until it submits the pages
			
 
				+			 * for IO, which happens in an async thread, so we could
			
 
				+			 * race and not actually wait for any writeback pages
			
 
				+			 * because they've not been submitted yet.  Technically
			
 
				+			 * this could still be the case for the ordered stuff
			
 
				+			 * since the async thread may not have started to do its
			
 
				+			 * work yet.  If this becomes the case then we need to
			
 
				+			 * figure out a way to make sure that in writepage we
			
 
				+			 * wait for any async pages to be submitted before
			
 
				+			 * returning so that fdatawait does what its supposed to
			
 
				+			 * do.
			
 
				+			 */
			
 
				+			btrfs_wait_ordered_range(inode, 0, (u64)-1);
			
 
				+		} else {
			
 
				+			filemap_flush(inode->i_mapping);
			
 
				+		}
			
 
				 		if (delay_iput)
			
 
				 			btrfs_add_delayed_iput(inode);
			
 
				 		else
			
@@ -6757,27 +6836,33 @@ out_unlock:
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-int btrfs_prealloc_file_range(struct inode *inode, int mode,
			
 
				-			      u64 start, u64 num_bytes, u64 min_size,
			
 
				-			      loff_t actual_len, u64 *alloc_hint)
			
 
				+static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
			
 
				+				       u64 start, u64 num_bytes, u64 min_size,
			
 
				+				       loff_t actual_len, u64 *alloc_hint,
			
 
				+				       struct btrfs_trans_handle *trans)
			
 
				 {
			
 
				-	struct btrfs_trans_handle *trans;
			
 
				 	struct btrfs_root *root = BTRFS_I(inode)->root;
			
 
				 	struct btrfs_key ins;
			
 
				 	u64 cur_offset = start;
			
 
				 	int ret = 0;
			
 
				+	bool own_trans = true;
			
 
				 
			
 
				+	if (trans)
			
 
				+		own_trans = false;
			
 
				 	while (num_bytes > 0) {
			
 
				-		trans = btrfs_start_transaction(root, 3);
			
 
				-		if (IS_ERR(trans)) {
			
 
				-			ret = PTR_ERR(trans);
			
 
				-			break;
			
 
				+		if (own_trans) {
			
 
				+			trans = btrfs_start_transaction(root, 3);
			
 
				+			if (IS_ERR(trans)) {
			
 
				+				ret = PTR_ERR(trans);
			
 
				+				break;
			
 
				+			}
			
 
				 		}
			
 
				 
			
 
				 		ret = btrfs_reserve_extent(trans, root, num_bytes, min_size,
			
 
				 					   0, *alloc_hint, (u64)-1, &ins, 1);
			
 
				 		if (ret) {
			
 
				-			btrfs_end_transaction(trans, root);
			
 
				+			if (own_trans)
			
 
				+				btrfs_end_transaction(trans, root);
			
 
				 			break;
			
 
				 		}
			
 
				 
			
@@ -6810,11 +6895,30 @@ int btrfs_prealloc_file_range(struct inode *inode, int mode,
 
				 		ret = btrfs_update_inode(trans, root, inode);
			
 
				 		BUG_ON(ret);
			
 
				 
			
 
				-		btrfs_end_transaction(trans, root);
			
 
				+		if (own_trans)
			
 
				+			btrfs_end_transaction(trans, root);
			
 
				 	}
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+int btrfs_prealloc_file_range(struct inode *inode, int mode,
			
 
				+			      u64 start, u64 num_bytes, u64 min_size,
			
 
				+			      loff_t actual_len, u64 *alloc_hint)
			
 
				+{
			
 
				+	return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
			
 
				+					   min_size, actual_len, alloc_hint,
			
 
				+					   NULL);
			
 
				+}
			
 
				+
			
 
				+int btrfs_prealloc_file_range_trans(struct inode *inode,
			
 
				+				    struct btrfs_trans_handle *trans, int mode,
			
 
				+				    u64 start, u64 num_bytes, u64 min_size,
			
 
				+				    loff_t actual_len, u64 *alloc_hint)
			
 
				+{
			
 
				+	return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
			
 
				+					   min_size, actual_len, alloc_hint, trans);
			
 
				+}
			
 
				+
			
 
				 static long btrfs_fallocate(struct inode *inode, int mode,
			
 
				 			    loff_t offset, loff_t len)
			
 
				 {
			
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -224,7 +224,8 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
 
				 
			
 
				 static noinline int create_subvol(struct btrfs_root *root,
			
 
				 				  struct dentry *dentry,
			
 
				-				  char *name, int namelen)
			
 
				+				  char *name, int namelen,
			
 
				+				  u64 *async_transid)
			
 
				 {
			
 
				 	struct btrfs_trans_handle *trans;
			
 
				 	struct btrfs_key key;
			
@@ -338,13 +339,19 @@ static noinline int create_subvol(struct btrfs_root *root,
 
				 
			
 
				 	d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
			
 
				 fail:
			
 
				-	err = btrfs_commit_transaction(trans, root);
			
 
				+	if (async_transid) {
			
 
				+		*async_transid = trans->transid;
			
 
				+		err = btrfs_commit_transaction_async(trans, root, 1);
			
 
				+	} else {
			
 
				+		err = btrfs_commit_transaction(trans, root);
			
 
				+	}
			
 
				 	if (err && !ret)
			
 
				 		ret = err;
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static int create_snapshot(struct btrfs_root *root, struct dentry *dentry)
			
 
				+static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
			
 
				+			   char *name, int namelen, u64 *async_transid)
			
 
				 {
			
 
				 	struct inode *inode;
			
 
				 	struct btrfs_pending_snapshot *pending_snapshot;
			
@@ -373,7 +380,14 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry)
 
				 
			
 
				 	list_add(&pending_snapshot->list,
			
 
				 		 &trans->transaction->pending_snapshots);
			
 
				-	ret = btrfs_commit_transaction(trans, root->fs_info->extent_root);
			
 
				+	if (async_transid) {
			
 
				+		*async_transid = trans->transid;
			
 
				+		ret = btrfs_commit_transaction_async(trans,
			
 
				+				     root->fs_info->extent_root, 1);
			
 
				+	} else {
			
 
				+		ret = btrfs_commit_transaction(trans,
			
 
				+					       root->fs_info->extent_root);
			
 
				+	}
			
 
				 	BUG_ON(ret);
			
 
				 
			
 
				 	ret = pending_snapshot->error;
			
@@ -395,6 +409,76 @@ fail:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+/*  copy of check_sticky in fs/namei.c()
			
 
				+* It's inline, so penalty for filesystems that don't use sticky bit is
			
 
				+* minimal.
			
 
				+*/
			
 
				+static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode)
			
 
				+{
			
 
				+	uid_t fsuid = current_fsuid();
			
 
				+
			
 
				+	if (!(dir->i_mode & S_ISVTX))
			
 
				+		return 0;
			
 
				+	if (inode->i_uid == fsuid)
			
 
				+		return 0;
			
 
				+	if (dir->i_uid == fsuid)
			
 
				+		return 0;
			
 
				+	return !capable(CAP_FOWNER);
			
 
				+}
			
 
				+
			
 
				+/*  copy of may_delete in fs/namei.c()
			
 
				+ *	Check whether we can remove a link victim from directory dir, check
			
 
				+ *  whether the type of victim is right.
			
 
				+ *  1. We can't do it if dir is read-only (done in permission())
			
 
				+ *  2. We should have write and exec permissions on dir
			
 
				+ *  3. We can't remove anything from append-only dir
			
 
				+ *  4. We can't do anything with immutable dir (done in permission())
			
 
				+ *  5. If the sticky bit on dir is set we should either
			
 
				+ *	a. be owner of dir, or
			
 
				+ *	b. be owner of victim, or
			
 
				+ *	c. have CAP_FOWNER capability
			
 
				+ *  6. If the victim is append-only or immutable we can't do antyhing with
			
 
				+ *     links pointing to it.
			
 
				+ *  7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
			
 
				+ *  8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
			
 
				+ *  9. We can't remove a root or mountpoint.
			
 
				+ * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
			
 
				+ *     nfs_async_unlink().
			
 
				+ */
			
 
				+
			
 
				+static int btrfs_may_delete(struct inode *dir,struct dentry *victim,int isdir)
			
 
				+{
			
 
				+	int error;
			
 
				+
			
 
				+	if (!victim->d_inode)
			
 
				+		return -ENOENT;
			
 
				+
			
 
				+	BUG_ON(victim->d_parent->d_inode != dir);
			
 
				+	audit_inode_child(victim, dir);
			
 
				+
			
 
				+	error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+	if (IS_APPEND(dir))
			
 
				+		return -EPERM;
			
 
				+	if (btrfs_check_sticky(dir, victim->d_inode)||
			
 
				+		IS_APPEND(victim->d_inode)||
			
 
				+	    IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
			
 
				+		return -EPERM;
			
 
				+	if (isdir) {
			
 
				+		if (!S_ISDIR(victim->d_inode->i_mode))
			
 
				+			return -ENOTDIR;
			
 
				+		if (IS_ROOT(victim))
			
 
				+			return -EBUSY;
			
 
				+	} else if (S_ISDIR(victim->d_inode->i_mode))
			
 
				+		return -EISDIR;
			
 
				+	if (IS_DEADDIR(dir))
			
 
				+		return -ENOENT;
			
 
				+	if (victim->d_flags & DCACHE_NFSFS_RENAMED)
			
 
				+		return -EBUSY;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 /* copy of may_create in fs/namei.c() */
			
 
				 static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
			
 
				 {
			
@@ -412,7 +496,8 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
 
				  */
			
 
				 static noinline int btrfs_mksubvol(struct path *parent,
			
 
				 				   char *name, int namelen,
			
 
				-				   struct btrfs_root *snap_src)
			
 
				+				   struct btrfs_root *snap_src,
			
 
				+				   u64 *async_transid)
			
 
				 {
			
 
				 	struct inode *dir  = parent->dentry->d_inode;
			
 
				 	struct dentry *dentry;
			
@@ -443,10 +528,11 @@ static noinline int btrfs_mksubvol(struct path *parent,
 
				 		goto out_up_read;
			
 
				 
			
 
				 	if (snap_src) {
			
 
				-		error = create_snapshot(snap_src, dentry);
			
 
				+		error = create_snapshot(snap_src, dentry,
			
 
				+					name, namelen, async_transid);
			
 
				 	} else {
			
 
				 		error = create_subvol(BTRFS_I(dir)->root, dentry,
			
 
				-				      name, namelen);
			
 
				+				      name, namelen, async_transid);
			
 
				 	}
			
 
				 	if (!error)
			
 
				 		fsnotify_mkdir(dir, dentry);
			
@@ -708,7 +794,6 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
 
				 	char *sizestr;
			
 
				 	char *devstr = NULL;
			
 
				 	int ret = 0;
			
 
				-	int namelen;
			
 
				 	int mod = 0;
			
 
				 
			
 
				 	if (root->fs_info->sb->s_flags & MS_RDONLY)
			
@@ -722,7 +807,6 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
 
				 		return PTR_ERR(vol_args);
			
 
				 
			
 
				 	vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
			
 
				-	namelen = strlen(vol_args->name);
			
 
				 
			
 
				 	mutex_lock(&root->fs_info->volume_mutex);
			
 
				 	sizestr = vol_args->name;
			
@@ -801,11 +885,13 @@ out_unlock:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static noinline int btrfs_ioctl_snap_create(struct file *file,
			
 
				-					    void __user *arg, int subvol)
			
 
				+static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
			
 
				+						    char *name,
			
 
				+						    unsigned long fd,
			
 
				+						    int subvol,
			
 
				+						    u64 *transid)
			
 
				 {
			
 
				 	struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
			
 
				-	struct btrfs_ioctl_vol_args *vol_args;
			
 
				 	struct file *src_file;
			
 
				 	int namelen;
			
 
				 	int ret = 0;
			
@@ -813,23 +899,18 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
 
				 	if (root->fs_info->sb->s_flags & MS_RDONLY)
			
 
				 		return -EROFS;
			
 
				 
			
 
				-	vol_args = memdup_user(arg, sizeof(*vol_args));
			
 
				-	if (IS_ERR(vol_args))
			
 
				-		return PTR_ERR(vol_args);
			
 
				-
			
 
				-	vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
			
 
				-	namelen = strlen(vol_args->name);
			
 
				-	if (strchr(vol_args->name, '/')) {
			
 
				+	namelen = strlen(name);
			
 
				+	if (strchr(name, '/')) {
			
 
				 		ret = -EINVAL;
			
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				 	if (subvol) {
			
 
				-		ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen,
			
 
				-				     NULL);
			
 
				+		ret = btrfs_mksubvol(&file->f_path, name, namelen,
			
 
				+				     NULL, transid);
			
 
				 	} else {
			
 
				 		struct inode *src_inode;
			
 
				-		src_file = fget(vol_args->fd);
			
 
				+		src_file = fget(fd);
			
 
				 		if (!src_file) {
			
 
				 			ret = -EINVAL;
			
 
				 			goto out;
			
@@ -843,12 +924,56 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
 
				 			fput(src_file);
			
 
				 			goto out;
			
 
				 		}
			
 
				-		ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen,
			
 
				-				     BTRFS_I(src_inode)->root);
			
 
				+		ret = btrfs_mksubvol(&file->f_path, name, namelen,
			
 
				+				     BTRFS_I(src_inode)->root,
			
 
				+				     transid);
			
 
				 		fput(src_file);
			
 
				 	}
			
 
				 out:
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static noinline int btrfs_ioctl_snap_create(struct file *file,
			
 
				+					    void __user *arg, int subvol,
			
 
				+					    int async)
			
 
				+{
			
 
				+	struct btrfs_ioctl_vol_args *vol_args = NULL;
			
 
				+	struct btrfs_ioctl_async_vol_args *async_vol_args = NULL;
			
 
				+	char *name;
			
 
				+	u64 fd;
			
 
				+	u64 transid = 0;
			
 
				+	int ret;
			
 
				+
			
 
				+	if (async) {
			
 
				+		async_vol_args = memdup_user(arg, sizeof(*async_vol_args));
			
 
				+		if (IS_ERR(async_vol_args))
			
 
				+			return PTR_ERR(async_vol_args);
			
 
				+
			
 
				+		name = async_vol_args->name;
			
 
				+		fd = async_vol_args->fd;
			
 
				+		async_vol_args->name[BTRFS_SNAPSHOT_NAME_MAX] = '\0';
			
 
				+	} else {
			
 
				+		vol_args = memdup_user(arg, sizeof(*vol_args));
			
 
				+		if (IS_ERR(vol_args))
			
 
				+			return PTR_ERR(vol_args);
			
 
				+		name = vol_args->name;
			
 
				+		fd = vol_args->fd;
			
 
				+		vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
			
 
				+	}
			
 
				+
			
 
				+	ret = btrfs_ioctl_snap_create_transid(file, name, fd,
			
 
				+					      subvol, &transid);
			
 
				+
			
 
				+	if (!ret && async) {
			
 
				+		if (copy_to_user(arg +
			
 
				+				offsetof(struct btrfs_ioctl_async_vol_args,
			
 
				+				transid), &transid, sizeof(transid)))
			
 
				+			return -EFAULT;
			
 
				+	}
			
 
				+
			
 
				 	kfree(vol_args);
			
 
				+	kfree(async_vol_args);
			
 
				+
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
@@ -1073,14 +1198,10 @@ static noinline int btrfs_ioctl_tree_search(struct file *file,
 
				 	if (!capable(CAP_SYS_ADMIN))
			
 
				 		return -EPERM;
			
 
				 
			
 
				-	args = kmalloc(sizeof(*args), GFP_KERNEL);
			
 
				-	if (!args)
			
 
				-		return -ENOMEM;
			
 
				+	args = memdup_user(argp, sizeof(*args));
			
 
				+	if (IS_ERR(args))
			
 
				+		return PTR_ERR(args);
			
 
				 
			
 
				-	if (copy_from_user(args, argp, sizeof(*args))) {
			
 
				-		kfree(args);
			
 
				-		return -EFAULT;
			
 
				-	}
			
 
				 	inode = fdentry(file)->d_inode;
			
 
				 	ret = search_ioctl(inode, args);
			
 
				 	if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
			
@@ -1188,14 +1309,10 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
 
				 	if (!capable(CAP_SYS_ADMIN))
			
 
				 		return -EPERM;
			
 
				 
			
 
				-	args = kmalloc(sizeof(*args), GFP_KERNEL);
			
 
				-	if (!args)
			
 
				-		return -ENOMEM;
			
 
				+	args = memdup_user(argp, sizeof(*args));
			
 
				+	if (IS_ERR(args))
			
 
				+		return PTR_ERR(args);
			
 
				 
			
 
				-	if (copy_from_user(args, argp, sizeof(*args))) {
			
 
				-		kfree(args);
			
 
				-		return -EFAULT;
			
 
				-	}
			
 
				 	inode = fdentry(file)->d_inode;
			
 
				 
			
 
				 	if (args->treeid == 0)
			
@@ -1227,9 +1344,6 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
 
				 	int ret;
			
 
				 	int err = 0;
			
 
				 
			
 
				-	if (!capable(CAP_SYS_ADMIN))
			
 
				-		return -EPERM;
			
 
				-
			
 
				 	vol_args = memdup_user(arg, sizeof(*vol_args));
			
 
				 	if (IS_ERR(vol_args))
			
 
				 		return PTR_ERR(vol_args);
			
@@ -1259,13 +1373,51 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
 
				 	}
			
 
				 
			
 
				 	inode = dentry->d_inode;
			
 
				+	dest = BTRFS_I(inode)->root;
			
 
				+	if (!capable(CAP_SYS_ADMIN)){
			
 
				+		/*
			
 
				+		 * Regular user.  Only allow this with a special mount
			
 
				+		 * option, when the user has write+exec access to the
			
 
				+		 * subvol root, and when rmdir(2) would have been
			
 
				+		 * allowed.
			
 
				+		 *
			
 
				+		 * Note that this is _not_ check that the subvol is
			
 
				+		 * empty or doesn't contain data that we wouldn't
			
 
				+		 * otherwise be able to delete.
			
 
				+		 *
			
 
				+		 * Users who want to delete empty subvols should try
			
 
				+		 * rmdir(2).
			
 
				+		 */
			
 
				+		err = -EPERM;
			
 
				+		if (!btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
			
 
				+			goto out_dput;
			
 
				+
			
 
				+		/*
			
 
				+		 * Do not allow deletion if the parent dir is the same
			
 
				+		 * as the dir to be deleted.  That means the ioctl
			
 
				+		 * must be called on the dentry referencing the root
			
 
				+		 * of the subvol, not a random directory contained
			
 
				+		 * within it.
			
 
				+		 */
			
 
				+		err = -EINVAL;
			
 
				+		if (root == dest)
			
 
				+			goto out_dput;
			
 
				+
			
 
				+		err = inode_permission(inode, MAY_WRITE | MAY_EXEC);
			
 
				+		if (err)
			
 
				+			goto out_dput;
			
 
				+
			
 
				+		/* check if subvolume may be deleted by a non-root user */
			
 
				+		err = btrfs_may_delete(dir, dentry, 1);
			
 
				+		if (err)
			
 
				+			goto out_dput;
			
 
				+	}
			
 
				+
			
 
				 	if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) {
			
 
				 		err = -EINVAL;
			
 
				 		goto out_dput;
			
 
				 	}
			
 
				 
			
 
				-	dest = BTRFS_I(inode)->root;
			
 
				-
			
 
				 	mutex_lock(&inode->i_mutex);
			
 
				 	err = d_invalidate(dentry);
			
 
				 	if (err)
			
@@ -1304,7 +1456,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
 
				 		BUG_ON(ret);
			
 
				 	}
			
 
				 
			
 
				-	ret = btrfs_commit_transaction(trans, root);
			
 
				+	ret = btrfs_end_transaction(trans, root);
			
 
				 	BUG_ON(ret);
			
 
				 	inode->i_flags |= S_DEAD;
			
 
				 out_up_write:
			
@@ -1502,11 +1654,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
 
				 	path->reada = 2;
			
 
				 
			
 
				 	if (inode < src) {
			
 
				-		mutex_lock(&inode->i_mutex);
			
 
				-		mutex_lock(&src->i_mutex);
			
 
				+		mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
			
 
				+		mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD);
			
 
				 	} else {
			
 
				-		mutex_lock(&src->i_mutex);
			
 
				-		mutex_lock(&inode->i_mutex);
			
 
				+		mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT);
			
 
				+		mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
			
 
				 	}
			
 
				 
			
 
				 	/* determine range to clone */
			
@@ -1530,13 +1682,15 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
 
				 	while (1) {
			
 
				 		struct btrfs_ordered_extent *ordered;
			
 
				 		lock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
			
 
				-		ordered = btrfs_lookup_first_ordered_extent(inode, off+len);
			
 
				-		if (BTRFS_I(src)->delalloc_bytes == 0 && !ordered)
			
 
				+		ordered = btrfs_lookup_first_ordered_extent(src, off+len);
			
 
				+		if (!ordered &&
			
 
				+		    !test_range_bit(&BTRFS_I(src)->io_tree, off, off+len,
			
 
				+				   EXTENT_DELALLOC, 0, NULL))
			
 
				 			break;
			
 
				 		unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
			
 
				 		if (ordered)
			
 
				 			btrfs_put_ordered_extent(ordered);
			
 
				-		btrfs_wait_ordered_range(src, off, off+len);
			
 
				+		btrfs_wait_ordered_range(src, off, len);
			
 
				 	}
			
 
				 
			
 
				 	/* clone data */
			
@@ -1605,7 +1759,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
 
				 			}
			
 
				 			btrfs_release_path(root, path);
			
 
				 
			
 
				-			if (key.offset + datal < off ||
			
 
				+			if (key.offset + datal <= off ||
			
 
				 			    key.offset >= off+len)
			
 
				 				goto next;
			
 
				 
			
@@ -1879,6 +2033,22 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static void get_block_group_info(struct list_head *groups_list,
			
 
				+				 struct btrfs_ioctl_space_info *space)
			
 
				+{
			
 
				+	struct btrfs_block_group_cache *block_group;
			
 
				+
			
 
				+	space->total_bytes = 0;
			
 
				+	space->used_bytes = 0;
			
 
				+	space->flags = 0;
			
 
				+	list_for_each_entry(block_group, groups_list, list) {
			
 
				+		space->flags = block_group->flags;
			
 
				+		space->total_bytes += block_group->key.offset;
			
 
				+		space->used_bytes +=
			
 
				+			btrfs_block_group_used(&block_group->item);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
			
 
				 {
			
 
				 	struct btrfs_ioctl_space_args space_args;
			
@@ -1887,27 +2057,56 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
 
				 	struct btrfs_ioctl_space_info *dest_orig;
			
 
				 	struct btrfs_ioctl_space_info *user_dest;
			
 
				 	struct btrfs_space_info *info;
			
 
				+	u64 types[] = {BTRFS_BLOCK_GROUP_DATA,
			
 
				+		       BTRFS_BLOCK_GROUP_SYSTEM,
			
 
				+		       BTRFS_BLOCK_GROUP_METADATA,
			
 
				+		       BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA};
			
 
				+	int num_types = 4;
			
 
				 	int alloc_size;
			
 
				 	int ret = 0;
			
 
				 	int slot_count = 0;
			
 
				+	int i, c;
			
 
				 
			
 
				 	if (copy_from_user(&space_args,
			
 
				 			   (struct btrfs_ioctl_space_args __user *)arg,
			
 
				 			   sizeof(space_args)))
			
 
				 		return -EFAULT;
			
 
				 
			
 
				-	/* first we count slots */
			
 
				-	rcu_read_lock();
			
 
				-	list_for_each_entry_rcu(info, &root->fs_info->space_info, list)
			
 
				-		slot_count++;
			
 
				-	rcu_read_unlock();
			
 
				+	for (i = 0; i < num_types; i++) {
			
 
				+		struct btrfs_space_info *tmp;
			
 
				+
			
 
				+		info = NULL;
			
 
				+		rcu_read_lock();
			
 
				+		list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
			
 
				+					list) {
			
 
				+			if (tmp->flags == types[i]) {
			
 
				+				info = tmp;
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				+		rcu_read_unlock();
			
 
				+
			
 
				+		if (!info)
			
 
				+			continue;
			
 
				+
			
 
				+		down_read(&info->groups_sem);
			
 
				+		for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
			
 
				+			if (!list_empty(&info->block_groups[c]))
			
 
				+				slot_count++;
			
 
				+		}
			
 
				+		up_read(&info->groups_sem);
			
 
				+	}
			
 
				 
			
 
				 	/* space_slots == 0 means they are asking for a count */
			
 
				 	if (space_args.space_slots == 0) {
			
 
				 		space_args.total_spaces = slot_count;
			
 
				 		goto out;
			
 
				 	}
			
 
				+
			
 
				+	slot_count = min_t(int, space_args.space_slots, slot_count);
			
 
				+
			
 
				 	alloc_size = sizeof(*dest) * slot_count;
			
 
				+
			
 
				 	/* we generally have at most 6 or so space infos, one for each raid
			
 
				 	 * level.  So, a whole page should be more than enough for everyone
			
 
				 	 */
			
@@ -1921,27 +2120,34 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
 
				 	dest_orig = dest;
			
 
				 
			
 
				 	/* now we have a buffer to copy into */
			
 
				-	rcu_read_lock();
			
 
				-	list_for_each_entry_rcu(info, &root->fs_info->space_info, list) {
			
 
				-		/* make sure we don't copy more than we allocated
			
 
				-		 * in our buffer
			
 
				-		 */
			
 
				-		if (slot_count == 0)
			
 
				-			break;
			
 
				-		slot_count--;
			
 
				-
			
 
				-		/* make sure userland has enough room in their buffer */
			
 
				-		if (space_args.total_spaces >= space_args.space_slots)
			
 
				-			break;
			
 
				+	for (i = 0; i < num_types; i++) {
			
 
				+		struct btrfs_space_info *tmp;
			
 
				+
			
 
				+		info = NULL;
			
 
				+		rcu_read_lock();
			
 
				+		list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
			
 
				+					list) {
			
 
				+			if (tmp->flags == types[i]) {
			
 
				+				info = tmp;
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				+		rcu_read_unlock();
			
 
				 
			
 
				-		space.flags = info->flags;
			
 
				-		space.total_bytes = info->total_bytes;
			
 
				-		space.used_bytes = info->bytes_used;
			
 
				-		memcpy(dest, &space, sizeof(space));
			
 
				-		dest++;
			
 
				-		space_args.total_spaces++;
			
 
				+		if (!info)
			
 
				+			continue;
			
 
				+		down_read(&info->groups_sem);
			
 
				+		for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
			
 
				+			if (!list_empty(&info->block_groups[c])) {
			
 
				+				get_block_group_info(&info->block_groups[c],
			
 
				+						     &space);
			
 
				+				memcpy(dest, &space, sizeof(space));
			
 
				+				dest++;
			
 
				+				space_args.total_spaces++;
			
 
				+			}
			
 
				+		}
			
 
				+		up_read(&info->groups_sem);
			
 
				 	}
			
 
				-	rcu_read_unlock();
			
 
				 
			
 
				 	user_dest = (struct btrfs_ioctl_space_info *)
			
 
				 		(arg + sizeof(struct btrfs_ioctl_space_args));
			
@@ -1984,6 +2190,36 @@ long btrfs_ioctl_trans_end(struct file *file)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp)
			
 
				+{
			
 
				+	struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
			
 
				+	struct btrfs_trans_handle *trans;
			
 
				+	u64 transid;
			
 
				+
			
 
				+	trans = btrfs_start_transaction(root, 0);
			
 
				+	transid = trans->transid;
			
 
				+	btrfs_commit_transaction_async(trans, root, 0);
			
 
				+
			
 
				+	if (argp)
			
 
				+		if (copy_to_user(argp, &transid, sizeof(transid)))
			
 
				+			return -EFAULT;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static noinline long btrfs_ioctl_wait_sync(struct file *file, void __user *argp)
			
 
				+{
			
 
				+	struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
			
 
				+	u64 transid;
			
 
				+
			
 
				+	if (argp) {
			
 
				+		if (copy_from_user(&transid, argp, sizeof(transid)))
			
 
				+			return -EFAULT;
			
 
				+	} else {
			
 
				+		transid = 0;  /* current trans */
			
 
				+	}
			
 
				+	return btrfs_wait_for_commit(root, transid);
			
 
				+}
			
 
				+
			
 
				 long btrfs_ioctl(struct file *file, unsigned int
			
 
				 		cmd, unsigned long arg)
			
 
				 {
			
@@ -1998,9 +2234,11 @@ long btrfs_ioctl(struct file *file, unsigned int
 
				 	case FS_IOC_GETVERSION:
			
 
				 		return btrfs_ioctl_getversion(file, argp);
			
 
				 	case BTRFS_IOC_SNAP_CREATE:
			
 
				-		return btrfs_ioctl_snap_create(file, argp, 0);
			
 
				+		return btrfs_ioctl_snap_create(file, argp, 0, 0);
			
 
				+	case BTRFS_IOC_SNAP_CREATE_ASYNC:
			
 
				+		return btrfs_ioctl_snap_create(file, argp, 0, 1);
			
 
				 	case BTRFS_IOC_SUBVOL_CREATE:
			
 
				-		return btrfs_ioctl_snap_create(file, argp, 1);
			
 
				+		return btrfs_ioctl_snap_create(file, argp, 1, 0);
			
 
				 	case BTRFS_IOC_SNAP_DESTROY:
			
 
				 		return btrfs_ioctl_snap_destroy(file, argp);
			
 
				 	case BTRFS_IOC_DEFAULT_SUBVOL:
			
@@ -2034,6 +2272,10 @@ long btrfs_ioctl(struct file *file, unsigned int
 
				 	case BTRFS_IOC_SYNC:
			
 
				 		btrfs_sync_fs(file->f_dentry->d_sb, 1);
			
 
				 		return 0;
			
 
				+	case BTRFS_IOC_START_SYNC:
			
 
				+		return btrfs_ioctl_start_sync(file, argp);
			
 
				+	case BTRFS_IOC_WAIT_SYNC:
			
 
				+		return btrfs_ioctl_wait_sync(file, argp);
			
 
				 	}
			
 
				 
			
 
				 	return -ENOTTY;
			
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -22,14 +22,21 @@
 
				 
			
 
				 #define BTRFS_IOCTL_MAGIC 0x94
			
 
				 #define BTRFS_VOL_NAME_MAX 255
			
 
				-#define BTRFS_PATH_NAME_MAX 4087
			
 
				 
			
 
				 /* this should be 4k */
			
 
				+#define BTRFS_PATH_NAME_MAX 4087
			
 
				 struct btrfs_ioctl_vol_args {
			
 
				 	__s64 fd;
			
 
				 	char name[BTRFS_PATH_NAME_MAX + 1];
			
 
				 };
			
 
				 
			
 
				+#define BTRFS_SNAPSHOT_NAME_MAX 4079
			
 
				+struct btrfs_ioctl_async_vol_args {
			
 
				+	__s64 fd;
			
 
				+	__u64 transid;
			
 
				+	char name[BTRFS_SNAPSHOT_NAME_MAX + 1];
			
 
				+};
			
 
				+
			
 
				 #define BTRFS_INO_LOOKUP_PATH_MAX 4080
			
 
				 struct btrfs_ioctl_ino_lookup_args {
			
 
				 	__u64 treeid;
			
@@ -178,4 +185,8 @@ struct btrfs_ioctl_space_args {
 
				 #define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64)
			
 
				 #define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \
			
 
				 				    struct btrfs_ioctl_space_args)
			
 
				+#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64)
			
 
				+#define BTRFS_IOC_WAIT_SYNC  _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
			
 
				+#define BTRFS_IOC_SNAP_CREATE_ASYNC _IOW(BTRFS_IOCTL_MAGIC, 23, \
			
 
				+				   struct btrfs_ioctl_async_vol_args)
			
 
				 #endif
			
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -526,7 +526,6 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
 
				 {
			
 
				 	u64 end;
			
 
				 	u64 orig_end;
			
 
				-	u64 wait_end;
			
 
				 	struct btrfs_ordered_extent *ordered;
			
 
				 	int found;
			
 
				 
			
@@ -537,7 +536,6 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
 
				 		if (orig_end > INT_LIMIT(loff_t))
			
 
				 			orig_end = INT_LIMIT(loff_t);
			
 
				 	}
			
 
				-	wait_end = orig_end;
			
 
				 again:
			
 
				 	/* start IO across the range first to instantiate any delalloc
			
 
				 	 * extents
			
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -29,6 +29,7 @@
 
				 #include "locking.h"
			
 
				 #include "btrfs_inode.h"
			
 
				 #include "async-thread.h"
			
 
				+#include "free-space-cache.h"
			
 
				 
			
 
				 /*
			
 
				  * backref_node, mapping_node and tree_block start with this
			
@@ -178,8 +179,6 @@ struct reloc_control {
 
				 	u64 search_start;
			
 
				 	u64 extents_found;
			
 
				 
			
 
				-	int block_rsv_retries;
			
 
				-
			
 
				 	unsigned int stage:8;
			
 
				 	unsigned int create_reloc_tree:1;
			
 
				 	unsigned int merge_reloc_tree:1;
			
@@ -2133,7 +2132,6 @@ int prepare_to_merge(struct reloc_control *rc, int err)
 
				 	LIST_HEAD(reloc_roots);
			
 
				 	u64 num_bytes = 0;
			
 
				 	int ret;
			
 
				-	int retries = 0;
			
 
				 
			
 
				 	mutex_lock(&root->fs_info->trans_mutex);
			
 
				 	rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
			
@@ -2143,7 +2141,7 @@ again:
 
				 	if (!err) {
			
 
				 		num_bytes = rc->merging_rsv_size;
			
 
				 		ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv,
			
 
				-					  num_bytes, &retries);
			
 
				+					  num_bytes);
			
 
				 		if (ret)
			
 
				 			err = ret;
			
 
				 	}
			
@@ -2155,7 +2153,6 @@ again:
 
				 			btrfs_end_transaction(trans, rc->extent_root);
			
 
				 			btrfs_block_rsv_release(rc->extent_root,
			
 
				 						rc->block_rsv, num_bytes);
			
 
				-			retries = 0;
			
 
				 			goto again;
			
 
				 		}
			
 
				 	}
			
@@ -2405,15 +2402,13 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans,
 
				 	num_bytes = calcu_metadata_size(rc, node, 1) * 2;
			
 
				 
			
 
				 	trans->block_rsv = rc->block_rsv;
			
 
				-	ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes,
			
 
				-				  &rc->block_rsv_retries);
			
 
				+	ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes);
			
 
				 	if (ret) {
			
 
				 		if (ret == -EAGAIN)
			
 
				 			rc->commit_transaction = 1;
			
 
				 		return ret;
			
 
				 	}
			
 
				 
			
 
				-	rc->block_rsv_retries = 0;
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -3099,6 +3094,8 @@ static int add_tree_block(struct reloc_control *rc,
 
				 		BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0));
			
 
				 		ret = get_ref_objectid_v0(rc, path, extent_key,
			
 
				 					  &ref_owner, NULL);
			
 
				+		if (ret < 0)
			
 
				+			return ret;
			
 
				 		BUG_ON(ref_owner >= BTRFS_MAX_LEVEL);
			
 
				 		level = (int)ref_owner;
			
 
				 		/* FIXME: get real generation */
			
@@ -3191,6 +3188,54 @@ static int block_use_full_backref(struct reloc_control *rc,
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
			
 
				+				    struct inode *inode, u64 ino)
			
 
				+{
			
 
				+	struct btrfs_key key;
			
 
				+	struct btrfs_path *path;
			
 
				+	struct btrfs_root *root = fs_info->tree_root;
			
 
				+	struct btrfs_trans_handle *trans;
			
 
				+	unsigned long nr;
			
 
				+	int ret = 0;
			
 
				+
			
 
				+	if (inode)
			
 
				+		goto truncate;
			
 
				+
			
 
				+	key.objectid = ino;
			
 
				+	key.type = BTRFS_INODE_ITEM_KEY;
			
 
				+	key.offset = 0;
			
 
				+
			
 
				+	inode = btrfs_iget(fs_info->sb, &key, root, NULL);
			
 
				+	if (!inode || IS_ERR(inode) || is_bad_inode(inode)) {
			
 
				+		if (inode && !IS_ERR(inode))
			
 
				+			iput(inode);
			
 
				+		return -ENOENT;
			
 
				+	}
			
 
				+
			
 
				+truncate:
			
 
				+	path = btrfs_alloc_path();
			
 
				+	if (!path) {
			
 
				+		ret = -ENOMEM;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	trans = btrfs_join_transaction(root, 0);
			
 
				+	if (IS_ERR(trans)) {
			
 
				+		btrfs_free_path(path);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
			
 
				+
			
 
				+	btrfs_free_path(path);
			
 
				+	nr = trans->blocks_used;
			
 
				+	btrfs_end_transaction(trans, root);
			
 
				+	btrfs_btree_balance_dirty(root, nr);
			
 
				+out:
			
 
				+	iput(inode);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY
			
 
				  * this function scans fs tree to find blocks reference the data extent
			
@@ -3217,15 +3262,27 @@ static int find_data_references(struct reloc_control *rc,
 
				 	int counted;
			
 
				 	int ret;
			
 
				 
			
 
				-	path = btrfs_alloc_path();
			
 
				-	if (!path)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				 	ref_root = btrfs_extent_data_ref_root(leaf, ref);
			
 
				 	ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref);
			
 
				 	ref_offset = btrfs_extent_data_ref_offset(leaf, ref);
			
 
				 	ref_count = btrfs_extent_data_ref_count(leaf, ref);
			
 
				 
			
 
				+	/*
			
 
				+	 * This is an extent belonging to the free space cache, lets just delete
			
 
				+	 * it and redo the search.
			
 
				+	 */
			
 
				+	if (ref_root == BTRFS_ROOT_TREE_OBJECTID) {
			
 
				+		ret = delete_block_group_cache(rc->extent_root->fs_info,
			
 
				+					       NULL, ref_objectid);
			
 
				+		if (ret != -ENOENT)
			
 
				+			return ret;
			
 
				+		ret = 0;
			
 
				+	}
			
 
				+
			
 
				+	path = btrfs_alloc_path();
			
 
				+	if (!path)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				 	root = read_fs_root(rc->extent_root->fs_info, ref_root);
			
 
				 	if (IS_ERR(root)) {
			
 
				 		err = PTR_ERR(root);
			
@@ -3554,8 +3611,7 @@ int prepare_to_relocate(struct reloc_control *rc)
 
				 	 * is no reservation in transaction handle.
			
 
				 	 */
			
 
				 	ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv,
			
 
				-				  rc->extent_root->nodesize * 256,
			
 
				-				  &rc->block_rsv_retries);
			
 
				+				  rc->extent_root->nodesize * 256);
			
 
				 	if (ret)
			
 
				 		return ret;
			
 
				 
			
@@ -3567,7 +3623,6 @@ int prepare_to_relocate(struct reloc_control *rc)
 
				 	rc->extents_found = 0;
			
 
				 	rc->nodes_relocated = 0;
			
 
				 	rc->merging_rsv_size = 0;
			
 
				-	rc->block_rsv_retries = 0;
			
 
				 
			
 
				 	rc->create_reloc_tree = 1;
			
 
				 	set_reloc_control(rc);
			
@@ -3860,6 +3915,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
 
				 {
			
 
				 	struct btrfs_fs_info *fs_info = extent_root->fs_info;
			
 
				 	struct reloc_control *rc;
			
 
				+	struct inode *inode;
			
 
				+	struct btrfs_path *path;
			
 
				 	int ret;
			
 
				 	int rw = 0;
			
 
				 	int err = 0;
			
@@ -3882,6 +3939,26 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
 
				 		rw = 1;
			
 
				 	}
			
 
				 
			
 
				+	path = btrfs_alloc_path();
			
 
				+	if (!path) {
			
 
				+		err = -ENOMEM;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	inode = lookup_free_space_inode(fs_info->tree_root, rc->block_group,
			
 
				+					path);
			
 
				+	btrfs_free_path(path);
			
 
				+
			
 
				+	if (!IS_ERR(inode))
			
 
				+		ret = delete_block_group_cache(fs_info, inode, 0);
			
 
				+	else
			
 
				+		ret = PTR_ERR(inode);
			
 
				+
			
 
				+	if (ret && ret != -ENOENT) {
			
 
				+		err = ret;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				 	rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
			
 
				 	if (IS_ERR(rc->data_inode)) {
			
 
				 		err = PTR_ERR(rc->data_inode);
			
@@ -4143,7 +4220,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
 
				 		btrfs_add_ordered_sum(inode, ordered, sums);
			
 
				 	}
			
 
				 	btrfs_put_ordered_extent(ordered);
			
 
				-	return 0;
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
			
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -181,7 +181,6 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root
 
				 int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid)
			
 
				 {
			
 
				 	struct btrfs_root *dead_root;
			
 
				-	struct btrfs_item *item;
			
 
				 	struct btrfs_root_item *ri;
			
 
				 	struct btrfs_key key;
			
 
				 	struct btrfs_key found_key;
			
@@ -214,7 +213,6 @@ again:
 
				 			nritems = btrfs_header_nritems(leaf);
			
 
				 			slot = path->slots[0];
			
 
				 		}
			
 
				-		item = btrfs_item_nr(leaf, slot);
			
 
				 		btrfs_item_key_to_cpu(leaf, &key, slot);
			
 
				 		if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY)
			
 
				 			goto next;
			
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -61,6 +61,8 @@ static void btrfs_put_super(struct super_block *sb)
 
				 
			
 
				 	ret = close_ctree(root);
			
 
				 	sb->s_fs_info = NULL;
			
 
				+
			
 
				+	(void)ret; /* FIXME: need to fix VFS to return error? */
			
 
				 }
			
 
				 
			
 
				 enum {
			
@@ -68,7 +70,8 @@ enum {
 
				 	Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
			
 
				 	Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
			
 
				 	Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit,
			
 
				-	Opt_discard, Opt_err,
			
 
				+	Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_err,
			
 
				+	Opt_user_subvol_rm_allowed,
			
 
				 };
			
 
				 
			
 
				 static match_table_t tokens = {
			
@@ -92,6 +95,9 @@ static match_table_t tokens = {
 
				 	{Opt_flushoncommit, "flushoncommit"},
			
 
				 	{Opt_ratio, "metadata_ratio=%d"},
			
 
				 	{Opt_discard, "discard"},
			
 
				+	{Opt_space_cache, "space_cache"},
			
 
				+	{Opt_clear_cache, "clear_cache"},
			
 
				+	{Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
			
 
				 	{Opt_err, NULL},
			
 
				 };
			
 
				 
			
@@ -235,6 +241,16 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
 
				 		case Opt_discard:
			
 
				 			btrfs_set_opt(info->mount_opt, DISCARD);
			
 
				 			break;
			
 
				+		case Opt_space_cache:
			
 
				+			printk(KERN_INFO "btrfs: enabling disk space caching\n");
			
 
				+			btrfs_set_opt(info->mount_opt, SPACE_CACHE);
			
 
				+		case Opt_clear_cache:
			
 
				+			printk(KERN_INFO "btrfs: force clearing of disk cache\n");
			
 
				+			btrfs_set_opt(info->mount_opt, CLEAR_CACHE);
			
 
				+			break;
			
 
				+		case Opt_user_subvol_rm_allowed:
			
 
				+			btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
			
 
				+			break;
			
 
				 		case Opt_err:
			
 
				 			printk(KERN_INFO "btrfs: unrecognized mount option "
			
 
				 			       "'%s'\n", p);
			
@@ -380,7 +396,7 @@ static struct dentry *get_default_root(struct super_block *sb,
 
				 find_root:
			
 
				 	new_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
			
 
				 	if (IS_ERR(new_root))
			
 
				-		return ERR_PTR(PTR_ERR(new_root));
			
 
				+		return ERR_CAST(new_root);
			
 
				 
			
 
				 	if (btrfs_root_refs(&new_root->root_item) == 0)
			
 
				 		return ERR_PTR(-ENOENT);
			
@@ -436,7 +452,6 @@ static int btrfs_fill_super(struct super_block *sb,
 
				 {
			
 
				 	struct inode *inode;
			
 
				 	struct dentry *root_dentry;
			
 
				-	struct btrfs_super_block *disk_super;
			
 
				 	struct btrfs_root *tree_root;
			
 
				 	struct btrfs_key key;
			
 
				 	int err;
			
@@ -458,7 +473,6 @@ static int btrfs_fill_super(struct super_block *sb,
 
				 		return PTR_ERR(tree_root);
			
 
				 	}
			
 
				 	sb->s_fs_info = tree_root;
			
 
				-	disk_super = &tree_root->fs_info->super_copy;
			
 
				 
			
 
				 	key.objectid = BTRFS_FIRST_FREE_OBJECTID;
			
 
				 	key.type = BTRFS_INODE_ITEM_KEY;
			
@@ -571,7 +585,6 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
 
				 	char *subvol_name = NULL;
			
 
				 	u64 subvol_objectid = 0;
			
 
				 	int error = 0;
			
 
				-	int found = 0;
			
 
				 
			
 
				 	if (!(flags & MS_RDONLY))
			
 
				 		mode |= FMODE_WRITE;
			
@@ -607,7 +620,6 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
 
				 			goto error_close_devices;
			
 
				 		}
			
 
				 
			
 
				-		found = 1;
			
 
				 		btrfs_close_devices(fs_devices);
			
 
				 	} else {
			
 
				 		char b[BDEVNAME_SIZE];
			
@@ -629,7 +641,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
 
				 	if (IS_ERR(root)) {
			
 
				 		error = PTR_ERR(root);
			
 
				 		deactivate_locked_super(s);
			
 
				-		goto error;
			
 
				+		goto error_free_subvol_name;
			
 
				 	}
			
 
				 	/* if they gave us a subvolume name bind mount into that */
			
 
				 	if (strcmp(subvol_name, ".")) {
			
@@ -643,14 +655,14 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
 
				 			deactivate_locked_super(s);
			
 
				 			error = PTR_ERR(new_root);
			
 
				 			dput(root);
			
 
				-			goto error_close_devices;
			
 
				+			goto error_free_subvol_name;
			
 
				 		}
			
 
				 		if (!new_root->d_inode) {
			
 
				 			dput(root);
			
 
				 			dput(new_root);
			
 
				 			deactivate_locked_super(s);
			
 
				 			error = -ENXIO;
			
 
				-			goto error_close_devices;
			
 
				+			goto error_free_subvol_name;
			
 
				 		}
			
 
				 		dput(root);
			
 
				 		root = new_root;
			
@@ -665,7 +677,6 @@ error_close_devices:
 
				 	btrfs_close_devices(fs_devices);
			
 
				 error_free_subvol_name:
			
 
				 	kfree(subvol_name);
			
 
				-error:
			
 
				 	return ERR_PTR(error);
			
 
				 }
			
 
				 
			
@@ -713,18 +724,25 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 
				 	struct list_head *head = &root->fs_info->space_info;
			
 
				 	struct btrfs_space_info *found;
			
 
				 	u64 total_used = 0;
			
 
				+	u64 total_used_data = 0;
			
 
				 	int bits = dentry->d_sb->s_blocksize_bits;
			
 
				 	__be32 *fsid = (__be32 *)root->fs_info->fsid;
			
 
				 
			
 
				 	rcu_read_lock();
			
 
				-	list_for_each_entry_rcu(found, head, list)
			
 
				+	list_for_each_entry_rcu(found, head, list) {
			
 
				+		if (found->flags & (BTRFS_BLOCK_GROUP_METADATA |
			
 
				+				    BTRFS_BLOCK_GROUP_SYSTEM))
			
 
				+			total_used_data += found->disk_total;
			
 
				+		else
			
 
				+			total_used_data += found->disk_used;
			
 
				 		total_used += found->disk_used;
			
 
				+	}
			
 
				 	rcu_read_unlock();
			
 
				 
			
 
				 	buf->f_namelen = BTRFS_NAME_LEN;
			
 
				 	buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
			
 
				 	buf->f_bfree = buf->f_blocks - (total_used >> bits);
			
 
				-	buf->f_bavail = buf->f_bfree;
			
 
				+	buf->f_bavail = buf->f_blocks - (total_used_data >> bits);
			
 
				 	buf->f_bsize = dentry->d_sb->s_blocksize;
			
 
				 	buf->f_type = BTRFS_SUPER_MAGIC;
			
 
				 
			
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -163,6 +163,7 @@ enum btrfs_trans_type {
 
				 	TRANS_START,
			
 
				 	TRANS_JOIN,
			
 
				 	TRANS_USERSPACE,
			
 
				+	TRANS_JOIN_NOLOCK,
			
 
				 };
			
 
				 
			
 
				 static int may_wait_transaction(struct btrfs_root *root, int type)
			
@@ -179,14 +180,14 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
 
				 {
			
 
				 	struct btrfs_trans_handle *h;
			
 
				 	struct btrfs_transaction *cur_trans;
			
 
				-	int retries = 0;
			
 
				 	int ret;
			
 
				 again:
			
 
				 	h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
			
 
				 	if (!h)
			
 
				 		return ERR_PTR(-ENOMEM);
			
 
				 
			
 
				-	mutex_lock(&root->fs_info->trans_mutex);
			
 
				+	if (type != TRANS_JOIN_NOLOCK)
			
 
				+		mutex_lock(&root->fs_info->trans_mutex);
			
 
				 	if (may_wait_transaction(root, type))
			
 
				 		wait_current_trans(root);
			
 
				 
			
@@ -195,7 +196,8 @@ again:
 
				 
			
 
				 	cur_trans = root->fs_info->running_transaction;
			
 
				 	cur_trans->use_count++;
			
 
				-	mutex_unlock(&root->fs_info->trans_mutex);
			
 
				+	if (type != TRANS_JOIN_NOLOCK)
			
 
				+		mutex_unlock(&root->fs_info->trans_mutex);
			
 
				 
			
 
				 	h->transid = cur_trans->transid;
			
 
				 	h->transaction = cur_trans;
			
@@ -212,8 +214,7 @@ again:
 
				 	}
			
 
				 
			
 
				 	if (num_items > 0) {
			
 
				-		ret = btrfs_trans_reserve_metadata(h, root, num_items,
			
 
				-						   &retries);
			
 
				+		ret = btrfs_trans_reserve_metadata(h, root, num_items);
			
 
				 		if (ret == -EAGAIN) {
			
 
				 			btrfs_commit_transaction(h, root);
			
 
				 			goto again;
			
@@ -224,9 +225,11 @@ again:
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	mutex_lock(&root->fs_info->trans_mutex);
			
 
				+	if (type != TRANS_JOIN_NOLOCK)
			
 
				+		mutex_lock(&root->fs_info->trans_mutex);
			
 
				 	record_root_in_trans(h, root);
			
 
				-	mutex_unlock(&root->fs_info->trans_mutex);
			
 
				+	if (type != TRANS_JOIN_NOLOCK)
			
 
				+		mutex_unlock(&root->fs_info->trans_mutex);
			
 
				 
			
 
				 	if (!current->journal_info && type != TRANS_USERSPACE)
			
 
				 		current->journal_info = h;
			
@@ -244,6 +247,12 @@ struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
 
				 	return start_transaction(root, 0, TRANS_JOIN);
			
 
				 }
			
 
				 
			
 
				+struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root,
			
 
				+							  int num_blocks)
			
 
				+{
			
 
				+	return start_transaction(root, 0, TRANS_JOIN_NOLOCK);
			
 
				+}
			
 
				+
			
 
				 struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
			
 
				 							 int num_blocks)
			
 
				 {
			
@@ -270,6 +279,58 @@ static noinline int wait_for_commit(struct btrfs_root *root,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
			
 
				+{
			
 
				+	struct btrfs_transaction *cur_trans = NULL, *t;
			
 
				+	int ret;
			
 
				+
			
 
				+	mutex_lock(&root->fs_info->trans_mutex);
			
 
				+
			
 
				+	ret = 0;
			
 
				+	if (transid) {
			
 
				+		if (transid <= root->fs_info->last_trans_committed)
			
 
				+			goto out_unlock;
			
 
				+
			
 
				+		/* find specified transaction */
			
 
				+		list_for_each_entry(t, &root->fs_info->trans_list, list) {
			
 
				+			if (t->transid == transid) {
			
 
				+				cur_trans = t;
			
 
				+				break;
			
 
				+			}
			
 
				+			if (t->transid > transid)
			
 
				+				break;
			
 
				+		}
			
 
				+		ret = -EINVAL;
			
 
				+		if (!cur_trans)
			
 
				+			goto out_unlock;  /* bad transid */
			
 
				+	} else {
			
 
				+		/* find newest transaction that is committing | committed */
			
 
				+		list_for_each_entry_reverse(t, &root->fs_info->trans_list,
			
 
				+					    list) {
			
 
				+			if (t->in_commit) {
			
 
				+				if (t->commit_done)
			
 
				+					goto out_unlock;
			
 
				+				cur_trans = t;
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				+		if (!cur_trans)
			
 
				+			goto out_unlock;  /* nothing committing|committed */
			
 
				+	}
			
 
				+
			
 
				+	cur_trans->use_count++;
			
 
				+	mutex_unlock(&root->fs_info->trans_mutex);
			
 
				+
			
 
				+	wait_for_commit(root, cur_trans);
			
 
				+
			
 
				+	mutex_lock(&root->fs_info->trans_mutex);
			
 
				+	put_transaction(cur_trans);
			
 
				+	ret = 0;
			
 
				+out_unlock:
			
 
				+	mutex_unlock(&root->fs_info->trans_mutex);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				 #if 0
			
 
				 /*
			
 
				  * rate limit against the drop_snapshot code.  This helps to slow down new
			
@@ -348,7 +409,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
 
				 }
			
 
				 
			
 
				 static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
			
 
				-			  struct btrfs_root *root, int throttle)
			
 
				+			  struct btrfs_root *root, int throttle, int lock)
			
 
				 {
			
 
				 	struct btrfs_transaction *cur_trans = trans->transaction;
			
 
				 	struct btrfs_fs_info *info = root->fs_info;
			
@@ -376,26 +437,29 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 
				 
			
 
				 	btrfs_trans_release_metadata(trans, root);
			
 
				 
			
 
				-	if (!root->fs_info->open_ioctl_trans &&
			
 
				+	if (lock && !root->fs_info->open_ioctl_trans &&
			
 
				 	    should_end_transaction(trans, root))
			
 
				 		trans->transaction->blocked = 1;
			
 
				 
			
 
				-	if (cur_trans->blocked && !cur_trans->in_commit) {
			
 
				+	if (lock && cur_trans->blocked && !cur_trans->in_commit) {
			
 
				 		if (throttle)
			
 
				 			return btrfs_commit_transaction(trans, root);
			
 
				 		else
			
 
				 			wake_up_process(info->transaction_kthread);
			
 
				 	}
			
 
				 
			
 
				-	mutex_lock(&info->trans_mutex);
			
 
				+	if (lock)
			
 
				+		mutex_lock(&info->trans_mutex);
			
 
				 	WARN_ON(cur_trans != info->running_transaction);
			
 
				 	WARN_ON(cur_trans->num_writers < 1);
			
 
				 	cur_trans->num_writers--;
			
 
				 
			
 
				+	smp_mb();
			
 
				 	if (waitqueue_active(&cur_trans->writer_wait))
			
 
				 		wake_up(&cur_trans->writer_wait);
			
 
				 	put_transaction(cur_trans);
			
 
				-	mutex_unlock(&info->trans_mutex);
			
 
				+	if (lock)
			
 
				+		mutex_unlock(&info->trans_mutex);
			
 
				 
			
 
				 	if (current->journal_info == trans)
			
 
				 		current->journal_info = NULL;
			
@@ -411,13 +475,19 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 
				 int btrfs_end_transaction(struct btrfs_trans_handle *trans,
			
 
				 			  struct btrfs_root *root)
			
 
				 {
			
 
				-	return __btrfs_end_transaction(trans, root, 0);
			
 
				+	return __btrfs_end_transaction(trans, root, 0, 1);
			
 
				 }
			
 
				 
			
 
				 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
			
 
				 				   struct btrfs_root *root)
			
 
				 {
			
 
				-	return __btrfs_end_transaction(trans, root, 1);
			
 
				+	return __btrfs_end_transaction(trans, root, 1, 1);
			
 
				+}
			
 
				+
			
 
				+int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
			
 
				+				 struct btrfs_root *root)
			
 
				+{
			
 
				+	return __btrfs_end_transaction(trans, root, 0, 0);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -836,7 +906,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 
				 	struct extent_buffer *tmp;
			
 
				 	struct extent_buffer *old;
			
 
				 	int ret;
			
 
				-	int retries = 0;
			
 
				 	u64 to_reserve = 0;
			
 
				 	u64 index = 0;
			
 
				 	u64 objectid;
			
@@ -858,7 +927,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 
				 
			
 
				 	if (to_reserve > 0) {
			
 
				 		ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv,
			
 
				-					  to_reserve, &retries);
			
 
				+					  to_reserve);
			
 
				 		if (ret) {
			
 
				 			pending->error = ret;
			
 
				 			goto fail;
			
@@ -966,6 +1035,8 @@ static void update_super_roots(struct btrfs_root *root)
 
				 	super->root = root_item->bytenr;
			
 
				 	super->generation = root_item->generation;
			
 
				 	super->root_level = root_item->level;
			
 
				+	if (super->cache_generation != 0 || btrfs_test_opt(root, SPACE_CACHE))
			
 
				+		super->cache_generation = root_item->generation;
			
 
				 }
			
 
				 
			
 
				 int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
			
@@ -988,11 +1059,127 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info)
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * wait for the current transaction commit to start and block subsequent
			
 
				+ * transaction joins
			
 
				+ */
			
 
				+static void wait_current_trans_commit_start(struct btrfs_root *root,
			
 
				+					    struct btrfs_transaction *trans)
			
 
				+{
			
 
				+	DEFINE_WAIT(wait);
			
 
				+
			
 
				+	if (trans->in_commit)
			
 
				+		return;
			
 
				+
			
 
				+	while (1) {
			
 
				+		prepare_to_wait(&root->fs_info->transaction_blocked_wait, &wait,
			
 
				+				TASK_UNINTERRUPTIBLE);
			
 
				+		if (trans->in_commit) {
			
 
				+			finish_wait(&root->fs_info->transaction_blocked_wait,
			
 
				+				    &wait);
			
 
				+			break;
			
 
				+		}
			
 
				+		mutex_unlock(&root->fs_info->trans_mutex);
			
 
				+		schedule();
			
 
				+		mutex_lock(&root->fs_info->trans_mutex);
			
 
				+		finish_wait(&root->fs_info->transaction_blocked_wait, &wait);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * wait for the current transaction to start and then become unblocked.
			
 
				+ * caller holds ref.
			
 
				+ */
			
 
				+static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
			
 
				+					 struct btrfs_transaction *trans)
			
 
				+{
			
 
				+	DEFINE_WAIT(wait);
			
 
				+
			
 
				+	if (trans->commit_done || (trans->in_commit && !trans->blocked))
			
 
				+		return;
			
 
				+
			
 
				+	while (1) {
			
 
				+		prepare_to_wait(&root->fs_info->transaction_wait, &wait,
			
 
				+				TASK_UNINTERRUPTIBLE);
			
 
				+		if (trans->commit_done ||
			
 
				+		    (trans->in_commit && !trans->blocked)) {
			
 
				+			finish_wait(&root->fs_info->transaction_wait,
			
 
				+				    &wait);
			
 
				+			break;
			
 
				+		}
			
 
				+		mutex_unlock(&root->fs_info->trans_mutex);
			
 
				+		schedule();
			
 
				+		mutex_lock(&root->fs_info->trans_mutex);
			
 
				+		finish_wait(&root->fs_info->transaction_wait,
			
 
				+			    &wait);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * commit transactions asynchronously. once btrfs_commit_transaction_async
			
 
				+ * returns, any subsequent transaction will not be allowed to join.
			
 
				+ */
			
 
				+struct btrfs_async_commit {
			
 
				+	struct btrfs_trans_handle *newtrans;
			
 
				+	struct btrfs_root *root;
			
 
				+	struct delayed_work work;
			
 
				+};
			
 
				+
			
 
				+static void do_async_commit(struct work_struct *work)
			
 
				+{
			
 
				+	struct btrfs_async_commit *ac =
			
 
				+		container_of(work, struct btrfs_async_commit, work.work);
			
 
				+
			
 
				+	btrfs_commit_transaction(ac->newtrans, ac->root);
			
 
				+	kfree(ac);
			
 
				+}
			
 
				+
			
 
				+int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
			
 
				+				   struct btrfs_root *root,
			
 
				+				   int wait_for_unblock)
			
 
				+{
			
 
				+	struct btrfs_async_commit *ac;
			
 
				+	struct btrfs_transaction *cur_trans;
			
 
				+
			
 
				+	ac = kmalloc(sizeof(*ac), GFP_NOFS);
			
 
				+	BUG_ON(!ac);
			
 
				+
			
 
				+	INIT_DELAYED_WORK(&ac->work, do_async_commit);
			
 
				+	ac->root = root;
			
 
				+	ac->newtrans = btrfs_join_transaction(root, 0);
			
 
				+
			
 
				+	/* take transaction reference */
			
 
				+	mutex_lock(&root->fs_info->trans_mutex);
			
 
				+	cur_trans = trans->transaction;
			
 
				+	cur_trans->use_count++;
			
 
				+	mutex_unlock(&root->fs_info->trans_mutex);
			
 
				+
			
 
				+	btrfs_end_transaction(trans, root);
			
 
				+	schedule_delayed_work(&ac->work, 0);
			
 
				+
			
 
				+	/* wait for transaction to start and unblock */
			
 
				+	mutex_lock(&root->fs_info->trans_mutex);
			
 
				+	if (wait_for_unblock)
			
 
				+		wait_current_trans_commit_start_and_unblock(root, cur_trans);
			
 
				+	else
			
 
				+		wait_current_trans_commit_start(root, cur_trans);
			
 
				+	put_transaction(cur_trans);
			
 
				+	mutex_unlock(&root->fs_info->trans_mutex);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * btrfs_transaction state sequence:
			
 
				+ *    in_commit = 0, blocked = 0  (initial)
			
 
				+ *    in_commit = 1, blocked = 1
			
 
				+ *    blocked = 0
			
 
				+ *    commit_done = 1
			
 
				+ */
			
 
				 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
			
 
				 			     struct btrfs_root *root)
			
 
				 {
			
 
				 	unsigned long joined = 0;
			
 
				-	unsigned long timeout = 1;
			
 
				 	struct btrfs_transaction *cur_trans;
			
 
				 	struct btrfs_transaction *prev_trans = NULL;
			
 
				 	DEFINE_WAIT(wait);
			
@@ -1039,6 +1226,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 
				 
			
 
				 	trans->transaction->in_commit = 1;
			
 
				 	trans->transaction->blocked = 1;
			
 
				+	wake_up(&root->fs_info->transaction_blocked_wait);
			
 
				+
			
 
				 	if (cur_trans->list.prev != &root->fs_info->trans_list) {
			
 
				 		prev_trans = list_entry(cur_trans->list.prev,
			
 
				 					struct btrfs_transaction, list);
			
@@ -1063,11 +1252,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 
				 			snap_pending = 1;
			
 
				 
			
 
				 		WARN_ON(cur_trans != trans->transaction);
			
 
				-		if (cur_trans->num_writers > 1)
			
 
				-			timeout = MAX_SCHEDULE_TIMEOUT;
			
 
				-		else if (should_grow)
			
 
				-			timeout = 1;
			
 
				-
			
 
				 		mutex_unlock(&root->fs_info->trans_mutex);
			
 
				 
			
 
				 		if (flush_on_commit || snap_pending) {
			
@@ -1089,8 +1273,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 
				 				TASK_UNINTERRUPTIBLE);
			
 
				 
			
 
				 		smp_mb();
			
 
				-		if (cur_trans->num_writers > 1 || should_grow)
			
 
				-			schedule_timeout(timeout);
			
 
				+		if (cur_trans->num_writers > 1)
			
 
				+			schedule_timeout(MAX_SCHEDULE_TIMEOUT);
			
 
				+		else if (should_grow)
			
 
				+			schedule_timeout(1);
			
 
				 
			
 
				 		mutex_lock(&root->fs_info->trans_mutex);
			
 
				 		finish_wait(&cur_trans->writer_wait, &wait);
			
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -87,12 +87,17 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
 
				 
			
 
				 int btrfs_end_transaction(struct btrfs_trans_handle *trans,
			
 
				 			  struct btrfs_root *root);
			
 
				+int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
			
 
				+				 struct btrfs_root *root);
			
 
				 struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
			
 
				 						   int num_items);
			
 
				 struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
			
 
				 						  int num_blocks);
			
 
				+struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root,
			
 
				+							  int num_blocks);
			
 
				 struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
			
 
				 							 int num_blocks);
			
 
				+int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid);
			
 
				 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
			
 
				 				     struct btrfs_root *root);
			
 
				 int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
			
@@ -104,6 +109,9 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
 
				 int btrfs_clean_old_snapshots(struct btrfs_root *root);
			
 
				 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
			
 
				 			     struct btrfs_root *root);
			
 
				+int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
			
 
				+				   struct btrfs_root *root,
			
 
				+				   int wait_for_unblock);
			
 
				 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
			
 
				 				   struct btrfs_root *root);
			
 
				 int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
			
--- a/fs/btrfs/tree-defrag.c
+++ b/fs/btrfs/tree-defrag.c
@@ -36,7 +36,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
 
				 	int ret = 0;
			
 
				 	int wret;
			
 
				 	int level;
			
 
				-	int orig_level;
			
 
				 	int is_extent = 0;
			
 
				 	int next_key_ret = 0;
			
 
				 	u64 last_ret = 0;
			
@@ -64,7 +63,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
 
				 		return -ENOMEM;
			
 
				 
			
 
				 	level = btrfs_header_level(root->node);
			
 
				-	orig_level = level;
			
 
				 
			
 
				 	if (level == 0)
			
 
				 		goto out;
			
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -786,7 +786,6 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
 
				 {
			
 
				 	struct inode *dir;
			
 
				 	int ret;
			
 
				-	struct btrfs_key location;
			
 
				 	struct btrfs_inode_ref *ref;
			
 
				 	struct btrfs_dir_item *di;
			
 
				 	struct inode *inode;
			
@@ -795,10 +794,6 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
 
				 	unsigned long ref_ptr;
			
 
				 	unsigned long ref_end;
			
 
				 
			
 
				-	location.objectid = key->objectid;
			
 
				-	location.type = BTRFS_INODE_ITEM_KEY;
			
 
				-	location.offset = 0;
			
 
				-
			
 
				 	/*
			
 
				 	 * it is possible that we didn't log all the parent directories
			
 
				 	 * for a given inode.  If we don't find the dir, just don't
			
@@ -1583,7 +1578,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
 
				 	struct btrfs_path *path;
			
 
				 	struct btrfs_root *root = wc->replay_dest;
			
 
				 	struct btrfs_key key;
			
 
				-	u32 item_size;
			
 
				 	int level;
			
 
				 	int i;
			
 
				 	int ret;
			
@@ -1601,7 +1595,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
 
				 	nritems = btrfs_header_nritems(eb);
			
 
				 	for (i = 0; i < nritems; i++) {
			
 
				 		btrfs_item_key_to_cpu(eb, &key, i);
			
 
				-		item_size = btrfs_item_size_nr(eb, i);
			
 
				 
			
 
				 		/* inode keys are done during the first stage */
			
 
				 		if (key.type == BTRFS_INODE_ITEM_KEY &&
			
@@ -1668,7 +1661,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
 
				 				   struct walk_control *wc)
			
 
				 {
			
 
				 	u64 root_owner;
			
 
				-	u64 root_gen;
			
 
				 	u64 bytenr;
			
 
				 	u64 ptr_gen;
			
 
				 	struct extent_buffer *next;
			
@@ -1698,7 +1690,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
 
				 
			
 
				 		parent = path->nodes[*level];
			
 
				 		root_owner = btrfs_header_owner(parent);
			
 
				-		root_gen = btrfs_header_generation(parent);
			
 
				 
			
 
				 		next = btrfs_find_create_tree_block(root, bytenr, blocksize);
			
 
				 
			
@@ -1749,7 +1740,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
 
				 				 struct walk_control *wc)
			
 
				 {
			
 
				 	u64 root_owner;
			
 
				-	u64 root_gen;
			
 
				 	int i;
			
 
				 	int slot;
			
 
				 	int ret;
			
@@ -1757,8 +1747,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
 
				 	for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
			
 
				 		slot = path->slots[i];
			
 
				 		if (slot + 1 < btrfs_header_nritems(path->nodes[i])) {
			
 
				-			struct extent_buffer *node;
			
 
				-			node = path->nodes[i];
			
 
				 			path->slots[i]++;
			
 
				 			*level = i;
			
 
				 			WARN_ON(*level == 0);
			
@@ -1771,7 +1759,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
 
				 				parent = path->nodes[*level + 1];
			
 
				 
			
 
				 			root_owner = btrfs_header_owner(parent);
			
 
				-			root_gen = btrfs_header_generation(parent);
			
 
				 			wc->process_func(root, path->nodes[*level], wc,
			
 
				 				 btrfs_header_generation(path->nodes[*level]));
			
 
				 			if (wc->free) {
			
@@ -2273,7 +2260,7 @@ fail:
 
				 	}
			
 
				 	btrfs_end_log_trans(root);
			
 
				 
			
 
				-	return 0;
			
 
				+	return err;
			
 
				 }
			
 
				 
			
 
				 /* see comments for btrfs_del_dir_entries_in_log */
			
@@ -2729,7 +2716,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 
				 	struct btrfs_key max_key;
			
 
				 	struct btrfs_root *log = root->log_root;
			
 
				 	struct extent_buffer *src = NULL;
			
 
				-	u32 size;
			
 
				 	int err = 0;
			
 
				 	int ret;
			
 
				 	int nritems;
			
@@ -2793,7 +2779,6 @@ again:
 
				 			break;
			
 
				 
			
 
				 		src = path->nodes[0];
			
 
				-		size = btrfs_item_size_nr(src, path->slots[0]);
			
 
				 		if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
			
 
				 			ins_nr++;
			
 
				 			goto next_slot;
			
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1898,7 +1898,6 @@ int btrfs_balance(struct btrfs_root *dev_root)
 
				 	u64 size_to_free;
			
 
				 	struct btrfs_path *path;
			
 
				 	struct btrfs_key key;
			
 
				-	struct btrfs_chunk *chunk;
			
 
				 	struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root;
			
 
				 	struct btrfs_trans_handle *trans;
			
 
				 	struct btrfs_key found_key;
			
@@ -1962,9 +1961,6 @@ int btrfs_balance(struct btrfs_root *dev_root)
 
				 		if (found_key.objectid != key.objectid)
			
 
				 			break;
			
 
				 
			
 
				-		chunk = btrfs_item_ptr(path->nodes[0],
			
 
				-				       path->slots[0],
			
 
				-				       struct btrfs_chunk);
			
 
				 		/* chunk zero is special */
			
 
				 		if (found_key.offset == 0)
			
 
				 			break;
			
@@ -3031,8 +3027,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
 
				 		}
			
 
				 		bio->bi_sector = multi->stripes[dev_nr].physical >> 9;
			
 
				 		dev = multi->stripes[dev_nr].dev;
			
 
				-		BUG_ON(rw == WRITE && !dev->writeable);
			
 
				-		if (dev && dev->bdev) {
			
 
				+		if (dev && dev->bdev && (rw != WRITE || dev->writeable)) {
			
 
				 			bio->bi_bdev = dev->bdev;
			
 
				 			if (async_submit)
			
 
				 				schedule_bio(root, dev, rw, bio);
			
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -178,7 +178,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
 
				 	struct inode *inode = dentry->d_inode;
			
 
				 	struct btrfs_root *root = BTRFS_I(inode)->root;
			
 
				 	struct btrfs_path *path;
			
 
				-	struct btrfs_item *item;
			
 
				 	struct extent_buffer *leaf;
			
 
				 	struct btrfs_dir_item *di;
			
 
				 	int ret = 0, slot, advance;
			
@@ -234,7 +233,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
 
				 		}
			
 
				 		advance = 1;
			
 
				 
			
 
				-		item = btrfs_item_nr(leaf, slot);
			
 
				 		btrfs_item_key_to_cpu(leaf, &found_key, slot);
			
 
				 
			
 
				 		/* check to make sure this item is what we want */
			
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -199,8 +199,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
 
				 	int nr_pages = 0;
			
 
				 	struct page *in_page = NULL;
			
 
				 	struct page *out_page = NULL;
			
 
				-	int out_written = 0;
			
 
				-	int in_read = 0;
			
 
				 	unsigned long bytes_left;
			
 
				 
			
 
				 	*out_pages = 0;
			
@@ -233,9 +231,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
 
				 	workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
			
 
				 	workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE);
			
 
				 
			
 
				-	out_written = 0;
			
 
				-	in_read = 0;
			
 
				-
			
 
				 	while (workspace->def_strm.total_in < len) {
			
 
				 		ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH);
			
 
				 		if (ret != Z_OK) {
			
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1081,30 +1081,42 @@ static void wait_sb_inodes(struct super_block *sb)
 
				 }
			
 
				 
			
 
				 /**
			
 
				- * writeback_inodes_sb	-	writeback dirty inodes from given super_block
			
 
				+ * writeback_inodes_sb_nr -	writeback dirty inodes from given super_block
			
 
				  * @sb: the superblock
			
 
				+ * @nr: the number of pages to write
			
 
				  *
			
 
				  * Start writeback on some inodes on this super_block. No guarantees are made
			
 
				  * on how many (if any) will be written, and this function does not wait
			
 
				- * for IO completion of submitted IO. The number of pages submitted is
			
 
				- * returned.
			
 
				+ * for IO completion of submitted IO.
			
 
				  */
			
 
				-void writeback_inodes_sb(struct super_block *sb)
			
 
				+void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr)
			
 
				 {
			
 
				 	DECLARE_COMPLETION_ONSTACK(done);
			
 
				 	struct wb_writeback_work work = {
			
 
				 		.sb		= sb,
			
 
				 		.sync_mode	= WB_SYNC_NONE,
			
 
				 		.done		= &done,
			
 
				+		.nr_pages	= nr,
			
 
				 	};
			
 
				 
			
 
				 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
			
 
				-
			
 
				-	work.nr_pages = get_nr_dirty_pages();
			
 
				-
			
 
				 	bdi_queue_work(sb->s_bdi, &work);
			
 
				 	wait_for_completion(&done);
			
 
				 }
			
 
				+EXPORT_SYMBOL(writeback_inodes_sb_nr);
			
 
				+
			
 
				+/**
			
 
				+ * writeback_inodes_sb	-	writeback dirty inodes from given super_block
			
 
				+ * @sb: the superblock
			
 
				+ *
			
 
				+ * Start writeback on some inodes on this super_block. No guarantees are made
			
 
				+ * on how many (if any) will be written, and this function does not wait
			
 
				+ * for IO completion of submitted IO.
			
 
				+ */
			
 
				+void writeback_inodes_sb(struct super_block *sb)
			
 
				+{
			
 
				+	return writeback_inodes_sb_nr(sb, get_nr_dirty_pages());
			
 
				+}
			
 
				 EXPORT_SYMBOL(writeback_inodes_sb);
			
 
				 
			
 
				 /**
			
@@ -1126,6 +1138,27 @@ int writeback_inodes_sb_if_idle(struct super_block *sb)
 
				 }
			
 
				 EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
			
 
				 
			
 
				+/**
			
 
				+ * writeback_inodes_sb_if_idle	-	start writeback if none underway
			
 
				+ * @sb: the superblock
			
 
				+ * @nr: the number of pages to write
			
 
				+ *
			
 
				+ * Invoke writeback_inodes_sb if no writeback is currently underway.
			
 
				+ * Returns 1 if writeback was started, 0 if not.
			
 
				+ */
			
 
				+int writeback_inodes_sb_nr_if_idle(struct super_block *sb,
			
 
				+				   unsigned long nr)
			
 
				+{
			
 
				+	if (!writeback_in_progress(sb->s_bdi)) {
			
 
				+		down_read(&sb->s_umount);
			
 
				+		writeback_inodes_sb_nr(sb, nr);
			
 
				+		up_read(&sb->s_umount);
			
 
				+		return 1;
			
 
				+	} else
			
 
				+		return 0;
			
 
				+}
			
 
				+EXPORT_SYMBOL(writeback_inodes_sb_nr_if_idle);
			
 
				+
			
 
				 /**
			
 
				  * sync_inodes_sb	-	sync sb inode pages
			
 
				  * @sb: the superblock
			
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -58,7 +58,9 @@ struct writeback_control {
 
				 struct bdi_writeback;
			
 
				 int inode_wait(void *);
			
 
				 void writeback_inodes_sb(struct super_block *);
			
 
				+void writeback_inodes_sb_nr(struct super_block *, unsigned long nr);
			
 
				 int writeback_inodes_sb_if_idle(struct super_block *);
			
 
				+int writeback_inodes_sb_nr_if_idle(struct super_block *, unsigned long nr);
			
 
				 void sync_inodes_sb(struct super_block *);
			
 
				 void writeback_inodes_wb(struct bdi_writeback *wb,
			
 
				 		struct writeback_control *wbc);