13 年之前 · afd582ac8f
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -103,11 +103,6 @@ struct btrfs_inode {
 
				 	 */
			
 
				 	u64 delalloc_bytes;
			
 
				 
			
 
				-	/* total number of bytes that may be used for this inode for
			
 
				-	 * delalloc
			
 
				-	 */
			
 
				-	u64 reserved_bytes;
			
 
				-
			
 
				 	/*
			
 
				 	 * the size of the file stored in the metadata on disk.  data=ordered
			
 
				 	 * means the in-memory i_size might be larger than the size on disk
			
@@ -115,9 +110,6 @@ struct btrfs_inode {
 
				 	 */
			
 
				 	u64 disk_i_size;
			
 
				 
			
 
				-	/* flags field from the on disk inode */
			
 
				-	u32 flags;
			
 
				-
			
 
				 	/*
			
 
				 	 * if this is a directory then index_cnt is the counter for the index
			
 
				 	 * number for new files that are created
			
@@ -131,6 +123,15 @@ struct btrfs_inode {
 
				 	 */
			
 
				 	u64 last_unlink_trans;
			
 
				 
			
 
				+	/*
			
 
				+	 * Number of bytes outstanding that are going to need csums.  This is
			
 
				+	 * used in ENOSPC accounting.
			
 
				+	 */
			
 
				+	u64 csum_bytes;
			
 
				+
			
 
				+	/* flags field from the on disk inode */
			
 
				+	u32 flags;
			
 
				+
			
 
				 	/*
			
 
				 	 * Counters to keep track of the number of extent item's we may use due
			
 
				 	 * to delalloc and such.  outstanding_extents is the number of extent
			
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -30,6 +30,7 @@
 
				 #include <linux/kobject.h>
			
 
				 #include <trace/events/btrfs.h>
			
 
				 #include <asm/kmap_types.h>
			
 
				+#include <linux/pagemap.h>
			
 
				 #include "extent_io.h"
			
 
				 #include "extent_map.h"
			
 
				 #include "async-thread.h"
			
@@ -772,14 +773,8 @@ struct btrfs_space_info {
 
				 struct btrfs_block_rsv {
			
 
				 	u64 size;
			
 
				 	u64 reserved;
			
 
				-	u64 freed[2];
			
 
				 	struct btrfs_space_info *space_info;
			
 
				-	struct list_head list;
			
 
				 	spinlock_t lock;
			
 
				-	atomic_t usage;
			
 
				-	unsigned int priority:8;
			
 
				-	unsigned int durable:1;
			
 
				-	unsigned int refill_used:1;
			
 
				 	unsigned int full:1;
			
 
				 };
			
 
				 
			
@@ -840,10 +835,10 @@ struct btrfs_block_group_cache {
 
				 	spinlock_t lock;
			
 
				 	u64 pinned;
			
 
				 	u64 reserved;
			
 
				-	u64 reserved_pinned;
			
 
				 	u64 bytes_super;
			
 
				 	u64 flags;
			
 
				 	u64 sectorsize;
			
 
				+	u64 cache_generation;
			
 
				 	unsigned int ro:1;
			
 
				 	unsigned int dirty:1;
			
 
				 	unsigned int iref:1;
			
@@ -899,6 +894,10 @@ struct btrfs_fs_info {
 
				 	spinlock_t block_group_cache_lock;
			
 
				 	struct rb_root block_group_cache_tree;
			
 
				 
			
 
				+	/* keep track of unallocated space */
			
 
				+	spinlock_t free_chunk_lock;
			
 
				+	u64 free_chunk_space;
			
 
				+
			
 
				 	struct extent_io_tree freed_extents[2];
			
 
				 	struct extent_io_tree *pinned_extents;
			
 
				 
			
@@ -919,11 +918,6 @@ struct btrfs_fs_info {
 
				 
			
 
				 	struct btrfs_block_rsv empty_block_rsv;
			
 
				 
			
 
				-	/* list of block reservations that cross multiple transactions */
			
 
				-	struct list_head durable_block_rsv_list;
			
 
				-
			
 
				-	struct mutex durable_block_rsv_mutex;
			
 
				-
			
 
				 	u64 generation;
			
 
				 	u64 last_trans_committed;
			
 
				 
			
@@ -2129,6 +2123,11 @@ static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
 
				 		(space_info->flags & BTRFS_BLOCK_GROUP_DATA));
			
 
				 }
			
 
				 
			
 
				+static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping)
			
 
				+{
			
 
				+	return mapping_gfp_mask(mapping) & ~__GFP_FS;
			
 
				+}
			
 
				+
			
 
				 /* extent-tree.c */
			
 
				 static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
			
 
				 						 unsigned num_items)
			
@@ -2137,6 +2136,17 @@ static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
 
				 		3 * num_items;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Doing a truncate won't result in new nodes or leaves, just what we need for
			
 
				+ * COW.
			
 
				+ */
			
 
				+static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_root *root,
			
 
				+						 unsigned num_items)
			
 
				+{
			
 
				+	return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
			
 
				+		num_items;
			
 
				+}
			
 
				+
			
 
				 void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
			
 
				 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
			
 
				 			   struct btrfs_root *root, unsigned long count);
			
@@ -2196,8 +2206,6 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
 
				 		      u64 root_objectid, u64 owner, u64 offset);
			
 
				 
			
 
				 int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
			
 
				-int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
			
 
				-				u64 num_bytes, int reserve, int sinfo);
			
 
				 int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
			
 
				 				struct btrfs_root *root);
			
 
				 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
			
@@ -2240,25 +2248,20 @@ void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv);
 
				 struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root);
			
 
				 void btrfs_free_block_rsv(struct btrfs_root *root,
			
 
				 			  struct btrfs_block_rsv *rsv);
			
 
				-void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
			
 
				-				 struct btrfs_block_rsv *rsv);
			
 
				-int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
			
 
				-			struct btrfs_root *root,
			
 
				+int btrfs_block_rsv_add(struct btrfs_root *root,
			
 
				 			struct btrfs_block_rsv *block_rsv,
			
 
				 			u64 num_bytes);
			
 
				-int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
			
 
				-			  struct btrfs_root *root,
			
 
				+int btrfs_block_rsv_check(struct btrfs_root *root,
			
 
				+			  struct btrfs_block_rsv *block_rsv, int min_factor);
			
 
				+int btrfs_block_rsv_refill(struct btrfs_root *root,
			
 
				 			  struct btrfs_block_rsv *block_rsv,
			
 
				-			  u64 min_reserved, int min_factor);
			
 
				+			  u64 min_reserved);
			
 
				 int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
			
 
				 			    struct btrfs_block_rsv *dst_rsv,
			
 
				 			    u64 num_bytes);
			
 
				 void btrfs_block_rsv_release(struct btrfs_root *root,
			
 
				 			     struct btrfs_block_rsv *block_rsv,
			
 
				 			     u64 num_bytes);
			
 
				-int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans,
			
 
				-				    struct btrfs_root *root,
			
 
				-				    struct btrfs_block_rsv *rsv);
			
 
				 int btrfs_set_block_group_ro(struct btrfs_root *root,
			
 
				 			     struct btrfs_block_group_cache *cache);
			
 
				 int btrfs_set_block_group_rw(struct btrfs_root *root,
			
@@ -2579,11 +2582,6 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans,
 
				 int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode);
			
 
				 int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
			
 
				 int btrfs_orphan_cleanup(struct btrfs_root *root);
			
 
				-void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans,
			
 
				-				struct btrfs_pending_snapshot *pending,
			
 
				-				u64 *bytes_to_reserve);
			
 
				-void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans,
			
 
				-				struct btrfs_pending_snapshot *pending);
			
 
				 void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
			
 
				 			      struct btrfs_root *root);
			
 
				 int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size);
			
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1648,6 +1648,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 
				 	spin_lock_init(&fs_info->fs_roots_radix_lock);
			
 
				 	spin_lock_init(&fs_info->delayed_iput_lock);
			
 
				 	spin_lock_init(&fs_info->defrag_inodes_lock);
			
 
				+	spin_lock_init(&fs_info->free_chunk_lock);
			
 
				 	mutex_init(&fs_info->reloc_mutex);
			
 
				 
			
 
				 	init_completion(&fs_info->kobj_unregister);
			
@@ -1665,8 +1666,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 
				 	btrfs_init_block_rsv(&fs_info->trans_block_rsv);
			
 
				 	btrfs_init_block_rsv(&fs_info->chunk_block_rsv);
			
 
				 	btrfs_init_block_rsv(&fs_info->empty_block_rsv);
			
 
				-	INIT_LIST_HEAD(&fs_info->durable_block_rsv_list);
			
 
				-	mutex_init(&fs_info->durable_block_rsv_mutex);
			
 
				 	atomic_set(&fs_info->nr_async_submits, 0);
			
 
				 	atomic_set(&fs_info->async_delalloc_pages, 0);
			
 
				 	atomic_set(&fs_info->async_submit_draining, 0);
			
@@ -1677,6 +1676,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 
				 	fs_info->metadata_ratio = 0;
			
 
				 	fs_info->defrag_inodes = RB_ROOT;
			
 
				 	fs_info->trans_no_join = 0;
			
 
				+	fs_info->free_chunk_space = 0;
			
 
				 
			
 
				 	fs_info->thread_pool_size = min_t(unsigned long,
			
 
				 					  num_online_cpus() + 2, 8);
			
@@ -2545,8 +2545,6 @@ int close_ctree(struct btrfs_root *root)
 
				 	/* clear out the rbtree of defraggable inodes */
			
 
				 	btrfs_run_defrag_inodes(root->fs_info);
			
 
				 
			
 
				-	btrfs_put_block_group_cache(fs_info);
			
 
				-
			
 
				 	/*
			
 
				 	 * Here come 2 situations when btrfs is broken to flip readonly:
			
 
				 	 *
			
@@ -2572,6 +2570,8 @@ int close_ctree(struct btrfs_root *root)
 
				 			printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
			
 
				 	}
			
 
				 
			
 
				+	btrfs_put_block_group_cache(fs_info);
			
 
				+
			
 
				 	kthread_stop(root->fs_info->transaction_kthread);
			
 
				 	kthread_stop(root->fs_info->cleaner_kthread);
			
 
				 
			
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -894,6 +894,194 @@ search_again:
 
				 	goto again;
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * convert_extent - convert all bits in a given range from one bit to another
			
 
				+ * @tree:	the io tree to search
			
 
				+ * @start:	the start offset in bytes
			
 
				+ * @end:	the end offset in bytes (inclusive)
			
 
				+ * @bits:	the bits to set in this range
			
 
				+ * @clear_bits:	the bits to clear in this range
			
 
				+ * @mask:	the allocation mask
			
 
				+ *
			
 
				+ * This will go through and set bits for the given range.  If any states exist
			
 
				+ * already in this range they are set with the given bit and cleared of the
			
 
				+ * clear_bits.  This is only meant to be used by things that are mergeable, ie
			
 
				+ * converting from say DELALLOC to DIRTY.  This is not meant to be used with
			
 
				+ * boundary bits like LOCK.
			
 
				+ */
			
 
				+int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
			
 
				+		       int bits, int clear_bits, gfp_t mask)
			
 
				+{
			
 
				+	struct extent_state *state;
			
 
				+	struct extent_state *prealloc = NULL;
			
 
				+	struct rb_node *node;
			
 
				+	int err = 0;
			
 
				+	u64 last_start;
			
 
				+	u64 last_end;
			
 
				+
			
 
				+again:
			
 
				+	if (!prealloc && (mask & __GFP_WAIT)) {
			
 
				+		prealloc = alloc_extent_state(mask);
			
 
				+		if (!prealloc)
			
 
				+			return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	spin_lock(&tree->lock);
			
 
				+	/*
			
 
				+	 * this search will find all the extents that end after
			
 
				+	 * our range starts.
			
 
				+	 */
			
 
				+	node = tree_search(tree, start);
			
 
				+	if (!node) {
			
 
				+		prealloc = alloc_extent_state_atomic(prealloc);
			
 
				+		if (!prealloc)
			
 
				+			return -ENOMEM;
			
 
				+		err = insert_state(tree, prealloc, start, end, &bits);
			
 
				+		prealloc = NULL;
			
 
				+		BUG_ON(err == -EEXIST);
			
 
				+		goto out;
			
 
				+	}
			
 
				+	state = rb_entry(node, struct extent_state, rb_node);
			
 
				+hit_next:
			
 
				+	last_start = state->start;
			
 
				+	last_end = state->end;
			
 
				+
			
 
				+	/*
			
 
				+	 * | ---- desired range ---- |
			
 
				+	 * | state |
			
 
				+	 *
			
 
				+	 * Just lock what we found and keep going
			
 
				+	 */
			
 
				+	if (state->start == start && state->end <= end) {
			
 
				+		struct rb_node *next_node;
			
 
				+
			
 
				+		set_state_bits(tree, state, &bits);
			
 
				+		clear_state_bit(tree, state, &clear_bits, 0);
			
 
				+
			
 
				+		merge_state(tree, state);
			
 
				+		if (last_end == (u64)-1)
			
 
				+			goto out;
			
 
				+
			
 
				+		start = last_end + 1;
			
 
				+		next_node = rb_next(&state->rb_node);
			
 
				+		if (next_node && start < end && prealloc && !need_resched()) {
			
 
				+			state = rb_entry(next_node, struct extent_state,
			
 
				+					 rb_node);
			
 
				+			if (state->start == start)
			
 
				+				goto hit_next;
			
 
				+		}
			
 
				+		goto search_again;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 *     | ---- desired range ---- |
			
 
				+	 * | state |
			
 
				+	 *   or
			
 
				+	 * | ------------- state -------------- |
			
 
				+	 *
			
 
				+	 * We need to split the extent we found, and may flip bits on
			
 
				+	 * second half.
			
 
				+	 *
			
 
				+	 * If the extent we found extends past our
			
 
				+	 * range, we just split and search again.  It'll get split
			
 
				+	 * again the next time though.
			
 
				+	 *
			
 
				+	 * If the extent we found is inside our range, we set the
			
 
				+	 * desired bit on it.
			
 
				+	 */
			
 
				+	if (state->start < start) {
			
 
				+		prealloc = alloc_extent_state_atomic(prealloc);
			
 
				+		if (!prealloc)
			
 
				+			return -ENOMEM;
			
 
				+		err = split_state(tree, state, prealloc, start);
			
 
				+		BUG_ON(err == -EEXIST);
			
 
				+		prealloc = NULL;
			
 
				+		if (err)
			
 
				+			goto out;
			
 
				+		if (state->end <= end) {
			
 
				+			set_state_bits(tree, state, &bits);
			
 
				+			clear_state_bit(tree, state, &clear_bits, 0);
			
 
				+			merge_state(tree, state);
			
 
				+			if (last_end == (u64)-1)
			
 
				+				goto out;
			
 
				+			start = last_end + 1;
			
 
				+		}
			
 
				+		goto search_again;
			
 
				+	}
			
 
				+	/*
			
 
				+	 * | ---- desired range ---- |
			
 
				+	 *     | state | or               | state |
			
 
				+	 *
			
 
				+	 * There's a hole, we need to insert something in it and
			
 
				+	 * ignore the extent we found.
			
 
				+	 */
			
 
				+	if (state->start > start) {
			
 
				+		u64 this_end;
			
 
				+		if (end < last_start)
			
 
				+			this_end = end;
			
 
				+		else
			
 
				+			this_end = last_start - 1;
			
 
				+
			
 
				+		prealloc = alloc_extent_state_atomic(prealloc);
			
 
				+		if (!prealloc)
			
 
				+			return -ENOMEM;
			
 
				+
			
 
				+		/*
			
 
				+		 * Avoid to free 'prealloc' if it can be merged with
			
 
				+		 * the later extent.
			
 
				+		 */
			
 
				+		err = insert_state(tree, prealloc, start, this_end,
			
 
				+				   &bits);
			
 
				+		BUG_ON(err == -EEXIST);
			
 
				+		if (err) {
			
 
				+			free_extent_state(prealloc);
			
 
				+			prealloc = NULL;
			
 
				+			goto out;
			
 
				+		}
			
 
				+		prealloc = NULL;
			
 
				+		start = this_end + 1;
			
 
				+		goto search_again;
			
 
				+	}
			
 
				+	/*
			
 
				+	 * | ---- desired range ---- |
			
 
				+	 *                        | state |
			
 
				+	 * We need to split the extent, and set the bit
			
 
				+	 * on the first half
			
 
				+	 */
			
 
				+	if (state->start <= end && state->end > end) {
			
 
				+		prealloc = alloc_extent_state_atomic(prealloc);
			
 
				+		if (!prealloc)
			
 
				+			return -ENOMEM;
			
 
				+
			
 
				+		err = split_state(tree, state, prealloc, end + 1);
			
 
				+		BUG_ON(err == -EEXIST);
			
 
				+
			
 
				+		set_state_bits(tree, prealloc, &bits);
			
 
				+		clear_state_bit(tree, prealloc, &clear_bits, 0);
			
 
				+
			
 
				+		merge_state(tree, prealloc);
			
 
				+		prealloc = NULL;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	goto search_again;
			
 
				+
			
 
				+out:
			
 
				+	spin_unlock(&tree->lock);
			
 
				+	if (prealloc)
			
 
				+		free_extent_state(prealloc);
			
 
				+
			
 
				+	return err;
			
 
				+
			
 
				+search_again:
			
 
				+	if (start > end)
			
 
				+		goto out;
			
 
				+	spin_unlock(&tree->lock);
			
 
				+	if (mask & __GFP_WAIT)
			
 
				+		cond_resched();
			
 
				+	goto again;
			
 
				+}
			
 
				+
			
 
				 /* wrappers around set/clear extent bit */
			
 
				 int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
			
 
				 		     gfp_t mask)
			
@@ -2136,6 +2324,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
 
				 	int compressed;
			
 
				 	int write_flags;
			
 
				 	unsigned long nr_written = 0;
			
 
				+	bool fill_delalloc = true;
			
 
				 
			
 
				 	if (wbc->sync_mode == WB_SYNC_ALL)
			
 
				 		write_flags = WRITE_SYNC;
			
@@ -2166,10 +2355,13 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
 
				 
			
 
				 	set_page_extent_mapped(page);
			
 
				 
			
 
				+	if (!tree->ops || !tree->ops->fill_delalloc)
			
 
				+		fill_delalloc = false;
			
 
				+
			
 
				 	delalloc_start = start;
			
 
				 	delalloc_end = 0;
			
 
				 	page_started = 0;
			
 
				-	if (!epd->extent_locked) {
			
 
				+	if (!epd->extent_locked && fill_delalloc) {
			
 
				 		u64 delalloc_to_write = 0;
			
 
				 		/*
			
 
				 		 * make sure the wbc mapping index is at least updated
			
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -17,6 +17,7 @@
 
				 #define EXTENT_NODATASUM (1 << 10)
			
 
				 #define EXTENT_DO_ACCOUNTING (1 << 11)
			
 
				 #define EXTENT_FIRST_DELALLOC (1 << 12)
			
 
				+#define EXTENT_NEED_WAIT (1 << 13)
			
 
				 #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
			
 
				 #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
			
 
				 
			
@@ -214,6 +215,8 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
 
				 		     gfp_t mask);
			
 
				 int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
			
 
				 		       gfp_t mask);
			
 
				+int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
			
 
				+		       int bits, int clear_bits, gfp_t mask);
			
 
				 int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
			
 
				 			struct extent_state **cached_state, gfp_t mask);
			
 
				 int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
			
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1069,6 +1069,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
 
				 	int i;
			
 
				 	unsigned long index = pos >> PAGE_CACHE_SHIFT;
			
 
				 	struct inode *inode = fdentry(file)->d_inode;
			
 
				+	gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
			
 
				 	int err = 0;
			
 
				 	int faili = 0;
			
 
				 	u64 start_pos;
			
@@ -1080,7 +1081,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
 
				 again:
			
 
				 	for (i = 0; i < num_pages; i++) {
			
 
				 		pages[i] = find_or_create_page(inode->i_mapping, index + i,
			
 
				-					       GFP_NOFS);
			
 
				+					       mask);
			
 
				 		if (!pages[i]) {
			
 
				 			faili = i - 1;
			
 
				 			err = -ENOMEM;
			
@@ -1615,10 +1616,6 @@ static long btrfs_fallocate(struct file *file, int mode,
 
				 			goto out;
			
 
				 	}
			
 
				 
			
 
				-	ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
			
 
				-	if (ret)
			
 
				-		goto out;
			
 
				-
			
 
				 	locked_end = alloc_end - 1;
			
 
				 	while (1) {
			
 
				 		struct btrfs_ordered_extent *ordered;
			
@@ -1664,11 +1661,27 @@ static long btrfs_fallocate(struct file *file, int mode,
 
				 		if (em->block_start == EXTENT_MAP_HOLE ||
			
 
				 		    (cur_offset >= inode->i_size &&
			
 
				 		     !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
			
 
				+
			
 
				+			/*
			
 
				+			 * Make sure we have enough space before we do the
			
 
				+			 * allocation.
			
 
				+			 */
			
 
				+			ret = btrfs_check_data_free_space(inode, last_byte -
			
 
				+							  cur_offset);
			
 
				+			if (ret) {
			
 
				+				free_extent_map(em);
			
 
				+				break;
			
 
				+			}
			
 
				+
			
 
				 			ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
			
 
				 							last_byte - cur_offset,
			
 
				 							1 << inode->i_blkbits,
			
 
				 							offset + len,
			
 
				 							&alloc_hint);
			
 
				+
			
 
				+			/* Let go of our reservation. */
			
 
				+			btrfs_free_reserved_data_space(inode, last_byte -
			
 
				+						       cur_offset);
			
 
				 			if (ret < 0) {
			
 
				 				free_extent_map(em);
			
 
				 				break;
			
@@ -1694,8 +1707,6 @@ static long btrfs_fallocate(struct file *file, int mode,
 
				 	}
			
 
				 	unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
			
 
				 			     &cached_state, GFP_NOFS);
			
 
				-
			
 
				-	btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
			
 
				 out:
			
 
				 	mutex_unlock(&inode->i_mutex);
			
 
				 	return ret;
			
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -465,14 +465,16 @@ again:
 
				 	/* Just to make sure we have enough space */
			
 
				 	prealloc += 8 * PAGE_CACHE_SIZE;
			
 
				 
			
 
				-	ret = btrfs_check_data_free_space(inode, prealloc);
			
 
				+	ret = btrfs_delalloc_reserve_space(inode, prealloc);
			
 
				 	if (ret)
			
 
				 		goto out_put;
			
 
				 
			
 
				 	ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
			
 
				 					      prealloc, prealloc, &alloc_hint);
			
 
				-	if (ret)
			
 
				+	if (ret) {
			
 
				+		btrfs_delalloc_release_space(inode, prealloc);
			
 
				 		goto out_put;
			
 
				+	}
			
 
				 	btrfs_free_reserved_data_space(inode, prealloc);
			
 
				 
			
 
				 out_put:
			
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1792,12 +1792,12 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
 
				 	}
			
 
				 	ret = 0;
			
 
				 out:
			
 
				-	if (nolock) {
			
 
				-		if (trans)
			
 
				-			btrfs_end_transaction_nolock(trans, root);
			
 
				-	} else {
			
 
				+	if (root != root->fs_info->tree_root)
			
 
				 		btrfs_delalloc_release_metadata(inode, ordered_extent->len);
			
 
				-		if (trans)
			
 
				+	if (trans) {
			
 
				+		if (nolock)
			
 
				+			btrfs_end_transaction_nolock(trans, root);
			
 
				+		else
			
 
				 			btrfs_end_transaction(trans, root);
			
 
				 	}
			
 
				 
			
@@ -2079,89 +2079,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
 
				 	up_read(&root->fs_info->cleanup_work_sem);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * calculate extra metadata reservation when snapshotting a subvolume
			
 
				- * contains orphan files.
			
 
				- */
			
 
				-void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans,
			
 
				-				struct btrfs_pending_snapshot *pending,
			
 
				-				u64 *bytes_to_reserve)
			
 
				-{
			
 
				-	struct btrfs_root *root;
			
 
				-	struct btrfs_block_rsv *block_rsv;
			
 
				-	u64 num_bytes;
			
 
				-	int index;
			
 
				-
			
 
				-	root = pending->root;
			
 
				-	if (!root->orphan_block_rsv || list_empty(&root->orphan_list))
			
 
				-		return;
			
 
				-
			
 
				-	block_rsv = root->orphan_block_rsv;
			
 
				-
			
 
				-	/* orphan block reservation for the snapshot */
			
 
				-	num_bytes = block_rsv->size;
			
 
				-
			
 
				-	/*
			
 
				-	 * after the snapshot is created, COWing tree blocks may use more
			
 
				-	 * space than it frees. So we should make sure there is enough
			
 
				-	 * reserved space.
			
 
				-	 */
			
 
				-	index = trans->transid & 0x1;
			
 
				-	if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) {
			
 
				-		num_bytes += block_rsv->size -
			
 
				-			     (block_rsv->reserved + block_rsv->freed[index]);
			
 
				-	}
			
 
				-
			
 
				-	*bytes_to_reserve += num_bytes;
			
 
				-}
			
 
				-
			
 
				-void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans,
			
 
				-				struct btrfs_pending_snapshot *pending)
			
 
				-{
			
 
				-	struct btrfs_root *root = pending->root;
			
 
				-	struct btrfs_root *snap = pending->snap;
			
 
				-	struct btrfs_block_rsv *block_rsv;
			
 
				-	u64 num_bytes;
			
 
				-	int index;
			
 
				-	int ret;
			
 
				-
			
 
				-	if (!root->orphan_block_rsv || list_empty(&root->orphan_list))
			
 
				-		return;
			
 
				-
			
 
				-	/* refill source subvolume's orphan block reservation */
			
 
				-	block_rsv = root->orphan_block_rsv;
			
 
				-	index = trans->transid & 0x1;
			
 
				-	if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) {
			
 
				-		num_bytes = block_rsv->size -
			
 
				-			    (block_rsv->reserved + block_rsv->freed[index]);
			
 
				-		ret = btrfs_block_rsv_migrate(&pending->block_rsv,
			
 
				-					      root->orphan_block_rsv,
			
 
				-					      num_bytes);
			
 
				-		BUG_ON(ret);
			
 
				-	}
			
 
				-
			
 
				-	/* setup orphan block reservation for the snapshot */
			
 
				-	block_rsv = btrfs_alloc_block_rsv(snap);
			
 
				-	BUG_ON(!block_rsv);
			
 
				-
			
 
				-	btrfs_add_durable_block_rsv(root->fs_info, block_rsv);
			
 
				-	snap->orphan_block_rsv = block_rsv;
			
 
				-
			
 
				-	num_bytes = root->orphan_block_rsv->size;
			
 
				-	ret = btrfs_block_rsv_migrate(&pending->block_rsv,
			
 
				-				      block_rsv, num_bytes);
			
 
				-	BUG_ON(ret);
			
 
				-
			
 
				-#if 0
			
 
				-	/* insert orphan item for the snapshot */
			
 
				-	WARN_ON(!root->orphan_item_inserted);
			
 
				-	ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root,
			
 
				-				       snap->root_key.objectid);
			
 
				-	BUG_ON(ret);
			
 
				-	snap->orphan_item_inserted = 1;
			
 
				-#endif
			
 
				-}
			
 
				-
			
 
				 enum btrfs_orphan_cleanup_state {
			
 
				 	ORPHAN_CLEANUP_STARTED	= 1,
			
 
				 	ORPHAN_CLEANUP_DONE	= 2,
			
@@ -2247,9 +2164,6 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
 
				 	}
			
 
				 	spin_unlock(&root->orphan_lock);
			
 
				 
			
 
				-	if (block_rsv)
			
 
				-		btrfs_add_durable_block_rsv(root->fs_info, block_rsv);
			
 
				-
			
 
				 	/* grab metadata reservation from transaction handle */
			
 
				 	if (reserve) {
			
 
				 		ret = btrfs_orphan_reserve_metadata(trans, inode);
			
@@ -2316,6 +2230,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
 
				 	struct btrfs_key key, found_key;
			
 
				 	struct btrfs_trans_handle *trans;
			
 
				 	struct inode *inode;
			
 
				+	u64 last_objectid = 0;
			
 
				 	int ret = 0, nr_unlink = 0, nr_truncate = 0;
			
 
				 
			
 
				 	if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED))
			
@@ -2367,41 +2282,49 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
 
				 		 * crossing root thing.  we store the inode number in the
			
 
				 		 * offset of the orphan item.
			
 
				 		 */
			
 
				+
			
 
				+		if (found_key.offset == last_objectid) {
			
 
				+			printk(KERN_ERR "btrfs: Error removing orphan entry, "
			
 
				+			       "stopping orphan cleanup\n");
			
 
				+			ret = -EINVAL;
			
 
				+			goto out;
			
 
				+		}
			
 
				+
			
 
				+		last_objectid = found_key.offset;
			
 
				+
			
 
				 		found_key.objectid = found_key.offset;
			
 
				 		found_key.type = BTRFS_INODE_ITEM_KEY;
			
 
				 		found_key.offset = 0;
			
 
				 		inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
			
 
				-		if (IS_ERR(inode)) {
			
 
				-			ret = PTR_ERR(inode);
			
 
				+		ret = PTR_RET(inode);
			
 
				+		if (ret && ret != -ESTALE)
			
 
				 			goto out;
			
 
				-		}
			
 
				-
			
 
				-		/*
			
 
				-		 * add this inode to the orphan list so btrfs_orphan_del does
			
 
				-		 * the proper thing when we hit it
			
 
				-		 */
			
 
				-		spin_lock(&root->orphan_lock);
			
 
				-		list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
			
 
				-		spin_unlock(&root->orphan_lock);
			
 
				 
			
 
				 		/*
			
 
				-		 * if this is a bad inode, means we actually succeeded in
			
 
				-		 * removing the inode, but not the orphan record, which means
			
 
				-		 * we need to manually delete the orphan since iput will just
			
 
				-		 * do a destroy_inode
			
 
				+		 * Inode is already gone but the orphan item is still there,
			
 
				+		 * kill the orphan item.
			
 
				 		 */
			
 
				-		if (is_bad_inode(inode)) {
			
 
				-			trans = btrfs_start_transaction(root, 0);
			
 
				+		if (ret == -ESTALE) {
			
 
				+			trans = btrfs_start_transaction(root, 1);
			
 
				 			if (IS_ERR(trans)) {
			
 
				 				ret = PTR_ERR(trans);
			
 
				 				goto out;
			
 
				 			}
			
 
				-			btrfs_orphan_del(trans, inode);
			
 
				+			ret = btrfs_del_orphan_item(trans, root,
			
 
				+						    found_key.objectid);
			
 
				+			BUG_ON(ret);
			
 
				 			btrfs_end_transaction(trans, root);
			
 
				-			iput(inode);
			
 
				 			continue;
			
 
				 		}
			
 
				 
			
 
				+		/*
			
 
				+		 * add this inode to the orphan list so btrfs_orphan_del does
			
 
				+		 * the proper thing when we hit it
			
 
				+		 */
			
 
				+		spin_lock(&root->orphan_lock);
			
 
				+		list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
			
 
				+		spin_unlock(&root->orphan_lock);
			
 
				+
			
 
				 		/* if we have links, this was a truncate, lets do that */
			
 
				 		if (inode->i_nlink) {
			
 
				 			if (!S_ISREG(inode->i_mode)) {
			
@@ -2835,7 +2758,16 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
 
				 	u64 ino = btrfs_ino(inode);
			
 
				 	u64 dir_ino = btrfs_ino(dir);
			
 
				 
			
 
				-	trans = btrfs_start_transaction(root, 10);
			
 
				+	/*
			
 
				+	 * 1 for the possible orphan item
			
 
				+	 * 1 for the dir item
			
 
				+	 * 1 for the dir index
			
 
				+	 * 1 for the inode ref
			
 
				+	 * 1 for the inode ref in the tree log
			
 
				+	 * 2 for the dir entries in the log
			
 
				+	 * 1 for the inode
			
 
				+	 */
			
 
				+	trans = btrfs_start_transaction(root, 8);
			
 
				 	if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
			
 
				 		return trans;
			
 
				 
			
@@ -2858,7 +2790,8 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
 
				 		return ERR_PTR(-ENOMEM);
			
 
				 	}
			
 
				 
			
 
				-	trans = btrfs_start_transaction(root, 0);
			
 
				+	/* 1 for the orphan item */
			
 
				+	trans = btrfs_start_transaction(root, 1);
			
 
				 	if (IS_ERR(trans)) {
			
 
				 		btrfs_free_path(path);
			
 
				 		root->fs_info->enospc_unlink = 0;
			
@@ -2963,6 +2896,12 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
 
				 	err = 0;
			
 
				 out:
			
 
				 	btrfs_free_path(path);
			
 
				+	/* Migrate the orphan reservation over */
			
 
				+	if (!err)
			
 
				+		err = btrfs_block_rsv_migrate(trans->block_rsv,
			
 
				+				&root->fs_info->global_block_rsv,
			
 
				+				btrfs_calc_trans_metadata_size(root, 1));
			
 
				+
			
 
				 	if (err) {
			
 
				 		btrfs_end_transaction(trans, root);
			
 
				 		root->fs_info->enospc_unlink = 0;
			
@@ -3368,6 +3307,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
 
				 	pgoff_t index = from >> PAGE_CACHE_SHIFT;
			
 
				 	unsigned offset = from & (PAGE_CACHE_SIZE-1);
			
 
				 	struct page *page;
			
 
				+	gfp_t mask = btrfs_alloc_write_mask(mapping);
			
 
				 	int ret = 0;
			
 
				 	u64 page_start;
			
 
				 	u64 page_end;
			
@@ -3380,7 +3320,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
 
				 
			
 
				 	ret = -ENOMEM;
			
 
				 again:
			
 
				-	page = find_or_create_page(mapping, index, GFP_NOFS);
			
 
				+	page = find_or_create_page(mapping, index, mask);
			
 
				 	if (!page) {
			
 
				 		btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
			
 
				 		goto out;
			
@@ -3613,6 +3553,8 @@ void btrfs_evict_inode(struct inode *inode)
 
				 {
			
 
				 	struct btrfs_trans_handle *trans;
			
 
				 	struct btrfs_root *root = BTRFS_I(inode)->root;
			
 
				+	struct btrfs_block_rsv *rsv, *global_rsv;
			
 
				+	u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
			
 
				 	unsigned long nr;
			
 
				 	int ret;
			
 
				 
			
@@ -3640,22 +3582,55 @@ void btrfs_evict_inode(struct inode *inode)
 
				 		goto no_delete;
			
 
				 	}
			
 
				 
			
 
				+	rsv = btrfs_alloc_block_rsv(root);
			
 
				+	if (!rsv) {
			
 
				+		btrfs_orphan_del(NULL, inode);
			
 
				+		goto no_delete;
			
 
				+	}
			
 
				+	rsv->size = min_size;
			
 
				+	global_rsv = &root->fs_info->global_block_rsv;
			
 
				+
			
 
				 	btrfs_i_size_write(inode, 0);
			
 
				 
			
 
				+	/*
			
 
				+	 * This is a bit simpler than btrfs_truncate since
			
 
				+	 *
			
 
				+	 * 1) We've already reserved our space for our orphan item in the
			
 
				+	 *    unlink.
			
 
				+	 * 2) We're going to delete the inode item, so we don't need to update
			
 
				+	 *    it at all.
			
 
				+	 *
			
 
				+	 * So we just need to reserve some slack space in case we add bytes when
			
 
				+	 * doing the truncate.
			
 
				+	 */
			
 
				 	while (1) {
			
 
				-		trans = btrfs_join_transaction(root);
			
 
				-		BUG_ON(IS_ERR(trans));
			
 
				-		trans->block_rsv = root->orphan_block_rsv;
			
 
				+		ret = btrfs_block_rsv_refill(root, rsv, min_size);
			
 
				+
			
 
				+		/*
			
 
				+		 * Try and steal from the global reserve since we will
			
 
				+		 * likely not use this space anyway, we want to try as
			
 
				+		 * hard as possible to get this to work.
			
 
				+		 */
			
 
				+		if (ret)
			
 
				+			ret = btrfs_block_rsv_migrate(global_rsv, rsv, min_size);
			
 
				 
			
 
				-		ret = btrfs_block_rsv_check(trans, root,
			
 
				-					    root->orphan_block_rsv, 0, 5);
			
 
				 		if (ret) {
			
 
				-			BUG_ON(ret != -EAGAIN);
			
 
				-			ret = btrfs_commit_transaction(trans, root);
			
 
				-			BUG_ON(ret);
			
 
				-			continue;
			
 
				+			printk(KERN_WARNING "Could not get space for a "
			
 
				+			       "delete, will truncate on mount %d\n", ret);
			
 
				+			btrfs_orphan_del(NULL, inode);
			
 
				+			btrfs_free_block_rsv(root, rsv);
			
 
				+			goto no_delete;
			
 
				+		}
			
 
				+
			
 
				+		trans = btrfs_start_transaction(root, 0);
			
 
				+		if (IS_ERR(trans)) {
			
 
				+			btrfs_orphan_del(NULL, inode);
			
 
				+			btrfs_free_block_rsv(root, rsv);
			
 
				+			goto no_delete;
			
 
				 		}
			
 
				 
			
 
				+		trans->block_rsv = rsv;
			
 
				+
			
 
				 		ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
			
 
				 		if (ret != -EAGAIN)
			
 
				 			break;
			
@@ -3664,14 +3639,17 @@ void btrfs_evict_inode(struct inode *inode)
 
				 		btrfs_end_transaction(trans, root);
			
 
				 		trans = NULL;
			
 
				 		btrfs_btree_balance_dirty(root, nr);
			
 
				-
			
 
				 	}
			
 
				 
			
 
				+	btrfs_free_block_rsv(root, rsv);
			
 
				+
			
 
				 	if (ret == 0) {
			
 
				+		trans->block_rsv = root->orphan_block_rsv;
			
 
				 		ret = btrfs_orphan_del(trans, inode);
			
 
				 		BUG_ON(ret);
			
 
				 	}
			
 
				 
			
 
				+	trans->block_rsv = &root->fs_info->trans_block_rsv;
			
 
				 	if (!(root == root->fs_info->tree_root ||
			
 
				 	      root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID))
			
 
				 		btrfs_return_ino(root, btrfs_ino(inode));
			
@@ -6541,6 +6519,7 @@ static int btrfs_truncate(struct inode *inode)
 
				 	struct btrfs_trans_handle *trans;
			
 
				 	unsigned long nr;
			
 
				 	u64 mask = root->sectorsize - 1;
			
 
				+	u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
			
 
				 
			
 
				 	ret = btrfs_truncate_page(inode->i_mapping, inode->i_size);
			
 
				 	if (ret)
			
@@ -6588,19 +6567,23 @@ static int btrfs_truncate(struct inode *inode)
 
				 	rsv = btrfs_alloc_block_rsv(root);
			
 
				 	if (!rsv)
			
 
				 		return -ENOMEM;
			
 
				-	btrfs_add_durable_block_rsv(root->fs_info, rsv);
			
 
				+	rsv->size = min_size;
			
 
				 
			
 
				+	/*
			
 
				+	 * 1 for the truncate slack space
			
 
				+	 * 1 for the orphan item we're going to add
			
 
				+	 * 1 for the orphan item deletion
			
 
				+	 * 1 for updating the inode.
			
 
				+	 */
			
 
				 	trans = btrfs_start_transaction(root, 4);
			
 
				 	if (IS_ERR(trans)) {
			
 
				 		err = PTR_ERR(trans);
			
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				-	/*
			
 
				-	 * Reserve space for the truncate process.  Truncate should be adding
			
 
				-	 * space, but if there are snapshots it may end up using space.
			
 
				-	 */
			
 
				-	ret = btrfs_truncate_reserve_metadata(trans, root, rsv);
			
 
				+	/* Migrate the slack space for the truncate to our reserve */
			
 
				+	ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv,
			
 
				+				      min_size);
			
 
				 	BUG_ON(ret);
			
 
				 
			
 
				 	ret = btrfs_orphan_add(trans, inode);
			
@@ -6609,21 +6592,6 @@ static int btrfs_truncate(struct inode *inode)
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				-	nr = trans->blocks_used;
			
 
				-	btrfs_end_transaction(trans, root);
			
 
				-	btrfs_btree_balance_dirty(root, nr);
			
 
				-
			
 
				-	/*
			
 
				-	 * Ok so we've already migrated our bytes over for the truncate, so here
			
 
				-	 * just reserve the one slot we need for updating the inode.
			
 
				-	 */
			
 
				-	trans = btrfs_start_transaction(root, 1);
			
 
				-	if (IS_ERR(trans)) {
			
 
				-		err = PTR_ERR(trans);
			
 
				-		goto out;
			
 
				-	}
			
 
				-	trans->block_rsv = rsv;
			
 
				-
			
 
				 	/*
			
 
				 	 * setattr is responsible for setting the ordered_data_close flag,
			
 
				 	 * but that is only tested during the last file release.  That
			
@@ -6645,20 +6613,30 @@ static int btrfs_truncate(struct inode *inode)
 
				 		btrfs_add_ordered_operation(trans, root, inode);
			
 
				 
			
 
				 	while (1) {
			
 
				+		ret = btrfs_block_rsv_refill(root, rsv, min_size);
			
 
				+		if (ret) {
			
 
				+			/*
			
 
				+			 * This can only happen with the original transaction we
			
 
				+			 * started above, every other time we shouldn't have a
			
 
				+			 * transaction started yet.
			
 
				+			 */
			
 
				+			if (ret == -EAGAIN)
			
 
				+				goto end_trans;
			
 
				+			err = ret;
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				 		if (!trans) {
			
 
				-			trans = btrfs_start_transaction(root, 3);
			
 
				+			/* Just need the 1 for updating the inode */
			
 
				+			trans = btrfs_start_transaction(root, 1);
			
 
				 			if (IS_ERR(trans)) {
			
 
				 				err = PTR_ERR(trans);
			
 
				 				goto out;
			
 
				 			}
			
 
				-
			
 
				-			ret = btrfs_truncate_reserve_metadata(trans, root,
			
 
				-							      rsv);
			
 
				-			BUG_ON(ret);
			
 
				-
			
 
				-			trans->block_rsv = rsv;
			
 
				 		}
			
 
				 
			
 
				+		trans->block_rsv = rsv;
			
 
				+
			
 
				 		ret = btrfs_truncate_inode_items(trans, root, inode,
			
 
				 						 inode->i_size,
			
 
				 						 BTRFS_EXTENT_DATA_KEY);
			
@@ -6673,7 +6651,7 @@ static int btrfs_truncate(struct inode *inode)
 
				 			err = ret;
			
 
				 			break;
			
 
				 		}
			
 
				-
			
 
				+end_trans:
			
 
				 		nr = trans->blocks_used;
			
 
				 		btrfs_end_transaction(trans, root);
			
 
				 		trans = NULL;
			
@@ -6755,9 +6733,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
 
				 	ei->last_sub_trans = 0;
			
 
				 	ei->logged_trans = 0;
			
 
				 	ei->delalloc_bytes = 0;
			
 
				-	ei->reserved_bytes = 0;
			
 
				 	ei->disk_i_size = 0;
			
 
				 	ei->flags = 0;
			
 
				+	ei->csum_bytes = 0;
			
 
				 	ei->index_cnt = (u64)-1;
			
 
				 	ei->last_unlink_trans = 0;
			
 
				 
			
@@ -6803,6 +6781,8 @@ void btrfs_destroy_inode(struct inode *inode)
 
				 	WARN_ON(inode->i_data.nrpages);
			
 
				 	WARN_ON(BTRFS_I(inode)->outstanding_extents);
			
 
				 	WARN_ON(BTRFS_I(inode)->reserved_extents);
			
 
				+	WARN_ON(BTRFS_I(inode)->delalloc_bytes);
			
 
				+	WARN_ON(BTRFS_I(inode)->csum_bytes);
			
 
				 
			
 
				 	/*
			
 
				 	 * This can happen where we create an inode, but somebody else also
			
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -117,7 +117,7 @@ void btrfs_update_iflags(struct inode *inode)
 
				 /*
			
 
				  * Inherit flags from the parent inode.
			
 
				  *
			
 
				- * Unlike extN we don't have any flags we don't want to inherit currently.
			
 
				+ * Currently only the compression flags and the cow flags are inherited.
			
 
				  */
			
 
				 void btrfs_inherit_iflags(struct inode *inode, struct inode *dir)
			
 
				 {
			
@@ -128,12 +128,17 @@ void btrfs_inherit_iflags(struct inode *inode, struct inode *dir)
 
				 
			
 
				 	flags = BTRFS_I(dir)->flags;
			
 
				 
			
 
				-	if (S_ISREG(inode->i_mode))
			
 
				-		flags &= ~BTRFS_INODE_DIRSYNC;
			
 
				-	else if (!S_ISDIR(inode->i_mode))
			
 
				-		flags &= (BTRFS_INODE_NODUMP | BTRFS_INODE_NOATIME);
			
 
				+	if (flags & BTRFS_INODE_NOCOMPRESS) {
			
 
				+		BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
			
 
				+		BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
			
 
				+	} else if (flags & BTRFS_INODE_COMPRESS) {
			
 
				+		BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
			
 
				+		BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
			
 
				+	}
			
 
				+
			
 
				+	if (flags & BTRFS_INODE_NODATACOW)
			
 
				+		BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
			
 
				 
			
 
				-	BTRFS_I(inode)->flags = flags;
			
 
				 	btrfs_update_iflags(inode);
			
 
				 }
			
 
				 
			
@@ -843,6 +848,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
 
				 	int i_done;
			
 
				 	struct btrfs_ordered_extent *ordered;
			
 
				 	struct extent_state *cached_state = NULL;
			
 
				+	gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
			
 
				 
			
 
				 	if (isize == 0)
			
 
				 		return 0;
			
@@ -860,7 +866,7 @@ again:
 
				 	for (i = 0; i < num_pages; i++) {
			
 
				 		struct page *page;
			
 
				 		page = find_or_create_page(inode->i_mapping,
			
 
				-					    start_index + i, GFP_NOFS);
			
 
				+					    start_index + i, mask);
			
 
				 		if (!page)
			
 
				 			break;
			
 
				 
			
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2041,8 +2041,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
 
				 		BUG_ON(IS_ERR(trans));
			
 
				 		trans->block_rsv = rc->block_rsv;
			
 
				 
			
 
				-		ret = btrfs_block_rsv_check(trans, root, rc->block_rsv,
			
 
				-					    min_reserved, 0);
			
 
				+		ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved);
			
 
				 		if (ret) {
			
 
				 			BUG_ON(ret != -EAGAIN);
			
 
				 			ret = btrfs_commit_transaction(trans, root);
			
@@ -2152,8 +2151,7 @@ int prepare_to_merge(struct reloc_control *rc, int err)
 
				 again:
			
 
				 	if (!err) {
			
 
				 		num_bytes = rc->merging_rsv_size;
			
 
				-		ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv,
			
 
				-					  num_bytes);
			
 
				+		ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes);
			
 
				 		if (ret)
			
 
				 			err = ret;
			
 
				 	}
			
@@ -2427,7 +2425,7 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans,
 
				 	num_bytes = calcu_metadata_size(rc, node, 1) * 2;
			
 
				 
			
 
				 	trans->block_rsv = rc->block_rsv;
			
 
				-	ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes);
			
 
				+	ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes);
			
 
				 	if (ret) {
			
 
				 		if (ret == -EAGAIN)
			
 
				 			rc->commit_transaction = 1;
			
@@ -2922,6 +2920,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
 
				 	unsigned long last_index;
			
 
				 	struct page *page;
			
 
				 	struct file_ra_state *ra;
			
 
				+	gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
			
 
				 	int nr = 0;
			
 
				 	int ret = 0;
			
 
				 
			
@@ -2956,7 +2955,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
 
				 						  ra, NULL, index,
			
 
				 						  last_index + 1 - index);
			
 
				 			page = find_or_create_page(inode->i_mapping, index,
			
 
				-						   GFP_NOFS);
			
 
				+						   mask);
			
 
				 			if (!page) {
			
 
				 				btrfs_delalloc_release_metadata(inode,
			
 
				 							PAGE_CACHE_SIZE);
			
@@ -3645,14 +3644,11 @@ int prepare_to_relocate(struct reloc_control *rc)
 
				 	 * btrfs_init_reloc_root will use them when there
			
 
				 	 * is no reservation in transaction handle.
			
 
				 	 */
			
 
				-	ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv,
			
 
				+	ret = btrfs_block_rsv_add(rc->extent_root, rc->block_rsv,
			
 
				 				  rc->extent_root->nodesize * 256);
			
 
				 	if (ret)
			
 
				 		return ret;
			
 
				 
			
 
				-	rc->block_rsv->refill_used = 1;
			
 
				-	btrfs_add_durable_block_rsv(rc->extent_root->fs_info, rc->block_rsv);
			
 
				-
			
 
				 	memset(&rc->cluster, 0, sizeof(rc->cluster));
			
 
				 	rc->search_start = rc->block_group->key.objectid;
			
 
				 	rc->extents_found = 0;
			
@@ -3777,8 +3773,7 @@ restart:
 
				 			}
			
 
				 		}
			
 
				 
			
 
				-		ret = btrfs_block_rsv_check(trans, rc->extent_root,
			
 
				-					    rc->block_rsv, 0, 5);
			
 
				+		ret = btrfs_block_rsv_check(rc->extent_root, rc->block_rsv, 5);
			
 
				 		if (ret < 0) {
			
 
				 			if (ret != -EAGAIN) {
			
 
				 				err = ret;
			
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -40,6 +40,7 @@
 
				 #include <linux/magic.h>
			
 
				 #include <linux/slab.h>
			
 
				 #include <linux/cleancache.h>
			
 
				+#include <linux/mnt_namespace.h>
			
 
				 #include "compat.h"
			
 
				 #include "delayed-inode.h"
			
 
				 #include "ctree.h"
			
@@ -58,6 +59,7 @@
 
				 #include <trace/events/btrfs.h>
			
 
				 
			
 
				 static const struct super_operations btrfs_super_ops;
			
 
				+static struct file_system_type btrfs_fs_type;
			
 
				 
			
 
				 static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno,
			
 
				 				      char nbuf[16])
			
@@ -162,7 +164,7 @@ enum {
 
				 	Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
			
 
				 	Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
			
 
				 	Opt_enospc_debug, Opt_subvolrootid, Opt_defrag,
			
 
				-	Opt_inode_cache, Opt_err,
			
 
				+	Opt_inode_cache, Opt_no_space_cache, Opt_err,
			
 
				 };
			
 
				 
			
 
				 static match_table_t tokens = {
			
@@ -195,6 +197,7 @@ static match_table_t tokens = {
 
				 	{Opt_subvolrootid, "subvolrootid=%d"},
			
 
				 	{Opt_defrag, "autodefrag"},
			
 
				 	{Opt_inode_cache, "inode_cache"},
			
 
				+	{Opt_no_space_cache, "no_space_cache"},
			
 
				 	{Opt_err, NULL},
			
 
				 };
			
 
				 
			
@@ -206,14 +209,19 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
 
				 {
			
 
				 	struct btrfs_fs_info *info = root->fs_info;
			
 
				 	substring_t args[MAX_OPT_ARGS];
			
 
				-	char *p, *num, *orig;
			
 
				+	char *p, *num, *orig = NULL;
			
 
				+	u64 cache_gen;
			
 
				 	int intarg;
			
 
				 	int ret = 0;
			
 
				 	char *compress_type;
			
 
				 	bool compress_force = false;
			
 
				 
			
 
				+	cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy);
			
 
				+	if (cache_gen)
			
 
				+		btrfs_set_opt(info->mount_opt, SPACE_CACHE);
			
 
				+
			
 
				 	if (!options)
			
 
				-		return 0;
			
 
				+		goto out;
			
 
				 
			
 
				 	/*
			
 
				 	 * strsep changes the string, duplicate it because parse_options
			
@@ -360,9 +368,12 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
 
				 			btrfs_set_opt(info->mount_opt, DISCARD);
			
 
				 			break;
			
 
				 		case Opt_space_cache:
			
 
				-			printk(KERN_INFO "btrfs: enabling disk space caching\n");
			
 
				 			btrfs_set_opt(info->mount_opt, SPACE_CACHE);
			
 
				 			break;
			
 
				+		case Opt_no_space_cache:
			
 
				+			printk(KERN_INFO "btrfs: disabling disk space caching\n");
			
 
				+			btrfs_clear_opt(info->mount_opt, SPACE_CACHE);
			
 
				+			break;
			
 
				 		case Opt_inode_cache:
			
 
				 			printk(KERN_INFO "btrfs: enabling inode map caching\n");
			
 
				 			btrfs_set_opt(info->mount_opt, INODE_MAP_CACHE);
			
@@ -391,6 +402,8 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
 
				 		}
			
 
				 	}
			
 
				 out:
			
 
				+	if (!ret && btrfs_test_opt(root, SPACE_CACHE))
			
 
				+		printk(KERN_INFO "btrfs: disk space caching is enabled\n");
			
 
				 	kfree(orig);
			
 
				 	return ret;
			
 
				 }
			
@@ -411,7 +424,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
 
				 	int intarg;
			
 
				 
			
 
				 	if (!options)
			
 
				-		goto out;
			
 
				+		return 0;
			
 
				 
			
 
				 	/*
			
 
				 	 * strsep changes the string, duplicate it because parse_options
			
@@ -460,26 +473,15 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
 
				 			error = btrfs_scan_one_device(match_strdup(&args[0]),
			
 
				 					flags, holder, fs_devices);
			
 
				 			if (error)
			
 
				-				goto out_free_opts;
			
 
				+				goto out;
			
 
				 			break;
			
 
				 		default:
			
 
				 			break;
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				- out_free_opts:
			
 
				+out:
			
 
				 	kfree(orig);
			
 
				- out:
			
 
				-	/*
			
 
				-	 * If no subvolume name is specified we use the default one.  Allocate
			
 
				-	 * a copy of the string "." here so that code later in the
			
 
				-	 * mount path doesn't care if it's the default volume or another one.
			
 
				-	 */
			
 
				-	if (!*subvol_name) {
			
 
				-		*subvol_name = kstrdup(".", GFP_KERNEL);
			
 
				-		if (!*subvol_name)
			
 
				-			return -ENOMEM;
			
 
				-	}
			
 
				 	return error;
			
 
				 }
			
 
				 
			
@@ -492,7 +494,6 @@ static struct dentry *get_default_root(struct super_block *sb,
 
				 	struct btrfs_path *path;
			
 
				 	struct btrfs_key location;
			
 
				 	struct inode *inode;
			
 
				-	struct dentry *dentry;
			
 
				 	u64 dir_id;
			
 
				 	int new = 0;
			
 
				 
			
@@ -566,29 +567,7 @@ setup_root:
 
				 		return dget(sb->s_root);
			
 
				 	}
			
 
				 
			
 
				-	if (new) {
			
 
				-		const struct qstr name = { .name = "/", .len = 1 };
			
 
				-
			
 
				-		/*
			
 
				-		 * New inode, we need to make the dentry a sibling of s_root so
			
 
				-		 * everything gets cleaned up properly on unmount.
			
 
				-		 */
			
 
				-		dentry = d_alloc(sb->s_root, &name);
			
 
				-		if (!dentry) {
			
 
				-			iput(inode);
			
 
				-			return ERR_PTR(-ENOMEM);
			
 
				-		}
			
 
				-		d_splice_alias(inode, dentry);
			
 
				-	} else {
			
 
				-		/*
			
 
				-		 * We found the inode in cache, just find a dentry for it and
			
 
				-		 * put the reference to the inode we just got.
			
 
				-		 */
			
 
				-		dentry = d_find_alias(inode);
			
 
				-		iput(inode);
			
 
				-	}
			
 
				-
			
 
				-	return dentry;
			
 
				+	return d_obtain_alias(inode);
			
 
				 }
			
 
				 
			
 
				 static int btrfs_fill_super(struct super_block *sb,
			
@@ -719,6 +698,8 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 
				 		seq_puts(seq, ",noacl");
			
 
				 	if (btrfs_test_opt(root, SPACE_CACHE))
			
 
				 		seq_puts(seq, ",space_cache");
			
 
				+	else
			
 
				+		seq_puts(seq, ",no_space_cache");
			
 
				 	if (btrfs_test_opt(root, CLEAR_CACHE))
			
 
				 		seq_puts(seq, ",clear_cache");
			
 
				 	if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
			
@@ -753,6 +734,118 @@ static int btrfs_set_super(struct super_block *s, void *data)
 
				 	return set_anon_super(s, data);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * This will strip out the subvol=%s argument for an argument string and add
			
 
				+ * subvolid=0 to make sure we get the actual tree root for path walking to the
			
 
				+ * subvol we want.
			
 
				+ */
			
 
				+static char *setup_root_args(char *args)
			
 
				+{
			
 
				+	unsigned copied = 0;
			
 
				+	unsigned len = strlen(args) + 2;
			
 
				+	char *pos;
			
 
				+	char *ret;
			
 
				+
			
 
				+	/*
			
 
				+	 * We need the same args as before, but minus
			
 
				+	 *
			
 
				+	 * subvol=a
			
 
				+	 *
			
 
				+	 * and add
			
 
				+	 *
			
 
				+	 * subvolid=0
			
 
				+	 *
			
 
				+	 * which is a difference of 2 characters, so we allocate strlen(args) +
			
 
				+	 * 2 characters.
			
 
				+	 */
			
 
				+	ret = kzalloc(len * sizeof(char), GFP_NOFS);
			
 
				+	if (!ret)
			
 
				+		return NULL;
			
 
				+	pos = strstr(args, "subvol=");
			
 
				+
			
 
				+	/* This shouldn't happen, but just in case.. */
			
 
				+	if (!pos) {
			
 
				+		kfree(ret);
			
 
				+		return NULL;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * The subvol=<> arg is not at the front of the string, copy everybody
			
 
				+	 * up to that into ret.
			
 
				+	 */
			
 
				+	if (pos != args) {
			
 
				+		*pos = '\0';
			
 
				+		strcpy(ret, args);
			
 
				+		copied += strlen(args);
			
 
				+		pos++;
			
 
				+	}
			
 
				+
			
 
				+	strncpy(ret + copied, "subvolid=0", len - copied);
			
 
				+
			
 
				+	/* Length of subvolid=0 */
			
 
				+	copied += 10;
			
 
				+
			
 
				+	/*
			
 
				+	 * If there is no , after the subvol= option then we know there's no
			
 
				+	 * other options and we can just return.
			
 
				+	 */
			
 
				+	pos = strchr(pos, ',');
			
 
				+	if (!pos)
			
 
				+		return ret;
			
 
				+
			
 
				+	/* Copy the rest of the arguments into our buffer */
			
 
				+	strncpy(ret + copied, pos, len - copied);
			
 
				+	copied += strlen(pos);
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static struct dentry *mount_subvol(const char *subvol_name, int flags,
			
 
				+				   const char *device_name, char *data)
			
 
				+{
			
 
				+	struct super_block *s;
			
 
				+	struct dentry *root;
			
 
				+	struct vfsmount *mnt;
			
 
				+	struct mnt_namespace *ns_private;
			
 
				+	char *newargs;
			
 
				+	struct path path;
			
 
				+	int error;
			
 
				+
			
 
				+	newargs = setup_root_args(data);
			
 
				+	if (!newargs)
			
 
				+		return ERR_PTR(-ENOMEM);
			
 
				+	mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name,
			
 
				+			     newargs);
			
 
				+	kfree(newargs);
			
 
				+	if (IS_ERR(mnt))
			
 
				+		return ERR_CAST(mnt);
			
 
				+
			
 
				+	ns_private = create_mnt_ns(mnt);
			
 
				+	if (IS_ERR(ns_private)) {
			
 
				+		mntput(mnt);
			
 
				+		return ERR_CAST(ns_private);
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * This will trigger the automount of the subvol so we can just
			
 
				+	 * drop the mnt we have here and return the dentry that we
			
 
				+	 * found.
			
 
				+	 */
			
 
				+	error = vfs_path_lookup(mnt->mnt_root, mnt, subvol_name,
			
 
				+				LOOKUP_FOLLOW, &path);
			
 
				+	put_mnt_ns(ns_private);
			
 
				+	if (error)
			
 
				+		return ERR_PTR(error);
			
 
				+
			
 
				+	/* Get a ref to the sb and the dentry we found and return it */
			
 
				+	s = path.mnt->mnt_sb;
			
 
				+	atomic_inc(&s->s_active);
			
 
				+	root = dget(path.dentry);
			
 
				+	path_put(&path);
			
 
				+	down_write(&s->s_umount);
			
 
				+
			
 
				+	return root;
			
 
				+}
			
 
				 
			
 
				 /*
			
 
				  * Find a superblock for the given device / mount point.
			
@@ -784,13 +877,19 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
 
				 	if (error)
			
 
				 		return ERR_PTR(error);
			
 
				 
			
 
				+	if (subvol_name) {
			
 
				+		root = mount_subvol(subvol_name, flags, device_name, data);
			
 
				+		kfree(subvol_name);
			
 
				+		return root;
			
 
				+	}
			
 
				+
			
 
				 	error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices);
			
 
				 	if (error)
			
 
				-		goto error_free_subvol_name;
			
 
				+		return ERR_PTR(error);
			
 
				 
			
 
				 	error = btrfs_open_devices(fs_devices, mode, fs_type);
			
 
				 	if (error)
			
 
				-		goto error_free_subvol_name;
			
 
				+		return ERR_PTR(error);
			
 
				 
			
 
				 	if (!(flags & MS_RDONLY) && fs_devices->rw_devices == 0) {
			
 
				 		error = -EACCES;
			
@@ -815,14 +914,15 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
 
				 
			
 
				 	bdev = fs_devices->latest_bdev;
			
 
				 	s = sget(fs_type, btrfs_test_super, btrfs_set_super, tree_root);
			
 
				-	if (IS_ERR(s))
			
 
				-		goto error_s;
			
 
				+	if (IS_ERR(s)) {
			
 
				+		error = PTR_ERR(s);
			
 
				+		goto error_close_devices;
			
 
				+	}
			
 
				 
			
 
				 	if (s->s_root) {
			
 
				 		if ((flags ^ s->s_flags) & MS_RDONLY) {
			
 
				 			deactivate_locked_super(s);
			
 
				-			error = -EBUSY;
			
 
				-			goto error_close_devices;
			
 
				+			return ERR_PTR(-EBUSY);
			
 
				 		}
			
 
				 
			
 
				 		btrfs_close_devices(fs_devices);
			
@@ -837,64 +937,25 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
 
				 					 flags & MS_SILENT ? 1 : 0);
			
 
				 		if (error) {
			
 
				 			deactivate_locked_super(s);
			
 
				-			goto error_free_subvol_name;
			
 
				+			return ERR_PTR(error);
			
 
				 		}
			
 
				 
			
 
				 		btrfs_sb(s)->fs_info->bdev_holder = fs_type;
			
 
				 		s->s_flags |= MS_ACTIVE;
			
 
				 	}
			
 
				 
			
 
				-	/* if they gave us a subvolume name bind mount into that */
			
 
				-	if (strcmp(subvol_name, ".")) {
			
 
				-		struct dentry *new_root;
			
 
				-
			
 
				-		root = get_default_root(s, subvol_rootid);
			
 
				-		if (IS_ERR(root)) {
			
 
				-			error = PTR_ERR(root);
			
 
				-			deactivate_locked_super(s);
			
 
				-			goto error_free_subvol_name;
			
 
				-		}
			
 
				-
			
 
				-		mutex_lock(&root->d_inode->i_mutex);
			
 
				-		new_root = lookup_one_len(subvol_name, root,
			
 
				-				      strlen(subvol_name));
			
 
				-		mutex_unlock(&root->d_inode->i_mutex);
			
 
				-
			
 
				-		if (IS_ERR(new_root)) {
			
 
				-			dput(root);
			
 
				-			deactivate_locked_super(s);
			
 
				-			error = PTR_ERR(new_root);
			
 
				-			goto error_free_subvol_name;
			
 
				-		}
			
 
				-		if (!new_root->d_inode) {
			
 
				-			dput(root);
			
 
				-			dput(new_root);
			
 
				-			deactivate_locked_super(s);
			
 
				-			error = -ENXIO;
			
 
				-			goto error_free_subvol_name;
			
 
				-		}
			
 
				-		dput(root);
			
 
				-		root = new_root;
			
 
				-	} else {
			
 
				-		root = get_default_root(s, subvol_objectid);
			
 
				-		if (IS_ERR(root)) {
			
 
				-			error = PTR_ERR(root);
			
 
				-			deactivate_locked_super(s);
			
 
				-			goto error_free_subvol_name;
			
 
				-		}
			
 
				+	root = get_default_root(s, subvol_objectid);
			
 
				+	if (IS_ERR(root)) {
			
 
				+		deactivate_locked_super(s);
			
 
				+		return root;
			
 
				 	}
			
 
				 
			
 
				-	kfree(subvol_name);
			
 
				 	return root;
			
 
				 
			
 
				-error_s:
			
 
				-	error = PTR_ERR(s);
			
 
				 error_close_devices:
			
 
				 	btrfs_close_devices(fs_devices);
			
 
				 	kfree(fs_info);
			
 
				 	kfree(tree_root);
			
 
				-error_free_subvol_name:
			
 
				-	kfree(subvol_name);
			
 
				 	return ERR_PTR(error);
			
 
				 }
			
 
				 
			
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -275,7 +275,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
 
				 	 */
			
 
				 	if (num_items > 0 && root != root->fs_info->chunk_root) {
			
 
				 		num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
			
 
				-		ret = btrfs_block_rsv_add(NULL, root,
			
 
				+		ret = btrfs_block_rsv_add(root,
			
 
				 					  &root->fs_info->trans_block_rsv,
			
 
				 					  num_bytes);
			
 
				 		if (ret)
			
@@ -418,8 +418,8 @@ static int should_end_transaction(struct btrfs_trans_handle *trans,
 
				 				  struct btrfs_root *root)
			
 
				 {
			
 
				 	int ret;
			
 
				-	ret = btrfs_block_rsv_check(trans, root,
			
 
				-				    &root->fs_info->global_block_rsv, 0, 5);
			
 
				+
			
 
				+	ret = btrfs_block_rsv_check(root, &root->fs_info->global_block_rsv, 5);
			
 
				 	return ret ? 1 : 0;
			
 
				 }
			
 
				 
			
@@ -427,17 +427,26 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
 
				 				 struct btrfs_root *root)
			
 
				 {
			
 
				 	struct btrfs_transaction *cur_trans = trans->transaction;
			
 
				+	struct btrfs_block_rsv *rsv = trans->block_rsv;
			
 
				 	int updates;
			
 
				 
			
 
				 	smp_mb();
			
 
				 	if (cur_trans->blocked || cur_trans->delayed_refs.flushing)
			
 
				 		return 1;
			
 
				 
			
 
				+	/*
			
 
				+	 * We need to do this in case we're deleting csums so the global block
			
 
				+	 * rsv get's used instead of the csum block rsv.
			
 
				+	 */
			
 
				+	trans->block_rsv = NULL;
			
 
				+
			
 
				 	updates = trans->delayed_ref_updates;
			
 
				 	trans->delayed_ref_updates = 0;
			
 
				 	if (updates)
			
 
				 		btrfs_run_delayed_refs(trans, root, updates);
			
 
				 
			
 
				+	trans->block_rsv = rsv;
			
 
				+
			
 
				 	return should_end_transaction(trans, root);
			
 
				 }
			
 
				 
			
@@ -453,6 +462,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 
				 		return 0;
			
 
				 	}
			
 
				 
			
 
				+	btrfs_trans_release_metadata(trans, root);
			
 
				+	trans->block_rsv = NULL;
			
 
				 	while (count < 4) {
			
 
				 		unsigned long cur = trans->delayed_ref_updates;
			
 
				 		trans->delayed_ref_updates = 0;
			
@@ -473,8 +484,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 
				 		count++;
			
 
				 	}
			
 
				 
			
 
				-	btrfs_trans_release_metadata(trans, root);
			
 
				-
			
 
				 	if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
			
 
				 	    should_end_transaction(trans, root)) {
			
 
				 		trans->transaction->blocked = 1;
			
@@ -562,50 +571,21 @@ int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans,
 
				 int btrfs_write_marked_extents(struct btrfs_root *root,
			
 
				 			       struct extent_io_tree *dirty_pages, int mark)
			
 
				 {
			
 
				-	int ret;
			
 
				 	int err = 0;
			
 
				 	int werr = 0;
			
 
				-	struct page *page;
			
 
				-	struct inode *btree_inode = root->fs_info->btree_inode;
			
 
				+	struct address_space *mapping = root->fs_info->btree_inode->i_mapping;
			
 
				 	u64 start = 0;
			
 
				 	u64 end;
			
 
				-	unsigned long index;
			
 
				-
			
 
				-	while (1) {
			
 
				-		ret = find_first_extent_bit(dirty_pages, start, &start, &end,
			
 
				-					    mark);
			
 
				-		if (ret)
			
 
				-			break;
			
 
				-		while (start <= end) {
			
 
				-			cond_resched();
			
 
				-
			
 
				-			index = start >> PAGE_CACHE_SHIFT;
			
 
				-			start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
			
 
				-			page = find_get_page(btree_inode->i_mapping, index);
			
 
				-			if (!page)
			
 
				-				continue;
			
 
				-
			
 
				-			btree_lock_page_hook(page);
			
 
				-			if (!page->mapping) {
			
 
				-				unlock_page(page);
			
 
				-				page_cache_release(page);
			
 
				-				continue;
			
 
				-			}
			
 
				 
			
 
				-			if (PageWriteback(page)) {
			
 
				-				if (PageDirty(page))
			
 
				-					wait_on_page_writeback(page);
			
 
				-				else {
			
 
				-					unlock_page(page);
			
 
				-					page_cache_release(page);
			
 
				-					continue;
			
 
				-				}
			
 
				-			}
			
 
				-			err = write_one_page(page, 0);
			
 
				-			if (err)
			
 
				-				werr = err;
			
 
				-			page_cache_release(page);
			
 
				-		}
			
 
				+	while (!find_first_extent_bit(dirty_pages, start, &start, &end,
			
 
				+				      mark)) {
			
 
				+		convert_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT, mark,
			
 
				+				   GFP_NOFS);
			
 
				+		err = filemap_fdatawrite_range(mapping, start, end);
			
 
				+		if (err)
			
 
				+			werr = err;
			
 
				+		cond_resched();
			
 
				+		start = end + 1;
			
 
				 	}
			
 
				 	if (err)
			
 
				 		werr = err;
			
@@ -621,39 +601,20 @@ int btrfs_write_marked_extents(struct btrfs_root *root,
 
				 int btrfs_wait_marked_extents(struct btrfs_root *root,
			
 
				 			      struct extent_io_tree *dirty_pages, int mark)
			
 
				 {
			
 
				-	int ret;
			
 
				 	int err = 0;
			
 
				 	int werr = 0;
			
 
				-	struct page *page;
			
 
				-	struct inode *btree_inode = root->fs_info->btree_inode;
			
 
				+	struct address_space *mapping = root->fs_info->btree_inode->i_mapping;
			
 
				 	u64 start = 0;
			
 
				 	u64 end;
			
 
				-	unsigned long index;
			
 
				-
			
 
				-	while (1) {
			
 
				-		ret = find_first_extent_bit(dirty_pages, start, &start, &end,
			
 
				-					    mark);
			
 
				-		if (ret)
			
 
				-			break;
			
 
				 
			
 
				-		clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
			
 
				-		while (start <= end) {
			
 
				-			index = start >> PAGE_CACHE_SHIFT;
			
 
				-			start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
			
 
				-			page = find_get_page(btree_inode->i_mapping, index);
			
 
				-			if (!page)
			
 
				-				continue;
			
 
				-			if (PageDirty(page)) {
			
 
				-				btree_lock_page_hook(page);
			
 
				-				wait_on_page_writeback(page);
			
 
				-				err = write_one_page(page, 0);
			
 
				-				if (err)
			
 
				-					werr = err;
			
 
				-			}
			
 
				-			wait_on_page_writeback(page);
			
 
				-			page_cache_release(page);
			
 
				-			cond_resched();
			
 
				-		}
			
 
				+	while (!find_first_extent_bit(dirty_pages, start, &start, &end,
			
 
				+				      EXTENT_NEED_WAIT)) {
			
 
				+		clear_extent_bits(dirty_pages, start, end, EXTENT_NEED_WAIT, GFP_NOFS);
			
 
				+		err = filemap_fdatawait_range(mapping, start, end);
			
 
				+		if (err)
			
 
				+			werr = err;
			
 
				+		cond_resched();
			
 
				+		start = end + 1;
			
 
				 	}
			
 
				 	if (err)
			
 
				 		werr = err;
			
@@ -911,10 +872,9 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 
				 	}
			
 
				 
			
 
				 	btrfs_reloc_pre_snapshot(trans, pending, &to_reserve);
			
 
				-	btrfs_orphan_pre_snapshot(trans, pending, &to_reserve);
			
 
				 
			
 
				 	if (to_reserve > 0) {
			
 
				-		ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv,
			
 
				+		ret = btrfs_block_rsv_add(root, &pending->block_rsv,
			
 
				 					  to_reserve);
			
 
				 		if (ret) {
			
 
				 			pending->error = ret;
			
@@ -1002,7 +962,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 
				 	BUG_ON(IS_ERR(pending->snap));
			
 
				 
			
 
				 	btrfs_reloc_post_snapshot(trans, pending);
			
 
				-	btrfs_orphan_post_snapshot(trans, pending);
			
 
				 fail:
			
 
				 	kfree(new_root_item);
			
 
				 	trans->block_rsv = rsv;
			
@@ -1043,7 +1002,7 @@ static void update_super_roots(struct btrfs_root *root)
 
				 	super->root = root_item->bytenr;
			
 
				 	super->generation = root_item->generation;
			
 
				 	super->root_level = root_item->level;
			
 
				-	if (super->cache_generation != 0 || btrfs_test_opt(root, SPACE_CACHE))
			
 
				+	if (btrfs_test_opt(root, SPACE_CACHE))
			
 
				 		super->cache_generation = root_item->generation;
			
 
				 }
			
 
				 
			
@@ -1168,14 +1127,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 
				 
			
 
				 	btrfs_run_ordered_operations(root, 0);
			
 
				 
			
 
				+	btrfs_trans_release_metadata(trans, root);
			
 
				+	trans->block_rsv = NULL;
			
 
				+
			
 
				 	/* make a pass through all the delayed refs we have so far
			
 
				 	 * any runnings procs may add more while we are here
			
 
				 	 */
			
 
				 	ret = btrfs_run_delayed_refs(trans, root, 0);
			
 
				 	BUG_ON(ret);
			
 
				 
			
 
				-	btrfs_trans_release_metadata(trans, root);
			
 
				-
			
 
				 	cur_trans = trans->transaction;
			
 
				 	/*
			
 
				 	 * set the flushing flag so procs in this transaction have to
			
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1013,8 +1013,13 @@ static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
 
				 	}
			
 
				 	BUG_ON(ret);
			
 
				 
			
 
				-	if (device->bytes_used > 0)
			
 
				-		device->bytes_used -= btrfs_dev_extent_length(leaf, extent);
			
 
				+	if (device->bytes_used > 0) {
			
 
				+		u64 len = btrfs_dev_extent_length(leaf, extent);
			
 
				+		device->bytes_used -= len;
			
 
				+		spin_lock(&root->fs_info->free_chunk_lock);
			
 
				+		root->fs_info->free_chunk_space += len;
			
 
				+		spin_unlock(&root->fs_info->free_chunk_lock);
			
 
				+	}
			
 
				 	ret = btrfs_del_item(trans, root, path);
			
 
				 
			
 
				 out:
			
@@ -1356,6 +1361,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
 
				 	if (ret)
			
 
				 		goto error_undo;
			
 
				 
			
 
				+	spin_lock(&root->fs_info->free_chunk_lock);
			
 
				+	root->fs_info->free_chunk_space = device->total_bytes -
			
 
				+		device->bytes_used;
			
 
				+	spin_unlock(&root->fs_info->free_chunk_lock);
			
 
				+
			
 
				 	device->in_fs_metadata = 0;
			
 
				 	btrfs_scrub_cancel_dev(root, device);
			
 
				 
			
@@ -1691,6 +1701,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
 
				 		root->fs_info->fs_devices->num_can_discard++;
			
 
				 	root->fs_info->fs_devices->total_rw_bytes += device->total_bytes;
			
 
				 
			
 
				+	spin_lock(&root->fs_info->free_chunk_lock);
			
 
				+	root->fs_info->free_chunk_space += device->total_bytes;
			
 
				+	spin_unlock(&root->fs_info->free_chunk_lock);
			
 
				+
			
 
				 	if (!blk_queue_nonrot(bdev_get_queue(bdev)))
			
 
				 		root->fs_info->fs_devices->rotating = 1;
			
 
				 
			
@@ -2192,8 +2206,12 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
 
				 	lock_chunks(root);
			
 
				 
			
 
				 	device->total_bytes = new_size;
			
 
				-	if (device->writeable)
			
 
				+	if (device->writeable) {
			
 
				 		device->fs_devices->total_rw_bytes -= diff;
			
 
				+		spin_lock(&root->fs_info->free_chunk_lock);
			
 
				+		root->fs_info->free_chunk_space -= diff;
			
 
				+		spin_unlock(&root->fs_info->free_chunk_lock);
			
 
				+	}
			
 
				 	unlock_chunks(root);
			
 
				 
			
 
				 again:
			
@@ -2257,6 +2275,9 @@ again:
 
				 		device->total_bytes = old_size;
			
 
				 		if (device->writeable)
			
 
				 			device->fs_devices->total_rw_bytes += diff;
			
 
				+		spin_lock(&root->fs_info->free_chunk_lock);
			
 
				+		root->fs_info->free_chunk_space += diff;
			
 
				+		spin_unlock(&root->fs_info->free_chunk_lock);
			
 
				 		unlock_chunks(root);
			
 
				 		goto done;
			
 
				 	}
			
@@ -2615,6 +2636,11 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
 
				 		index++;
			
 
				 	}
			
 
				 
			
 
				+	spin_lock(&extent_root->fs_info->free_chunk_lock);
			
 
				+	extent_root->fs_info->free_chunk_space -= (stripe_size *
			
 
				+						   map->num_stripes);
			
 
				+	spin_unlock(&extent_root->fs_info->free_chunk_lock);
			
 
				+
			
 
				 	index = 0;
			
 
				 	stripe = &chunk->stripe;
			
 
				 	while (index < map->num_stripes) {
			
@@ -3616,8 +3642,13 @@ static int read_one_dev(struct btrfs_root *root,
 
				 	fill_device_from_item(leaf, dev_item, device);
			
 
				 	device->dev_root = root->fs_info->dev_root;
			
 
				 	device->in_fs_metadata = 1;
			
 
				-	if (device->writeable)
			
 
				+	if (device->writeable) {
			
 
				 		device->fs_devices->total_rw_bytes += device->total_bytes;
			
 
				+		spin_lock(&root->fs_info->free_chunk_lock);
			
 
				+		root->fs_info->free_chunk_space += device->total_bytes -
			
 
				+			device->bytes_used;
			
 
				+		spin_unlock(&root->fs_info->free_chunk_lock);
			
 
				+	}
			
 
				 	ret = 0;
			
 
				 	return ret;
			
 
				 }
			
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -127,6 +127,17 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
 
				 again:
			
 
				 	ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
			
 
				 				      name, name_len, value, size);
			
 
				+	/*
			
 
				+	 * If we're setting an xattr to a new value but the new value is say
			
 
				+	 * exactly BTRFS_MAX_XATTR_SIZE, we could end up with EOVERFLOW getting
			
 
				+	 * back from split_leaf.  This is because it thinks we'll be extending
			
 
				+	 * the existing item size, but we're asking for enough space to add the
			
 
				+	 * item itself.  So if we get EOVERFLOW just set ret to EEXIST and let
			
 
				+	 * the rest of the function figure it out.
			
 
				+	 */
			
 
				+	if (ret == -EOVERFLOW)
			
 
				+		ret = -EEXIST;
			
 
				+
			
 
				 	if (ret == -EEXIST) {
			
 
				 		if (flags & XATTR_CREATE)
			
 
				 			goto out;