12 years ago · b2c6b3e061
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -19,7 +19,7 @@
 
				 #ifndef __BTRFS_BACKREF__
			
 
				 #define __BTRFS_BACKREF__
			
 
				 
			
 
				-#include "ioctl.h"
			
 
				+#include <linux/btrfs.h>
			
 
				 #include "ulist.h"
			
 
				 #include "extent_io.h"
			
 
				 
			
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -40,6 +40,8 @@
 
				 #define BTRFS_INODE_HAS_ASYNC_EXTENT		6
			
 
				 #define BTRFS_INODE_NEEDS_FULL_SYNC		7
			
 
				 #define BTRFS_INODE_COPY_EVERYTHING		8
			
 
				+#define BTRFS_INODE_IN_DELALLOC_LIST		9
			
 
				+#define BTRFS_INODE_READDIO_NEED_LOCK		10
			
 
				 
			
 
				 /* in memory btrfs inode */
			
 
				 struct btrfs_inode {
			
@@ -216,4 +218,22 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Disable DIO read nolock optimization, so new dio readers will be forced
			
 
				+ * to grab i_mutex. It is used to avoid the endless truncate due to
			
 
				+ * nonlocked dio read.
			
 
				+ */
			
 
				+static inline void btrfs_inode_block_unlocked_dio(struct inode *inode)
			
 
				+{
			
 
				+	set_bit(BTRFS_INODE_READDIO_NEED_LOCK, &BTRFS_I(inode)->runtime_flags);
			
 
				+	smp_mb();
			
 
				+}
			
 
				+
			
 
				+static inline void btrfs_inode_resume_unlocked_dio(struct inode *inode)
			
 
				+{
			
 
				+	smp_mb__before_clear_bit();
			
 
				+	clear_bit(BTRFS_INODE_READDIO_NEED_LOCK,
			
 
				+		  &BTRFS_I(inode)->runtime_flags);
			
 
				+}
			
 
				+
			
 
				 #endif
			
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -813,8 +813,7 @@ static int btrfsic_process_superblock_dev_mirror(
 
				 	    (bh->b_data + (dev_bytenr & 4095));
			
 
				 
			
 
				 	if (btrfs_super_bytenr(super_tmp) != dev_bytenr ||
			
 
				-	    strncmp((char *)(&(super_tmp->magic)), BTRFS_MAGIC,
			
 
				-		    sizeof(super_tmp->magic)) ||
			
 
				+	    super_tmp->magic != cpu_to_le64(BTRFS_MAGIC) ||
			
 
				 	    memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) ||
			
 
				 	    btrfs_super_nodesize(super_tmp) != state->metablock_size ||
			
 
				 	    btrfs_super_leafsize(super_tmp) != state->metablock_size ||
			
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1138,6 +1138,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
 
				 		switch (tm->op) {
			
 
				 		case MOD_LOG_KEY_REMOVE_WHILE_FREEING:
			
 
				 			BUG_ON(tm->slot < n);
			
 
				+			/* Fallthrough */
			
 
				 		case MOD_LOG_KEY_REMOVE_WHILE_MOVING:
			
 
				 		case MOD_LOG_KEY_REMOVE:
			
 
				 			btrfs_set_node_key(eb, &tm->key, tm->slot);
			
@@ -1222,7 +1223,7 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
 
				 
			
 
				 	__tree_mod_log_rewind(eb_rewin, time_seq, tm);
			
 
				 	WARN_ON(btrfs_header_nritems(eb_rewin) >
			
 
				-		BTRFS_NODEPTRS_PER_BLOCK(fs_info->fs_root));
			
 
				+		BTRFS_NODEPTRS_PER_BLOCK(fs_info->tree_root));
			
 
				 
			
 
				 	return eb_rewin;
			
 
				 }
			
@@ -1441,7 +1442,7 @@ int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
 
				  */
			
 
				 int btrfs_realloc_node(struct btrfs_trans_handle *trans,
			
 
				 		       struct btrfs_root *root, struct extent_buffer *parent,
			
 
				-		       int start_slot, int cache_only, u64 *last_ret,
			
 
				+		       int start_slot, u64 *last_ret,
			
 
				 		       struct btrfs_key *progress)
			
 
				 {
			
 
				 	struct extent_buffer *cur;
			
@@ -1461,8 +1462,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
 
				 	struct btrfs_disk_key disk_key;
			
 
				 
			
 
				 	parent_level = btrfs_header_level(parent);
			
 
				-	if (cache_only && parent_level != 1)
			
 
				-		return 0;
			
 
				 
			
 
				 	WARN_ON(trans->transaction != root->fs_info->running_transaction);
			
 
				 	WARN_ON(trans->transid != root->fs_info->generation);
			
@@ -1508,10 +1507,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
 
				 		else
			
 
				 			uptodate = 0;
			
 
				 		if (!cur || !uptodate) {
			
 
				-			if (cache_only) {
			
 
				-				free_extent_buffer(cur);
			
 
				-				continue;
			
 
				-			}
			
 
				 			if (!cur) {
			
 
				 				cur = read_tree_block(root, blocknr,
			
 
				 							 blocksize, gen);
			
@@ -4825,8 +4820,8 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
 
				 
			
 
				 /*
			
 
				  * A helper function to walk down the tree starting at min_key, and looking
			
 
				- * for nodes or leaves that are either in cache or have a minimum
			
 
				- * transaction id.  This is used by the btree defrag code, and tree logging
			
 
				+ * for nodes or leaves that are have a minimum transaction id.
			
 
				+ * This is used by the btree defrag code, and tree logging
			
 
				  *
			
 
				  * This does not cow, but it does stuff the starting key it finds back
			
 
				  * into min_key, so you can call btrfs_search_slot with cow=1 on the
			
@@ -4847,7 +4842,7 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
 
				  */
			
 
				 int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
			
 
				 			 struct btrfs_key *max_key,
			
 
				-			 struct btrfs_path *path, int cache_only,
			
 
				+			 struct btrfs_path *path,
			
 
				 			 u64 min_trans)
			
 
				 {
			
 
				 	struct extent_buffer *cur;
			
@@ -4887,15 +4882,12 @@ again:
 
				 		if (sret && slot > 0)
			
 
				 			slot--;
			
 
				 		/*
			
 
				-		 * check this node pointer against the cache_only and
			
 
				-		 * min_trans parameters.  If it isn't in cache or is too
			
 
				-		 * old, skip to the next one.
			
 
				+		 * check this node pointer against the min_trans parameters.
			
 
				+		 * If it is too old, old, skip to the next one.
			
 
				 		 */
			
 
				 		while (slot < nritems) {
			
 
				 			u64 blockptr;
			
 
				 			u64 gen;
			
 
				-			struct extent_buffer *tmp;
			
 
				-			struct btrfs_disk_key disk_key;
			
 
				 
			
 
				 			blockptr = btrfs_node_blockptr(cur, slot);
			
 
				 			gen = btrfs_node_ptr_generation(cur, slot);
			
@@ -4903,27 +4895,7 @@ again:
 
				 				slot++;
			
 
				 				continue;
			
 
				 			}
			
 
				-			if (!cache_only)
			
 
				-				break;
			
 
				-
			
 
				-			if (max_key) {
			
 
				-				btrfs_node_key(cur, &disk_key, slot);
			
 
				-				if (comp_keys(&disk_key, max_key) >= 0) {
			
 
				-					ret = 1;
			
 
				-					goto out;
			
 
				-				}
			
 
				-			}
			
 
				-
			
 
				-			tmp = btrfs_find_tree_block(root, blockptr,
			
 
				-					    btrfs_level_size(root, level - 1));
			
 
				-
			
 
				-			if (tmp && btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
			
 
				-				free_extent_buffer(tmp);
			
 
				-				break;
			
 
				-			}
			
 
				-			if (tmp)
			
 
				-				free_extent_buffer(tmp);
			
 
				-			slot++;
			
 
				+			break;
			
 
				 		}
			
 
				 find_next_key:
			
 
				 		/*
			
@@ -4934,7 +4906,7 @@ find_next_key:
 
				 			path->slots[level] = slot;
			
 
				 			btrfs_set_path_blocking(path);
			
 
				 			sret = btrfs_find_next_key(root, path, min_key, level,
			
 
				-						  cache_only, min_trans);
			
 
				+						  min_trans);
			
 
				 			if (sret == 0) {
			
 
				 				btrfs_release_path(path);
			
 
				 				goto again;
			
@@ -5399,8 +5371,7 @@ out:
 
				 /*
			
 
				  * this is similar to btrfs_next_leaf, but does not try to preserve
			
 
				  * and fixup the path.  It looks for and returns the next key in the
			
 
				- * tree based on the current path and the cache_only and min_trans
			
 
				- * parameters.
			
 
				+ * tree based on the current path and the min_trans parameters.
			
 
				  *
			
 
				  * 0 is returned if another key is found, < 0 if there are any errors
			
 
				  * and 1 is returned if there are no higher keys in the tree
			
@@ -5409,8 +5380,7 @@ out:
 
				  * calling this function.
			
 
				  */
			
 
				 int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
			
 
				-			struct btrfs_key *key, int level,
			
 
				-			int cache_only, u64 min_trans)
			
 
				+			struct btrfs_key *key, int level, u64 min_trans)
			
 
				 {
			
 
				 	int slot;
			
 
				 	struct extent_buffer *c;
			
@@ -5461,22 +5431,8 @@ next:
 
				 		if (level == 0)
			
 
				 			btrfs_item_key_to_cpu(c, key, slot);
			
 
				 		else {
			
 
				-			u64 blockptr = btrfs_node_blockptr(c, slot);
			
 
				 			u64 gen = btrfs_node_ptr_generation(c, slot);
			
 
				 
			
 
				-			if (cache_only) {
			
 
				-				struct extent_buffer *cur;
			
 
				-				cur = btrfs_find_tree_block(root, blockptr,
			
 
				-					    btrfs_level_size(root, level - 1));
			
 
				-				if (!cur ||
			
 
				-				    btrfs_buffer_uptodate(cur, gen, 1) <= 0) {
			
 
				-					slot++;
			
 
				-					if (cur)
			
 
				-						free_extent_buffer(cur);
			
 
				-					goto next;
			
 
				-				}
			
 
				-				free_extent_buffer(cur);
			
 
				-			}
			
 
				 			if (gen < min_trans) {
			
 
				 				slot++;
			
 
				 				goto next;
			
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -31,10 +31,10 @@
 
				 #include <trace/events/btrfs.h>
			
 
				 #include <asm/kmap_types.h>
			
 
				 #include <linux/pagemap.h>
			
 
				+#include <linux/btrfs.h>
			
 
				 #include "extent_io.h"
			
 
				 #include "extent_map.h"
			
 
				 #include "async-thread.h"
			
 
				-#include "ioctl.h"
			
 
				 
			
 
				 struct btrfs_trans_handle;
			
 
				 struct btrfs_transaction;
			
@@ -46,7 +46,7 @@ extern struct kmem_cache *btrfs_path_cachep;
 
				 extern struct kmem_cache *btrfs_free_space_cachep;
			
 
				 struct btrfs_ordered_sum;
			
 
				 
			
 
				-#define BTRFS_MAGIC "_BHRfS_M"
			
 
				+#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
			
 
				 
			
 
				 #define BTRFS_MAX_MIRRORS 3
			
 
				 
			
@@ -191,6 +191,8 @@ static int btrfs_csum_sizes[] = { 4, 0 };
 
				 /* ioprio of readahead is set to idle */
			
 
				 #define BTRFS_IOPRIO_READA (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0))
			
 
				 
			
 
				+#define BTRFS_DIRTY_METADATA_THRESH	(32 * 1024 * 1024)
			
 
				+
			
 
				 /*
			
 
				  * The key defines the order in the tree, and so it also defines (optimal)
			
 
				  * block layout.
			
@@ -336,7 +338,9 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes)
 
				 /*
			
 
				  * File system states
			
 
				  */
			
 
				+#define BTRFS_FS_STATE_ERROR		0
			
 
				 
			
 
				+/* Super block flags */
			
 
				 /* Errors detected */
			
 
				 #define BTRFS_SUPER_FLAG_ERROR		(1ULL << 2)
			
 
				 
			
@@ -953,7 +957,15 @@ struct btrfs_dev_replace_item {
 
				 #define BTRFS_BLOCK_GROUP_DUP		(1ULL << 5)
			
 
				 #define BTRFS_BLOCK_GROUP_RAID10	(1ULL << 6)
			
 
				 #define BTRFS_BLOCK_GROUP_RESERVED	BTRFS_AVAIL_ALLOC_BIT_SINGLE
			
 
				-#define BTRFS_NR_RAID_TYPES		5
			
 
				+
			
 
				+enum btrfs_raid_types {
			
 
				+	BTRFS_RAID_RAID10,
			
 
				+	BTRFS_RAID_RAID1,
			
 
				+	BTRFS_RAID_DUP,
			
 
				+	BTRFS_RAID_RAID0,
			
 
				+	BTRFS_RAID_SINGLE,
			
 
				+	BTRFS_NR_RAID_TYPES
			
 
				+};
			
 
				 
			
 
				 #define BTRFS_BLOCK_GROUP_TYPE_MASK	(BTRFS_BLOCK_GROUP_DATA |    \
			
 
				 					 BTRFS_BLOCK_GROUP_SYSTEM |  \
			
@@ -1225,6 +1237,11 @@ struct seq_list {
 
				 	u64 seq;
			
 
				 };
			
 
				 
			
 
				+enum btrfs_orphan_cleanup_state {
			
 
				+	ORPHAN_CLEANUP_STARTED	= 1,
			
 
				+	ORPHAN_CLEANUP_DONE	= 2,
			
 
				+};
			
 
				+
			
 
				 /* fs_info */
			
 
				 struct reloc_control;
			
 
				 struct btrfs_device;
			
@@ -1250,6 +1267,7 @@ struct btrfs_fs_info {
 
				 
			
 
				 	/* block group cache stuff */
			
 
				 	spinlock_t block_group_cache_lock;
			
 
				+	u64 first_logical_byte;
			
 
				 	struct rb_root block_group_cache_tree;
			
 
				 
			
 
				 	/* keep track of unallocated space */
			
@@ -1288,7 +1306,23 @@ struct btrfs_fs_info {
 
				 	u64 last_trans_log_full_commit;
			
 
				 	unsigned long mount_opt;
			
 
				 	unsigned long compress_type:4;
			
 
				+	/*
			
 
				+	 * It is a suggestive number, the read side is safe even it gets a
			
 
				+	 * wrong number because we will write out the data into a regular
			
 
				+	 * extent. The write side(mount/remount) is under ->s_umount lock,
			
 
				+	 * so it is also safe.
			
 
				+	 */
			
 
				 	u64 max_inline;
			
 
				+	/*
			
 
				+	 * Protected by ->chunk_mutex and sb->s_umount.
			
 
				+	 *
			
 
				+	 * The reason that we use two lock to protect it is because only
			
 
				+	 * remount and mount operations can change it and these two operations
			
 
				+	 * are under sb->s_umount, but the read side (chunk allocation) can not
			
 
				+	 * acquire sb->s_umount or the deadlock would happen. So we use two
			
 
				+	 * locks to protect it. On the write side, we must acquire two locks,
			
 
				+	 * and on the read side, we just need acquire one of them.
			
 
				+	 */
			
 
				 	u64 alloc_start;
			
 
				 	struct btrfs_transaction *running_transaction;
			
 
				 	wait_queue_head_t transaction_throttle;
			
@@ -1365,6 +1399,7 @@ struct btrfs_fs_info {
 
				 	 */
			
 
				 	struct list_head ordered_extents;
			
 
				 
			
 
				+	spinlock_t delalloc_lock;
			
 
				 	/*
			
 
				 	 * all of the inodes that have delalloc bytes.  It is possible for
			
 
				 	 * this list to be empty even when there is still dirty data=ordered
			
@@ -1372,13 +1407,6 @@ struct btrfs_fs_info {
 
				 	 */
			
 
				 	struct list_head delalloc_inodes;
			
 
				 
			
 
				-	/*
			
 
				-	 * special rename and truncate targets that must be on disk before
			
 
				-	 * we're allowed to commit.  This is basically the ext3 style
			
 
				-	 * data=ordered list.
			
 
				-	 */
			
 
				-	struct list_head ordered_operations;
			
 
				-
			
 
				 	/*
			
 
				 	 * there is a pool of worker threads for checksumming during writes
			
 
				 	 * and a pool for checksumming after reads.  This is because readers
			
@@ -1423,10 +1451,12 @@ struct btrfs_fs_info {
 
				 
			
 
				 	u64 total_pinned;
			
 
				 
			
 
				-	/* protected by the delalloc lock, used to keep from writing
			
 
				-	 * metadata until there is a nice batch
			
 
				-	 */
			
 
				-	u64 dirty_metadata_bytes;
			
 
				+	/* used to keep from writing metadata until there is a nice batch */
			
 
				+	struct percpu_counter dirty_metadata_bytes;
			
 
				+	struct percpu_counter delalloc_bytes;
			
 
				+	s32 dirty_metadata_batch;
			
 
				+	s32 delalloc_batch;
			
 
				+
			
 
				 	struct list_head dirty_cowonly_roots;
			
 
				 
			
 
				 	struct btrfs_fs_devices *fs_devices;
			
@@ -1442,9 +1472,6 @@ struct btrfs_fs_info {
 
				 
			
 
				 	struct reloc_control *reloc_ctl;
			
 
				 
			
 
				-	spinlock_t delalloc_lock;
			
 
				-	u64 delalloc_bytes;
			
 
				-
			
 
				 	/* data_alloc_cluster is only used in ssd mode */
			
 
				 	struct btrfs_free_cluster data_alloc_cluster;
			
 
				 
			
@@ -1456,6 +1483,8 @@ struct btrfs_fs_info {
 
				 	struct rb_root defrag_inodes;
			
 
				 	atomic_t defrag_running;
			
 
				 
			
 
				+	/* Used to protect avail_{data, metadata, system}_alloc_bits */
			
 
				+	seqlock_t profiles_lock;
			
 
				 	/*
			
 
				 	 * these three are in extended format (availability of single
			
 
				 	 * chunks is denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE bit, other
			
@@ -1520,7 +1549,7 @@ struct btrfs_fs_info {
 
				 	u64 qgroup_seq;
			
 
				 
			
 
				 	/* filesystem state */
			
 
				-	u64 fs_state;
			
 
				+	unsigned long fs_state;
			
 
				 
			
 
				 	struct btrfs_delayed_root *delayed_root;
			
 
				 
			
@@ -1623,6 +1652,9 @@ struct btrfs_root {
 
				 
			
 
				 	struct list_head root_list;
			
 
				 
			
 
				+	spinlock_t log_extents_lock[2];
			
 
				+	struct list_head logged_list[2];
			
 
				+
			
 
				 	spinlock_t orphan_lock;
			
 
				 	atomic_t orphan_inodes;
			
 
				 	struct btrfs_block_rsv *orphan_block_rsv;
			
@@ -2936,8 +2968,7 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
 
				 			     u64 num_bytes, u64 *refs, u64 *flags);
			
 
				 int btrfs_pin_extent(struct btrfs_root *root,
			
 
				 		     u64 bytenr, u64 num, int reserved);
			
 
				-int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
			
 
				-				    struct btrfs_root *root,
			
 
				+int btrfs_pin_extent_for_log_replay(struct btrfs_root *root,
			
 
				 				    u64 bytenr, u64 num_bytes);
			
 
				 int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
			
 
				 			  struct btrfs_root *root,
			
@@ -3092,10 +3123,10 @@ struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
 
				 struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
			
 
				 int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
			
 
				 			struct btrfs_key *key, int lowest_level,
			
 
				-			int cache_only, u64 min_trans);
			
 
				+			u64 min_trans);
			
 
				 int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
			
 
				 			 struct btrfs_key *max_key,
			
 
				-			 struct btrfs_path *path, int cache_only,
			
 
				+			 struct btrfs_path *path,
			
 
				 			 u64 min_trans);
			
 
				 enum btrfs_compare_tree_result {
			
 
				 	BTRFS_COMPARE_TREE_NEW,
			
@@ -3148,7 +3179,7 @@ int btrfs_search_slot_for_read(struct btrfs_root *root,
 
				 			       int find_higher, int return_any);
			
 
				 int btrfs_realloc_node(struct btrfs_trans_handle *trans,
			
 
				 		       struct btrfs_root *root, struct extent_buffer *parent,
			
 
				-		       int start_slot, int cache_only, u64 *last_ret,
			
 
				+		       int start_slot, u64 *last_ret,
			
 
				 		       struct btrfs_key *progress);
			
 
				 void btrfs_release_path(struct btrfs_path *p);
			
 
				 struct btrfs_path *btrfs_alloc_path(void);
			
@@ -3543,7 +3574,7 @@ int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
 
				 
			
 
				 /* tree-defrag.c */
			
 
				 int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
			
 
				-			struct btrfs_root *root, int cache_only);
			
 
				+			struct btrfs_root *root);
			
 
				 
			
 
				 /* sysfs.c */
			
 
				 int btrfs_init_sysfs(void);
			
@@ -3620,11 +3651,14 @@ __printf(5, 6)
 
				 void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
			
 
				 		   unsigned int line, int errno, const char *fmt, ...);
			
 
				 
			
 
				+/*
			
 
				+ * If BTRFS_MOUNT_PANIC_ON_FATAL_ERROR is in mount_opt, __btrfs_panic
			
 
				+ * will panic().  Otherwise we BUG() here.
			
 
				+ */
			
 
				 #define btrfs_panic(fs_info, errno, fmt, args...)			\
			
 
				 do {									\
			
 
				-	struct btrfs_fs_info *_i = (fs_info);				\
			
 
				-	__btrfs_panic(_i, __func__, __LINE__, errno, fmt, ##args);	\
			
 
				-	BUG_ON(!(_i->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR));	\
			
 
				+	__btrfs_panic(fs_info, __func__, __LINE__, errno, fmt, ##args);	\
			
 
				+	BUG();								\
			
 
				 } while (0)
			
 
				 
			
 
				 /* acl.c */
			
@@ -3745,4 +3779,11 @@ static inline int is_fstree(u64 rootid)
 
				 		return 1;
			
 
				 	return 0;
			
 
				 }
			
 
				+
			
 
				+static inline int btrfs_defrag_cancelled(struct btrfs_fs_info *fs_info)
			
 
				+{
			
 
				+	return signal_pending(current);
			
 
				+}
			
 
				+
			
 
				+
			
 
				 #endif
			
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -875,7 +875,6 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
 
				 				     struct btrfs_delayed_item *delayed_item)
			
 
				 {
			
 
				 	struct extent_buffer *leaf;
			
 
				-	struct btrfs_item *item;
			
 
				 	char *ptr;
			
 
				 	int ret;
			
 
				 
			
@@ -886,7 +885,6 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
 
				 
			
 
				 	leaf = path->nodes[0];
			
 
				 
			
 
				-	item = btrfs_item_nr(leaf, path->slots[0]);
			
 
				 	ptr = btrfs_item_ptr(leaf, path->slots[0], char);
			
 
				 
			
 
				 	write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr,
			
@@ -1065,32 +1063,25 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
			
 
				-				      struct btrfs_root *root,
			
 
				-				      struct btrfs_path *path,
			
 
				-				      struct btrfs_delayed_node *node)
			
 
				+static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
			
 
				+					struct btrfs_root *root,
			
 
				+					struct btrfs_path *path,
			
 
				+					struct btrfs_delayed_node *node)
			
 
				 {
			
 
				 	struct btrfs_key key;
			
 
				 	struct btrfs_inode_item *inode_item;
			
 
				 	struct extent_buffer *leaf;
			
 
				 	int ret;
			
 
				 
			
 
				-	mutex_lock(&node->mutex);
			
 
				-	if (!node->inode_dirty) {
			
 
				-		mutex_unlock(&node->mutex);
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				 	key.objectid = node->inode_id;
			
 
				 	btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
			
 
				 	key.offset = 0;
			
 
				+
			
 
				 	ret = btrfs_lookup_inode(trans, root, path, &key, 1);
			
 
				 	if (ret > 0) {
			
 
				 		btrfs_release_path(path);
			
 
				-		mutex_unlock(&node->mutex);
			
 
				 		return -ENOENT;
			
 
				 	} else if (ret < 0) {
			
 
				-		mutex_unlock(&node->mutex);
			
 
				 		return ret;
			
 
				 	}
			
 
				 
			
@@ -1105,11 +1096,47 @@ static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
 
				 
			
 
				 	btrfs_delayed_inode_release_metadata(root, node);
			
 
				 	btrfs_release_delayed_inode(node);
			
 
				-	mutex_unlock(&node->mutex);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
			
 
				+					     struct btrfs_root *root,
			
 
				+					     struct btrfs_path *path,
			
 
				+					     struct btrfs_delayed_node *node)
			
 
				+{
			
 
				+	int ret;
			
 
				+
			
 
				+	mutex_lock(&node->mutex);
			
 
				+	if (!node->inode_dirty) {
			
 
				+		mutex_unlock(&node->mutex);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	ret = __btrfs_update_delayed_inode(trans, root, path, node);
			
 
				+	mutex_unlock(&node->mutex);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+__btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
			
 
				+				   struct btrfs_path *path,
			
 
				+				   struct btrfs_delayed_node *node)
			
 
				+{
			
 
				+	int ret;
			
 
				+
			
 
				+	ret = btrfs_insert_delayed_items(trans, path, node->root, node);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+
			
 
				+	ret = btrfs_delete_delayed_items(trans, path, node->root, node);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+
			
 
				+	ret = btrfs_update_delayed_inode(trans, node->root, path, node);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Called when committing the transaction.
			
 
				  * Returns 0 on success.
			
@@ -1119,7 +1146,6 @@ static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
 
				 static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
			
 
				 				     struct btrfs_root *root, int nr)
			
 
				 {
			
 
				-	struct btrfs_root *curr_root = root;
			
 
				 	struct btrfs_delayed_root *delayed_root;
			
 
				 	struct btrfs_delayed_node *curr_node, *prev_node;
			
 
				 	struct btrfs_path *path;
			
@@ -1142,15 +1168,8 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
 
				 
			
 
				 	curr_node = btrfs_first_delayed_node(delayed_root);
			
 
				 	while (curr_node && (!count || (count && nr--))) {
			
 
				-		curr_root = curr_node->root;
			
 
				-		ret = btrfs_insert_delayed_items(trans, path, curr_root,
			
 
				-						 curr_node);
			
 
				-		if (!ret)
			
 
				-			ret = btrfs_delete_delayed_items(trans, path,
			
 
				-						curr_root, curr_node);
			
 
				-		if (!ret)
			
 
				-			ret = btrfs_update_delayed_inode(trans, curr_root,
			
 
				-						path, curr_node);
			
 
				+		ret = __btrfs_commit_inode_delayed_items(trans, path,
			
 
				+							 curr_node);
			
 
				 		if (ret) {
			
 
				 			btrfs_release_delayed_node(curr_node);
			
 
				 			curr_node = NULL;
			
@@ -1183,51 +1202,93 @@ int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans,
 
				 	return __btrfs_run_delayed_items(trans, root, nr);
			
 
				 }
			
 
				 
			
 
				-static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
			
 
				-					      struct btrfs_delayed_node *node)
			
 
				+int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
			
 
				+				     struct inode *inode)
			
 
				 {
			
 
				+	struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
			
 
				 	struct btrfs_path *path;
			
 
				 	struct btrfs_block_rsv *block_rsv;
			
 
				 	int ret;
			
 
				 
			
 
				+	if (!delayed_node)
			
 
				+		return 0;
			
 
				+
			
 
				+	mutex_lock(&delayed_node->mutex);
			
 
				+	if (!delayed_node->count) {
			
 
				+		mutex_unlock(&delayed_node->mutex);
			
 
				+		btrfs_release_delayed_node(delayed_node);
			
 
				+		return 0;
			
 
				+	}
			
 
				+	mutex_unlock(&delayed_node->mutex);
			
 
				+
			
 
				 	path = btrfs_alloc_path();
			
 
				 	if (!path)
			
 
				 		return -ENOMEM;
			
 
				 	path->leave_spinning = 1;
			
 
				 
			
 
				 	block_rsv = trans->block_rsv;
			
 
				-	trans->block_rsv = &node->root->fs_info->delayed_block_rsv;
			
 
				+	trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv;
			
 
				 
			
 
				-	ret = btrfs_insert_delayed_items(trans, path, node->root, node);
			
 
				-	if (!ret)
			
 
				-		ret = btrfs_delete_delayed_items(trans, path, node->root, node);
			
 
				-	if (!ret)
			
 
				-		ret = btrfs_update_delayed_inode(trans, node->root, path, node);
			
 
				-	btrfs_free_path(path);
			
 
				+	ret = __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
			
 
				 
			
 
				+	btrfs_release_delayed_node(delayed_node);
			
 
				+	btrfs_free_path(path);
			
 
				 	trans->block_rsv = block_rsv;
			
 
				+
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
			
 
				-				     struct inode *inode)
			
 
				+int btrfs_commit_inode_delayed_inode(struct inode *inode)
			
 
				 {
			
 
				+	struct btrfs_trans_handle *trans;
			
 
				 	struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
			
 
				+	struct btrfs_path *path;
			
 
				+	struct btrfs_block_rsv *block_rsv;
			
 
				 	int ret;
			
 
				 
			
 
				 	if (!delayed_node)
			
 
				 		return 0;
			
 
				 
			
 
				 	mutex_lock(&delayed_node->mutex);
			
 
				-	if (!delayed_node->count) {
			
 
				+	if (!delayed_node->inode_dirty) {
			
 
				 		mutex_unlock(&delayed_node->mutex);
			
 
				 		btrfs_release_delayed_node(delayed_node);
			
 
				 		return 0;
			
 
				 	}
			
 
				 	mutex_unlock(&delayed_node->mutex);
			
 
				 
			
 
				-	ret = __btrfs_commit_inode_delayed_items(trans, delayed_node);
			
 
				+	trans = btrfs_join_transaction(delayed_node->root);
			
 
				+	if (IS_ERR(trans)) {
			
 
				+		ret = PTR_ERR(trans);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	path = btrfs_alloc_path();
			
 
				+	if (!path) {
			
 
				+		ret = -ENOMEM;
			
 
				+		goto trans_out;
			
 
				+	}
			
 
				+	path->leave_spinning = 1;
			
 
				+
			
 
				+	block_rsv = trans->block_rsv;
			
 
				+	trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv;
			
 
				+
			
 
				+	mutex_lock(&delayed_node->mutex);
			
 
				+	if (delayed_node->inode_dirty)
			
 
				+		ret = __btrfs_update_delayed_inode(trans, delayed_node->root,
			
 
				+						   path, delayed_node);
			
 
				+	else
			
 
				+		ret = 0;
			
 
				+	mutex_unlock(&delayed_node->mutex);
			
 
				+
			
 
				+	btrfs_free_path(path);
			
 
				+	trans->block_rsv = block_rsv;
			
 
				+trans_out:
			
 
				+	btrfs_end_transaction(trans, delayed_node->root);
			
 
				+	btrfs_btree_balance_dirty(delayed_node->root);
			
 
				+out:
			
 
				 	btrfs_release_delayed_node(delayed_node);
			
 
				+
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
@@ -1258,7 +1319,6 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
 
				 	struct btrfs_root *root;
			
 
				 	struct btrfs_block_rsv *block_rsv;
			
 
				 	int need_requeue = 0;
			
 
				-	int ret;
			
 
				 
			
 
				 	async_node = container_of(work, struct btrfs_async_delayed_node, work);
			
 
				 
			
@@ -1277,14 +1337,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
 
				 	block_rsv = trans->block_rsv;
			
 
				 	trans->block_rsv = &root->fs_info->delayed_block_rsv;
			
 
				 
			
 
				-	ret = btrfs_insert_delayed_items(trans, path, root, delayed_node);
			
 
				-	if (!ret)
			
 
				-		ret = btrfs_delete_delayed_items(trans, path, root,
			
 
				-						 delayed_node);
			
 
				-
			
 
				-	if (!ret)
			
 
				-		btrfs_update_delayed_inode(trans, root, path, delayed_node);
			
 
				-
			
 
				+	__btrfs_commit_inode_delayed_items(trans, path, delayed_node);
			
 
				 	/*
			
 
				 	 * Maybe new delayed items have been inserted, so we need requeue
			
 
				 	 * the work. Besides that, we must dequeue the empty delayed nodes
			
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -117,6 +117,7 @@ int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
 
				 /* Used for evicting the inode. */
			
 
				 void btrfs_remove_delayed_node(struct inode *inode);
			
 
				 void btrfs_kill_delayed_inode_items(struct inode *inode);
			
 
				+int btrfs_commit_inode_delayed_inode(struct inode *inode);
			
 
				 
			
 
				 
			
 
				 int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
			
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -23,6 +23,10 @@
 
				 #include "delayed-ref.h"
			
 
				 #include "transaction.h"
			
 
				 
			
 
				+struct kmem_cache *btrfs_delayed_ref_head_cachep;
			
 
				+struct kmem_cache *btrfs_delayed_tree_ref_cachep;
			
 
				+struct kmem_cache *btrfs_delayed_data_ref_cachep;
			
 
				+struct kmem_cache *btrfs_delayed_extent_op_cachep;
			
 
				 /*
			
 
				  * delayed back reference update tracking.  For subvolume trees
			
 
				  * we queue up extent allocations and backref maintenance for
			
@@ -422,6 +426,14 @@ again:
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				+void btrfs_release_ref_cluster(struct list_head *cluster)
			
 
				+{
			
 
				+	struct list_head *pos, *q;
			
 
				+
			
 
				+	list_for_each_safe(pos, q, cluster)
			
 
				+		list_del_init(pos);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * helper function to update an extent delayed ref in the
			
 
				  * rbtree.  existing and update must both have the same
			
@@ -511,7 +523,7 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
 
				 					ref->extent_op->flags_to_set;
			
 
				 				existing_ref->extent_op->update_flags = 1;
			
 
				 			}
			
 
				-			kfree(ref->extent_op);
			
 
				+			btrfs_free_delayed_extent_op(ref->extent_op);
			
 
				 		}
			
 
				 	}
			
 
				 	/*
			
@@ -592,7 +604,7 @@ static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info,
 
				 		 * we've updated the existing ref, free the newly
			
 
				 		 * allocated ref
			
 
				 		 */
			
 
				-		kfree(head_ref);
			
 
				+		kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
			
 
				 	} else {
			
 
				 		delayed_refs->num_heads++;
			
 
				 		delayed_refs->num_heads_ready++;
			
@@ -653,7 +665,7 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
 
				 		 * we've updated the existing ref, free the newly
			
 
				 		 * allocated ref
			
 
				 		 */
			
 
				-		kfree(full_ref);
			
 
				+		kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref);
			
 
				 	} else {
			
 
				 		delayed_refs->num_entries++;
			
 
				 		trans->delayed_ref_updates++;
			
@@ -714,7 +726,7 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info,
 
				 		 * we've updated the existing ref, free the newly
			
 
				 		 * allocated ref
			
 
				 		 */
			
 
				-		kfree(full_ref);
			
 
				+		kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
			
 
				 	} else {
			
 
				 		delayed_refs->num_entries++;
			
 
				 		trans->delayed_ref_updates++;
			
@@ -738,13 +750,13 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
 
				 	struct btrfs_delayed_ref_root *delayed_refs;
			
 
				 
			
 
				 	BUG_ON(extent_op && extent_op->is_data);
			
 
				-	ref = kmalloc(sizeof(*ref), GFP_NOFS);
			
 
				+	ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
			
 
				 	if (!ref)
			
 
				 		return -ENOMEM;
			
 
				 
			
 
				-	head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
			
 
				+	head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
			
 
				 	if (!head_ref) {
			
 
				-		kfree(ref);
			
 
				+		kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
			
 
				 		return -ENOMEM;
			
 
				 	}
			
 
				 
			
@@ -786,13 +798,13 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
 
				 	struct btrfs_delayed_ref_root *delayed_refs;
			
 
				 
			
 
				 	BUG_ON(extent_op && !extent_op->is_data);
			
 
				-	ref = kmalloc(sizeof(*ref), GFP_NOFS);
			
 
				+	ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
			
 
				 	if (!ref)
			
 
				 		return -ENOMEM;
			
 
				 
			
 
				-	head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
			
 
				+	head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
			
 
				 	if (!head_ref) {
			
 
				-		kfree(ref);
			
 
				+		kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
			
 
				 		return -ENOMEM;
			
 
				 	}
			
 
				 
			
@@ -826,7 +838,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
 
				 	struct btrfs_delayed_ref_head *head_ref;
			
 
				 	struct btrfs_delayed_ref_root *delayed_refs;
			
 
				 
			
 
				-	head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
			
 
				+	head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
			
 
				 	if (!head_ref)
			
 
				 		return -ENOMEM;
			
 
				 
			
@@ -860,3 +872,51 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
 
				 		return btrfs_delayed_node_to_head(ref);
			
 
				 	return NULL;
			
 
				 }
			
 
				+
			
 
				+void btrfs_delayed_ref_exit(void)
			
 
				+{
			
 
				+	if (btrfs_delayed_ref_head_cachep)
			
 
				+		kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
			
 
				+	if (btrfs_delayed_tree_ref_cachep)
			
 
				+		kmem_cache_destroy(btrfs_delayed_tree_ref_cachep);
			
 
				+	if (btrfs_delayed_data_ref_cachep)
			
 
				+		kmem_cache_destroy(btrfs_delayed_data_ref_cachep);
			
 
				+	if (btrfs_delayed_extent_op_cachep)
			
 
				+		kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
			
 
				+}
			
 
				+
			
 
				+int btrfs_delayed_ref_init(void)
			
 
				+{
			
 
				+	btrfs_delayed_ref_head_cachep = kmem_cache_create(
			
 
				+				"btrfs_delayed_ref_head",
			
 
				+				sizeof(struct btrfs_delayed_ref_head), 0,
			
 
				+				SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
			
 
				+	if (!btrfs_delayed_ref_head_cachep)
			
 
				+		goto fail;
			
 
				+
			
 
				+	btrfs_delayed_tree_ref_cachep = kmem_cache_create(
			
 
				+				"btrfs_delayed_tree_ref",
			
 
				+				sizeof(struct btrfs_delayed_tree_ref), 0,
			
 
				+				SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
			
 
				+	if (!btrfs_delayed_tree_ref_cachep)
			
 
				+		goto fail;
			
 
				+
			
 
				+	btrfs_delayed_data_ref_cachep = kmem_cache_create(
			
 
				+				"btrfs_delayed_data_ref",
			
 
				+				sizeof(struct btrfs_delayed_data_ref), 0,
			
 
				+				SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
			
 
				+	if (!btrfs_delayed_data_ref_cachep)
			
 
				+		goto fail;
			
 
				+
			
 
				+	btrfs_delayed_extent_op_cachep = kmem_cache_create(
			
 
				+				"btrfs_delayed_extent_op",
			
 
				+				sizeof(struct btrfs_delayed_extent_op), 0,
			
 
				+				SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
			
 
				+	if (!btrfs_delayed_extent_op_cachep)
			
 
				+		goto fail;
			
 
				+
			
 
				+	return 0;
			
 
				+fail:
			
 
				+	btrfs_delayed_ref_exit();
			
 
				+	return -ENOMEM;
			
 
				+}
			
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -141,12 +141,47 @@ struct btrfs_delayed_ref_root {
 
				 	u64 run_delayed_start;
			
 
				 };
			
 
				 
			
 
				+extern struct kmem_cache *btrfs_delayed_ref_head_cachep;
			
 
				+extern struct kmem_cache *btrfs_delayed_tree_ref_cachep;
			
 
				+extern struct kmem_cache *btrfs_delayed_data_ref_cachep;
			
 
				+extern struct kmem_cache *btrfs_delayed_extent_op_cachep;
			
 
				+
			
 
				+int btrfs_delayed_ref_init(void);
			
 
				+void btrfs_delayed_ref_exit(void);
			
 
				+
			
 
				+static inline struct btrfs_delayed_extent_op *
			
 
				+btrfs_alloc_delayed_extent_op(void)
			
 
				+{
			
 
				+	return kmem_cache_alloc(btrfs_delayed_extent_op_cachep, GFP_NOFS);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+btrfs_free_delayed_extent_op(struct btrfs_delayed_extent_op *op)
			
 
				+{
			
 
				+	if (op)
			
 
				+		kmem_cache_free(btrfs_delayed_extent_op_cachep, op);
			
 
				+}
			
 
				+
			
 
				 static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
			
 
				 {
			
 
				 	WARN_ON(atomic_read(&ref->refs) == 0);
			
 
				 	if (atomic_dec_and_test(&ref->refs)) {
			
 
				 		WARN_ON(ref->in_tree);
			
 
				-		kfree(ref);
			
 
				+		switch (ref->type) {
			
 
				+		case BTRFS_TREE_BLOCK_REF_KEY:
			
 
				+		case BTRFS_SHARED_BLOCK_REF_KEY:
			
 
				+			kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
			
 
				+			break;
			
 
				+		case BTRFS_EXTENT_DATA_REF_KEY:
			
 
				+		case BTRFS_SHARED_DATA_REF_KEY:
			
 
				+			kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
			
 
				+			break;
			
 
				+		case 0:
			
 
				+			kmem_cache_free(btrfs_delayed_ref_head_cachep, ref);
			
 
				+			break;
			
 
				+		default:
			
 
				+			BUG();
			
 
				+		}
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -176,8 +211,14 @@ struct btrfs_delayed_ref_head *
 
				 btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr);
			
 
				 int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
			
 
				 			   struct btrfs_delayed_ref_head *head);
			
 
				+static inline void btrfs_delayed_ref_unlock(struct btrfs_delayed_ref_head *head)
			
 
				+{
			
 
				+	mutex_unlock(&head->mutex);
			
 
				+}
			
 
				+
			
 
				 int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
			
 
				 			   struct list_head *cluster, u64 search_start);
			
 
				+void btrfs_release_ref_cluster(struct list_head *cluster);
			
 
				 
			
 
				 int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
			
 
				 			    struct btrfs_delayed_ref_root *delayed_refs,
			
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -465,7 +465,11 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
 
				 	 * flush all outstanding I/O and inode extent mappings before the
			
 
				 	 * copy operation is declared as being finished
			
 
				 	 */
			
 
				-	btrfs_start_delalloc_inodes(root, 0);
			
 
				+	ret = btrfs_start_delalloc_inodes(root, 0);
			
 
				+	if (ret) {
			
 
				+		mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
			
 
				+		return ret;
			
 
				+	}
			
 
				 	btrfs_wait_ordered_extents(root, 0);
			
 
				 
			
 
				 	trans = btrfs_start_transaction(root, 0);
			
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -56,7 +56,8 @@ static void end_workqueue_fn(struct btrfs_work *work);
 
				 static void free_fs_root(struct btrfs_root *root);
			
 
				 static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
			
 
				 				    int read_only);
			
 
				-static void btrfs_destroy_ordered_operations(struct btrfs_root *root);
			
 
				+static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
			
 
				+					     struct btrfs_root *root);
			
 
				 static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
			
 
				 static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
			
 
				 				      struct btrfs_root *root);
			
@@ -420,7 +421,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
 
				 static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
			
 
				 {
			
 
				 	struct extent_io_tree *tree;
			
 
				-	u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
			
 
				+	u64 start = page_offset(page);
			
 
				 	u64 found_start;
			
 
				 	struct extent_buffer *eb;
			
 
				 
			
@@ -946,18 +947,20 @@ static int btree_writepages(struct address_space *mapping,
 
				 			    struct writeback_control *wbc)
			
 
				 {
			
 
				 	struct extent_io_tree *tree;
			
 
				+	struct btrfs_fs_info *fs_info;
			
 
				+	int ret;
			
 
				+
			
 
				 	tree = &BTRFS_I(mapping->host)->io_tree;
			
 
				 	if (wbc->sync_mode == WB_SYNC_NONE) {
			
 
				-		struct btrfs_root *root = BTRFS_I(mapping->host)->root;
			
 
				-		u64 num_dirty;
			
 
				-		unsigned long thresh = 32 * 1024 * 1024;
			
 
				 
			
 
				 		if (wbc->for_kupdate)
			
 
				 			return 0;
			
 
				 
			
 
				+		fs_info = BTRFS_I(mapping->host)->root->fs_info;
			
 
				 		/* this is a bit racy, but that's ok */
			
 
				-		num_dirty = root->fs_info->dirty_metadata_bytes;
			
 
				-		if (num_dirty < thresh)
			
 
				+		ret = percpu_counter_compare(&fs_info->dirty_metadata_bytes,
			
 
				+					     BTRFS_DIRTY_METADATA_THRESH);
			
 
				+		if (ret < 0)
			
 
				 			return 0;
			
 
				 	}
			
 
				 	return btree_write_cache_pages(mapping, wbc);
			
@@ -1125,24 +1128,16 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
 
				 void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
			
 
				 		      struct extent_buffer *buf)
			
 
				 {
			
 
				+	struct btrfs_fs_info *fs_info = root->fs_info;
			
 
				+
			
 
				 	if (btrfs_header_generation(buf) ==
			
 
				-	    root->fs_info->running_transaction->transid) {
			
 
				+	    fs_info->running_transaction->transid) {
			
 
				 		btrfs_assert_tree_locked(buf);
			
 
				 
			
 
				 		if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) {
			
 
				-			spin_lock(&root->fs_info->delalloc_lock);
			
 
				-			if (root->fs_info->dirty_metadata_bytes >= buf->len)
			
 
				-				root->fs_info->dirty_metadata_bytes -= buf->len;
			
 
				-			else {
			
 
				-				spin_unlock(&root->fs_info->delalloc_lock);
			
 
				-				btrfs_panic(root->fs_info, -EOVERFLOW,
			
 
				-					  "Can't clear %lu bytes from "
			
 
				-					  " dirty_mdatadata_bytes (%llu)",
			
 
				-					  buf->len,
			
 
				-					  root->fs_info->dirty_metadata_bytes);
			
 
				-			}
			
 
				-			spin_unlock(&root->fs_info->delalloc_lock);
			
 
				-
			
 
				+			__percpu_counter_add(&fs_info->dirty_metadata_bytes,
			
 
				+					     -buf->len,
			
 
				+					     fs_info->dirty_metadata_batch);
			
 
				 			/* ugh, clear_extent_buffer_dirty needs to lock the page */
			
 
				 			btrfs_set_lock_blocking(buf);
			
 
				 			clear_extent_buffer_dirty(buf);
			
@@ -1178,9 +1173,13 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
 
				 
			
 
				 	INIT_LIST_HEAD(&root->dirty_list);
			
 
				 	INIT_LIST_HEAD(&root->root_list);
			
 
				+	INIT_LIST_HEAD(&root->logged_list[0]);
			
 
				+	INIT_LIST_HEAD(&root->logged_list[1]);
			
 
				 	spin_lock_init(&root->orphan_lock);
			
 
				 	spin_lock_init(&root->inode_lock);
			
 
				 	spin_lock_init(&root->accounting_lock);
			
 
				+	spin_lock_init(&root->log_extents_lock[0]);
			
 
				+	spin_lock_init(&root->log_extents_lock[1]);
			
 
				 	mutex_init(&root->objectid_mutex);
			
 
				 	mutex_init(&root->log_mutex);
			
 
				 	init_waitqueue_head(&root->log_writer_wait);
			
@@ -2004,10 +2003,24 @@ int open_ctree(struct super_block *sb,
 
				 		goto fail_srcu;
			
 
				 	}
			
 
				 
			
 
				+	ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0);
			
 
				+	if (ret) {
			
 
				+		err = ret;
			
 
				+		goto fail_bdi;
			
 
				+	}
			
 
				+	fs_info->dirty_metadata_batch = PAGE_CACHE_SIZE *
			
 
				+					(1 + ilog2(nr_cpu_ids));
			
 
				+
			
 
				+	ret = percpu_counter_init(&fs_info->delalloc_bytes, 0);
			
 
				+	if (ret) {
			
 
				+		err = ret;
			
 
				+		goto fail_dirty_metadata_bytes;
			
 
				+	}
			
 
				+
			
 
				 	fs_info->btree_inode = new_inode(sb);
			
 
				 	if (!fs_info->btree_inode) {
			
 
				 		err = -ENOMEM;
			
 
				-		goto fail_bdi;
			
 
				+		goto fail_delalloc_bytes;
			
 
				 	}
			
 
				 
			
 
				 	mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
			
@@ -2017,7 +2030,6 @@ int open_ctree(struct super_block *sb,
 
				 	INIT_LIST_HEAD(&fs_info->dead_roots);
			
 
				 	INIT_LIST_HEAD(&fs_info->delayed_iputs);
			
 
				 	INIT_LIST_HEAD(&fs_info->delalloc_inodes);
			
 
				-	INIT_LIST_HEAD(&fs_info->ordered_operations);
			
 
				 	INIT_LIST_HEAD(&fs_info->caching_block_groups);
			
 
				 	spin_lock_init(&fs_info->delalloc_lock);
			
 
				 	spin_lock_init(&fs_info->trans_lock);
			
@@ -2028,6 +2040,7 @@ int open_ctree(struct super_block *sb,
 
				 	spin_lock_init(&fs_info->tree_mod_seq_lock);
			
 
				 	rwlock_init(&fs_info->tree_mod_log_lock);
			
 
				 	mutex_init(&fs_info->reloc_mutex);
			
 
				+	seqlock_init(&fs_info->profiles_lock);
			
 
				 
			
 
				 	init_completion(&fs_info->kobj_unregister);
			
 
				 	INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
			
@@ -2126,6 +2139,7 @@ int open_ctree(struct super_block *sb,
 
				 
			
 
				 	spin_lock_init(&fs_info->block_group_cache_lock);
			
 
				 	fs_info->block_group_cache_tree = RB_ROOT;
			
 
				+	fs_info->first_logical_byte = (u64)-1;
			
 
				 
			
 
				 	extent_io_tree_init(&fs_info->freed_extents[0],
			
 
				 			     fs_info->btree_inode->i_mapping);
			
@@ -2187,7 +2201,8 @@ int open_ctree(struct super_block *sb,
 
				 		goto fail_alloc;
			
 
				 
			
 
				 	/* check FS state, whether FS is broken. */
			
 
				-	fs_info->fs_state |= btrfs_super_flags(disk_super);
			
 
				+	if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_ERROR)
			
 
				+		set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
			
 
				 
			
 
				 	ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
			
 
				 	if (ret) {
			
@@ -2261,6 +2276,8 @@ int open_ctree(struct super_block *sb,
 
				 	leafsize = btrfs_super_leafsize(disk_super);
			
 
				 	sectorsize = btrfs_super_sectorsize(disk_super);
			
 
				 	stripesize = btrfs_super_stripesize(disk_super);
			
 
				+	fs_info->dirty_metadata_batch = leafsize * (1 + ilog2(nr_cpu_ids));
			
 
				+	fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids));
			
 
				 
			
 
				 	/*
			
 
				 	 * mixed block groups end up with duplicate but slightly offset
			
@@ -2390,8 +2407,7 @@ int open_ctree(struct super_block *sb,
 
				 	sb->s_blocksize = sectorsize;
			
 
				 	sb->s_blocksize_bits = blksize_bits(sectorsize);
			
 
				 
			
 
				-	if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
			
 
				-		    sizeof(disk_super->magic))) {
			
 
				+	if (disk_super->magic != cpu_to_le64(BTRFS_MAGIC)) {
			
 
				 		printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id);
			
 
				 		goto fail_sb_buffer;
			
 
				 	}
			
@@ -2694,13 +2710,13 @@ fail_cleaner:
 
				 	 * kthreads
			
 
				 	 */
			
 
				 	filemap_write_and_wait(fs_info->btree_inode->i_mapping);
			
 
				-	invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
			
 
				 
			
 
				 fail_block_groups:
			
 
				 	btrfs_free_block_groups(fs_info);
			
 
				 
			
 
				 fail_tree_roots:
			
 
				 	free_root_pointers(fs_info, 1);
			
 
				+	invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
			
 
				 
			
 
				 fail_sb_buffer:
			
 
				 	btrfs_stop_workers(&fs_info->generic_worker);
			
@@ -2721,8 +2737,11 @@ fail_alloc:
 
				 fail_iput:
			
 
				 	btrfs_mapping_tree_free(&fs_info->mapping_tree);
			
 
				 
			
 
				-	invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
			
 
				 	iput(fs_info->btree_inode);
			
 
				+fail_delalloc_bytes:
			
 
				+	percpu_counter_destroy(&fs_info->delalloc_bytes);
			
 
				+fail_dirty_metadata_bytes:
			
 
				+	percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
			
 
				 fail_bdi:
			
 
				 	bdi_destroy(&fs_info->bdi);
			
 
				 fail_srcu:
			
@@ -2795,8 +2814,7 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
 
				 
			
 
				 		super = (struct btrfs_super_block *)bh->b_data;
			
 
				 		if (btrfs_super_bytenr(super) != bytenr ||
			
 
				-		    strncmp((char *)(&super->magic), BTRFS_MAGIC,
			
 
				-			    sizeof(super->magic))) {
			
 
				+		    super->magic != cpu_to_le64(BTRFS_MAGIC)) {
			
 
				 			brelse(bh);
			
 
				 			continue;
			
 
				 		}
			
@@ -3339,7 +3357,7 @@ int close_ctree(struct btrfs_root *root)
 
				 			printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
			
 
				 	}
			
 
				 
			
 
				-	if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
			
 
				+	if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
			
 
				 		btrfs_error_commit_super(root);
			
 
				 
			
 
				 	btrfs_put_block_group_cache(fs_info);
			
@@ -3352,9 +3370,9 @@ int close_ctree(struct btrfs_root *root)
 
				 
			
 
				 	btrfs_free_qgroup_config(root->fs_info);
			
 
				 
			
 
				-	if (fs_info->delalloc_bytes) {
			
 
				-		printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n",
			
 
				-		       (unsigned long long)fs_info->delalloc_bytes);
			
 
				+	if (percpu_counter_sum(&fs_info->delalloc_bytes)) {
			
 
				+		printk(KERN_INFO "btrfs: at unmount delalloc count %lld\n",
			
 
				+		       percpu_counter_sum(&fs_info->delalloc_bytes));
			
 
				 	}
			
 
				 
			
 
				 	free_extent_buffer(fs_info->extent_root->node);
			
@@ -3401,6 +3419,8 @@ int close_ctree(struct btrfs_root *root)
 
				 	btrfs_close_devices(fs_info->fs_devices);
			
 
				 	btrfs_mapping_tree_free(&fs_info->mapping_tree);
			
 
				 
			
 
				+	percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
			
 
				+	percpu_counter_destroy(&fs_info->delalloc_bytes);
			
 
				 	bdi_destroy(&fs_info->bdi);
			
 
				 	cleanup_srcu_struct(&fs_info->subvol_srcu);
			
 
				 
			
@@ -3443,11 +3463,10 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
 
				 			(unsigned long long)transid,
			
 
				 			(unsigned long long)root->fs_info->generation);
			
 
				 	was_dirty = set_extent_buffer_dirty(buf);
			
 
				-	if (!was_dirty) {
			
 
				-		spin_lock(&root->fs_info->delalloc_lock);
			
 
				-		root->fs_info->dirty_metadata_bytes += buf->len;
			
 
				-		spin_unlock(&root->fs_info->delalloc_lock);
			
 
				-	}
			
 
				+	if (!was_dirty)
			
 
				+		__percpu_counter_add(&root->fs_info->dirty_metadata_bytes,
			
 
				+				     buf->len,
			
 
				+				     root->fs_info->dirty_metadata_batch);
			
 
				 }
			
 
				 
			
 
				 static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
			
@@ -3457,8 +3476,7 @@ static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
 
				 	 * looks as though older kernels can get into trouble with
			
 
				 	 * this code, they end up stuck in balance_dirty_pages forever
			
 
				 	 */
			
 
				-	u64 num_dirty;
			
 
				-	unsigned long thresh = 32 * 1024 * 1024;
			
 
				+	int ret;
			
 
				 
			
 
				 	if (current->flags & PF_MEMALLOC)
			
 
				 		return;
			
@@ -3466,9 +3484,9 @@ static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
 
				 	if (flush_delayed)
			
 
				 		btrfs_balance_delayed_items(root);
			
 
				 
			
 
				-	num_dirty = root->fs_info->dirty_metadata_bytes;
			
 
				-
			
 
				-	if (num_dirty > thresh) {
			
 
				+	ret = percpu_counter_compare(&root->fs_info->dirty_metadata_bytes,
			
 
				+				     BTRFS_DIRTY_METADATA_THRESH);
			
 
				+	if (ret > 0) {
			
 
				 		balance_dirty_pages_ratelimited(
			
 
				 				   root->fs_info->btree_inode->i_mapping);
			
 
				 	}
			
@@ -3518,7 +3536,8 @@ void btrfs_error_commit_super(struct btrfs_root *root)
 
				 	btrfs_cleanup_transaction(root);
			
 
				 }
			
 
				 
			
 
				-static void btrfs_destroy_ordered_operations(struct btrfs_root *root)
			
 
				+static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
			
 
				+					     struct btrfs_root *root)
			
 
				 {
			
 
				 	struct btrfs_inode *btrfs_inode;
			
 
				 	struct list_head splice;
			
@@ -3528,7 +3547,7 @@ static void btrfs_destroy_ordered_operations(struct btrfs_root *root)
 
				 	mutex_lock(&root->fs_info->ordered_operations_mutex);
			
 
				 	spin_lock(&root->fs_info->ordered_extent_lock);
			
 
				 
			
 
				-	list_splice_init(&root->fs_info->ordered_operations, &splice);
			
 
				+	list_splice_init(&t->ordered_operations, &splice);
			
 
				 	while (!list_empty(&splice)) {
			
 
				 		btrfs_inode = list_entry(splice.next, struct btrfs_inode,
			
 
				 					 ordered_operations);
			
@@ -3544,35 +3563,16 @@ static void btrfs_destroy_ordered_operations(struct btrfs_root *root)
 
				 
			
 
				 static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
			
 
				 {
			
 
				-	struct list_head splice;
			
 
				 	struct btrfs_ordered_extent *ordered;
			
 
				-	struct inode *inode;
			
 
				-
			
 
				-	INIT_LIST_HEAD(&splice);
			
 
				 
			
 
				 	spin_lock(&root->fs_info->ordered_extent_lock);
			
 
				-
			
 
				-	list_splice_init(&root->fs_info->ordered_extents, &splice);
			
 
				-	while (!list_empty(&splice)) {
			
 
				-		ordered = list_entry(splice.next, struct btrfs_ordered_extent,
			
 
				-				     root_extent_list);
			
 
				-
			
 
				-		list_del_init(&ordered->root_extent_list);
			
 
				-		atomic_inc(&ordered->refs);
			
 
				-
			
 
				-		/* the inode may be getting freed (in sys_unlink path). */
			
 
				-		inode = igrab(ordered->inode);
			
 
				-
			
 
				-		spin_unlock(&root->fs_info->ordered_extent_lock);
			
 
				-		if (inode)
			
 
				-			iput(inode);
			
 
				-
			
 
				-		atomic_set(&ordered->refs, 1);
			
 
				-		btrfs_put_ordered_extent(ordered);
			
 
				-
			
 
				-		spin_lock(&root->fs_info->ordered_extent_lock);
			
 
				-	}
			
 
				-
			
 
				+	/*
			
 
				+	 * This will just short circuit the ordered completion stuff which will
			
 
				+	 * make sure the ordered extent gets properly cleaned up.
			
 
				+	 */
			
 
				+	list_for_each_entry(ordered, &root->fs_info->ordered_extents,
			
 
				+			    root_extent_list)
			
 
				+		set_bit(BTRFS_ORDERED_IOERR, &ordered->flags);
			
 
				 	spin_unlock(&root->fs_info->ordered_extent_lock);
			
 
				 }
			
 
				 
			
@@ -3594,11 +3594,11 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
 
				 	}
			
 
				 
			
 
				 	while ((node = rb_first(&delayed_refs->root)) != NULL) {
			
 
				-		ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
			
 
				+		struct btrfs_delayed_ref_head *head = NULL;
			
 
				 
			
 
				+		ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
			
 
				 		atomic_set(&ref->refs, 1);
			
 
				 		if (btrfs_delayed_ref_is_head(ref)) {
			
 
				-			struct btrfs_delayed_ref_head *head;
			
 
				 
			
 
				 			head = btrfs_delayed_node_to_head(ref);
			
 
				 			if (!mutex_trylock(&head->mutex)) {
			
@@ -3614,16 +3614,18 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
 
				 				continue;
			
 
				 			}
			
 
				 
			
 
				-			kfree(head->extent_op);
			
 
				+			btrfs_free_delayed_extent_op(head->extent_op);
			
 
				 			delayed_refs->num_heads--;
			
 
				 			if (list_empty(&head->cluster))
			
 
				 				delayed_refs->num_heads_ready--;
			
 
				 			list_del_init(&head->cluster);
			
 
				 		}
			
 
				+
			
 
				 		ref->in_tree = 0;
			
 
				 		rb_erase(&ref->rb_node, &delayed_refs->root);
			
 
				 		delayed_refs->num_entries--;
			
 
				-
			
 
				+		if (head)
			
 
				+			mutex_unlock(&head->mutex);
			
 
				 		spin_unlock(&delayed_refs->lock);
			
 
				 		btrfs_put_delayed_ref(ref);
			
 
				 
			
@@ -3671,6 +3673,8 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
 
				 				    delalloc_inodes);
			
 
				 
			
 
				 		list_del_init(&btrfs_inode->delalloc_inodes);
			
 
				+		clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
			
 
				+			  &btrfs_inode->runtime_flags);
			
 
				 
			
 
				 		btrfs_invalidate_inodes(btrfs_inode->root);
			
 
				 	}
			
@@ -3823,10 +3827,8 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
 
				 
			
 
				 	while (!list_empty(&list)) {
			
 
				 		t = list_entry(list.next, struct btrfs_transaction, list);
			
 
				-		if (!t)
			
 
				-			break;
			
 
				 
			
 
				-		btrfs_destroy_ordered_operations(root);
			
 
				+		btrfs_destroy_ordered_operations(t, root);
			
 
				 
			
 
				 		btrfs_destroy_ordered_extents(root);
			
 
				 
			
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -72,8 +72,7 @@ enum {
 
				 	RESERVE_ALLOC_NO_ACCOUNT = 2,
			
 
				 };
			
 
				 
			
 
				-static int update_block_group(struct btrfs_trans_handle *trans,
			
 
				-			      struct btrfs_root *root,
			
 
				+static int update_block_group(struct btrfs_root *root,
			
 
				 			      u64 bytenr, u64 num_bytes, int alloc);
			
 
				 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
			
 
				 				struct btrfs_root *root,
			
@@ -103,6 +102,8 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
 
				 			    int dump_block_groups);
			
 
				 static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
			
 
				 				       u64 num_bytes, int reserve);
			
 
				+static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
			
 
				+			       u64 num_bytes);
			
 
				 
			
 
				 static noinline int
			
 
				 block_group_cache_done(struct btrfs_block_group_cache *cache)
			
@@ -162,6 +163,10 @@ static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
 
				 	rb_link_node(&block_group->cache_node, parent, p);
			
 
				 	rb_insert_color(&block_group->cache_node,
			
 
				 			&info->block_group_cache_tree);
			
 
				+
			
 
				+	if (info->first_logical_byte > block_group->key.objectid)
			
 
				+		info->first_logical_byte = block_group->key.objectid;
			
 
				+
			
 
				 	spin_unlock(&info->block_group_cache_lock);
			
 
				 
			
 
				 	return 0;
			
@@ -203,8 +208,11 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
 
				 			break;
			
 
				 		}
			
 
				 	}
			
 
				-	if (ret)
			
 
				+	if (ret) {
			
 
				 		btrfs_get_block_group(ret);
			
 
				+		if (bytenr == 0 && info->first_logical_byte > ret->key.objectid)
			
 
				+			info->first_logical_byte = ret->key.objectid;
			
 
				+	}
			
 
				 	spin_unlock(&info->block_group_cache_lock);
			
 
				 
			
 
				 	return ret;
			
@@ -468,8 +476,6 @@ out:
 
				 }
			
 
				 
			
 
				 static int cache_block_group(struct btrfs_block_group_cache *cache,
			
 
				-			     struct btrfs_trans_handle *trans,
			
 
				-			     struct btrfs_root *root,
			
 
				 			     int load_cache_only)
			
 
				 {
			
 
				 	DEFINE_WAIT(wait);
			
@@ -527,12 +533,6 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
 
				 	cache->cached = BTRFS_CACHE_FAST;
			
 
				 	spin_unlock(&cache->lock);
			
 
				 
			
 
				-	/*
			
 
				-	 * We can't do the read from on-disk cache during a commit since we need
			
 
				-	 * to have the normal tree locking.  Also if we are currently trying to
			
 
				-	 * allocate blocks for the tree root we can't do the fast caching since
			
 
				-	 * we likely hold important locks.
			
 
				-	 */
			
 
				 	if (fs_info->mount_opt & BTRFS_MOUNT_SPACE_CACHE) {
			
 
				 		ret = load_free_space_cache(fs_info, cache);
			
 
				 
			
@@ -2143,7 +2143,6 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
 
				 						      node->num_bytes);
			
 
				 			}
			
 
				 		}
			
 
				-		mutex_unlock(&head->mutex);
			
 
				 		return ret;
			
 
				 	}
			
 
				 
			
@@ -2258,7 +2257,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
 
				 			 * process of being added. Don't run this ref yet.
			
 
				 			 */
			
 
				 			list_del_init(&locked_ref->cluster);
			
 
				-			mutex_unlock(&locked_ref->mutex);
			
 
				+			btrfs_delayed_ref_unlock(locked_ref);
			
 
				 			locked_ref = NULL;
			
 
				 			delayed_refs->num_heads_ready++;
			
 
				 			spin_unlock(&delayed_refs->lock);
			
@@ -2285,7 +2284,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
 
				 			ref = &locked_ref->node;
			
 
				 
			
 
				 			if (extent_op && must_insert_reserved) {
			
 
				-				kfree(extent_op);
			
 
				+				btrfs_free_delayed_extent_op(extent_op);
			
 
				 				extent_op = NULL;
			
 
				 			}
			
 
				 
			
@@ -2294,28 +2293,25 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
 
				 
			
 
				 				ret = run_delayed_extent_op(trans, root,
			
 
				 							    ref, extent_op);
			
 
				-				kfree(extent_op);
			
 
				+				btrfs_free_delayed_extent_op(extent_op);
			
 
				 
			
 
				 				if (ret) {
			
 
				-					list_del_init(&locked_ref->cluster);
			
 
				-					mutex_unlock(&locked_ref->mutex);
			
 
				-
			
 
				-					printk(KERN_DEBUG "btrfs: run_delayed_extent_op returned %d\n", ret);
			
 
				+					printk(KERN_DEBUG
			
 
				+					       "btrfs: run_delayed_extent_op "
			
 
				+					       "returned %d\n", ret);
			
 
				 					spin_lock(&delayed_refs->lock);
			
 
				+					btrfs_delayed_ref_unlock(locked_ref);
			
 
				 					return ret;
			
 
				 				}
			
 
				 
			
 
				 				goto next;
			
 
				 			}
			
 
				-
			
 
				-			list_del_init(&locked_ref->cluster);
			
 
				-			locked_ref = NULL;
			
 
				 		}
			
 
				 
			
 
				 		ref->in_tree = 0;
			
 
				 		rb_erase(&ref->rb_node, &delayed_refs->root);
			
 
				 		delayed_refs->num_entries--;
			
 
				-		if (locked_ref) {
			
 
				+		if (!btrfs_delayed_ref_is_head(ref)) {
			
 
				 			/*
			
 
				 			 * when we play the delayed ref, also correct the
			
 
				 			 * ref_mod on head
			
@@ -2337,20 +2333,29 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
 
				 		ret = run_one_delayed_ref(trans, root, ref, extent_op,
			
 
				 					  must_insert_reserved);
			
 
				 
			
 
				-		btrfs_put_delayed_ref(ref);
			
 
				-		kfree(extent_op);
			
 
				-		count++;
			
 
				-
			
 
				+		btrfs_free_delayed_extent_op(extent_op);
			
 
				 		if (ret) {
			
 
				-			if (locked_ref) {
			
 
				-				list_del_init(&locked_ref->cluster);
			
 
				-				mutex_unlock(&locked_ref->mutex);
			
 
				-			}
			
 
				-			printk(KERN_DEBUG "btrfs: run_one_delayed_ref returned %d\n", ret);
			
 
				+			btrfs_delayed_ref_unlock(locked_ref);
			
 
				+			btrfs_put_delayed_ref(ref);
			
 
				+			printk(KERN_DEBUG
			
 
				+			       "btrfs: run_one_delayed_ref returned %d\n", ret);
			
 
				 			spin_lock(&delayed_refs->lock);
			
 
				 			return ret;
			
 
				 		}
			
 
				 
			
 
				+		/*
			
 
				+		 * If this node is a head, that means all the refs in this head
			
 
				+		 * have been dealt with, and we will pick the next head to deal
			
 
				+		 * with, so we must unlock the head and drop it from the cluster
			
 
				+		 * list before we release it.
			
 
				+		 */
			
 
				+		if (btrfs_delayed_ref_is_head(ref)) {
			
 
				+			list_del_init(&locked_ref->cluster);
			
 
				+			btrfs_delayed_ref_unlock(locked_ref);
			
 
				+			locked_ref = NULL;
			
 
				+		}
			
 
				+		btrfs_put_delayed_ref(ref);
			
 
				+		count++;
			
 
				 next:
			
 
				 		cond_resched();
			
 
				 		spin_lock(&delayed_refs->lock);
			
@@ -2500,6 +2505,7 @@ again:
 
				 
			
 
				 		ret = run_clustered_refs(trans, root, &cluster);
			
 
				 		if (ret < 0) {
			
 
				+			btrfs_release_ref_cluster(&cluster);
			
 
				 			spin_unlock(&delayed_refs->lock);
			
 
				 			btrfs_abort_transaction(trans, root, ret);
			
 
				 			return ret;
			
@@ -2586,7 +2592,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
 
				 	struct btrfs_delayed_extent_op *extent_op;
			
 
				 	int ret;
			
 
				 
			
 
				-	extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
			
 
				+	extent_op = btrfs_alloc_delayed_extent_op();
			
 
				 	if (!extent_op)
			
 
				 		return -ENOMEM;
			
 
				 
			
@@ -2598,7 +2604,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
 
				 	ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr,
			
 
				 					  num_bytes, extent_op);
			
 
				 	if (ret)
			
 
				-		kfree(extent_op);
			
 
				+		btrfs_free_delayed_extent_op(extent_op);
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
@@ -3223,12 +3229,14 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
 
				 	u64 extra_flags = chunk_to_extended(flags) &
			
 
				 				BTRFS_EXTENDED_PROFILE_MASK;
			
 
				 
			
 
				+	write_seqlock(&fs_info->profiles_lock);
			
 
				 	if (flags & BTRFS_BLOCK_GROUP_DATA)
			
 
				 		fs_info->avail_data_alloc_bits |= extra_flags;
			
 
				 	if (flags & BTRFS_BLOCK_GROUP_METADATA)
			
 
				 		fs_info->avail_metadata_alloc_bits |= extra_flags;
			
 
				 	if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
			
 
				 		fs_info->avail_system_alloc_bits |= extra_flags;
			
 
				+	write_sequnlock(&fs_info->profiles_lock);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -3320,12 +3328,18 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
 
				 
			
 
				 static u64 get_alloc_profile(struct btrfs_root *root, u64 flags)
			
 
				 {
			
 
				-	if (flags & BTRFS_BLOCK_GROUP_DATA)
			
 
				-		flags |= root->fs_info->avail_data_alloc_bits;
			
 
				-	else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
			
 
				-		flags |= root->fs_info->avail_system_alloc_bits;
			
 
				-	else if (flags & BTRFS_BLOCK_GROUP_METADATA)
			
 
				-		flags |= root->fs_info->avail_metadata_alloc_bits;
			
 
				+	unsigned seq;
			
 
				+
			
 
				+	do {
			
 
				+		seq = read_seqbegin(&root->fs_info->profiles_lock);
			
 
				+
			
 
				+		if (flags & BTRFS_BLOCK_GROUP_DATA)
			
 
				+			flags |= root->fs_info->avail_data_alloc_bits;
			
 
				+		else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
			
 
				+			flags |= root->fs_info->avail_system_alloc_bits;
			
 
				+		else if (flags & BTRFS_BLOCK_GROUP_METADATA)
			
 
				+			flags |= root->fs_info->avail_metadata_alloc_bits;
			
 
				+	} while (read_seqretry(&root->fs_info->profiles_lock, seq));
			
 
				 
			
 
				 	return btrfs_reduce_alloc_profile(root, flags);
			
 
				 }
			
@@ -3564,6 +3578,10 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
 
				 	int wait_for_alloc = 0;
			
 
				 	int ret = 0;
			
 
				 
			
 
				+	/* Don't re-enter if we're already allocating a chunk */
			
 
				+	if (trans->allocating_chunk)
			
 
				+		return -ENOSPC;
			
 
				+
			
 
				 	space_info = __find_space_info(extent_root->fs_info, flags);
			
 
				 	if (!space_info) {
			
 
				 		ret = update_space_info(extent_root->fs_info, flags,
			
@@ -3606,6 +3624,8 @@ again:
 
				 		goto again;
			
 
				 	}
			
 
				 
			
 
				+	trans->allocating_chunk = true;
			
 
				+
			
 
				 	/*
			
 
				 	 * If we have mixed data/metadata chunks we want to make sure we keep
			
 
				 	 * allocating mixed chunks instead of individual chunks.
			
@@ -3632,6 +3652,7 @@ again:
 
				 	check_system_chunk(trans, extent_root, flags);
			
 
				 
			
 
				 	ret = btrfs_alloc_chunk(trans, extent_root, flags);
			
 
				+	trans->allocating_chunk = false;
			
 
				 	if (ret < 0 && ret != -ENOSPC)
			
 
				 		goto out;
			
 
				 
			
@@ -3653,13 +3674,31 @@ static int can_overcommit(struct btrfs_root *root,
 
				 			  struct btrfs_space_info *space_info, u64 bytes,
			
 
				 			  enum btrfs_reserve_flush_enum flush)
			
 
				 {
			
 
				+	struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
			
 
				 	u64 profile = btrfs_get_alloc_profile(root, 0);
			
 
				+	u64 rsv_size = 0;
			
 
				 	u64 avail;
			
 
				 	u64 used;
			
 
				+	u64 to_add;
			
 
				 
			
 
				 	used = space_info->bytes_used + space_info->bytes_reserved +
			
 
				-		space_info->bytes_pinned + space_info->bytes_readonly +
			
 
				-		space_info->bytes_may_use;
			
 
				+		space_info->bytes_pinned + space_info->bytes_readonly;
			
 
				+
			
 
				+	spin_lock(&global_rsv->lock);
			
 
				+	rsv_size = global_rsv->size;
			
 
				+	spin_unlock(&global_rsv->lock);
			
 
				+
			
 
				+	/*
			
 
				+	 * We only want to allow over committing if we have lots of actual space
			
 
				+	 * free, but if we don't have enough space to handle the global reserve
			
 
				+	 * space then we could end up having a real enospc problem when trying
			
 
				+	 * to allocate a chunk or some other such important allocation.
			
 
				+	 */
			
 
				+	rsv_size <<= 1;
			
 
				+	if (used + rsv_size >= space_info->total_bytes)
			
 
				+		return 0;
			
 
				+
			
 
				+	used += space_info->bytes_may_use;
			
 
				 
			
 
				 	spin_lock(&root->fs_info->free_chunk_lock);
			
 
				 	avail = root->fs_info->free_chunk_space;
			
@@ -3674,27 +3713,38 @@ static int can_overcommit(struct btrfs_root *root,
 
				 		       BTRFS_BLOCK_GROUP_RAID10))
			
 
				 		avail >>= 1;
			
 
				 
			
 
				+	to_add = space_info->total_bytes;
			
 
				+
			
 
				 	/*
			
 
				 	 * If we aren't flushing all things, let us overcommit up to
			
 
				 	 * 1/2th of the space. If we can flush, don't let us overcommit
			
 
				 	 * too much, let it overcommit up to 1/8 of the space.
			
 
				 	 */
			
 
				 	if (flush == BTRFS_RESERVE_FLUSH_ALL)
			
 
				-		avail >>= 3;
			
 
				+		to_add >>= 3;
			
 
				 	else
			
 
				-		avail >>= 1;
			
 
				+		to_add >>= 1;
			
 
				 
			
 
				-	if (used + bytes < space_info->total_bytes + avail)
			
 
				+	/*
			
 
				+	 * Limit the overcommit to the amount of free space we could possibly
			
 
				+	 * allocate for chunks.
			
 
				+	 */
			
 
				+	to_add = min(avail, to_add);
			
 
				+
			
 
				+	if (used + bytes < space_info->total_bytes + to_add)
			
 
				 		return 1;
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb,
			
 
				-					       unsigned long nr_pages,
			
 
				-					       enum wb_reason reason)
			
 
				+static inline int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb,
			
 
				+						      unsigned long nr_pages,
			
 
				+						      enum wb_reason reason)
			
 
				 {
			
 
				-	if (!writeback_in_progress(sb->s_bdi) &&
			
 
				-	    down_read_trylock(&sb->s_umount)) {
			
 
				+	/* the flusher is dealing with the dirty inodes now. */
			
 
				+	if (writeback_in_progress(sb->s_bdi))
			
 
				+		return 1;
			
 
				+
			
 
				+	if (down_read_trylock(&sb->s_umount)) {
			
 
				 		writeback_inodes_sb_nr(sb, nr_pages, reason);
			
 
				 		up_read(&sb->s_umount);
			
 
				 		return 1;
			
@@ -3703,6 +3753,28 @@ static int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
			
 
				+				  unsigned long nr_pages)
			
 
				+{
			
 
				+	struct super_block *sb = root->fs_info->sb;
			
 
				+	int started;
			
 
				+
			
 
				+	/* If we can not start writeback, just sync all the delalloc file. */
			
 
				+	started = writeback_inodes_sb_nr_if_idle_safe(sb, nr_pages,
			
 
				+						      WB_REASON_FS_FREE_SPACE);
			
 
				+	if (!started) {
			
 
				+		/*
			
 
				+		 * We needn't worry the filesystem going from r/w to r/o though
			
 
				+		 * we don't acquire ->s_umount mutex, because the filesystem
			
 
				+		 * should guarantee the delalloc inodes list be empty after
			
 
				+		 * the filesystem is readonly(all dirty pages are written to
			
 
				+		 * the disk).
			
 
				+		 */
			
 
				+		btrfs_start_delalloc_inodes(root, 0);
			
 
				+		btrfs_wait_ordered_extents(root, 0);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * shrink metadata reservation for delalloc
			
 
				  */
			
@@ -3724,7 +3796,8 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
 
				 	space_info = block_rsv->space_info;
			
 
				 
			
 
				 	smp_mb();
			
 
				-	delalloc_bytes = root->fs_info->delalloc_bytes;
			
 
				+	delalloc_bytes = percpu_counter_sum_positive(
			
 
				+						&root->fs_info->delalloc_bytes);
			
 
				 	if (delalloc_bytes == 0) {
			
 
				 		if (trans)
			
 
				 			return;
			
@@ -3735,10 +3808,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
 
				 	while (delalloc_bytes && loops < 3) {
			
 
				 		max_reclaim = min(delalloc_bytes, to_reclaim);
			
 
				 		nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
			
 
				-		writeback_inodes_sb_nr_if_idle_safe(root->fs_info->sb,
			
 
				-						    nr_pages,
			
 
				-						    WB_REASON_FS_FREE_SPACE);
			
 
				-
			
 
				+		btrfs_writeback_inodes_sb_nr(root, nr_pages);
			
 
				 		/*
			
 
				 		 * We need to wait for the async pages to actually start before
			
 
				 		 * we do anything.
			
@@ -3766,7 +3836,8 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
 
				 				break;
			
 
				 		}
			
 
				 		smp_mb();
			
 
				-		delalloc_bytes = root->fs_info->delalloc_bytes;
			
 
				+		delalloc_bytes = percpu_counter_sum_positive(
			
 
				+						&root->fs_info->delalloc_bytes);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -4030,6 +4101,15 @@ again:
 
				 		goto again;
			
 
				 
			
 
				 out:
			
 
				+	if (ret == -ENOSPC &&
			
 
				+	    unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
			
 
				+		struct btrfs_block_rsv *global_rsv =
			
 
				+			&root->fs_info->global_block_rsv;
			
 
				+
			
 
				+		if (block_rsv != global_rsv &&
			
 
				+		    !block_rsv_use_bytes(global_rsv, orig_bytes))
			
 
				+			ret = 0;
			
 
				+	}
			
 
				 	if (flushing) {
			
 
				 		spin_lock(&space_info->lock);
			
 
				 		space_info->flush = 0;
			
@@ -4668,7 +4748,8 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
 
				 	spin_lock(&BTRFS_I(inode)->lock);
			
 
				 	dropped = drop_outstanding_extent(inode);
			
 
				 
			
 
				-	to_free = calc_csum_metadata_size(inode, num_bytes, 0);
			
 
				+	if (num_bytes)
			
 
				+		to_free = calc_csum_metadata_size(inode, num_bytes, 0);
			
 
				 	spin_unlock(&BTRFS_I(inode)->lock);
			
 
				 	if (dropped > 0)
			
 
				 		to_free += btrfs_calc_trans_metadata_size(root, dropped);
			
@@ -4735,8 +4816,7 @@ void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes)
 
				 	btrfs_free_reserved_data_space(inode, num_bytes);
			
 
				 }
			
 
				 
			
 
				-static int update_block_group(struct btrfs_trans_handle *trans,
			
 
				-			      struct btrfs_root *root,
			
 
				+static int update_block_group(struct btrfs_root *root,
			
 
				 			      u64 bytenr, u64 num_bytes, int alloc)
			
 
				 {
			
 
				 	struct btrfs_block_group_cache *cache = NULL;
			
@@ -4773,7 +4853,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
 
				 		 * space back to the block group, otherwise we will leak space.
			
 
				 		 */
			
 
				 		if (!alloc && cache->cached == BTRFS_CACHE_NO)
			
 
				-			cache_block_group(cache, trans, NULL, 1);
			
 
				+			cache_block_group(cache, 1);
			
 
				 
			
 
				 		byte_in_group = bytenr - cache->key.objectid;
			
 
				 		WARN_ON(byte_in_group > cache->key.offset);
			
@@ -4823,6 +4903,13 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
 
				 	struct btrfs_block_group_cache *cache;
			
 
				 	u64 bytenr;
			
 
				 
			
 
				+	spin_lock(&root->fs_info->block_group_cache_lock);
			
 
				+	bytenr = root->fs_info->first_logical_byte;
			
 
				+	spin_unlock(&root->fs_info->block_group_cache_lock);
			
 
				+
			
 
				+	if (bytenr < (u64)-1)
			
 
				+		return bytenr;
			
 
				+
			
 
				 	cache = btrfs_lookup_first_block_group(root->fs_info, search_start);
			
 
				 	if (!cache)
			
 
				 		return 0;
			
@@ -4873,8 +4960,7 @@ int btrfs_pin_extent(struct btrfs_root *root,
 
				 /*
			
 
				  * this function must be called within transaction
			
 
				  */
			
 
				-int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
			
 
				-				    struct btrfs_root *root,
			
 
				+int btrfs_pin_extent_for_log_replay(struct btrfs_root *root,
			
 
				 				    u64 bytenr, u64 num_bytes)
			
 
				 {
			
 
				 	struct btrfs_block_group_cache *cache;
			
@@ -4888,7 +4974,7 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
 
				 	 * to one because the slow code to read in the free extents does check
			
 
				 	 * the pinned extents.
			
 
				 	 */
			
 
				-	cache_block_group(cache, trans, root, 1);
			
 
				+	cache_block_group(cache, 1);
			
 
				 
			
 
				 	pin_down_extent(root, cache, bytenr, num_bytes, 0);
			
 
				 
			
@@ -5285,7 +5371,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 
				 			}
			
 
				 		}
			
 
				 
			
 
				-		ret = update_block_group(trans, root, bytenr, num_bytes, 0);
			
 
				+		ret = update_block_group(root, bytenr, num_bytes, 0);
			
 
				 		if (ret) {
			
 
				 			btrfs_abort_transaction(trans, extent_root, ret);
			
 
				 			goto out;
			
@@ -5330,7 +5416,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
 
				 	if (head->extent_op) {
			
 
				 		if (!head->must_insert_reserved)
			
 
				 			goto out;
			
 
				-		kfree(head->extent_op);
			
 
				+		btrfs_free_delayed_extent_op(head->extent_op);
			
 
				 		head->extent_op = NULL;
			
 
				 	}
			
 
				 
			
@@ -5476,7 +5562,6 @@ wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
 
				 				u64 num_bytes)
			
 
				 {
			
 
				 	struct btrfs_caching_control *caching_ctl;
			
 
				-	DEFINE_WAIT(wait);
			
 
				 
			
 
				 	caching_ctl = get_caching_control(cache);
			
 
				 	if (!caching_ctl)
			
@@ -5493,7 +5578,6 @@ static noinline int
 
				 wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
			
 
				 {
			
 
				 	struct btrfs_caching_control *caching_ctl;
			
 
				-	DEFINE_WAIT(wait);
			
 
				 
			
 
				 	caching_ctl = get_caching_control(cache);
			
 
				 	if (!caching_ctl)
			
@@ -5507,20 +5591,16 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
 
				 
			
 
				 int __get_raid_index(u64 flags)
			
 
				 {
			
 
				-	int index;
			
 
				-
			
 
				 	if (flags & BTRFS_BLOCK_GROUP_RAID10)
			
 
				-		index = 0;
			
 
				+		return BTRFS_RAID_RAID10;
			
 
				 	else if (flags & BTRFS_BLOCK_GROUP_RAID1)
			
 
				-		index = 1;
			
 
				+		return BTRFS_RAID_RAID1;
			
 
				 	else if (flags & BTRFS_BLOCK_GROUP_DUP)
			
 
				-		index = 2;
			
 
				+		return BTRFS_RAID_DUP;
			
 
				 	else if (flags & BTRFS_BLOCK_GROUP_RAID0)
			
 
				-		index = 3;
			
 
				+		return BTRFS_RAID_RAID0;
			
 
				 	else
			
 
				-		index = 4;
			
 
				-
			
 
				-	return index;
			
 
				+		return BTRFS_RAID_SINGLE;
			
 
				 }
			
 
				 
			
 
				 static int get_block_group_index(struct btrfs_block_group_cache *cache)
			
@@ -5678,8 +5758,7 @@ have_block_group:
 
				 		cached = block_group_cache_done(block_group);
			
 
				 		if (unlikely(!cached)) {
			
 
				 			found_uncached_bg = true;
			
 
				-			ret = cache_block_group(block_group, trans,
			
 
				-						orig_root, 0);
			
 
				+			ret = cache_block_group(block_group, 0);
			
 
				 			BUG_ON(ret < 0);
			
 
				 			ret = 0;
			
 
				 		}
			
@@ -6108,7 +6187,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
 
				 	btrfs_mark_buffer_dirty(path->nodes[0]);
			
 
				 	btrfs_free_path(path);
			
 
				 
			
 
				-	ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
			
 
				+	ret = update_block_group(root, ins->objectid, ins->offset, 1);
			
 
				 	if (ret) { /* -ENOENT, logic error */
			
 
				 		printk(KERN_ERR "btrfs update block group failed for %llu "
			
 
				 		       "%llu\n", (unsigned long long)ins->objectid,
			
@@ -6172,7 +6251,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
 
				 	btrfs_mark_buffer_dirty(leaf);
			
 
				 	btrfs_free_path(path);
			
 
				 
			
 
				-	ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
			
 
				+	ret = update_block_group(root, ins->objectid, ins->offset, 1);
			
 
				 	if (ret) { /* -ENOENT, logic error */
			
 
				 		printk(KERN_ERR "btrfs update block group failed for %llu "
			
 
				 		       "%llu\n", (unsigned long long)ins->objectid,
			
@@ -6215,7 +6294,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
 
				 	u64 num_bytes = ins->offset;
			
 
				 
			
 
				 	block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
			
 
				-	cache_block_group(block_group, trans, NULL, 0);
			
 
				+	cache_block_group(block_group, 0);
			
 
				 	caching_ctl = get_caching_control(block_group);
			
 
				 
			
 
				 	if (!caching_ctl) {
			
@@ -6329,12 +6408,14 @@ use_block_rsv(struct btrfs_trans_handle *trans,
 
				 	if (!ret)
			
 
				 		return block_rsv;
			
 
				 	if (ret && !block_rsv->failfast) {
			
 
				-		static DEFINE_RATELIMIT_STATE(_rs,
			
 
				-				DEFAULT_RATELIMIT_INTERVAL,
			
 
				-				/*DEFAULT_RATELIMIT_BURST*/ 2);
			
 
				-		if (__ratelimit(&_rs))
			
 
				-			WARN(1, KERN_DEBUG "btrfs: block rsv returned %d\n",
			
 
				-			     ret);
			
 
				+		if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
			
 
				+			static DEFINE_RATELIMIT_STATE(_rs,
			
 
				+					DEFAULT_RATELIMIT_INTERVAL * 10,
			
 
				+					/*DEFAULT_RATELIMIT_BURST*/ 1);
			
 
				+			if (__ratelimit(&_rs))
			
 
				+				WARN(1, KERN_DEBUG
			
 
				+					"btrfs: block rsv returned %d\n", ret);
			
 
				+		}
			
 
				 		ret = reserve_metadata_bytes(root, block_rsv, blocksize,
			
 
				 					     BTRFS_RESERVE_NO_FLUSH);
			
 
				 		if (!ret) {
			
@@ -6400,7 +6481,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
 
				 
			
 
				 	if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
			
 
				 		struct btrfs_delayed_extent_op *extent_op;
			
 
				-		extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
			
 
				+		extent_op = btrfs_alloc_delayed_extent_op();
			
 
				 		BUG_ON(!extent_op); /* -ENOMEM */
			
 
				 		if (key)
			
 
				 			memcpy(&extent_op->key, key, sizeof(extent_op->key));
			
@@ -7481,16 +7562,16 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
 
				 		index = get_block_group_index(block_group);
			
 
				 	}
			
 
				 
			
 
				-	if (index == 0) {
			
 
				+	if (index == BTRFS_RAID_RAID10) {
			
 
				 		dev_min = 4;
			
 
				 		/* Divide by 2 */
			
 
				 		min_free >>= 1;
			
 
				-	} else if (index == 1) {
			
 
				+	} else if (index == BTRFS_RAID_RAID1) {
			
 
				 		dev_min = 2;
			
 
				-	} else if (index == 2) {
			
 
				+	} else if (index == BTRFS_RAID_DUP) {
			
 
				 		/* Multiply by 2 */
			
 
				 		min_free <<= 1;
			
 
				-	} else if (index == 3) {
			
 
				+	} else if (index == BTRFS_RAID_RAID0) {
			
 
				 		dev_min = fs_devices->rw_devices;
			
 
				 		do_div(min_free, dev_min);
			
 
				 	}
			
@@ -7651,11 +7732,13 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
 
				 		space_info = list_entry(info->space_info.next,
			
 
				 					struct btrfs_space_info,
			
 
				 					list);
			
 
				-		if (space_info->bytes_pinned > 0 ||
			
 
				-		    space_info->bytes_reserved > 0 ||
			
 
				-		    space_info->bytes_may_use > 0) {
			
 
				-			WARN_ON(1);
			
 
				-			dump_space_info(space_info, 0, 0);
			
 
				+		if (btrfs_test_opt(info->tree_root, ENOSPC_DEBUG)) {
			
 
				+			if (space_info->bytes_pinned > 0 ||
			
 
				+			    space_info->bytes_reserved > 0 ||
			
 
				+			    space_info->bytes_may_use > 0) {
			
 
				+				WARN_ON(1);
			
 
				+				dump_space_info(space_info, 0, 0);
			
 
				+			}
			
 
				 		}
			
 
				 		list_del(&space_info->list);
			
 
				 		kfree(space_info);
			
@@ -7932,12 +8015,14 @@ static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
 
				 	u64 extra_flags = chunk_to_extended(flags) &
			
 
				 				BTRFS_EXTENDED_PROFILE_MASK;
			
 
				 
			
 
				+	write_seqlock(&fs_info->profiles_lock);
			
 
				 	if (flags & BTRFS_BLOCK_GROUP_DATA)
			
 
				 		fs_info->avail_data_alloc_bits &= ~extra_flags;
			
 
				 	if (flags & BTRFS_BLOCK_GROUP_METADATA)
			
 
				 		fs_info->avail_metadata_alloc_bits &= ~extra_flags;
			
 
				 	if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
			
 
				 		fs_info->avail_system_alloc_bits &= ~extra_flags;
			
 
				+	write_sequnlock(&fs_info->profiles_lock);
			
 
				 }
			
 
				 
			
 
				 int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
			
@@ -8036,6 +8121,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 
				 	spin_lock(&root->fs_info->block_group_cache_lock);
			
 
				 	rb_erase(&block_group->cache_node,
			
 
				 		 &root->fs_info->block_group_cache_tree);
			
 
				+
			
 
				+	if (root->fs_info->first_logical_byte == block_group->key.objectid)
			
 
				+		root->fs_info->first_logical_byte = (u64)-1;
			
 
				 	spin_unlock(&root->fs_info->block_group_cache_lock);
			
 
				 
			
 
				 	down_write(&block_group->space_info->groups_sem);
			
@@ -8158,7 +8246,7 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
 
				 
			
 
				 		if (end - start >= range->minlen) {
			
 
				 			if (!block_group_cache_done(cache)) {
			
 
				-				ret = cache_block_group(cache, NULL, root, 0);
			
 
				+				ret = cache_block_group(cache, 0);
			
 
				 				if (!ret)
			
 
				 					wait_block_group_cache_done(cache);
			
 
				 			}
			
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1834,7 +1834,7 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
 
				  */
			
 
				 static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
			
 
				 {
			
 
				-	u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
			
 
				+	u64 start = page_offset(page);
			
 
				 	u64 end = start + PAGE_CACHE_SIZE - 1;
			
 
				 	if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
			
 
				 		SetPageUptodate(page);
			
@@ -1846,7 +1846,7 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
 
				  */
			
 
				 static void check_page_locked(struct extent_io_tree *tree, struct page *page)
			
 
				 {
			
 
				-	u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
			
 
				+	u64 start = page_offset(page);
			
 
				 	u64 end = start + PAGE_CACHE_SIZE - 1;
			
 
				 	if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL))
			
 
				 		unlock_page(page);
			
@@ -1960,7 +1960,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
 
				 		return -EIO;
			
 
				 	}
			
 
				 	bio->bi_bdev = dev->bdev;
			
 
				-	bio_add_page(bio, page, length, start-page_offset(page));
			
 
				+	bio_add_page(bio, page, length, start - page_offset(page));
			
 
				 	btrfsic_submit_bio(WRITE_SYNC, bio);
			
 
				 	wait_for_completion(&compl);
			
 
				 
			
@@ -2293,8 +2293,7 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
 
				 		struct page *page = bvec->bv_page;
			
 
				 		tree = &BTRFS_I(page->mapping->host)->io_tree;
			
 
				 
			
 
				-		start = ((u64)page->index << PAGE_CACHE_SHIFT) +
			
 
				-			 bvec->bv_offset;
			
 
				+		start = page_offset(page) + bvec->bv_offset;
			
 
				 		end = start + bvec->bv_len - 1;
			
 
				 
			
 
				 		if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
			
@@ -2353,8 +2352,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
 
				 			 (long int)bio->bi_bdev);
			
 
				 		tree = &BTRFS_I(page->mapping->host)->io_tree;
			
 
				 
			
 
				-		start = ((u64)page->index << PAGE_CACHE_SHIFT) +
			
 
				-			bvec->bv_offset;
			
 
				+		start = page_offset(page) + bvec->bv_offset;
			
 
				 		end = start + bvec->bv_len - 1;
			
 
				 
			
 
				 		if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
			
@@ -2471,7 +2469,7 @@ static int __must_check submit_one_bio(int rw, struct bio *bio,
 
				 	struct extent_io_tree *tree = bio->bi_private;
			
 
				 	u64 start;
			
 
				 
			
 
				-	start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
			
 
				+	start = page_offset(page) + bvec->bv_offset;
			
 
				 
			
 
				 	bio->bi_private = NULL;
			
 
				 
			
@@ -2595,7 +2593,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
 
				 				   unsigned long *bio_flags)
			
 
				 {
			
 
				 	struct inode *inode = page->mapping->host;
			
 
				-	u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
			
 
				+	u64 start = page_offset(page);
			
 
				 	u64 page_end = start + PAGE_CACHE_SIZE - 1;
			
 
				 	u64 end;
			
 
				 	u64 cur = start;
			
@@ -2648,6 +2646,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
 
				 		}
			
 
				 	}
			
 
				 	while (cur <= end) {
			
 
				+		unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
			
 
				+
			
 
				 		if (cur >= last_byte) {
			
 
				 			char *userpage;
			
 
				 			struct extent_state *cached = NULL;
			
@@ -2735,26 +2735,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
 
				 			continue;
			
 
				 		}
			
 
				 
			
 
				-		ret = 0;
			
 
				-		if (tree->ops && tree->ops->readpage_io_hook) {
			
 
				-			ret = tree->ops->readpage_io_hook(page, cur,
			
 
				-							  cur + iosize - 1);
			
 
				-		}
			
 
				-		if (!ret) {
			
 
				-			unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
			
 
				-			pnr -= page->index;
			
 
				-			ret = submit_extent_page(READ, tree, page,
			
 
				+		pnr -= page->index;
			
 
				+		ret = submit_extent_page(READ, tree, page,
			
 
				 					 sector, disk_io_size, pg_offset,
			
 
				 					 bdev, bio, pnr,
			
 
				 					 end_bio_extent_readpage, mirror_num,
			
 
				 					 *bio_flags,
			
 
				 					 this_bio_flag);
			
 
				-			if (!ret) {
			
 
				-				nr++;
			
 
				-				*bio_flags = this_bio_flag;
			
 
				-			}
			
 
				-		}
			
 
				-		if (ret) {
			
 
				+		if (!ret) {
			
 
				+			nr++;
			
 
				+			*bio_flags = this_bio_flag;
			
 
				+		} else {
			
 
				 			SetPageError(page);
			
 
				 			unlock_extent(tree, cur, cur + iosize - 1);
			
 
				 		}
			
@@ -2806,7 +2797,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
 
				 	struct inode *inode = page->mapping->host;
			
 
				 	struct extent_page_data *epd = data;
			
 
				 	struct extent_io_tree *tree = epd->tree;
			
 
				-	u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
			
 
				+	u64 start = page_offset(page);
			
 
				 	u64 delalloc_start;
			
 
				 	u64 page_end = start + PAGE_CACHE_SIZE - 1;
			
 
				 	u64 end;
			
@@ -3124,12 +3115,9 @@ static int lock_extent_buffer_for_io(struct extent_buffer *eb,
 
				 		set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
			
 
				 		spin_unlock(&eb->refs_lock);
			
 
				 		btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
			
 
				-		spin_lock(&fs_info->delalloc_lock);
			
 
				-		if (fs_info->dirty_metadata_bytes >= eb->len)
			
 
				-			fs_info->dirty_metadata_bytes -= eb->len;
			
 
				-		else
			
 
				-			WARN_ON(1);
			
 
				-		spin_unlock(&fs_info->delalloc_lock);
			
 
				+		__percpu_counter_add(&fs_info->dirty_metadata_bytes,
			
 
				+				     -eb->len,
			
 
				+				     fs_info->dirty_metadata_batch);
			
 
				 		ret = 1;
			
 
				 	} else {
			
 
				 		spin_unlock(&eb->refs_lock);
			
@@ -3446,15 +3434,9 @@ retry:
 
				 			 * swizzled back from swapper_space to tmpfs file
			
 
				 			 * mapping
			
 
				 			 */
			
 
				-			if (tree->ops &&
			
 
				-			    tree->ops->write_cache_pages_lock_hook) {
			
 
				-				tree->ops->write_cache_pages_lock_hook(page,
			
 
				-							       data, flush_fn);
			
 
				-			} else {
			
 
				-				if (!trylock_page(page)) {
			
 
				-					flush_fn(data);
			
 
				-					lock_page(page);
			
 
				-				}
			
 
				+			if (!trylock_page(page)) {
			
 
				+				flush_fn(data);
			
 
				+				lock_page(page);
			
 
				 			}
			
 
				 
			
 
				 			if (unlikely(page->mapping != mapping)) {
			
@@ -3674,7 +3656,7 @@ int extent_invalidatepage(struct extent_io_tree *tree,
 
				 			  struct page *page, unsigned long offset)
			
 
				 {
			
 
				 	struct extent_state *cached_state = NULL;
			
 
				-	u64 start = ((u64)page->index << PAGE_CACHE_SHIFT);
			
 
				+	u64 start = page_offset(page);
			
 
				 	u64 end = start + PAGE_CACHE_SIZE - 1;
			
 
				 	size_t blocksize = page->mapping->host->i_sb->s_blocksize;
			
 
				 
			
@@ -3700,7 +3682,7 @@ int try_release_extent_state(struct extent_map_tree *map,
 
				 			     struct extent_io_tree *tree, struct page *page,
			
 
				 			     gfp_t mask)
			
 
				 {
			
 
				-	u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
			
 
				+	u64 start = page_offset(page);
			
 
				 	u64 end = start + PAGE_CACHE_SIZE - 1;
			
 
				 	int ret = 1;
			
 
				 
			
@@ -3739,7 +3721,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
 
				 			       gfp_t mask)
			
 
				 {
			
 
				 	struct extent_map *em;
			
 
				-	u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
			
 
				+	u64 start = page_offset(page);
			
 
				 	u64 end = start + PAGE_CACHE_SIZE - 1;
			
 
				 
			
 
				 	if ((mask & __GFP_WAIT) &&
			
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -75,7 +75,6 @@ struct extent_io_ops {
 
				 	int (*merge_bio_hook)(struct page *page, unsigned long offset,
			
 
				 			      size_t size, struct bio *bio,
			
 
				 			      unsigned long bio_flags);
			
 
				-	int (*readpage_io_hook)(struct page *page, u64 start, u64 end);
			
 
				 	int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
			
 
				 	int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end,
			
 
				 				    struct extent_state *state, int mirror);
			
@@ -90,8 +89,6 @@ struct extent_io_ops {
 
				 				  struct extent_state *other);
			
 
				 	void (*split_extent_hook)(struct inode *inode,
			
 
				 				  struct extent_state *orig, u64 split);
			
 
				-	int (*write_cache_pages_lock_hook)(struct page *page, void *data,
			
 
				-					   void (*flush_fn)(void *));
			
 
				 };
			
 
				 
			
 
				 struct extent_io_tree {
			
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -684,6 +684,24 @@ out:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+static u64 btrfs_sector_sum_left(struct btrfs_ordered_sum *sums,
			
 
				+				 struct btrfs_sector_sum *sector_sum,
			
 
				+				 u64 total_bytes, u64 sectorsize)
			
 
				+{
			
 
				+	u64 tmp = sectorsize;
			
 
				+	u64 next_sector = sector_sum->bytenr;
			
 
				+	struct btrfs_sector_sum *next = sector_sum + 1;
			
 
				+
			
 
				+	while ((tmp + total_bytes) < sums->len) {
			
 
				+		if (next_sector + sectorsize != next->bytenr)
			
 
				+			break;
			
 
				+		tmp += sectorsize;
			
 
				+		next_sector = next->bytenr;
			
 
				+		next++;
			
 
				+	}
			
 
				+	return tmp;
			
 
				+}
			
 
				+
			
 
				 int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
			
 
				 			   struct btrfs_root *root,
			
 
				 			   struct btrfs_ordered_sum *sums)
			
@@ -789,20 +807,32 @@ again:
 
				 		goto insert;
			
 
				 	}
			
 
				 
			
 
				-	if (csum_offset >= btrfs_item_size_nr(leaf, path->slots[0]) /
			
 
				+	if (csum_offset == btrfs_item_size_nr(leaf, path->slots[0]) /
			
 
				 	    csum_size) {
			
 
				-		u32 diff = (csum_offset + 1) * csum_size;
			
 
				+		int extend_nr;
			
 
				+		u64 tmp;
			
 
				+		u32 diff;
			
 
				+		u32 free_space;
			
 
				 
			
 
				-		/*
			
 
				-		 * is the item big enough already?  we dropped our lock
			
 
				-		 * before and need to recheck
			
 
				-		 */
			
 
				-		if (diff < btrfs_item_size_nr(leaf, path->slots[0]))
			
 
				-			goto csum;
			
 
				+		if (btrfs_leaf_free_space(root, leaf) <
			
 
				+				 sizeof(struct btrfs_item) + csum_size * 2)
			
 
				+			goto insert;
			
 
				+
			
 
				+		free_space = btrfs_leaf_free_space(root, leaf) -
			
 
				+					 sizeof(struct btrfs_item) - csum_size;
			
 
				+		tmp = btrfs_sector_sum_left(sums, sector_sum, total_bytes,
			
 
				+					    root->sectorsize);
			
 
				+		tmp >>= root->fs_info->sb->s_blocksize_bits;
			
 
				+		WARN_ON(tmp < 1);
			
 
				+
			
 
				+		extend_nr = max_t(int, 1, (int)tmp);
			
 
				+		diff = (csum_offset + extend_nr) * csum_size;
			
 
				+		diff = min(diff, MAX_CSUM_ITEMS(root, csum_size) * csum_size);
			
 
				 
			
 
				 		diff = diff - btrfs_item_size_nr(leaf, path->slots[0]);
			
 
				-		if (diff != csum_size)
			
 
				-			goto insert;
			
 
				+		diff = min(free_space, diff);
			
 
				+		diff /= csum_size;
			
 
				+		diff *= csum_size;
			
 
				 
			
 
				 		btrfs_extend_item(trans, root, path, diff);
			
 
				 		goto csum;
			
@@ -812,19 +842,14 @@ insert:
 
				 	btrfs_release_path(path);
			
 
				 	csum_offset = 0;
			
 
				 	if (found_next) {
			
 
				-		u64 tmp = total_bytes + root->sectorsize;
			
 
				-		u64 next_sector = sector_sum->bytenr;
			
 
				-		struct btrfs_sector_sum *next = sector_sum + 1;
			
 
				+		u64 tmp;
			
 
				 
			
 
				-		while (tmp < sums->len) {
			
 
				-			if (next_sector + root->sectorsize != next->bytenr)
			
 
				-				break;
			
 
				-			tmp += root->sectorsize;
			
 
				-			next_sector = next->bytenr;
			
 
				-			next++;
			
 
				-		}
			
 
				-		tmp = min(tmp, next_offset - file_key.offset);
			
 
				+		tmp = btrfs_sector_sum_left(sums, sector_sum, total_bytes,
			
 
				+					    root->sectorsize);
			
 
				 		tmp >>= root->fs_info->sb->s_blocksize_bits;
			
 
				+		tmp = min(tmp, (next_offset - file_key.offset) >>
			
 
				+					 root->fs_info->sb->s_blocksize_bits);
			
 
				+
			
 
				 		tmp = max((u64)1, tmp);
			
 
				 		tmp = min(tmp, (u64)MAX_CSUM_ITEMS(root, csum_size));
			
 
				 		ins_size = csum_size * tmp;
			
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -30,11 +30,11 @@
 
				 #include <linux/statfs.h>
			
 
				 #include <linux/compat.h>
			
 
				 #include <linux/slab.h>
			
 
				+#include <linux/btrfs.h>
			
 
				 #include "ctree.h"
			
 
				 #include "disk-io.h"
			
 
				 #include "transaction.h"
			
 
				 #include "btrfs_inode.h"
			
 
				-#include "ioctl.h"
			
 
				 #include "print-tree.h"
			
 
				 #include "tree-log.h"
			
 
				 #include "locking.h"
			
@@ -1544,7 +1544,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 
				 	 * although we have opened a file as writable, we have
			
 
				 	 * to stop this write operation to ensure FS consistency.
			
 
				 	 */
			
 
				-	if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
			
 
				+	if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
			
 
				 		mutex_unlock(&inode->i_mutex);
			
 
				 		err = -EROFS;
			
 
				 		goto out;
			
@@ -1627,7 +1627,20 @@ int btrfs_release_file(struct inode *inode, struct file *filp)
 
				 	 */
			
 
				 	if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
			
 
				 			       &BTRFS_I(inode)->runtime_flags)) {
			
 
				-		btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode);
			
 
				+		struct btrfs_trans_handle *trans;
			
 
				+		struct btrfs_root *root = BTRFS_I(inode)->root;
			
 
				+
			
 
				+		/*
			
 
				+		 * We need to block on a committing transaction to keep us from
			
 
				+		 * throwing a ordered operation on to the list and causing
			
 
				+		 * something like sync to deadlock trying to flush out this
			
 
				+		 * inode.
			
 
				+		 */
			
 
				+		trans = btrfs_start_transaction(root, 0);
			
 
				+		if (IS_ERR(trans))
			
 
				+			return PTR_ERR(trans);
			
 
				+		btrfs_add_ordered_operation(trans, BTRFS_I(inode)->root, inode);
			
 
				+		btrfs_end_transaction(trans, root);
			
 
				 		if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
			
 
				 			filemap_flush(inode->i_mapping);
			
 
				 	}
			
@@ -1654,16 +1667,21 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 
				 	struct btrfs_root *root = BTRFS_I(inode)->root;
			
 
				 	int ret = 0;
			
 
				 	struct btrfs_trans_handle *trans;
			
 
				+	bool full_sync = 0;
			
 
				 
			
 
				 	trace_btrfs_sync_file(file, datasync);
			
 
				 
			
 
				 	/*
			
 
				 	 * We write the dirty pages in the range and wait until they complete
			
 
				 	 * out of the ->i_mutex. If so, we can flush the dirty pages by
			
 
				-	 * multi-task, and make the performance up.
			
 
				+	 * multi-task, and make the performance up.  See
			
 
				+	 * btrfs_wait_ordered_range for an explanation of the ASYNC check.
			
 
				 	 */
			
 
				 	atomic_inc(&BTRFS_I(inode)->sync_writers);
			
 
				-	ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
			
 
				+	ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
			
 
				+	if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
			
 
				+			     &BTRFS_I(inode)->runtime_flags))
			
 
				+		ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
			
 
				 	atomic_dec(&BTRFS_I(inode)->sync_writers);
			
 
				 	if (ret)
			
 
				 		return ret;
			
@@ -1675,7 +1693,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 
				 	 * range being left.
			
 
				 	 */
			
 
				 	atomic_inc(&root->log_batch);
			
 
				-	btrfs_wait_ordered_range(inode, start, end - start + 1);
			
 
				+	full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
			
 
				+			     &BTRFS_I(inode)->runtime_flags);
			
 
				+	if (full_sync)
			
 
				+		btrfs_wait_ordered_range(inode, start, end - start + 1);
			
 
				 	atomic_inc(&root->log_batch);
			
 
				 
			
 
				 	/*
			
@@ -1742,13 +1763,25 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 
				 
			
 
				 	if (ret != BTRFS_NO_LOG_SYNC) {
			
 
				 		if (ret > 0) {
			
 
				+			/*
			
 
				+			 * If we didn't already wait for ordered extents we need
			
 
				+			 * to do that now.
			
 
				+			 */
			
 
				+			if (!full_sync)
			
 
				+				btrfs_wait_ordered_range(inode, start,
			
 
				+							 end - start + 1);
			
 
				 			ret = btrfs_commit_transaction(trans, root);
			
 
				 		} else {
			
 
				 			ret = btrfs_sync_log(trans, root);
			
 
				-			if (ret == 0)
			
 
				+			if (ret == 0) {
			
 
				 				ret = btrfs_end_transaction(trans, root);
			
 
				-			else
			
 
				+			} else {
			
 
				+				if (!full_sync)
			
 
				+					btrfs_wait_ordered_range(inode, start,
			
 
				+								 end -
			
 
				+								 start + 1);
			
 
				 				ret = btrfs_commit_transaction(trans, root);
			
 
				+			}
			
 
				 		}
			
 
				 	} else {
			
 
				 		ret = btrfs_end_transaction(trans, root);
			
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1356,6 +1356,8 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
 
				 	u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
			
 
				 	int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
			
 
				 
			
 
				+	max_bitmaps = max(max_bitmaps, 1);
			
 
				+
			
 
				 	BUG_ON(ctl->total_bitmaps > max_bitmaps);
			
 
				 
			
 
				 	/*
			
@@ -1636,10 +1638,14 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
 
				 	}
			
 
				 
			
 
				 	/*
			
 
				-	 * some block groups are so tiny they can't be enveloped by a bitmap, so
			
 
				-	 * don't even bother to create a bitmap for this
			
 
				+	 * The original block groups from mkfs can be really small, like 8
			
 
				+	 * megabytes, so don't bother with a bitmap for those entries.  However
			
 
				+	 * some block groups can be smaller than what a bitmap would cover but
			
 
				+	 * are still large enough that they could overflow the 32k memory limit,
			
 
				+	 * so allow those block groups to still be allowed to have a bitmap
			
 
				+	 * entry.
			
 
				 	 */
			
 
				-	if (BITS_PER_BITMAP * ctl->unit > block_group->key.offset)
			
 
				+	if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->key.offset)
			
 
				 		return false;
			
 
				 
			
 
				 	return true;
			
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -39,12 +39,12 @@
 
				 #include <linux/slab.h>
			
 
				 #include <linux/ratelimit.h>
			
 
				 #include <linux/mount.h>
			
 
				+#include <linux/btrfs.h>
			
 
				 #include "compat.h"
			
 
				 #include "ctree.h"
			
 
				 #include "disk-io.h"
			
 
				 #include "transaction.h"
			
 
				 #include "btrfs_inode.h"
			
 
				-#include "ioctl.h"
			
 
				 #include "print-tree.h"
			
 
				 #include "ordered-data.h"
			
 
				 #include "xattr.h"
			
@@ -608,7 +608,7 @@ static noinline int submit_compressed_extents(struct inode *inode,
 
				 	if (list_empty(&async_cow->extents))
			
 
				 		return 0;
			
 
				 
			
 
				-
			
 
				+again:
			
 
				 	while (!list_empty(&async_cow->extents)) {
			
 
				 		async_extent = list_entry(async_cow->extents.next,
			
 
				 					  struct async_extent, list);
			
@@ -648,6 +648,8 @@ retry:
 
				 						  async_extent->ram_size - 1,
			
 
				 						  btrfs_get_extent,
			
 
				 						  WB_SYNC_ALL);
			
 
				+			else if (ret)
			
 
				+				unlock_page(async_cow->locked_page);
			
 
				 			kfree(async_extent);
			
 
				 			cond_resched();
			
 
				 			continue;
			
@@ -672,6 +674,7 @@ retry:
 
				 
			
 
				 		if (ret) {
			
 
				 			int i;
			
 
				+
			
 
				 			for (i = 0; i < async_extent->nr_pages; i++) {
			
 
				 				WARN_ON(async_extent->pages[i]->mapping);
			
 
				 				page_cache_release(async_extent->pages[i]);
			
@@ -679,12 +682,10 @@ retry:
 
				 			kfree(async_extent->pages);
			
 
				 			async_extent->nr_pages = 0;
			
 
				 			async_extent->pages = NULL;
			
 
				-			unlock_extent(io_tree, async_extent->start,
			
 
				-				      async_extent->start +
			
 
				-				      async_extent->ram_size - 1);
			
 
				+
			
 
				 			if (ret == -ENOSPC)
			
 
				 				goto retry;
			
 
				-			goto out_free; /* JDM: Requeue? */
			
 
				+			goto out_free;
			
 
				 		}
			
 
				 
			
 
				 		/*
			
@@ -696,10 +697,13 @@ retry:
 
				 					async_extent->ram_size - 1, 0);
			
 
				 
			
 
				 		em = alloc_extent_map();
			
 
				-		BUG_ON(!em); /* -ENOMEM */
			
 
				+		if (!em)
			
 
				+			goto out_free_reserve;
			
 
				 		em->start = async_extent->start;
			
 
				 		em->len = async_extent->ram_size;
			
 
				 		em->orig_start = em->start;
			
 
				+		em->mod_start = em->start;
			
 
				+		em->mod_len = em->len;
			
 
				 
			
 
				 		em->block_start = ins.objectid;
			
 
				 		em->block_len = ins.offset;
			
@@ -726,6 +730,9 @@ retry:
 
				 						async_extent->ram_size - 1, 0);
			
 
				 		}
			
 
				 
			
 
				+		if (ret)
			
 
				+			goto out_free_reserve;
			
 
				+
			
 
				 		ret = btrfs_add_ordered_extent_compress(inode,
			
 
				 						async_extent->start,
			
 
				 						ins.objectid,
			
@@ -733,7 +740,8 @@ retry:
 
				 						ins.offset,
			
 
				 						BTRFS_ORDERED_COMPRESSED,
			
 
				 						async_extent->compress_type);
			
 
				-		BUG_ON(ret); /* -ENOMEM */
			
 
				+		if (ret)
			
 
				+			goto out_free_reserve;
			
 
				 
			
 
				 		/*
			
 
				 		 * clear dirty, set writeback and unlock the pages.
			
@@ -754,18 +762,30 @@ retry:
 
				 				    ins.objectid,
			
 
				 				    ins.offset, async_extent->pages,
			
 
				 				    async_extent->nr_pages);
			
 
				-
			
 
				-		BUG_ON(ret); /* -ENOMEM */
			
 
				 		alloc_hint = ins.objectid + ins.offset;
			
 
				 		kfree(async_extent);
			
 
				+		if (ret)
			
 
				+			goto out;
			
 
				 		cond_resched();
			
 
				 	}
			
 
				 	ret = 0;
			
 
				 out:
			
 
				 	return ret;
			
 
				+out_free_reserve:
			
 
				+	btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
			
 
				 out_free:
			
 
				+	extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
			
 
				+				     async_extent->start,
			
 
				+				     async_extent->start +
			
 
				+				     async_extent->ram_size - 1,
			
 
				+				     NULL, EXTENT_CLEAR_UNLOCK_PAGE |
			
 
				+				     EXTENT_CLEAR_UNLOCK |
			
 
				+				     EXTENT_CLEAR_DELALLOC |
			
 
				+				     EXTENT_CLEAR_DIRTY |
			
 
				+				     EXTENT_SET_WRITEBACK |
			
 
				+				     EXTENT_END_WRITEBACK);
			
 
				 	kfree(async_extent);
			
 
				-	goto out;
			
 
				+	goto again;
			
 
				 }
			
 
				 
			
 
				 static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
			
@@ -892,6 +912,8 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
 
				 		em->orig_start = em->start;
			
 
				 		ram_size = ins.offset;
			
 
				 		em->len = ins.offset;
			
 
				+		em->mod_start = em->start;
			
 
				+		em->mod_len = em->len;
			
 
				 
			
 
				 		em->block_start = ins.objectid;
			
 
				 		em->block_len = ins.offset;
			
@@ -1338,6 +1360,8 @@ out_check:
 
				 			em->block_start = disk_bytenr;
			
 
				 			em->orig_block_len = disk_num_bytes;
			
 
				 			em->bdev = root->fs_info->fs_devices->latest_bdev;
			
 
				+			em->mod_start = em->start;
			
 
				+			em->mod_len = em->len;
			
 
				 			set_bit(EXTENT_FLAG_PINNED, &em->flags);
			
 
				 			set_bit(EXTENT_FLAG_FILLING, &em->flags);
			
 
				 			em->generation = -1;
			
@@ -1508,14 +1532,22 @@ static void btrfs_set_bit_hook(struct inode *inode,
 
				 			spin_unlock(&BTRFS_I(inode)->lock);
			
 
				 		}
			
 
				 
			
 
				-		spin_lock(&root->fs_info->delalloc_lock);
			
 
				+		__percpu_counter_add(&root->fs_info->delalloc_bytes, len,
			
 
				+				     root->fs_info->delalloc_batch);
			
 
				+		spin_lock(&BTRFS_I(inode)->lock);
			
 
				 		BTRFS_I(inode)->delalloc_bytes += len;
			
 
				-		root->fs_info->delalloc_bytes += len;
			
 
				-		if (do_list && list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
			
 
				-			list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
			
 
				-				      &root->fs_info->delalloc_inodes);
			
 
				+		if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
			
 
				+					 &BTRFS_I(inode)->runtime_flags)) {
			
 
				+			spin_lock(&root->fs_info->delalloc_lock);
			
 
				+			if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
			
 
				+				list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
			
 
				+					      &root->fs_info->delalloc_inodes);
			
 
				+				set_bit(BTRFS_INODE_IN_DELALLOC_LIST,
			
 
				+					&BTRFS_I(inode)->runtime_flags);
			
 
				+			}
			
 
				+			spin_unlock(&root->fs_info->delalloc_lock);
			
 
				 		}
			
 
				-		spin_unlock(&root->fs_info->delalloc_lock);
			
 
				+		spin_unlock(&BTRFS_I(inode)->lock);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -1550,15 +1582,22 @@ static void btrfs_clear_bit_hook(struct inode *inode,
 
				 		    && do_list)
			
 
				 			btrfs_free_reserved_data_space(inode, len);
			
 
				 
			
 
				-		spin_lock(&root->fs_info->delalloc_lock);
			
 
				-		root->fs_info->delalloc_bytes -= len;
			
 
				+		__percpu_counter_add(&root->fs_info->delalloc_bytes, -len,
			
 
				+				     root->fs_info->delalloc_batch);
			
 
				+		spin_lock(&BTRFS_I(inode)->lock);
			
 
				 		BTRFS_I(inode)->delalloc_bytes -= len;
			
 
				-
			
 
				 		if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
			
 
				-		    !list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
			
 
				-			list_del_init(&BTRFS_I(inode)->delalloc_inodes);
			
 
				+		    test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
			
 
				+			     &BTRFS_I(inode)->runtime_flags)) {
			
 
				+			spin_lock(&root->fs_info->delalloc_lock);
			
 
				+			if (!list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
			
 
				+				list_del_init(&BTRFS_I(inode)->delalloc_inodes);
			
 
				+				clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
			
 
				+					  &BTRFS_I(inode)->runtime_flags);
			
 
				+			}
			
 
				+			spin_unlock(&root->fs_info->delalloc_lock);
			
 
				 		}
			
 
				-		spin_unlock(&root->fs_info->delalloc_lock);
			
 
				+		spin_unlock(&BTRFS_I(inode)->lock);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -2001,11 +2040,23 @@ out:
 
				 	if (trans)
			
 
				 		btrfs_end_transaction(trans, root);
			
 
				 
			
 
				-	if (ret)
			
 
				+	if (ret) {
			
 
				 		clear_extent_uptodate(io_tree, ordered_extent->file_offset,
			
 
				 				      ordered_extent->file_offset +
			
 
				 				      ordered_extent->len - 1, NULL, GFP_NOFS);
			
 
				 
			
 
				+		/*
			
 
				+		 * If the ordered extent had an IOERR or something else went
			
 
				+		 * wrong we need to return the space for this ordered extent
			
 
				+		 * back to the allocator.
			
 
				+		 */
			
 
				+		if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
			
 
				+		    !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags))
			
 
				+			btrfs_free_reserved_extent(root, ordered_extent->start,
			
 
				+						   ordered_extent->disk_len);
			
 
				+	}
			
 
				+
			
 
				+
			
 
				 	/*
			
 
				 	 * This needs to be done to make sure anybody waiting knows we are done
			
 
				 	 * updating everything for this ordered extent.
			
@@ -2062,7 +2113,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
 
				 static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
			
 
				 			       struct extent_state *state, int mirror)
			
 
				 {
			
 
				-	size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT);
			
 
				+	size_t offset = start - page_offset(page);
			
 
				 	struct inode *inode = page->mapping->host;
			
 
				 	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
			
 
				 	char *kaddr;
			
@@ -2167,11 +2218,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-enum btrfs_orphan_cleanup_state {
			
 
				-	ORPHAN_CLEANUP_STARTED	= 1,
			
 
				-	ORPHAN_CLEANUP_DONE	= 2,
			
 
				-};
			
 
				-
			
 
				 /*
			
 
				  * This is called in transaction commit time. If there are no orphan
			
 
				  * files in the subvolume, it removes orphan item and frees block_rsv
			
@@ -2469,6 +2515,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
 
				 		 */
			
 
				 		set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
			
 
				 			&BTRFS_I(inode)->runtime_flags);
			
 
				+		atomic_inc(&root->orphan_inodes);
			
 
				 
			
 
				 		/* if we have links, this was a truncate, lets do that */
			
 
				 		if (inode->i_nlink) {
			
@@ -2491,6 +2538,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
 
				 				goto out;
			
 
				 
			
 
				 			ret = btrfs_truncate(inode);
			
 
				+			if (ret)
			
 
				+				btrfs_orphan_del(NULL, inode);
			
 
				 		} else {
			
 
				 			nr_unlink++;
			
 
				 		}
			
@@ -2709,34 +2758,41 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
 
				 			    struct btrfs_inode_item *item,
			
 
				 			    struct inode *inode)
			
 
				 {
			
 
				-	btrfs_set_inode_uid(leaf, item, i_uid_read(inode));
			
 
				-	btrfs_set_inode_gid(leaf, item, i_gid_read(inode));
			
 
				-	btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
			
 
				-	btrfs_set_inode_mode(leaf, item, inode->i_mode);
			
 
				-	btrfs_set_inode_nlink(leaf, item, inode->i_nlink);
			
 
				+	struct btrfs_map_token token;
			
 
				+
			
 
				+	btrfs_init_map_token(&token);
			
 
				 
			
 
				-	btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item),
			
 
				-			       inode->i_atime.tv_sec);
			
 
				-	btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item),
			
 
				-				inode->i_atime.tv_nsec);
			
 
				+	btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token);
			
 
				+	btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token);
			
 
				+	btrfs_set_token_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size,
			
 
				+				   &token);
			
 
				+	btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
			
 
				+	btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
			
 
				 
			
 
				-	btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item),
			
 
				-			       inode->i_mtime.tv_sec);
			
 
				-	btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item),
			
 
				-				inode->i_mtime.tv_nsec);
			
 
				+	btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item),
			
 
				+				     inode->i_atime.tv_sec, &token);
			
 
				+	btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item),
			
 
				+				      inode->i_atime.tv_nsec, &token);
			
 
				 
			
 
				-	btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item),
			
 
				-			       inode->i_ctime.tv_sec);
			
 
				-	btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item),
			
 
				-				inode->i_ctime.tv_nsec);
			
 
				+	btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item),
			
 
				+				     inode->i_mtime.tv_sec, &token);
			
 
				+	btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item),
			
 
				+				      inode->i_mtime.tv_nsec, &token);
			
 
				 
			
 
				-	btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode));
			
 
				-	btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation);
			
 
				-	btrfs_set_inode_sequence(leaf, item, inode->i_version);
			
 
				-	btrfs_set_inode_transid(leaf, item, trans->transid);
			
 
				-	btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
			
 
				-	btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
			
 
				-	btrfs_set_inode_block_group(leaf, item, 0);
			
 
				+	btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item),
			
 
				+				     inode->i_ctime.tv_sec, &token);
			
 
				+	btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item),
			
 
				+				      inode->i_ctime.tv_nsec, &token);
			
 
				+
			
 
				+	btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
			
 
				+				     &token);
			
 
				+	btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation,
			
 
				+					 &token);
			
 
				+	btrfs_set_token_inode_sequence(leaf, item, inode->i_version, &token);
			
 
				+	btrfs_set_token_inode_transid(leaf, item, trans->transid, &token);
			
 
				+	btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token);
			
 
				+	btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token);
			
 
				+	btrfs_set_token_inode_block_group(leaf, item, 0, &token);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -3832,6 +3888,12 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
 
				 
			
 
				 		/* we don't support swapfiles, so vmtruncate shouldn't fail */
			
 
				 		truncate_setsize(inode, newsize);
			
 
				+
			
 
				+		/* Disable nonlocked read DIO to avoid the end less truncate */
			
 
				+		btrfs_inode_block_unlocked_dio(inode);
			
 
				+		inode_dio_wait(inode);
			
 
				+		btrfs_inode_resume_unlocked_dio(inode);
			
 
				+
			
 
				 		ret = btrfs_truncate(inode);
			
 
				 		if (ret && inode->i_nlink)
			
 
				 			btrfs_orphan_del(NULL, inode);
			
@@ -3904,6 +3966,12 @@ void btrfs_evict_inode(struct inode *inode)
 
				 		goto no_delete;
			
 
				 	}
			
 
				 
			
 
				+	ret = btrfs_commit_inode_delayed_inode(inode);
			
 
				+	if (ret) {
			
 
				+		btrfs_orphan_del(NULL, inode);
			
 
				+		goto no_delete;
			
 
				+	}
			
 
				+
			
 
				 	rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP);
			
 
				 	if (!rsv) {
			
 
				 		btrfs_orphan_del(NULL, inode);
			
@@ -3941,7 +4009,7 @@ void btrfs_evict_inode(struct inode *inode)
 
				 			goto no_delete;
			
 
				 		}
			
 
				 
			
 
				-		trans = btrfs_start_transaction_lflush(root, 1);
			
 
				+		trans = btrfs_join_transaction(root);
			
 
				 		if (IS_ERR(trans)) {
			
 
				 			btrfs_orphan_del(NULL, inode);
			
 
				 			btrfs_free_block_rsv(root, rsv);
			
@@ -3955,9 +4023,6 @@ void btrfs_evict_inode(struct inode *inode)
 
				 			break;
			
 
				 
			
 
				 		trans->block_rsv = &root->fs_info->trans_block_rsv;
			
 
				-		ret = btrfs_update_inode(trans, root, inode);
			
 
				-		BUG_ON(ret);
			
 
				-
			
 
				 		btrfs_end_transaction(trans, root);
			
 
				 		trans = NULL;
			
 
				 		btrfs_btree_balance_dirty(root);
			
@@ -5006,12 +5071,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
 
				 		goto out_unlock;
			
 
				 	}
			
 
				 
			
 
				-	err = btrfs_update_inode(trans, root, inode);
			
 
				-	if (err) {
			
 
				-		drop_inode = 1;
			
 
				-		goto out_unlock;
			
 
				-	}
			
 
				-
			
 
				 	/*
			
 
				 	* If the active LSM wants to access the inode during
			
 
				 	* d_instantiate it needs these. Smack checks to see
			
@@ -5949,6 +6008,8 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
 
				 
			
 
				 	em->start = start;
			
 
				 	em->orig_start = orig_start;
			
 
				+	em->mod_start = start;
			
 
				+	em->mod_len = len;
			
 
				 	em->len = len;
			
 
				 	em->block_len = block_len;
			
 
				 	em->block_start = block_start;
			
@@ -5990,16 +6051,15 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
 
				 	u64 len = bh_result->b_size;
			
 
				 	struct btrfs_trans_handle *trans;
			
 
				 	int unlock_bits = EXTENT_LOCKED;
			
 
				-	int ret;
			
 
				+	int ret = 0;
			
 
				 
			
 
				 	if (create) {
			
 
				-		ret = btrfs_delalloc_reserve_space(inode, len);
			
 
				-		if (ret)
			
 
				-			return ret;
			
 
				+		spin_lock(&BTRFS_I(inode)->lock);
			
 
				+		BTRFS_I(inode)->outstanding_extents++;
			
 
				+		spin_unlock(&BTRFS_I(inode)->lock);
			
 
				 		unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY;
			
 
				-	} else {
			
 
				+	} else
			
 
				 		len = min_t(u64, len, root->sectorsize);
			
 
				-	}
			
 
				 
			
 
				 	lockstart = start;
			
 
				 	lockend = start + len - 1;
			
@@ -6011,14 +6071,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
 
				 	if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create))
			
 
				 		return -ENOTBLK;
			
 
				 
			
 
				-	if (create) {
			
 
				-		ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
			
 
				-				     lockend, EXTENT_DELALLOC, NULL,
			
 
				-				     &cached_state, GFP_NOFS);
			
 
				-		if (ret)
			
 
				-			goto unlock_err;
			
 
				-	}
			
 
				-
			
 
				 	em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
			
 
				 	if (IS_ERR(em)) {
			
 
				 		ret = PTR_ERR(em);
			
@@ -6050,7 +6102,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
 
				 	if (!create && (em->block_start == EXTENT_MAP_HOLE ||
			
 
				 			test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
			
 
				 		free_extent_map(em);
			
 
				-		ret = 0;
			
 
				 		goto unlock_err;
			
 
				 	}
			
 
				 
			
@@ -6148,6 +6199,11 @@ unlock:
 
				 		 */
			
 
				 		if (start + len > i_size_read(inode))
			
 
				 			i_size_write(inode, start + len);
			
 
				+
			
 
				+		ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
			
 
				+				     lockstart + len - 1, EXTENT_DELALLOC, NULL,
			
 
				+				     &cached_state, GFP_NOFS);
			
 
				+		BUG_ON(ret);
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -6156,24 +6212,9 @@ unlock:
 
				 	 * aren't using if there is any left over space.
			
 
				 	 */
			
 
				 	if (lockstart < lockend) {
			
 
				-		if (create && len < lockend - lockstart) {
			
 
				-			clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
			
 
				-					 lockstart + len - 1,
			
 
				-					 unlock_bits | EXTENT_DEFRAG, 1, 0,
			
 
				-					 &cached_state, GFP_NOFS);
			
 
				-			/*
			
 
				-			 * Beside unlock, we also need to cleanup reserved space
			
 
				-			 * for the left range by attaching EXTENT_DO_ACCOUNTING.
			
 
				-			 */
			
 
				-			clear_extent_bit(&BTRFS_I(inode)->io_tree,
			
 
				-					 lockstart + len, lockend,
			
 
				-					 unlock_bits | EXTENT_DO_ACCOUNTING |
			
 
				-					 EXTENT_DEFRAG, 1, 0, NULL, GFP_NOFS);
			
 
				-		} else {
			
 
				-			clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
			
 
				-					 lockend, unlock_bits, 1, 0,
			
 
				-					 &cached_state, GFP_NOFS);
			
 
				-		}
			
 
				+		clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
			
 
				+				 lockend, unlock_bits, 1, 0,
			
 
				+				 &cached_state, GFP_NOFS);
			
 
				 	} else {
			
 
				 		free_extent_state(cached_state);
			
 
				 	}
			
@@ -6183,9 +6224,6 @@ unlock:
 
				 	return 0;
			
 
				 
			
 
				 unlock_err:
			
 
				-	if (create)
			
 
				-		unlock_bits |= EXTENT_DO_ACCOUNTING;
			
 
				-
			
 
				 	clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
			
 
				 			 unlock_bits, 1, 0, &cached_state, GFP_NOFS);
			
 
				 	return ret;
			
@@ -6623,15 +6661,63 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
 
				 {
			
 
				 	struct file *file = iocb->ki_filp;
			
 
				 	struct inode *inode = file->f_mapping->host;
			
 
				+	size_t count = 0;
			
 
				+	int flags = 0;
			
 
				+	bool wakeup = true;
			
 
				+	bool relock = false;
			
 
				+	ssize_t ret;
			
 
				 
			
 
				 	if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
			
 
				 			    offset, nr_segs))
			
 
				 		return 0;
			
 
				 
			
 
				-	return __blockdev_direct_IO(rw, iocb, inode,
			
 
				-		   BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
			
 
				-		   iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
			
 
				-		   btrfs_submit_direct, 0);
			
 
				+	atomic_inc(&inode->i_dio_count);
			
 
				+	smp_mb__after_atomic_inc();
			
 
				+
			
 
				+	if (rw & WRITE) {
			
 
				+		count = iov_length(iov, nr_segs);
			
 
				+		/*
			
 
				+		 * If the write DIO is beyond the EOF, we need update
			
 
				+		 * the isize, but it is protected by i_mutex. So we can
			
 
				+		 * not unlock the i_mutex at this case.
			
 
				+		 */
			
 
				+		if (offset + count <= inode->i_size) {
			
 
				+			mutex_unlock(&inode->i_mutex);
			
 
				+			relock = true;
			
 
				+		}
			
 
				+		ret = btrfs_delalloc_reserve_space(inode, count);
			
 
				+		if (ret)
			
 
				+			goto out;
			
 
				+	} else if (unlikely(test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
			
 
				+				     &BTRFS_I(inode)->runtime_flags))) {
			
 
				+		inode_dio_done(inode);
			
 
				+		flags = DIO_LOCKING | DIO_SKIP_HOLES;
			
 
				+		wakeup = false;
			
 
				+	}
			
 
				+
			
 
				+	ret = __blockdev_direct_IO(rw, iocb, inode,
			
 
				+			BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
			
 
				+			iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
			
 
				+			btrfs_submit_direct, flags);
			
 
				+	if (rw & WRITE) {
			
 
				+		if (ret < 0 && ret != -EIOCBQUEUED)
			
 
				+			btrfs_delalloc_release_space(inode, count);
			
 
				+		else if (ret > 0 && (size_t)ret < count) {
			
 
				+			spin_lock(&BTRFS_I(inode)->lock);
			
 
				+			BTRFS_I(inode)->outstanding_extents++;
			
 
				+			spin_unlock(&BTRFS_I(inode)->lock);
			
 
				+			btrfs_delalloc_release_space(inode,
			
 
				+						     count - (size_t)ret);
			
 
				+		}
			
 
				+		btrfs_delalloc_release_metadata(inode, 0);
			
 
				+	}
			
 
				+out:
			
 
				+	if (wakeup)
			
 
				+		inode_dio_done(inode);
			
 
				+	if (relock)
			
 
				+		mutex_lock(&inode->i_mutex);
			
 
				+
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 #define BTRFS_FIEMAP_FLAGS	(FIEMAP_FLAG_SYNC)
			
@@ -6735,8 +6821,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
 
				 		return;
			
 
				 	}
			
 
				 	lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
			
 
				-	ordered = btrfs_lookup_ordered_extent(inode,
			
 
				-					   page_offset(page));
			
 
				+	ordered = btrfs_lookup_ordered_extent(inode, page_offset(page));
			
 
				 	if (ordered) {
			
 
				 		/*
			
 
				 		 * IO on this page will never be started, so we need
			
@@ -7216,8 +7301,9 @@ int btrfs_drop_inode(struct inode *inode)
 
				 {
			
 
				 	struct btrfs_root *root = BTRFS_I(inode)->root;
			
 
				 
			
 
				+	/* the snap/subvol tree is on deleting */
			
 
				 	if (btrfs_root_refs(&root->root_item) == 0 &&
			
 
				-	    !btrfs_is_free_space_inode(inode))
			
 
				+	    root != root->fs_info->tree_root)
			
 
				 		return 1;
			
 
				 	else
			
 
				 		return generic_drop_inode(inode);
			
@@ -7299,14 +7385,19 @@ fail:
 
				 static int btrfs_getattr(struct vfsmount *mnt,
			
 
				 			 struct dentry *dentry, struct kstat *stat)
			
 
				 {
			
 
				+	u64 delalloc_bytes;
			
 
				 	struct inode *inode = dentry->d_inode;
			
 
				 	u32 blocksize = inode->i_sb->s_blocksize;
			
 
				 
			
 
				 	generic_fillattr(inode, stat);
			
 
				 	stat->dev = BTRFS_I(inode)->root->anon_dev;
			
 
				 	stat->blksize = PAGE_CACHE_SIZE;
			
 
				+
			
 
				+	spin_lock(&BTRFS_I(inode)->lock);
			
 
				+	delalloc_bytes = BTRFS_I(inode)->delalloc_bytes;
			
 
				+	spin_unlock(&BTRFS_I(inode)->lock);
			
 
				 	stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) +
			
 
				-		ALIGN(BTRFS_I(inode)->delalloc_bytes, blocksize)) >> 9;
			
 
				+			ALIGN(delalloc_bytes, blocksize)) >> 9;
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -7583,7 +7674,7 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
 
				 
			
 
				 	INIT_LIST_HEAD(&works);
			
 
				 	INIT_LIST_HEAD(&splice);
			
 
				-again:
			
 
				+
			
 
				 	spin_lock(&root->fs_info->delalloc_lock);
			
 
				 	list_splice_init(&root->fs_info->delalloc_inodes, &splice);
			
 
				 	while (!list_empty(&splice)) {
			
@@ -7593,8 +7684,11 @@ again:
 
				 		list_del_init(&binode->delalloc_inodes);
			
 
				 
			
 
				 		inode = igrab(&binode->vfs_inode);
			
 
				-		if (!inode)
			
 
				+		if (!inode) {
			
 
				+			clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
			
 
				+				  &binode->runtime_flags);
			
 
				 			continue;
			
 
				+		}
			
 
				 
			
 
				 		list_add_tail(&binode->delalloc_inodes,
			
 
				 			      &root->fs_info->delalloc_inodes);
			
@@ -7619,13 +7713,6 @@ again:
 
				 		btrfs_wait_and_free_delalloc_work(work);
			
 
				 	}
			
 
				 
			
 
				-	spin_lock(&root->fs_info->delalloc_lock);
			
 
				-	if (!list_empty(&root->fs_info->delalloc_inodes)) {
			
 
				-		spin_unlock(&root->fs_info->delalloc_lock);
			
 
				-		goto again;
			
 
				-	}
			
 
				-	spin_unlock(&root->fs_info->delalloc_lock);
			
 
				-
			
 
				 	/* the filemap_flush will queue IO into the worker threads, but
			
 
				 	 * we have to make sure the IO is actually started and that
			
 
				 	 * ordered extents get created before we return
			
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -42,12 +42,12 @@
 
				 #include <linux/slab.h>
			
 
				 #include <linux/blkdev.h>
			
 
				 #include <linux/uuid.h>
			
 
				+#include <linux/btrfs.h>
			
 
				 #include "compat.h"
			
 
				 #include "ctree.h"
			
 
				 #include "disk-io.h"
			
 
				 #include "transaction.h"
			
 
				 #include "btrfs_inode.h"
			
 
				-#include "ioctl.h"
			
 
				 #include "print-tree.h"
			
 
				 #include "volumes.h"
			
 
				 #include "locking.h"
			
@@ -367,7 +367,7 @@ static noinline int create_subvol(struct btrfs_root *root,
 
				 				  struct dentry *dentry,
			
 
				 				  char *name, int namelen,
			
 
				 				  u64 *async_transid,
			
 
				-				  struct btrfs_qgroup_inherit **inherit)
			
 
				+				  struct btrfs_qgroup_inherit *inherit)
			
 
				 {
			
 
				 	struct btrfs_trans_handle *trans;
			
 
				 	struct btrfs_key key;
			
@@ -401,8 +401,7 @@ static noinline int create_subvol(struct btrfs_root *root,
 
				 	if (IS_ERR(trans))
			
 
				 		return PTR_ERR(trans);
			
 
				 
			
 
				-	ret = btrfs_qgroup_inherit(trans, root->fs_info, 0, objectid,
			
 
				-				   inherit ? *inherit : NULL);
			
 
				+	ret = btrfs_qgroup_inherit(trans, root->fs_info, 0, objectid, inherit);
			
 
				 	if (ret)
			
 
				 		goto fail;
			
 
				 
			
@@ -533,7 +532,7 @@ fail:
 
				 
			
 
				 static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
			
 
				 			   char *name, int namelen, u64 *async_transid,
			
 
				-			   bool readonly, struct btrfs_qgroup_inherit **inherit)
			
 
				+			   bool readonly, struct btrfs_qgroup_inherit *inherit)
			
 
				 {
			
 
				 	struct inode *inode;
			
 
				 	struct btrfs_pending_snapshot *pending_snapshot;
			
@@ -552,10 +551,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
 
				 	pending_snapshot->dentry = dentry;
			
 
				 	pending_snapshot->root = root;
			
 
				 	pending_snapshot->readonly = readonly;
			
 
				-	if (inherit) {
			
 
				-		pending_snapshot->inherit = *inherit;
			
 
				-		*inherit = NULL;	/* take responsibility to free it */
			
 
				-	}
			
 
				+	pending_snapshot->inherit = inherit;
			
 
				 
			
 
				 	trans = btrfs_start_transaction(root->fs_info->extent_root, 6);
			
 
				 	if (IS_ERR(trans)) {
			
@@ -695,7 +691,7 @@ static noinline int btrfs_mksubvol(struct path *parent,
 
				 				   char *name, int namelen,
			
 
				 				   struct btrfs_root *snap_src,
			
 
				 				   u64 *async_transid, bool readonly,
			
 
				-				   struct btrfs_qgroup_inherit **inherit)
			
 
				+				   struct btrfs_qgroup_inherit *inherit)
			
 
				 {
			
 
				 	struct inode *dir  = parent->dentry->d_inode;
			
 
				 	struct dentry *dentry;
			
@@ -818,7 +814,7 @@ static int find_new_extents(struct btrfs_root *root,
 
				 
			
 
				 	while(1) {
			
 
				 		ret = btrfs_search_forward(root, &min_key, &max_key,
			
 
				-					   path, 0, newer_than);
			
 
				+					   path, newer_than);
			
 
				 		if (ret != 0)
			
 
				 			goto none;
			
 
				 		if (min_key.objectid != ino)
			
@@ -1206,6 +1202,12 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
 
				 		if (!(inode->i_sb->s_flags & MS_ACTIVE))
			
 
				 			break;
			
 
				 
			
 
				+		if (btrfs_defrag_cancelled(root->fs_info)) {
			
 
				+			printk(KERN_DEBUG "btrfs: defrag_file cancelled\n");
			
 
				+			ret = -EAGAIN;
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				 		if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT,
			
 
				 					 extent_thresh, &last_len, &skip,
			
 
				 					 &defrag_end, range->flags &
			
@@ -1329,9 +1331,6 @@ static noinline int btrfs_ioctl_resize(struct file *file,
 
				 	int ret = 0;
			
 
				 	int mod = 0;
			
 
				 
			
 
				-	if (root->fs_info->sb->s_flags & MS_RDONLY)
			
 
				-		return -EROFS;
			
 
				-
			
 
				 	if (!capable(CAP_SYS_ADMIN))
			
 
				 		return -EPERM;
			
 
				 
			
@@ -1363,6 +1362,10 @@ static noinline int btrfs_ioctl_resize(struct file *file,
 
				 		*devstr = '\0';
			
 
				 		devstr = vol_args->name;
			
 
				 		devid = simple_strtoull(devstr, &end, 10);
			
 
				+		if (!devid) {
			
 
				+			ret = -EINVAL;
			
 
				+			goto out_free;
			
 
				+		}
			
 
				 		printk(KERN_INFO "btrfs: resizing devid %llu\n",
			
 
				 		       (unsigned long long)devid);
			
 
				 	}
			
@@ -1371,7 +1374,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
 
				 	if (!device) {
			
 
				 		printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",
			
 
				 		       (unsigned long long)devid);
			
 
				-		ret = -EINVAL;
			
 
				+		ret = -ENODEV;
			
 
				 		goto out_free;
			
 
				 	}
			
 
				 
			
@@ -1379,7 +1382,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
 
				 		printk(KERN_INFO "btrfs: resizer unable to apply on "
			
 
				 		       "readonly device %llu\n",
			
 
				 		       (unsigned long long)devid);
			
 
				-		ret = -EINVAL;
			
 
				+		ret = -EPERM;
			
 
				 		goto out_free;
			
 
				 	}
			
 
				 
			
@@ -1401,7 +1404,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
 
				 	}
			
 
				 
			
 
				 	if (device->is_tgtdev_for_dev_replace) {
			
 
				-		ret = -EINVAL;
			
 
				+		ret = -EPERM;
			
 
				 		goto out_free;
			
 
				 	}
			
 
				 
			
@@ -1457,7 +1460,7 @@ out:
 
				 static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
			
 
				 				char *name, unsigned long fd, int subvol,
			
 
				 				u64 *transid, bool readonly,
			
 
				-				struct btrfs_qgroup_inherit **inherit)
			
 
				+				struct btrfs_qgroup_inherit *inherit)
			
 
				 {
			
 
				 	int namelen;
			
 
				 	int ret = 0;
			
@@ -1566,7 +1569,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
 
				 
			
 
				 	ret = btrfs_ioctl_snap_create_transid(file, vol_args->name,
			
 
				 					      vol_args->fd, subvol, ptr,
			
 
				-					      readonly, &inherit);
			
 
				+					      readonly, inherit);
			
 
				 
			
 
				 	if (ret == 0 && ptr &&
			
 
				 	    copy_to_user(arg +
			
@@ -1863,7 +1866,7 @@ static noinline int search_ioctl(struct inode *inode,
 
				 	path->keep_locks = 1;
			
 
				 
			
 
				 	while(1) {
			
 
				-		ret = btrfs_search_forward(root, &key, &max_key, path, 0,
			
 
				+		ret = btrfs_search_forward(root, &key, &max_key, path,
			
 
				 					   sk->min_transid);
			
 
				 		if (ret != 0) {
			
 
				 			if (ret > 0)
			
@@ -2171,6 +2174,12 @@ out_unlock:
 
				 		shrink_dcache_sb(root->fs_info->sb);
			
 
				 		btrfs_invalidate_inodes(dest);
			
 
				 		d_delete(dentry);
			
 
				+
			
 
				+		/* the last ref */
			
 
				+		if (dest->cache_inode) {
			
 
				+			iput(dest->cache_inode);
			
 
				+			dest->cache_inode = NULL;
			
 
				+		}
			
 
				 	}
			
 
				 out_dput:
			
 
				 	dput(dentry);
			
@@ -2211,10 +2220,10 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
 
				 			ret = -EPERM;
			
 
				 			goto out;
			
 
				 		}
			
 
				-		ret = btrfs_defrag_root(root, 0);
			
 
				+		ret = btrfs_defrag_root(root);
			
 
				 		if (ret)
			
 
				 			goto out;
			
 
				-		ret = btrfs_defrag_root(root->fs_info->extent_root, 0);
			
 
				+		ret = btrfs_defrag_root(root->fs_info->extent_root);
			
 
				 		break;
			
 
				 	case S_IFREG:
			
 
				 		if (!(file->f_mode & FMODE_WRITE)) {
			
@@ -3111,7 +3120,7 @@ static noinline long btrfs_ioctl_start_sync(struct btrfs_root *root,
 
				 	u64 transid;
			
 
				 	int ret;
			
 
				 
			
 
				-	trans = btrfs_attach_transaction(root);
			
 
				+	trans = btrfs_attach_transaction_barrier(root);
			
 
				 	if (IS_ERR(trans)) {
			
 
				 		if (PTR_ERR(trans) != -ENOENT)
			
 
				 			return PTR_ERR(trans);
			
@@ -3289,7 +3298,7 @@ static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg)
 
				 	struct inode_fs_paths *ipath = NULL;
			
 
				 	struct btrfs_path *path;
			
 
				 
			
 
				-	if (!capable(CAP_SYS_ADMIN))
			
 
				+	if (!capable(CAP_DAC_READ_SEARCH))
			
 
				 		return -EPERM;
			
 
				 
			
 
				 	path = btrfs_alloc_path();
			
@@ -3914,6 +3923,65 @@ out:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg)
			
 
				+{
			
 
				+	struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
			
 
				+	const char *label = root->fs_info->super_copy->label;
			
 
				+	size_t len = strnlen(label, BTRFS_LABEL_SIZE);
			
 
				+	int ret;
			
 
				+
			
 
				+	if (len == BTRFS_LABEL_SIZE) {
			
 
				+		pr_warn("btrfs: label is too long, return the first %zu bytes\n",
			
 
				+			--len);
			
 
				+	}
			
 
				+
			
 
				+	mutex_lock(&root->fs_info->volume_mutex);
			
 
				+	ret = copy_to_user(arg, label, len);
			
 
				+	mutex_unlock(&root->fs_info->volume_mutex);
			
 
				+
			
 
				+	return ret ? -EFAULT : 0;
			
 
				+}
			
 
				+
			
 
				+static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg)
			
 
				+{
			
 
				+	struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
			
 
				+	struct btrfs_super_block *super_block = root->fs_info->super_copy;
			
 
				+	struct btrfs_trans_handle *trans;
			
 
				+	char label[BTRFS_LABEL_SIZE];
			
 
				+	int ret;
			
 
				+
			
 
				+	if (!capable(CAP_SYS_ADMIN))
			
 
				+		return -EPERM;
			
 
				+
			
 
				+	if (copy_from_user(label, arg, sizeof(label)))
			
 
				+		return -EFAULT;
			
 
				+
			
 
				+	if (strnlen(label, BTRFS_LABEL_SIZE) == BTRFS_LABEL_SIZE) {
			
 
				+		pr_err("btrfs: unable to set label with more than %d bytes\n",
			
 
				+		       BTRFS_LABEL_SIZE - 1);
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	ret = mnt_want_write_file(file);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+
			
 
				+	mutex_lock(&root->fs_info->volume_mutex);
			
 
				+	trans = btrfs_start_transaction(root, 0);
			
 
				+	if (IS_ERR(trans)) {
			
 
				+		ret = PTR_ERR(trans);
			
 
				+		goto out_unlock;
			
 
				+	}
			
 
				+
			
 
				+	strcpy(super_block->label, label);
			
 
				+	ret = btrfs_end_transaction(trans, root);
			
 
				+
			
 
				+out_unlock:
			
 
				+	mutex_unlock(&root->fs_info->volume_mutex);
			
 
				+	mnt_drop_write_file(file);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				 long btrfs_ioctl(struct file *file, unsigned int
			
 
				 		cmd, unsigned long arg)
			
 
				 {
			
@@ -4014,6 +4082,10 @@ long btrfs_ioctl(struct file *file, unsigned int
 
				 		return btrfs_ioctl_qgroup_limit(file, argp);
			
 
				 	case BTRFS_IOC_DEV_REPLACE:
			
 
				 		return btrfs_ioctl_dev_replace(root, argp);
			
 
				+	case BTRFS_IOC_GET_FSLABEL:
			
 
				+		return btrfs_ioctl_get_fslabel(file, argp);
			
 
				+	case BTRFS_IOC_SET_FSLABEL:
			
 
				+		return btrfs_ioctl_set_fslabel(file, argp);
			
 
				 	}
			
 
				 
			
 
				 	return -ENOTTY;
			
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -113,11 +113,10 @@ again:
 
				 		read_unlock(&eb->lock);
			
 
				 		return;
			
 
				 	}
			
 
				-	read_unlock(&eb->lock);
			
 
				-	wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0);
			
 
				-	read_lock(&eb->lock);
			
 
				 	if (atomic_read(&eb->blocking_writers)) {
			
 
				 		read_unlock(&eb->lock);
			
 
				+		wait_event(eb->write_lock_wq,
			
 
				+			   atomic_read(&eb->blocking_writers) == 0);
			
 
				 		goto again;
			
 
				 	}
			
 
				 	atomic_inc(&eb->read_locks);
			
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -196,6 +196,9 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
 
				 	entry->file_offset = file_offset;
			
 
				 	entry->start = start;
			
 
				 	entry->len = len;
			
 
				+	if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) &&
			
 
				+	    !(type == BTRFS_ORDERED_NOCOW))
			
 
				+		entry->csum_bytes_left = disk_len;
			
 
				 	entry->disk_len = disk_len;
			
 
				 	entry->bytes_left = len;
			
 
				 	entry->inode = igrab(inode);
			
@@ -213,6 +216,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
 
				 	INIT_LIST_HEAD(&entry->root_extent_list);
			
 
				 	INIT_LIST_HEAD(&entry->work_list);
			
 
				 	init_completion(&entry->completion);
			
 
				+	INIT_LIST_HEAD(&entry->log_list);
			
 
				 
			
 
				 	trace_btrfs_ordered_extent_add(inode, entry);
			
 
				 
			
@@ -270,6 +274,10 @@ void btrfs_add_ordered_sum(struct inode *inode,
 
				 	tree = &BTRFS_I(inode)->ordered_tree;
			
 
				 	spin_lock_irq(&tree->lock);
			
 
				 	list_add_tail(&sum->list, &entry->list);
			
 
				+	WARN_ON(entry->csum_bytes_left < sum->len);
			
 
				+	entry->csum_bytes_left -= sum->len;
			
 
				+	if (entry->csum_bytes_left == 0)
			
 
				+		wake_up(&entry->wait);
			
 
				 	spin_unlock_irq(&tree->lock);
			
 
				 }
			
 
				 
			
@@ -405,6 +413,66 @@ out:
 
				 	return ret == 0;
			
 
				 }
			
 
				 
			
 
				+/* Needs to either be called under a log transaction or the log_mutex */
			
 
				+void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode)
			
 
				+{
			
 
				+	struct btrfs_ordered_inode_tree *tree;
			
 
				+	struct btrfs_ordered_extent *ordered;
			
 
				+	struct rb_node *n;
			
 
				+	int index = log->log_transid % 2;
			
 
				+
			
 
				+	tree = &BTRFS_I(inode)->ordered_tree;
			
 
				+	spin_lock_irq(&tree->lock);
			
 
				+	for (n = rb_first(&tree->tree); n; n = rb_next(n)) {
			
 
				+		ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node);
			
 
				+		spin_lock(&log->log_extents_lock[index]);
			
 
				+		if (list_empty(&ordered->log_list)) {
			
 
				+			list_add_tail(&ordered->log_list, &log->logged_list[index]);
			
 
				+			atomic_inc(&ordered->refs);
			
 
				+		}
			
 
				+		spin_unlock(&log->log_extents_lock[index]);
			
 
				+	}
			
 
				+	spin_unlock_irq(&tree->lock);
			
 
				+}
			
 
				+
			
 
				+void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid)
			
 
				+{
			
 
				+	struct btrfs_ordered_extent *ordered;
			
 
				+	int index = transid % 2;
			
 
				+
			
 
				+	spin_lock_irq(&log->log_extents_lock[index]);
			
 
				+	while (!list_empty(&log->logged_list[index])) {
			
 
				+		ordered = list_first_entry(&log->logged_list[index],
			
 
				+					   struct btrfs_ordered_extent,
			
 
				+					   log_list);
			
 
				+		list_del_init(&ordered->log_list);
			
 
				+		spin_unlock_irq(&log->log_extents_lock[index]);
			
 
				+		wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
			
 
				+						   &ordered->flags));
			
 
				+		btrfs_put_ordered_extent(ordered);
			
 
				+		spin_lock_irq(&log->log_extents_lock[index]);
			
 
				+	}
			
 
				+	spin_unlock_irq(&log->log_extents_lock[index]);
			
 
				+}
			
 
				+
			
 
				+void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid)
			
 
				+{
			
 
				+	struct btrfs_ordered_extent *ordered;
			
 
				+	int index = transid % 2;
			
 
				+
			
 
				+	spin_lock_irq(&log->log_extents_lock[index]);
			
 
				+	while (!list_empty(&log->logged_list[index])) {
			
 
				+		ordered = list_first_entry(&log->logged_list[index],
			
 
				+					   struct btrfs_ordered_extent,
			
 
				+					   log_list);
			
 
				+		list_del_init(&ordered->log_list);
			
 
				+		spin_unlock_irq(&log->log_extents_lock[index]);
			
 
				+		btrfs_put_ordered_extent(ordered);
			
 
				+		spin_lock_irq(&log->log_extents_lock[index]);
			
 
				+	}
			
 
				+	spin_unlock_irq(&log->log_extents_lock[index]);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * used to drop a reference on an ordered extent.  This will free
			
 
				  * the extent if the last reference is dropped
			
@@ -544,10 +612,12 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
 
				  * extra check to make sure the ordered operation list really is empty
			
 
				  * before we return
			
 
				  */
			
 
				-int btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
			
 
				+int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
			
 
				+				 struct btrfs_root *root, int wait)
			
 
				 {
			
 
				 	struct btrfs_inode *btrfs_inode;
			
 
				 	struct inode *inode;
			
 
				+	struct btrfs_transaction *cur_trans = trans->transaction;
			
 
				 	struct list_head splice;
			
 
				 	struct list_head works;
			
 
				 	struct btrfs_delalloc_work *work, *next;
			
@@ -558,14 +628,10 @@ int btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
 
				 
			
 
				 	mutex_lock(&root->fs_info->ordered_operations_mutex);
			
 
				 	spin_lock(&root->fs_info->ordered_extent_lock);
			
 
				-again:
			
 
				-	list_splice_init(&root->fs_info->ordered_operations, &splice);
			
 
				-
			
 
				+	list_splice_init(&cur_trans->ordered_operations, &splice);
			
 
				 	while (!list_empty(&splice)) {
			
 
				-
			
 
				 		btrfs_inode = list_entry(splice.next, struct btrfs_inode,
			
 
				 				   ordered_operations);
			
 
				-
			
 
				 		inode = &btrfs_inode->vfs_inode;
			
 
				 
			
 
				 		list_del_init(&btrfs_inode->ordered_operations);
			
@@ -574,24 +640,22 @@ again:
 
				 		 * the inode may be getting freed (in sys_unlink path).
			
 
				 		 */
			
 
				 		inode = igrab(inode);
			
 
				-
			
 
				-		if (!wait && inode) {
			
 
				-			list_add_tail(&BTRFS_I(inode)->ordered_operations,
			
 
				-			      &root->fs_info->ordered_operations);
			
 
				-		}
			
 
				-
			
 
				 		if (!inode)
			
 
				 			continue;
			
 
				+
			
 
				+		if (!wait)
			
 
				+			list_add_tail(&BTRFS_I(inode)->ordered_operations,
			
 
				+				      &cur_trans->ordered_operations);
			
 
				 		spin_unlock(&root->fs_info->ordered_extent_lock);
			
 
				 
			
 
				 		work = btrfs_alloc_delalloc_work(inode, wait, 1);
			
 
				 		if (!work) {
			
 
				+			spin_lock(&root->fs_info->ordered_extent_lock);
			
 
				 			if (list_empty(&BTRFS_I(inode)->ordered_operations))
			
 
				 				list_add_tail(&btrfs_inode->ordered_operations,
			
 
				 					      &splice);
			
 
				-			spin_lock(&root->fs_info->ordered_extent_lock);
			
 
				 			list_splice_tail(&splice,
			
 
				-					 &root->fs_info->ordered_operations);
			
 
				+					 &cur_trans->ordered_operations);
			
 
				 			spin_unlock(&root->fs_info->ordered_extent_lock);
			
 
				 			ret = -ENOMEM;
			
 
				 			goto out;
			
@@ -603,9 +667,6 @@ again:
 
				 		cond_resched();
			
 
				 		spin_lock(&root->fs_info->ordered_extent_lock);
			
 
				 	}
			
 
				-	if (wait && !list_empty(&root->fs_info->ordered_operations))
			
 
				-		goto again;
			
 
				-
			
 
				 	spin_unlock(&root->fs_info->ordered_extent_lock);
			
 
				 out:
			
 
				 	list_for_each_entry_safe(work, next, &works, list) {
			
@@ -974,6 +1035,7 @@ out:
 
				 void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
			
 
				 				 struct btrfs_root *root, struct inode *inode)
			
 
				 {
			
 
				+	struct btrfs_transaction *cur_trans = trans->transaction;
			
 
				 	u64 last_mod;
			
 
				 
			
 
				 	last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans);
			
@@ -988,7 +1050,7 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
 
				 	spin_lock(&root->fs_info->ordered_extent_lock);
			
 
				 	if (list_empty(&BTRFS_I(inode)->ordered_operations)) {
			
 
				 		list_add_tail(&BTRFS_I(inode)->ordered_operations,
			
 
				-			      &root->fs_info->ordered_operations);
			
 
				+			      &cur_trans->ordered_operations);
			
 
				 	}
			
 
				 	spin_unlock(&root->fs_info->ordered_extent_lock);
			
 
				 }
			
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -79,6 +79,8 @@ struct btrfs_ordered_sum {
 
				 #define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates whether this ordered extent
			
 
				 				       * has done its due diligence in updating
			
 
				 				       * the isize. */
			
 
				+#define BTRFS_ORDERED_LOGGED_CSUM 8 /* We've logged the csums on this ordered
			
 
				+				       ordered extent */
			
 
				 
			
 
				 struct btrfs_ordered_extent {
			
 
				 	/* logical offset in the file */
			
@@ -96,6 +98,9 @@ struct btrfs_ordered_extent {
 
				 	/* number of bytes that still need writing */
			
 
				 	u64 bytes_left;
			
 
				 
			
 
				+	/* number of bytes that still need csumming */
			
 
				+	u64 csum_bytes_left;
			
 
				+
			
 
				 	/*
			
 
				 	 * the end of the ordered extent which is behind it but
			
 
				 	 * didn't update disk_i_size. Please see the comment of
			
@@ -118,6 +123,9 @@ struct btrfs_ordered_extent {
 
				 	/* list of checksums for insertion when the extent io is done */
			
 
				 	struct list_head list;
			
 
				 
			
 
				+	/* If we need to wait on this to be done */
			
 
				+	struct list_head log_list;
			
 
				+
			
 
				 	/* used to wait for the BTRFS_ORDERED_COMPLETE bit */
			
 
				 	wait_queue_head_t wait;
			
 
				 
			
@@ -189,11 +197,15 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
 
				 int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
			
 
				 				struct btrfs_ordered_extent *ordered);
			
 
				 int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
			
 
				-int btrfs_run_ordered_operations(struct btrfs_root *root, int wait);
			
 
				+int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
			
 
				+				 struct btrfs_root *root, int wait);
			
 
				 void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
			
 
				 				 struct btrfs_root *root,
			
 
				 				 struct inode *inode);
			
 
				 void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput);
			
 
				+void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode);
			
 
				+void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid);
			
 
				+void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid);
			
 
				 int __init ordered_data_init(void);
			
 
				 void ordered_data_exit(void);
			
 
				 #endif
			
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -294,6 +294,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
 
				 			       btrfs_dev_extent_chunk_offset(l, dev_extent),
			
 
				 			       (unsigned long long)
			
 
				 			       btrfs_dev_extent_length(l, dev_extent));
			
 
				+			break;
			
 
				 		case BTRFS_DEV_STATS_KEY:
			
 
				 			printk(KERN_INFO "\t\tdevice stats\n");
			
 
				 			break;
			
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -23,13 +23,13 @@
 
				 #include <linux/rbtree.h>
			
 
				 #include <linux/slab.h>
			
 
				 #include <linux/workqueue.h>
			
 
				+#include <linux/btrfs.h>
			
 
				 
			
 
				 #include "ctree.h"
			
 
				 #include "transaction.h"
			
 
				 #include "disk-io.h"
			
 
				 #include "locking.h"
			
 
				 #include "ulist.h"
			
 
				-#include "ioctl.h"
			
 
				 #include "backref.h"
			
 
				 
			
 
				 /* TODO XXX FIXME
			
@@ -847,6 +847,10 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
 
				 	int ret = 0;
			
 
				 
			
 
				 	spin_lock(&fs_info->qgroup_lock);
			
 
				+	if (!fs_info->quota_root) {
			
 
				+		spin_unlock(&fs_info->qgroup_lock);
			
 
				+		return 0;
			
 
				+	}
			
 
				 	fs_info->quota_enabled = 0;
			
 
				 	fs_info->pending_quota_state = 0;
			
 
				 	quota_root = fs_info->quota_root;
			
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3017,7 +3017,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
 
				 			}
			
 
				 		}
			
 
				 
			
 
				-		page_start = (u64)page->index << PAGE_CACHE_SHIFT;
			
 
				+		page_start = page_offset(page);
			
 
				 		page_end = page_start + PAGE_CACHE_SIZE - 1;
			
 
				 
			
 
				 		lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end);
			
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -2708,7 +2708,7 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
 
				 	int	ret;
			
 
				 	struct btrfs_root *root = sctx->dev_root;
			
 
				 
			
 
				-	if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
			
 
				+	if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
			
 
				 		return -EIO;
			
 
				 
			
 
				 	gen = root->fs_info->last_trans_committed;
			
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -85,6 +85,7 @@ struct send_ctx {
 
				 	u32 send_max_size;
			
 
				 	u64 total_send_size;
			
 
				 	u64 cmd_send_size[BTRFS_SEND_C_MAX + 1];
			
 
				+	u64 flags;	/* 'flags' member of btrfs_ioctl_send_args is u64 */
			
 
				 
			
 
				 	struct vfsmount *mnt;
			
 
				 
			
@@ -3709,6 +3710,39 @@ out:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Send an update extent command to user space.
			
 
				+ */
			
 
				+static int send_update_extent(struct send_ctx *sctx,
			
 
				+			      u64 offset, u32 len)
			
 
				+{
			
 
				+	int ret = 0;
			
 
				+	struct fs_path *p;
			
 
				+
			
 
				+	p = fs_path_alloc(sctx);
			
 
				+	if (!p)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	ret = begin_cmd(sctx, BTRFS_SEND_C_UPDATE_EXTENT);
			
 
				+	if (ret < 0)
			
 
				+		goto out;
			
 
				+
			
 
				+	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
			
 
				+	if (ret < 0)
			
 
				+		goto out;
			
 
				+
			
 
				+	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
			
 
				+	TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
			
 
				+	TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len);
			
 
				+
			
 
				+	ret = send_cmd(sctx);
			
 
				+
			
 
				+tlv_put_failure:
			
 
				+out:
			
 
				+	fs_path_free(sctx, p);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				 static int send_write_or_clone(struct send_ctx *sctx,
			
 
				 			       struct btrfs_path *path,
			
 
				 			       struct btrfs_key *key,
			
@@ -3744,7 +3778,11 @@ static int send_write_or_clone(struct send_ctx *sctx,
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				-	if (!clone_root) {
			
 
				+	if (clone_root) {
			
 
				+		ret = send_clone(sctx, offset, len, clone_root);
			
 
				+	} else if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) {
			
 
				+		ret = send_update_extent(sctx, offset, len);
			
 
				+	} else {
			
 
				 		while (pos < len) {
			
 
				 			l = len - pos;
			
 
				 			if (l > BTRFS_SEND_READ_SIZE)
			
@@ -3757,10 +3795,7 @@ static int send_write_or_clone(struct send_ctx *sctx,
 
				 			pos += ret;
			
 
				 		}
			
 
				 		ret = 0;
			
 
				-	} else {
			
 
				-		ret = send_clone(sctx, offset, len, clone_root);
			
 
				 	}
			
 
				-
			
 
				 out:
			
 
				 	return ret;
			
 
				 }
			
@@ -4536,7 +4571,6 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
 
				 	struct btrfs_fs_info *fs_info;
			
 
				 	struct btrfs_ioctl_send_args *arg = NULL;
			
 
				 	struct btrfs_key key;
			
 
				-	struct file *filp = NULL;
			
 
				 	struct send_ctx *sctx = NULL;
			
 
				 	u32 i;
			
 
				 	u64 *clone_sources_tmp = NULL;
			
@@ -4561,6 +4595,11 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				+	if (arg->flags & ~BTRFS_SEND_FLAG_NO_FILE_DATA) {
			
 
				+		ret = -EINVAL;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				 	sctx = kzalloc(sizeof(struct send_ctx), GFP_NOFS);
			
 
				 	if (!sctx) {
			
 
				 		ret = -ENOMEM;
			
@@ -4572,6 +4611,8 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
 
				 	INIT_RADIX_TREE(&sctx->name_cache, GFP_NOFS);
			
 
				 	INIT_LIST_HEAD(&sctx->name_cache_list);
			
 
				 
			
 
				+	sctx->flags = arg->flags;
			
 
				+
			
 
				 	sctx->send_filp = fget(arg->send_fd);
			
 
				 	if (IS_ERR(sctx->send_filp)) {
			
 
				 		ret = PTR_ERR(sctx->send_filp);
			
@@ -4673,8 +4714,6 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
 
				 		goto out;
			
 
				 
			
 
				 out:
			
 
				-	if (filp)
			
 
				-		fput(filp);
			
 
				 	kfree(arg);
			
 
				 	vfree(clone_sources_tmp);
			
 
				 
			
--- a/fs/btrfs/send.h
+++ b/fs/btrfs/send.h
@@ -86,6 +86,7 @@ enum btrfs_send_cmd {
 
				 	BTRFS_SEND_C_UTIMES,
			
 
				 
			
 
				 	BTRFS_SEND_C_END,
			
 
				+	BTRFS_SEND_C_UPDATE_EXTENT,
			
 
				 	__BTRFS_SEND_C_MAX,
			
 
				 };
			
 
				 #define BTRFS_SEND_C_MAX (__BTRFS_SEND_C_MAX - 1)
			
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -41,13 +41,13 @@
 
				 #include <linux/slab.h>
			
 
				 #include <linux/cleancache.h>
			
 
				 #include <linux/ratelimit.h>
			
 
				+#include <linux/btrfs.h>
			
 
				 #include "compat.h"
			
 
				 #include "delayed-inode.h"
			
 
				 #include "ctree.h"
			
 
				 #include "disk-io.h"
			
 
				 #include "transaction.h"
			
 
				 #include "btrfs_inode.h"
			
 
				-#include "ioctl.h"
			
 
				 #include "print-tree.h"
			
 
				 #include "xattr.h"
			
 
				 #include "volumes.h"
			
@@ -63,8 +63,7 @@
 
				 static const struct super_operations btrfs_super_ops;
			
 
				 static struct file_system_type btrfs_fs_type;
			
 
				 
			
 
				-static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno,
			
 
				-				      char nbuf[16])
			
 
				+static const char *btrfs_decode_error(int errno, char nbuf[16])
			
 
				 {
			
 
				 	char *errstr = NULL;
			
 
				 
			
@@ -98,7 +97,7 @@ static void __save_error_info(struct btrfs_fs_info *fs_info)
 
				 	 * today we only save the error info into ram.  Long term we'll
			
 
				 	 * also send it down to the disk
			
 
				 	 */
			
 
				-	fs_info->fs_state = BTRFS_SUPER_FLAG_ERROR;
			
 
				+	set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
			
 
				 }
			
 
				 
			
 
				 static void save_error_info(struct btrfs_fs_info *fs_info)
			
@@ -114,7 +113,7 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
 
				 	if (sb->s_flags & MS_RDONLY)
			
 
				 		return;
			
 
				 
			
 
				-	if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
			
 
				+	if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
			
 
				 		sb->s_flags |= MS_RDONLY;
			
 
				 		printk(KERN_INFO "btrfs is forced readonly\n");
			
 
				 		/*
			
@@ -142,8 +141,6 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
 
				 	struct super_block *sb = fs_info->sb;
			
 
				 	char nbuf[16];
			
 
				 	const char *errstr;
			
 
				-	va_list args;
			
 
				-	va_start(args, fmt);
			
 
				 
			
 
				 	/*
			
 
				 	 * Special case: if the error is EROFS, and we're already
			
@@ -152,15 +149,18 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
 
				 	if (errno == -EROFS && (sb->s_flags & MS_RDONLY))
			
 
				   		return;
			
 
				 
			
 
				-  	errstr = btrfs_decode_error(fs_info, errno, nbuf);
			
 
				+  	errstr = btrfs_decode_error(errno, nbuf);
			
 
				 	if (fmt) {
			
 
				-		struct va_format vaf = {
			
 
				-			.fmt = fmt,
			
 
				-			.va = &args,
			
 
				-		};
			
 
				+		struct va_format vaf;
			
 
				+		va_list args;
			
 
				+
			
 
				+		va_start(args, fmt);
			
 
				+		vaf.fmt = fmt;
			
 
				+		vaf.va = &args;
			
 
				 
			
 
				 		printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s (%pV)\n",
			
 
				 			sb->s_id, function, line, errstr, &vaf);
			
 
				+		va_end(args);
			
 
				 	} else {
			
 
				 		printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n",
			
 
				 			sb->s_id, function, line, errstr);
			
@@ -171,7 +171,6 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
 
				 		save_error_info(fs_info);
			
 
				 		btrfs_handle_error(fs_info);
			
 
				 	}
			
 
				-	va_end(args);
			
 
				 }
			
 
				 
			
 
				 static const char * const logtypes[] = {
			
@@ -261,7 +260,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
 
				 		char nbuf[16];
			
 
				 		const char *errstr;
			
 
				 
			
 
				-		errstr = btrfs_decode_error(root->fs_info, errno, nbuf);
			
 
				+		errstr = btrfs_decode_error(errno, nbuf);
			
 
				 		btrfs_printk(root->fs_info,
			
 
				 			     "%s:%d: Aborting unused transaction(%s).\n",
			
 
				 			     function, line, errstr);
			
@@ -289,8 +288,8 @@ void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
 
				 	va_start(args, fmt);
			
 
				 	vaf.va = &args;
			
 
				 
			
 
				-	errstr = btrfs_decode_error(fs_info, errno, nbuf);
			
 
				-	if (fs_info->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR)
			
 
				+	errstr = btrfs_decode_error(errno, nbuf);
			
 
				+	if (fs_info && (fs_info->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR))
			
 
				 		panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (%s)\n",
			
 
				 			s_id, function, line, &vaf, errstr);
			
 
				 
			
@@ -438,6 +437,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
 
				 		case Opt_compress_force:
			
 
				 		case Opt_compress_force_type:
			
 
				 			compress_force = true;
			
 
				+			/* Fallthrough */
			
 
				 		case Opt_compress:
			
 
				 		case Opt_compress_type:
			
 
				 			if (token == Opt_compress ||
			
@@ -519,7 +519,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
 
				 		case Opt_alloc_start:
			
 
				 			num = match_strdup(&args[0]);
			
 
				 			if (num) {
			
 
				+				mutex_lock(&info->chunk_mutex);
			
 
				 				info->alloc_start = memparse(num, NULL);
			
 
				+				mutex_unlock(&info->chunk_mutex);
			
 
				 				kfree(num);
			
 
				 				printk(KERN_INFO
			
 
				 					"btrfs: allocations start at %llu\n",
			
@@ -876,7 +878,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
 
				 
			
 
				 	btrfs_wait_ordered_extents(root, 0);
			
 
				 
			
 
				-	trans = btrfs_attach_transaction(root);
			
 
				+	trans = btrfs_attach_transaction_barrier(root);
			
 
				 	if (IS_ERR(trans)) {
			
 
				 		/* no transaction, don't bother */
			
 
				 		if (PTR_ERR(trans) == -ENOENT)
			
@@ -1289,7 +1291,9 @@ restore:
 
				 	fs_info->mount_opt = old_opts;
			
 
				 	fs_info->compress_type = old_compress_type;
			
 
				 	fs_info->max_inline = old_max_inline;
			
 
				+	mutex_lock(&fs_info->chunk_mutex);
			
 
				 	fs_info->alloc_start = old_alloc_start;
			
 
				+	mutex_unlock(&fs_info->chunk_mutex);
			
 
				 	btrfs_resize_thread_pool(fs_info,
			
 
				 		old_thread_pool_size, fs_info->thread_pool_size);
			
 
				 	fs_info->metadata_ratio = old_metadata_ratio;
			
@@ -1559,7 +1563,7 @@ static int btrfs_freeze(struct super_block *sb)
 
				 	struct btrfs_trans_handle *trans;
			
 
				 	struct btrfs_root *root = btrfs_sb(sb)->tree_root;
			
 
				 
			
 
				-	trans = btrfs_attach_transaction(root);
			
 
				+	trans = btrfs_attach_transaction_barrier(root);
			
 
				 	if (IS_ERR(trans)) {
			
 
				 		/* no transaction, don't bother */
			
 
				 		if (PTR_ERR(trans) == -ENOENT)
			
@@ -1684,10 +1688,14 @@ static int __init init_btrfs_fs(void)
 
				 	if (err)
			
 
				 		goto free_delayed_inode;
			
 
				 
			
 
				-	err = btrfs_interface_init();
			
 
				+	err = btrfs_delayed_ref_init();
			
 
				 	if (err)
			
 
				 		goto free_auto_defrag;
			
 
				 
			
 
				+	err = btrfs_interface_init();
			
 
				+	if (err)
			
 
				+		goto free_delayed_ref;
			
 
				+
			
 
				 	err = register_filesystem(&btrfs_fs_type);
			
 
				 	if (err)
			
 
				 		goto unregister_ioctl;
			
@@ -1699,6 +1707,8 @@ static int __init init_btrfs_fs(void)
 
				 
			
 
				 unregister_ioctl:
			
 
				 	btrfs_interface_exit();
			
 
				+free_delayed_ref:
			
 
				+	btrfs_delayed_ref_exit();
			
 
				 free_auto_defrag:
			
 
				 	btrfs_auto_defrag_exit();
			
 
				 free_delayed_inode:
			
@@ -1720,6 +1730,7 @@ free_compress:
 
				 static void __exit exit_btrfs_fs(void)
			
 
				 {
			
 
				 	btrfs_destroy_cachep();
			
 
				+	btrfs_delayed_ref_exit();
			
 
				 	btrfs_auto_defrag_exit();
			
 
				 	btrfs_delayed_inode_exit();
			
 
				 	ordered_data_exit();
			
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -40,7 +40,6 @@ void put_transaction(struct btrfs_transaction *transaction)
 
				 	if (atomic_dec_and_test(&transaction->use_count)) {
			
 
				 		BUG_ON(!list_empty(&transaction->list));
			
 
				 		WARN_ON(transaction->delayed_refs.root.rb_node);
			
 
				-		memset(transaction, 0, sizeof(*transaction));
			
 
				 		kmem_cache_free(btrfs_transaction_cachep, transaction);
			
 
				 	}
			
 
				 }
			
@@ -51,6 +50,14 @@ static noinline void switch_commit_root(struct btrfs_root *root)
 
				 	root->commit_root = btrfs_root_node(root);
			
 
				 }
			
 
				 
			
 
				+static inline int can_join_transaction(struct btrfs_transaction *trans,
			
 
				+				       int type)
			
 
				+{
			
 
				+	return !(trans->in_commit &&
			
 
				+		 type != TRANS_JOIN &&
			
 
				+		 type != TRANS_JOIN_NOLOCK);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * either allocate a new transaction or hop into the existing one
			
 
				  */
			
@@ -62,7 +69,7 @@ static noinline int join_transaction(struct btrfs_root *root, int type)
 
				 	spin_lock(&fs_info->trans_lock);
			
 
				 loop:
			
 
				 	/* The file system has been taken offline. No new transactions. */
			
 
				-	if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
			
 
				+	if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
			
 
				 		spin_unlock(&fs_info->trans_lock);
			
 
				 		return -EROFS;
			
 
				 	}
			
@@ -86,6 +93,10 @@ loop:
 
				 			spin_unlock(&fs_info->trans_lock);
			
 
				 			return cur_trans->aborted;
			
 
				 		}
			
 
				+		if (!can_join_transaction(cur_trans, type)) {
			
 
				+			spin_unlock(&fs_info->trans_lock);
			
 
				+			return -EBUSY;
			
 
				+		}
			
 
				 		atomic_inc(&cur_trans->use_count);
			
 
				 		atomic_inc(&cur_trans->num_writers);
			
 
				 		cur_trans->num_joined++;
			
@@ -114,7 +125,7 @@ loop:
 
				 		kmem_cache_free(btrfs_transaction_cachep, cur_trans);
			
 
				 		cur_trans = fs_info->running_transaction;
			
 
				 		goto loop;
			
 
				-	} else if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
			
 
				+	} else if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
			
 
				 		spin_unlock(&fs_info->trans_lock);
			
 
				 		kmem_cache_free(btrfs_transaction_cachep, cur_trans);
			
 
				 		return -EROFS;
			
@@ -158,6 +169,7 @@ loop:
 
				 	spin_lock_init(&cur_trans->delayed_refs.lock);
			
 
				 
			
 
				 	INIT_LIST_HEAD(&cur_trans->pending_snapshots);
			
 
				+	INIT_LIST_HEAD(&cur_trans->ordered_operations);
			
 
				 	list_add_tail(&cur_trans->list, &fs_info->trans_list);
			
 
				 	extent_io_tree_init(&cur_trans->dirty_pages,
			
 
				 			     fs_info->btree_inode->i_mapping);
			
@@ -302,7 +314,7 @@ start_transaction(struct btrfs_root *root, u64 num_items, int type,
 
				 	int ret;
			
 
				 	u64 qgroup_reserved = 0;
			
 
				 
			
 
				-	if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
			
 
				+	if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
			
 
				 		return ERR_PTR(-EROFS);
			
 
				 
			
 
				 	if (current->journal_info) {
			
@@ -360,8 +372,11 @@ again:
 
				 
			
 
				 	do {
			
 
				 		ret = join_transaction(root, type);
			
 
				-		if (ret == -EBUSY)
			
 
				+		if (ret == -EBUSY) {
			
 
				 			wait_current_trans(root);
			
 
				+			if (unlikely(type == TRANS_ATTACH))
			
 
				+				ret = -ENOENT;
			
 
				+		}
			
 
				 	} while (ret == -EBUSY);
			
 
				 
			
 
				 	if (ret < 0) {
			
@@ -383,9 +398,10 @@ again:
 
				 	h->block_rsv = NULL;
			
 
				 	h->orig_rsv = NULL;
			
 
				 	h->aborted = 0;
			
 
				-	h->qgroup_reserved = qgroup_reserved;
			
 
				+	h->qgroup_reserved = 0;
			
 
				 	h->delayed_ref_elem.seq = 0;
			
 
				 	h->type = type;
			
 
				+	h->allocating_chunk = false;
			
 
				 	INIT_LIST_HEAD(&h->qgroup_ref_list);
			
 
				 	INIT_LIST_HEAD(&h->new_bgs);
			
 
				 
			
@@ -401,6 +417,7 @@ again:
 
				 		h->block_rsv = &root->fs_info->trans_block_rsv;
			
 
				 		h->bytes_reserved = num_bytes;
			
 
				 	}
			
 
				+	h->qgroup_reserved = qgroup_reserved;
			
 
				 
			
 
				 got_it:
			
 
				 	btrfs_record_root_in_trans(h, root);
			
@@ -452,11 +469,43 @@ struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root
 
				 	return start_transaction(root, 0, TRANS_USERSPACE, 0);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * btrfs_attach_transaction() - catch the running transaction
			
 
				+ *
			
 
				+ * It is used when we want to commit the current the transaction, but
			
 
				+ * don't want to start a new one.
			
 
				+ *
			
 
				+ * Note: If this function return -ENOENT, it just means there is no
			
 
				+ * running transaction. But it is possible that the inactive transaction
			
 
				+ * is still in the memory, not fully on disk. If you hope there is no
			
 
				+ * inactive transaction in the fs when -ENOENT is returned, you should
			
 
				+ * invoke
			
 
				+ *     btrfs_attach_transaction_barrier()
			
 
				+ */
			
 
				 struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root)
			
 
				 {
			
 
				 	return start_transaction(root, 0, TRANS_ATTACH, 0);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * btrfs_attach_transaction() - catch the running transaction
			
 
				+ *
			
 
				+ * It is similar to the above function, the differentia is this one
			
 
				+ * will wait for all the inactive transactions until they fully
			
 
				+ * complete.
			
 
				+ */
			
 
				+struct btrfs_trans_handle *
			
 
				+btrfs_attach_transaction_barrier(struct btrfs_root *root)
			
 
				+{
			
 
				+	struct btrfs_trans_handle *trans;
			
 
				+
			
 
				+	trans = start_transaction(root, 0, TRANS_ATTACH, 0);
			
 
				+	if (IS_ERR(trans) && PTR_ERR(trans) == -ENOENT)
			
 
				+		btrfs_wait_for_commit(root, 0);
			
 
				+
			
 
				+	return trans;
			
 
				+}
			
 
				+
			
 
				 /* wait for a transaction commit to be fully complete */
			
 
				 static noinline void wait_for_commit(struct btrfs_root *root,
			
 
				 				    struct btrfs_transaction *commit)
			
@@ -645,12 +694,10 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 
				 		btrfs_run_delayed_iputs(root);
			
 
				 
			
 
				 	if (trans->aborted ||
			
 
				-	    root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
			
 
				+	    test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
			
 
				 		err = -EIO;
			
 
				-	}
			
 
				 	assert_qgroups_uptodate(trans);
			
 
				 
			
 
				-	memset(trans, 0, sizeof(*trans));
			
 
				 	kmem_cache_free(btrfs_trans_handle_cachep, trans);
			
 
				 	return err;
			
 
				 }
			
@@ -961,10 +1008,10 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * defrag a given btree.  If cacheonly == 1, this won't read from the disk,
			
 
				- * otherwise every leaf in the btree is read and defragged.
			
 
				+ * defrag a given btree.
			
 
				+ * Every leaf in the btree is read and defragged.
			
 
				  */
			
 
				-int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
			
 
				+int btrfs_defrag_root(struct btrfs_root *root)
			
 
				 {
			
 
				 	struct btrfs_fs_info *info = root->fs_info;
			
 
				 	struct btrfs_trans_handle *trans;
			
@@ -978,7 +1025,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
 
				 		if (IS_ERR(trans))
			
 
				 			return PTR_ERR(trans);
			
 
				 
			
 
				-		ret = btrfs_defrag_leaves(trans, root, cacheonly);
			
 
				+		ret = btrfs_defrag_leaves(trans, root);
			
 
				 
			
 
				 		btrfs_end_transaction(trans, root);
			
 
				 		btrfs_btree_balance_dirty(info->tree_root);
			
@@ -986,6 +1033,12 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
 
				 
			
 
				 		if (btrfs_fs_closing(root->fs_info) || ret != -EAGAIN)
			
 
				 			break;
			
 
				+
			
 
				+		if (btrfs_defrag_cancelled(root->fs_info)) {
			
 
				+			printk(KERN_DEBUG "btrfs: defrag_root cancelled\n");
			
 
				+			ret = -EAGAIN;
			
 
				+			break;
			
 
				+		}
			
 
				 	}
			
 
				 	root->defrag_running = 0;
			
 
				 	return ret;
			
@@ -1307,13 +1360,13 @@ static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
 
				 struct btrfs_async_commit {
			
 
				 	struct btrfs_trans_handle *newtrans;
			
 
				 	struct btrfs_root *root;
			
 
				-	struct delayed_work work;
			
 
				+	struct work_struct work;
			
 
				 };
			
 
				 
			
 
				 static void do_async_commit(struct work_struct *work)
			
 
				 {
			
 
				 	struct btrfs_async_commit *ac =
			
 
				-		container_of(work, struct btrfs_async_commit, work.work);
			
 
				+		container_of(work, struct btrfs_async_commit, work);
			
 
				 
			
 
				 	/*
			
 
				 	 * We've got freeze protection passed with the transaction.
			
@@ -1341,7 +1394,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
 
				 	if (!ac)
			
 
				 		return -ENOMEM;
			
 
				 
			
 
				-	INIT_DELAYED_WORK(&ac->work, do_async_commit);
			
 
				+	INIT_WORK(&ac->work, do_async_commit);
			
 
				 	ac->root = root;
			
 
				 	ac->newtrans = btrfs_join_transaction(root);
			
 
				 	if (IS_ERR(ac->newtrans)) {
			
@@ -1365,7 +1418,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
 
				 			&root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
			
 
				 			1, _THIS_IP_);
			
 
				 
			
 
				-	schedule_delayed_work(&ac->work, 0);
			
 
				+	schedule_work(&ac->work);
			
 
				 
			
 
				 	/* wait for transaction to start and unblock */
			
 
				 	if (wait_for_unblock)
			
@@ -1428,7 +1481,9 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
 
				 	}
			
 
				 
			
 
				 	if (flush_on_commit || snap_pending) {
			
 
				-		btrfs_start_delalloc_inodes(root, 1);
			
 
				+		ret = btrfs_start_delalloc_inodes(root, 1);
			
 
				+		if (ret)
			
 
				+			return ret;
			
 
				 		btrfs_wait_ordered_extents(root, 1);
			
 
				 	}
			
 
				 
			
@@ -1450,9 +1505,9 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
 
				 	 * it here and no for sure that nothing new will be added
			
 
				 	 * to the list
			
 
				 	 */
			
 
				-	btrfs_run_ordered_operations(root, 1);
			
 
				+	ret = btrfs_run_ordered_operations(trans, root, 1);
			
 
				 
			
 
				-	return 0;
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1473,27 +1528,35 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 
				 	int should_grow = 0;
			
 
				 	unsigned long now = get_seconds();
			
 
				 
			
 
				-	ret = btrfs_run_ordered_operations(root, 0);
			
 
				+	ret = btrfs_run_ordered_operations(trans, root, 0);
			
 
				 	if (ret) {
			
 
				 		btrfs_abort_transaction(trans, root, ret);
			
 
				-		goto cleanup_transaction;
			
 
				+		btrfs_end_transaction(trans, root);
			
 
				+		return ret;
			
 
				 	}
			
 
				 
			
 
				 	/* Stop the commit early if ->aborted is set */
			
 
				 	if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
			
 
				 		ret = cur_trans->aborted;
			
 
				-		goto cleanup_transaction;
			
 
				+		btrfs_end_transaction(trans, root);
			
 
				+		return ret;
			
 
				 	}
			
 
				 
			
 
				 	/* make a pass through all the delayed refs we have so far
			
 
				 	 * any runnings procs may add more while we are here
			
 
				 	 */
			
 
				 	ret = btrfs_run_delayed_refs(trans, root, 0);
			
 
				-	if (ret)
			
 
				-		goto cleanup_transaction;
			
 
				+	if (ret) {
			
 
				+		btrfs_end_transaction(trans, root);
			
 
				+		return ret;
			
 
				+	}
			
 
				 
			
 
				 	btrfs_trans_release_metadata(trans, root);
			
 
				 	trans->block_rsv = NULL;
			
 
				+	if (trans->qgroup_reserved) {
			
 
				+		btrfs_qgroup_free(root, trans->qgroup_reserved);
			
 
				+		trans->qgroup_reserved = 0;
			
 
				+	}
			
 
				 
			
 
				 	cur_trans = trans->transaction;
			
 
				 
			
@@ -1507,8 +1570,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 
				 		btrfs_create_pending_block_groups(trans, root);
			
 
				 
			
 
				 	ret = btrfs_run_delayed_refs(trans, root, 0);
			
 
				-	if (ret)
			
 
				-		goto cleanup_transaction;
			
 
				+	if (ret) {
			
 
				+		btrfs_end_transaction(trans, root);
			
 
				+		return ret;
			
 
				+	}
			
 
				 
			
 
				 	spin_lock(&cur_trans->commit_lock);
			
 
				 	if (cur_trans->in_commit) {
			
@@ -1772,6 +1837,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 
				 cleanup_transaction:
			
 
				 	btrfs_trans_release_metadata(trans, root);
			
 
				 	trans->block_rsv = NULL;
			
 
				+	if (trans->qgroup_reserved) {
			
 
				+		btrfs_qgroup_free(root, trans->qgroup_reserved);
			
 
				+		trans->qgroup_reserved = 0;
			
 
				+	}
			
 
				 	btrfs_printk(root->fs_info, "Skipping commit of aborted transaction.\n");
			
 
				 //	WARN_ON(1);
			
 
				 	if (current->journal_info == trans)
			
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -43,6 +43,7 @@ struct btrfs_transaction {
 
				 	wait_queue_head_t writer_wait;
			
 
				 	wait_queue_head_t commit_wait;
			
 
				 	struct list_head pending_snapshots;
			
 
				+	struct list_head ordered_operations;
			
 
				 	struct btrfs_delayed_ref_root delayed_refs;
			
 
				 	int aborted;
			
 
				 };
			
@@ -68,6 +69,7 @@ struct btrfs_trans_handle {
 
				 	struct btrfs_block_rsv *orig_rsv;
			
 
				 	short aborted;
			
 
				 	short adding_csums;
			
 
				+	bool allocating_chunk;
			
 
				 	enum btrfs_trans_type type;
			
 
				 	/*
			
 
				 	 * this root is only needed to validate that the root passed to
			
@@ -110,13 +112,15 @@ struct btrfs_trans_handle *btrfs_start_transaction_lflush(
 
				 struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root);
			
 
				 struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root);
			
 
				 struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root);
			
 
				+struct btrfs_trans_handle *btrfs_attach_transaction_barrier(
			
 
				+					struct btrfs_root *root);
			
 
				 struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root);
			
 
				 int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid);
			
 
				 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
			
 
				 				     struct btrfs_root *root);
			
 
				 
			
 
				 int btrfs_add_dead_root(struct btrfs_root *root);
			
 
				-int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
			
 
				+int btrfs_defrag_root(struct btrfs_root *root);
			
 
				 int btrfs_clean_old_snapshots(struct btrfs_root *root);
			
 
				 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
			
 
				 			     struct btrfs_root *root);
			
--- a/fs/btrfs/tree-defrag.c
+++ b/fs/btrfs/tree-defrag.c
@@ -23,13 +23,14 @@
 
				 #include "transaction.h"
			
 
				 #include "locking.h"
			
 
				 
			
 
				-/* defrag all the leaves in a given btree.  If cache_only == 1, don't read
			
 
				- * things from disk, otherwise read all the leaves and try to get key order to
			
 
				+/*
			
 
				+ * Defrag all the leaves in a given btree.
			
 
				+ * Read all the leaves and try to get key order to
			
 
				  * better reflect disk order
			
 
				  */
			
 
				 
			
 
				 int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
			
 
				-			struct btrfs_root *root, int cache_only)
			
 
				+			struct btrfs_root *root)
			
 
				 {
			
 
				 	struct btrfs_path *path = NULL;
			
 
				 	struct btrfs_key key;
			
@@ -41,9 +42,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
 
				 	u64 last_ret = 0;
			
 
				 	u64 min_trans = 0;
			
 
				 
			
 
				-	if (cache_only)
			
 
				-		goto out;
			
 
				-
			
 
				 	if (root->fs_info->extent_root == root) {
			
 
				 		/*
			
 
				 		 * there's recursion here right now in the tree locking,
			
@@ -86,11 +84,8 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
 
				 	}
			
 
				 
			
 
				 	path->keep_locks = 1;
			
 
				-	if (cache_only)
			
 
				-		min_trans = root->defrag_trans_start;
			
 
				 
			
 
				-	ret = btrfs_search_forward(root, &key, NULL, path,
			
 
				-				   cache_only, min_trans);
			
 
				+	ret = btrfs_search_forward(root, &key, NULL, path, min_trans);
			
 
				 	if (ret < 0)
			
 
				 		goto out;
			
 
				 	if (ret > 0) {
			
@@ -109,11 +104,11 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
 
				 		goto out;
			
 
				 	}
			
 
				 	path->slots[1] = btrfs_header_nritems(path->nodes[1]);
			
 
				-	next_key_ret = btrfs_find_next_key(root, path, &key, 1, cache_only,
			
 
				+	next_key_ret = btrfs_find_next_key(root, path, &key, 1,
			
 
				 					   min_trans);
			
 
				 	ret = btrfs_realloc_node(trans, root,
			
 
				 				 path->nodes[1], 0,
			
 
				-				 cache_only, &last_ret,
			
 
				+				 &last_ret,
			
 
				 				 &root->defrag_progress);
			
 
				 	if (ret) {
			
 
				 		WARN_ON(ret == -EAGAIN);
			
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -278,8 +278,7 @@ static int process_one_buffer(struct btrfs_root *log,
 
				 			      struct walk_control *wc, u64 gen)
			
 
				 {
			
 
				 	if (wc->pin)
			
 
				-		btrfs_pin_extent_for_log_replay(wc->trans,
			
 
				-						log->fs_info->extent_root,
			
 
				+		btrfs_pin_extent_for_log_replay(log->fs_info->extent_root,
			
 
				 						eb->start, eb->len);
			
 
				 
			
 
				 	if (btrfs_buffer_uptodate(eb, gen, 0)) {
			
@@ -2281,6 +2280,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 
				 	unsigned long log_transid = 0;
			
 
				 
			
 
				 	mutex_lock(&root->log_mutex);
			
 
				+	log_transid = root->log_transid;
			
 
				 	index1 = root->log_transid % 2;
			
 
				 	if (atomic_read(&root->log_commit[index1])) {
			
 
				 		wait_log_commit(trans, root, root->log_transid);
			
@@ -2308,11 +2308,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 
				 	/* bail out if we need to do a full commit */
			
 
				 	if (root->fs_info->last_trans_log_full_commit == trans->transid) {
			
 
				 		ret = -EAGAIN;
			
 
				+		btrfs_free_logged_extents(log, log_transid);
			
 
				 		mutex_unlock(&root->log_mutex);
			
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				-	log_transid = root->log_transid;
			
 
				 	if (log_transid % 2 == 0)
			
 
				 		mark = EXTENT_DIRTY;
			
 
				 	else
			
@@ -2324,6 +2324,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 
				 	ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark);
			
 
				 	if (ret) {
			
 
				 		btrfs_abort_transaction(trans, root, ret);
			
 
				+		btrfs_free_logged_extents(log, log_transid);
			
 
				 		mutex_unlock(&root->log_mutex);
			
 
				 		goto out;
			
 
				 	}
			
@@ -2363,6 +2364,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 
				 		}
			
 
				 		root->fs_info->last_trans_log_full_commit = trans->transid;
			
 
				 		btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
			
 
				+		btrfs_free_logged_extents(log, log_transid);
			
 
				 		mutex_unlock(&log_root_tree->log_mutex);
			
 
				 		ret = -EAGAIN;
			
 
				 		goto out;
			
@@ -2373,6 +2375,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 
				 		btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
			
 
				 		wait_log_commit(trans, log_root_tree,
			
 
				 				log_root_tree->log_transid);
			
 
				+		btrfs_free_logged_extents(log, log_transid);
			
 
				 		mutex_unlock(&log_root_tree->log_mutex);
			
 
				 		ret = 0;
			
 
				 		goto out;
			
@@ -2392,6 +2395,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 
				 	 */
			
 
				 	if (root->fs_info->last_trans_log_full_commit == trans->transid) {
			
 
				 		btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
			
 
				+		btrfs_free_logged_extents(log, log_transid);
			
 
				 		mutex_unlock(&log_root_tree->log_mutex);
			
 
				 		ret = -EAGAIN;
			
 
				 		goto out_wake_log_root;
			
@@ -2402,10 +2406,12 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 
				 				EXTENT_DIRTY | EXTENT_NEW);
			
 
				 	if (ret) {
			
 
				 		btrfs_abort_transaction(trans, root, ret);
			
 
				+		btrfs_free_logged_extents(log, log_transid);
			
 
				 		mutex_unlock(&log_root_tree->log_mutex);
			
 
				 		goto out_wake_log_root;
			
 
				 	}
			
 
				 	btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
			
 
				+	btrfs_wait_logged_extents(log, log_transid);
			
 
				 
			
 
				 	btrfs_set_super_log_root(root->fs_info->super_for_commit,
			
 
				 				log_root_tree->node->start);
			
@@ -2475,6 +2481,14 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
 
				 				  EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
			
 
				 	}
			
 
				 
			
 
				+	/*
			
 
				+	 * We may have short-circuited the log tree with the full commit logic
			
 
				+	 * and left ordered extents on our list, so clear these out to keep us
			
 
				+	 * from leaking inodes and memory.
			
 
				+	 */
			
 
				+	btrfs_free_logged_extents(log, 0);
			
 
				+	btrfs_free_logged_extents(log, 1);
			
 
				+
			
 
				 	free_extent_buffer(log->node);
			
 
				 	kfree(log);
			
 
				 }
			
@@ -2724,7 +2738,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
 
				 	path->keep_locks = 1;
			
 
				 
			
 
				 	ret = btrfs_search_forward(root, &min_key, &max_key,
			
 
				-				   path, 0, trans->transid);
			
 
				+				   path, trans->transid);
			
 
				 
			
 
				 	/*
			
 
				 	 * we didn't find anything from this transaction, see if there
			
@@ -3271,14 +3285,18 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
 
				 	struct btrfs_root *log = root->log_root;
			
 
				 	struct btrfs_file_extent_item *fi;
			
 
				 	struct extent_buffer *leaf;
			
 
				+	struct btrfs_ordered_extent *ordered;
			
 
				 	struct list_head ordered_sums;
			
 
				 	struct btrfs_map_token token;
			
 
				 	struct btrfs_key key;
			
 
				-	u64 csum_offset = em->mod_start - em->start;
			
 
				-	u64 csum_len = em->mod_len;
			
 
				+	u64 mod_start = em->mod_start;
			
 
				+	u64 mod_len = em->mod_len;
			
 
				+	u64 csum_offset;
			
 
				+	u64 csum_len;
			
 
				 	u64 extent_offset = em->start - em->orig_start;
			
 
				 	u64 block_len;
			
 
				 	int ret;
			
 
				+	int index = log->log_transid % 2;
			
 
				 	bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
			
 
				 
			
 
				 	INIT_LIST_HEAD(&ordered_sums);
			
@@ -3362,6 +3380,92 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
 
				 		csum_len = block_len;
			
 
				 	}
			
 
				 
			
 
				+	/*
			
 
				+	 * First check and see if our csums are on our outstanding ordered
			
 
				+	 * extents.
			
 
				+	 */
			
 
				+again:
			
 
				+	spin_lock_irq(&log->log_extents_lock[index]);
			
 
				+	list_for_each_entry(ordered, &log->logged_list[index], log_list) {
			
 
				+		struct btrfs_ordered_sum *sum;
			
 
				+
			
 
				+		if (!mod_len)
			
 
				+			break;
			
 
				+
			
 
				+		if (ordered->inode != inode)
			
 
				+			continue;
			
 
				+
			
 
				+		if (ordered->file_offset + ordered->len <= mod_start ||
			
 
				+		    mod_start + mod_len <= ordered->file_offset)
			
 
				+			continue;
			
 
				+
			
 
				+		/*
			
 
				+		 * We are going to copy all the csums on this ordered extent, so
			
 
				+		 * go ahead and adjust mod_start and mod_len in case this
			
 
				+		 * ordered extent has already been logged.
			
 
				+		 */
			
 
				+		if (ordered->file_offset > mod_start) {
			
 
				+			if (ordered->file_offset + ordered->len >=
			
 
				+			    mod_start + mod_len)
			
 
				+				mod_len = ordered->file_offset - mod_start;
			
 
				+			/*
			
 
				+			 * If we have this case
			
 
				+			 *
			
 
				+			 * |--------- logged extent ---------|
			
 
				+			 *       |----- ordered extent ----|
			
 
				+			 *
			
 
				+			 * Just don't mess with mod_start and mod_len, we'll
			
 
				+			 * just end up logging more csums than we need and it
			
 
				+			 * will be ok.
			
 
				+			 */
			
 
				+		} else {
			
 
				+			if (ordered->file_offset + ordered->len <
			
 
				+			    mod_start + mod_len) {
			
 
				+				mod_len = (mod_start + mod_len) -
			
 
				+					(ordered->file_offset + ordered->len);
			
 
				+				mod_start = ordered->file_offset +
			
 
				+					ordered->len;
			
 
				+			} else {
			
 
				+				mod_len = 0;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		/*
			
 
				+		 * To keep us from looping for the above case of an ordered
			
 
				+		 * extent that falls inside of the logged extent.
			
 
				+		 */
			
 
				+		if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM,
			
 
				+				     &ordered->flags))
			
 
				+			continue;
			
 
				+		atomic_inc(&ordered->refs);
			
 
				+		spin_unlock_irq(&log->log_extents_lock[index]);
			
 
				+		/*
			
 
				+		 * we've dropped the lock, we must either break or
			
 
				+		 * start over after this.
			
 
				+		 */
			
 
				+
			
 
				+		wait_event(ordered->wait, ordered->csum_bytes_left == 0);
			
 
				+
			
 
				+		list_for_each_entry(sum, &ordered->list, list) {
			
 
				+			ret = btrfs_csum_file_blocks(trans, log, sum);
			
 
				+			if (ret) {
			
 
				+				btrfs_put_ordered_extent(ordered);
			
 
				+				goto unlocked;
			
 
				+			}
			
 
				+		}
			
 
				+		btrfs_put_ordered_extent(ordered);
			
 
				+		goto again;
			
 
				+
			
 
				+	}
			
 
				+	spin_unlock_irq(&log->log_extents_lock[index]);
			
 
				+unlocked:
			
 
				+
			
 
				+	if (!mod_len || ret)
			
 
				+		return ret;
			
 
				+
			
 
				+	csum_offset = mod_start - em->start;
			
 
				+	csum_len = mod_len;
			
 
				+
			
 
				 	/* block start is already adjusted for the file extent offset. */
			
 
				 	ret = btrfs_lookup_csums_range(log->fs_info->csum_root,
			
 
				 				       em->block_start + csum_offset,
			
@@ -3393,6 +3497,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
 
				 	struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree;
			
 
				 	u64 test_gen;
			
 
				 	int ret = 0;
			
 
				+	int num = 0;
			
 
				 
			
 
				 	INIT_LIST_HEAD(&extents);
			
 
				 
			
@@ -3401,16 +3506,31 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
 
				 
			
 
				 	list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
			
 
				 		list_del_init(&em->list);
			
 
				+
			
 
				+		/*
			
 
				+		 * Just an arbitrary number, this can be really CPU intensive
			
 
				+		 * once we start getting a lot of extents, and really once we
			
 
				+		 * have a bunch of extents we just want to commit since it will
			
 
				+		 * be faster.
			
 
				+		 */
			
 
				+		if (++num > 32768) {
			
 
				+			list_del_init(&tree->modified_extents);
			
 
				+			ret = -EFBIG;
			
 
				+			goto process;
			
 
				+		}
			
 
				+
			
 
				 		if (em->generation <= test_gen)
			
 
				 			continue;
			
 
				 		/* Need a ref to keep it from getting evicted from cache */
			
 
				 		atomic_inc(&em->refs);
			
 
				 		set_bit(EXTENT_FLAG_LOGGING, &em->flags);
			
 
				 		list_add_tail(&em->list, &extents);
			
 
				+		num++;
			
 
				 	}
			
 
				 
			
 
				 	list_sort(NULL, &extents, extent_cmp);
			
 
				 
			
 
				+process:
			
 
				 	while (!list_empty(&extents)) {
			
 
				 		em = list_entry(extents.next, struct extent_map, list);
			
 
				 
			
@@ -3513,6 +3633,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 
				 
			
 
				 	mutex_lock(&BTRFS_I(inode)->log_mutex);
			
 
				 
			
 
				+	btrfs_get_logged_extents(log, inode);
			
 
				+
			
 
				 	/*
			
 
				 	 * a brute force approach to making sure we get the most uptodate
			
 
				 	 * copies of everything.
			
@@ -3558,7 +3680,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 
				 	while (1) {
			
 
				 		ins_nr = 0;
			
 
				 		ret = btrfs_search_forward(root, &min_key, &max_key,
			
 
				-					   path, 0, trans->transid);
			
 
				+					   path, trans->transid);
			
 
				 		if (ret != 0)
			
 
				 			break;
			
 
				 again:
			
@@ -3656,6 +3778,8 @@ log_extents:
 
				 	BTRFS_I(inode)->logged_trans = trans->transid;
			
 
				 	BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans;
			
 
				 out_unlock:
			
 
				+	if (err)
			
 
				+		btrfs_free_logged_extents(log, log->log_transid);
			
 
				 	mutex_unlock(&BTRFS_I(inode)->log_mutex);
			
 
				 
			
 
				 	btrfs_free_path(path);
			
@@ -3822,7 +3946,6 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
 
				 end_trans:
			
 
				 	dput(old_parent);
			
 
				 	if (ret < 0) {
			
 
				-		WARN_ON(ret != -ENOSPC);
			
 
				 		root->fs_info->last_trans_log_full_commit = trans->transid;
			
 
				 		ret = 1;
			
 
				 	}
			
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -792,26 +792,76 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Look for a btrfs signature on a device. This may be called out of the mount path
			
 
				+ * and we are not allowed to call set_blocksize during the scan. The superblock
			
 
				+ * is read via pagecache
			
 
				+ */
			
 
				 int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
			
 
				 			  struct btrfs_fs_devices **fs_devices_ret)
			
 
				 {
			
 
				 	struct btrfs_super_block *disk_super;
			
 
				 	struct block_device *bdev;
			
 
				-	struct buffer_head *bh;
			
 
				-	int ret;
			
 
				+	struct page *page;
			
 
				+	void *p;
			
 
				+	int ret = -EINVAL;
			
 
				 	u64 devid;
			
 
				 	u64 transid;
			
 
				 	u64 total_devices;
			
 
				+	u64 bytenr;
			
 
				+	pgoff_t index;
			
 
				 
			
 
				+	/*
			
 
				+	 * we would like to check all the supers, but that would make
			
 
				+	 * a btrfs mount succeed after a mkfs from a different FS.
			
 
				+	 * So, we need to add a special mount option to scan for
			
 
				+	 * later supers, using BTRFS_SUPER_MIRROR_MAX instead
			
 
				+	 */
			
 
				+	bytenr = btrfs_sb_offset(0);
			
 
				 	flags |= FMODE_EXCL;
			
 
				 	mutex_lock(&uuid_mutex);
			
 
				-	ret = btrfs_get_bdev_and_sb(path, flags, holder, 0, &bdev, &bh);
			
 
				-	if (ret)
			
 
				+
			
 
				+	bdev = blkdev_get_by_path(path, flags, holder);
			
 
				+
			
 
				+	if (IS_ERR(bdev)) {
			
 
				+		ret = PTR_ERR(bdev);
			
 
				+		printk(KERN_INFO "btrfs: open %s failed\n", path);
			
 
				 		goto error;
			
 
				-	disk_super = (struct btrfs_super_block *)bh->b_data;
			
 
				+	}
			
 
				+
			
 
				+	/* make sure our super fits in the device */
			
 
				+	if (bytenr + PAGE_CACHE_SIZE >= i_size_read(bdev->bd_inode))
			
 
				+		goto error_bdev_put;
			
 
				+
			
 
				+	/* make sure our super fits in the page */
			
 
				+	if (sizeof(*disk_super) > PAGE_CACHE_SIZE)
			
 
				+		goto error_bdev_put;
			
 
				+
			
 
				+	/* make sure our super doesn't straddle pages on disk */
			
 
				+	index = bytenr >> PAGE_CACHE_SHIFT;
			
 
				+	if ((bytenr + sizeof(*disk_super) - 1) >> PAGE_CACHE_SHIFT != index)
			
 
				+		goto error_bdev_put;
			
 
				+
			
 
				+	/* pull in the page with our super */
			
 
				+	page = read_cache_page_gfp(bdev->bd_inode->i_mapping,
			
 
				+				   index, GFP_NOFS);
			
 
				+
			
 
				+	if (IS_ERR_OR_NULL(page))
			
 
				+		goto error_bdev_put;
			
 
				+
			
 
				+	p = kmap(page);
			
 
				+
			
 
				+	/* align our pointer to the offset of the super block */
			
 
				+	disk_super = p + (bytenr & ~PAGE_CACHE_MASK);
			
 
				+
			
 
				+	if (btrfs_super_bytenr(disk_super) != bytenr ||
			
 
				+	    disk_super->magic != cpu_to_le64(BTRFS_MAGIC))
			
 
				+		goto error_unmap;
			
 
				+
			
 
				 	devid = btrfs_stack_device_id(&disk_super->dev_item);
			
 
				 	transid = btrfs_super_generation(disk_super);
			
 
				 	total_devices = btrfs_super_num_devices(disk_super);
			
 
				+
			
 
				 	if (disk_super->label[0]) {
			
 
				 		if (disk_super->label[BTRFS_LABEL_SIZE - 1])
			
 
				 			disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0';
			
@@ -819,12 +869,19 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
 
				 	} else {
			
 
				 		printk(KERN_INFO "device fsid %pU ", disk_super->fsid);
			
 
				 	}
			
 
				+
			
 
				 	printk(KERN_CONT "devid %llu transid %llu %s\n",
			
 
				 	       (unsigned long long)devid, (unsigned long long)transid, path);
			
 
				+
			
 
				 	ret = device_list_add(path, disk_super, devid, fs_devices_ret);
			
 
				 	if (!ret && fs_devices_ret)
			
 
				 		(*fs_devices_ret)->total_devices = total_devices;
			
 
				-	brelse(bh);
			
 
				+
			
 
				+error_unmap:
			
 
				+	kunmap(page);
			
 
				+	page_cache_release(page);
			
 
				+
			
 
				+error_bdev_put:
			
 
				 	blkdev_put(bdev, flags);
			
 
				 error:
			
 
				 	mutex_unlock(&uuid_mutex);
			
@@ -1372,14 +1429,19 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
 
				 	u64 devid;
			
 
				 	u64 num_devices;
			
 
				 	u8 *dev_uuid;
			
 
				+	unsigned seq;
			
 
				 	int ret = 0;
			
 
				 	bool clear_super = false;
			
 
				 
			
 
				 	mutex_lock(&uuid_mutex);
			
 
				 
			
 
				-	all_avail = root->fs_info->avail_data_alloc_bits |
			
 
				-		root->fs_info->avail_system_alloc_bits |
			
 
				-		root->fs_info->avail_metadata_alloc_bits;
			
 
				+	do {
			
 
				+		seq = read_seqbegin(&root->fs_info->profiles_lock);
			
 
				+
			
 
				+		all_avail = root->fs_info->avail_data_alloc_bits |
			
 
				+			    root->fs_info->avail_system_alloc_bits |
			
 
				+			    root->fs_info->avail_metadata_alloc_bits;
			
 
				+	} while (read_seqretry(&root->fs_info->profiles_lock, seq));
			
 
				 
			
 
				 	num_devices = root->fs_info->fs_devices->num_devices;
			
 
				 	btrfs_dev_replace_lock(&root->fs_info->dev_replace);
			
@@ -2616,7 +2678,7 @@ static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
 
				 	chunk_used = btrfs_block_group_used(&cache->item);
			
 
				 
			
 
				 	if (bargs->usage == 0)
			
 
				-		user_thresh = 0;
			
 
				+		user_thresh = 1;
			
 
				 	else if (bargs->usage > 100)
			
 
				 		user_thresh = cache->key.offset;
			
 
				 	else
			
@@ -2985,6 +3047,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
 
				 	int mixed = 0;
			
 
				 	int ret;
			
 
				 	u64 num_devices;
			
 
				+	unsigned seq;
			
 
				 
			
 
				 	if (btrfs_fs_closing(fs_info) ||
			
 
				 	    atomic_read(&fs_info->balance_pause_req) ||
			
@@ -3068,22 +3131,26 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
 
				 	/* allow to reduce meta or sys integrity only if force set */
			
 
				 	allowed = BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
			
 
				 			BTRFS_BLOCK_GROUP_RAID10;
			
 
				-	if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
			
 
				-	     (fs_info->avail_system_alloc_bits & allowed) &&
			
 
				-	     !(bctl->sys.target & allowed)) ||
			
 
				-	    ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
			
 
				-	     (fs_info->avail_metadata_alloc_bits & allowed) &&
			
 
				-	     !(bctl->meta.target & allowed))) {
			
 
				-		if (bctl->flags & BTRFS_BALANCE_FORCE) {
			
 
				-			printk(KERN_INFO "btrfs: force reducing metadata "
			
 
				-			       "integrity\n");
			
 
				-		} else {
			
 
				-			printk(KERN_ERR "btrfs: balance will reduce metadata "
			
 
				-			       "integrity, use force if you want this\n");
			
 
				-			ret = -EINVAL;
			
 
				-			goto out;
			
 
				+	do {
			
 
				+		seq = read_seqbegin(&fs_info->profiles_lock);
			
 
				+
			
 
				+		if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
			
 
				+		     (fs_info->avail_system_alloc_bits & allowed) &&
			
 
				+		     !(bctl->sys.target & allowed)) ||
			
 
				+		    ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
			
 
				+		     (fs_info->avail_metadata_alloc_bits & allowed) &&
			
 
				+		     !(bctl->meta.target & allowed))) {
			
 
				+			if (bctl->flags & BTRFS_BALANCE_FORCE) {
			
 
				+				printk(KERN_INFO "btrfs: force reducing metadata "
			
 
				+				       "integrity\n");
			
 
				+			} else {
			
 
				+				printk(KERN_ERR "btrfs: balance will reduce metadata "
			
 
				+				       "integrity, use force if you want this\n");
			
 
				+				ret = -EINVAL;
			
 
				+				goto out;
			
 
				+			}
			
 
				 		}
			
 
				-	}
			
 
				+	} while (read_seqretry(&fs_info->profiles_lock, seq));
			
 
				 
			
 
				 	if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
			
 
				 		int num_tolerated_disk_barrier_failures;
			
@@ -3127,6 +3194,11 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
 
				 	mutex_lock(&fs_info->balance_mutex);
			
 
				 	atomic_dec(&fs_info->balance_running);
			
 
				 
			
 
				+	if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
			
 
				+		fs_info->num_tolerated_disk_barrier_failures =
			
 
				+			btrfs_calc_num_tolerated_disk_barrier_failures(fs_info);
			
 
				+	}
			
 
				+
			
 
				 	if (bargs) {
			
 
				 		memset(bargs, 0, sizeof(*bargs));
			
 
				 		update_ioctl_balance_args(fs_info, 0, bargs);
			
@@ -3137,11 +3209,6 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
 
				 		__cancel_balance(fs_info);
			
 
				 	}
			
 
				 
			
 
				-	if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
			
 
				-		fs_info->num_tolerated_disk_barrier_failures =
			
 
				-			btrfs_calc_num_tolerated_disk_barrier_failures(fs_info);
			
 
				-	}
			
 
				-
			
 
				 	wake_up(&fs_info->balance_wait_q);
			
 
				 
			
 
				 	return ret;
			
@@ -3504,13 +3571,48 @@ static int btrfs_cmp_device_info(const void *a, const void *b)
 
				 }
			
 
				 
			
 
				 struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
			
 
				-	{ 2, 1, 0, 4, 2, 2 /* raid10 */ },
			
 
				-	{ 1, 1, 2, 2, 2, 2 /* raid1 */ },
			
 
				-	{ 1, 2, 1, 1, 1, 2 /* dup */ },
			
 
				-	{ 1, 1, 0, 2, 1, 1 /* raid0 */ },
			
 
				-	{ 1, 1, 1, 1, 1, 1 /* single */ },
			
 
				+	[BTRFS_RAID_RAID10] = {
			
 
				+		.sub_stripes	= 2,
			
 
				+		.dev_stripes	= 1,
			
 
				+		.devs_max	= 0,	/* 0 == as many as possible */
			
 
				+		.devs_min	= 4,
			
 
				+		.devs_increment	= 2,
			
 
				+		.ncopies	= 2,
			
 
				+	},
			
 
				+	[BTRFS_RAID_RAID1] = {
			
 
				+		.sub_stripes	= 1,
			
 
				+		.dev_stripes	= 1,
			
 
				+		.devs_max	= 2,
			
 
				+		.devs_min	= 2,
			
 
				+		.devs_increment	= 2,
			
 
				+		.ncopies	= 2,
			
 
				+	},
			
 
				+	[BTRFS_RAID_DUP] = {
			
 
				+		.sub_stripes	= 1,
			
 
				+		.dev_stripes	= 2,
			
 
				+		.devs_max	= 1,
			
 
				+		.devs_min	= 1,
			
 
				+		.devs_increment	= 1,
			
 
				+		.ncopies	= 2,
			
 
				+	},
			
 
				+	[BTRFS_RAID_RAID0] = {
			
 
				+		.sub_stripes	= 1,
			
 
				+		.dev_stripes	= 1,
			
 
				+		.devs_max	= 0,
			
 
				+		.devs_min	= 2,
			
 
				+		.devs_increment	= 1,
			
 
				+		.ncopies	= 1,
			
 
				+	},
			
 
				+	[BTRFS_RAID_SINGLE] = {
			
 
				+		.sub_stripes	= 1,
			
 
				+		.dev_stripes	= 1,
			
 
				+		.devs_max	= 1,
			
 
				+		.devs_min	= 1,
			
 
				+		.devs_increment	= 1,
			
 
				+		.ncopies	= 1,
			
 
				+	},
			
 
				 };
			
 
				-
			
 
				+ 
			
 
				 static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
			
 
				 			       struct btrfs_root *extent_root,
			
 
				 			       struct map_lookup **map_ret,
			
@@ -3631,12 +3733,16 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 
				 		if (max_avail < BTRFS_STRIPE_LEN * dev_stripes)
			
 
				 			continue;
			
 
				 
			
 
				+		if (ndevs == fs_devices->rw_devices) {
			
 
				+			WARN(1, "%s: found more than %llu devices\n",
			
 
				+			     __func__, fs_devices->rw_devices);
			
 
				+			break;
			
 
				+		}
			
 
				 		devices_info[ndevs].dev_offset = dev_offset;
			
 
				 		devices_info[ndevs].max_avail = max_avail;
			
 
				 		devices_info[ndevs].total_avail = total_avail;
			
 
				 		devices_info[ndevs].dev = device;
			
 
				 		++ndevs;
			
 
				-		WARN_ON(ndevs > fs_devices->rw_devices);
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -3718,15 +3824,10 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 
				 	write_lock(&em_tree->lock);
			
 
				 	ret = add_extent_mapping(em_tree, em);
			
 
				 	write_unlock(&em_tree->lock);
			
 
				-	free_extent_map(em);
			
 
				-	if (ret)
			
 
				-		goto error;
			
 
				-
			
 
				-	ret = btrfs_make_block_group(trans, extent_root, 0, type,
			
 
				-				     BTRFS_FIRST_CHUNK_TREE_OBJECTID,
			
 
				-				     start, num_bytes);
			
 
				-	if (ret)
			
 
				+	if (ret) {
			
 
				+		free_extent_map(em);
			
 
				 		goto error;
			
 
				+	}
			
 
				 
			
 
				 	for (i = 0; i < map->num_stripes; ++i) {
			
 
				 		struct btrfs_device *device;
			
@@ -3739,15 +3840,42 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 
				 				info->chunk_root->root_key.objectid,
			
 
				 				BTRFS_FIRST_CHUNK_TREE_OBJECTID,
			
 
				 				start, dev_offset, stripe_size);
			
 
				-		if (ret) {
			
 
				-			btrfs_abort_transaction(trans, extent_root, ret);
			
 
				-			goto error;
			
 
				-		}
			
 
				+		if (ret)
			
 
				+			goto error_dev_extent;
			
 
				 	}
			
 
				 
			
 
				+	ret = btrfs_make_block_group(trans, extent_root, 0, type,
			
 
				+				     BTRFS_FIRST_CHUNK_TREE_OBJECTID,
			
 
				+				     start, num_bytes);
			
 
				+	if (ret) {
			
 
				+		i = map->num_stripes - 1;
			
 
				+		goto error_dev_extent;
			
 
				+	}
			
 
				+
			
 
				+	free_extent_map(em);
			
 
				 	kfree(devices_info);
			
 
				 	return 0;
			
 
				 
			
 
				+error_dev_extent:
			
 
				+	for (; i >= 0; i--) {
			
 
				+		struct btrfs_device *device;
			
 
				+		int err;
			
 
				+
			
 
				+		device = map->stripes[i].dev;
			
 
				+		err = btrfs_free_dev_extent(trans, device, start);
			
 
				+		if (err) {
			
 
				+			btrfs_abort_transaction(trans, extent_root, err);
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+	write_lock(&em_tree->lock);
			
 
				+	remove_extent_mapping(em_tree, em);
			
 
				+	write_unlock(&em_tree->lock);
			
 
				+
			
 
				+	/* One for our allocation */
			
 
				+	free_extent_map(em);
			
 
				+	/* One for the tree reference */
			
 
				+	free_extent_map(em);
			
 
				 error:
			
 
				 	kfree(map);
			
 
				 	kfree(devices_info);
			
@@ -3887,10 +4015,7 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
 
				 	if (ret)
			
 
				 		return ret;
			
 
				 
			
 
				-	alloc_profile = BTRFS_BLOCK_GROUP_METADATA |
			
 
				-				fs_info->avail_metadata_alloc_bits;
			
 
				-	alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile);
			
 
				-
			
 
				+	alloc_profile = btrfs_get_alloc_profile(extent_root, 0);
			
 
				 	ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size,
			
 
				 				  &stripe_size, chunk_offset, alloc_profile);
			
 
				 	if (ret)
			
@@ -3898,10 +4023,7 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
 
				 
			
 
				 	sys_chunk_offset = chunk_offset + chunk_size;
			
 
				 
			
 
				-	alloc_profile = BTRFS_BLOCK_GROUP_SYSTEM |
			
 
				-				fs_info->avail_system_alloc_bits;
			
 
				-	alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile);
			
 
				-
			
 
				+	alloc_profile = btrfs_get_alloc_profile(fs_info->chunk_root, 0);
			
 
				 	ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map,
			
 
				 				  &sys_chunk_size, &sys_stripe_size,
			
 
				 				  sys_chunk_offset, alloc_profile);
			
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -21,8 +21,8 @@
 
				 
			
 
				 #include <linux/bio.h>
			
 
				 #include <linux/sort.h>
			
 
				+#include <linux/btrfs.h>
			
 
				 #include "async-thread.h"
			
 
				-#include "ioctl.h"
			
 
				 
			
 
				 #define BTRFS_STRIPE_LEN	(64 * 1024)
			
 
				 
			
--- a/include/linux/btrfs.h
+++ b/include/linux/btrfs.h
@@ -0,0 +1,6 @@
 
				+#ifndef _LINUX_BTRFS_H
			
 
				+#define _LINUX_BTRFS_H
			
 
				+
			
 
				+#include <uapi/linux/btrfs.h>
			
 
				+
			
 
				+#endif /* _LINUX_BTRFS_H */
			
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -68,6 +68,7 @@ header-y += blkpg.h
 
				 header-y += blktrace_api.h
			
 
				 header-y += bpqether.h
			
 
				 header-y += bsg.h
			
 
				+header-y += btrfs.h
			
 
				 header-y += can.h
			
 
				 header-y += capability.h
			
 
				 header-y += capi.h
			
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -16,8 +16,9 @@
 
				  * Boston, MA 021110-1307, USA.
			
 
				  */
			
 
				 
			
 
				-#ifndef __IOCTL_
			
 
				-#define __IOCTL_
			
 
				+#ifndef _UAPI_LINUX_BTRFS_H
			
 
				+#define _UAPI_LINUX_BTRFS_H
			
 
				+#include <linux/types.h>
			
 
				 #include <linux/ioctl.h>
			
 
				 
			
 
				 #define BTRFS_IOCTL_MAGIC 0x94
			
@@ -406,6 +407,13 @@ struct btrfs_ioctl_received_subvol_args {
 
				 	__u64	reserved[16];		/* in */
			
 
				 };
			
 
				 
			
 
				+/*
			
 
				+ * Caller doesn't want file data in the send stream, even if the
			
 
				+ * search of clone sources doesn't find an extent. UPDATE_EXTENT
			
 
				+ * commands will be sent instead of WRITE commands.
			
 
				+ */
			
 
				+#define BTRFS_SEND_FLAG_NO_FILE_DATA     0x1
			
 
				+
			
 
				 struct btrfs_ioctl_send_args {
			
 
				 	__s64 send_fd;			/* in */
			
 
				 	__u64 clone_sources_count;	/* in */
			
@@ -494,9 +502,13 @@ struct btrfs_ioctl_send_args {
 
				 			       struct btrfs_ioctl_qgroup_create_args)
			
 
				 #define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \
			
 
				 			       struct btrfs_ioctl_qgroup_limit_args)
			
 
				+#define BTRFS_IOC_GET_FSLABEL _IOR(BTRFS_IOCTL_MAGIC, 49, \
			
 
				+				   char[BTRFS_LABEL_SIZE])
			
 
				+#define BTRFS_IOC_SET_FSLABEL _IOW(BTRFS_IOCTL_MAGIC, 50, \
			
 
				+				   char[BTRFS_LABEL_SIZE])
			
 
				 #define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \
			
 
				 				      struct btrfs_ioctl_get_dev_stats)
			
 
				 #define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \
			
 
				 				    struct btrfs_ioctl_dev_replace_args)
			
 
				 
			
 
				-#endif
			
 
				+#endif /* _UAPI_LINUX_BTRFS_H */