Переглянути джерело

Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable

* git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: (22 commits)
  Btrfs: Fix async caching interaction with unmount
  Btrfs: change how we unpin extents
  Btrfs: Correct redundant test in add_inode_ref
  Btrfs: find smallest available device extent during chunk allocation
  Btrfs: clear all space_info->full after removing a block group
  Btrfs: make flushoncommit mount option correctly wait on ordered_extents
  Btrfs: Avoid delayed reference update looping
  Btrfs: Fix ordering of key field checks in btrfs_previous_item
  Btrfs: find_free_dev_extent doesn't handle holes at the start of the device
  Btrfs: Remove code duplication in comp_keys
  Btrfs: async block group caching
  Btrfs: use hybrid extents+bitmap rb tree for free space
  Btrfs: Fix crash on read failures at mount
  Btrfs: remove of redundant btrfs_header_level
  Btrfs: adjust NULL test
  Btrfs: Remove broken sanity check from btrfs_rmap_block()
  Btrfs: convert nested spin_lock_irqsave to spin_lock
  Btrfs: make sure all dirty blocks are written at commit time
  Btrfs: fix locking issue in btrfs_find_next_key
  Btrfs: fix double increment of path->slots[0] in btrfs_next_leaf
  ...
Linus Torvalds 16 роки тому
батько
коміт
655c5d8fc1

+ 2 - 2
fs/btrfs/async-thread.c

@@ -424,11 +424,11 @@ int btrfs_requeue_work(struct btrfs_work *work)
 	 * list
 	 * list
 	 */
 	 */
 	if (worker->idle) {
 	if (worker->idle) {
-		spin_lock_irqsave(&worker->workers->lock, flags);
+		spin_lock(&worker->workers->lock);
 		worker->idle = 0;
 		worker->idle = 0;
 		list_move_tail(&worker->worker_list,
 		list_move_tail(&worker->worker_list,
 			       &worker->workers->worker_list);
 			       &worker->workers->worker_list);
-		spin_unlock_irqrestore(&worker->workers->lock, flags);
+		spin_unlock(&worker->workers->lock);
 	}
 	}
 	if (!worker->working) {
 	if (!worker->working) {
 		wake = 1;
 		wake = 1;

+ 68 - 53
fs/btrfs/ctree.c

@@ -557,19 +557,7 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
 
 
 	btrfs_disk_key_to_cpu(&k1, disk);
 	btrfs_disk_key_to_cpu(&k1, disk);
 
 
-	if (k1.objectid > k2->objectid)
-		return 1;
-	if (k1.objectid < k2->objectid)
-		return -1;
-	if (k1.type > k2->type)
-		return 1;
-	if (k1.type < k2->type)
-		return -1;
-	if (k1.offset > k2->offset)
-		return 1;
-	if (k1.offset < k2->offset)
-		return -1;
-	return 0;
+	return btrfs_comp_cpu_keys(&k1, k2);
 }
 }
 
 
 /*
 /*
@@ -1052,9 +1040,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 	    BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
 	    BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
 		return 0;
 		return 0;
 
 
-	if (btrfs_header_nritems(mid) > 2)
-		return 0;
-
 	if (btrfs_header_nritems(mid) < 2)
 	if (btrfs_header_nritems(mid) < 2)
 		err_on_enospc = 1;
 		err_on_enospc = 1;
 
 
@@ -1701,6 +1686,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
 	struct extent_buffer *b;
 	struct extent_buffer *b;
 	int slot;
 	int slot;
 	int ret;
 	int ret;
+	int err;
 	int level;
 	int level;
 	int lowest_unlock = 1;
 	int lowest_unlock = 1;
 	u8 lowest_level = 0;
 	u8 lowest_level = 0;
@@ -1737,8 +1723,6 @@ again:
 			p->locks[level] = 1;
 			p->locks[level] = 1;
 
 
 		if (cow) {
 		if (cow) {
-			int wret;
-
 			/*
 			/*
 			 * if we don't really need to cow this block
 			 * if we don't really need to cow this block
 			 * then we don't want to set the path blocking,
 			 * then we don't want to set the path blocking,
@@ -1749,12 +1733,12 @@ again:
 
 
 			btrfs_set_path_blocking(p);
 			btrfs_set_path_blocking(p);
 
 
-			wret = btrfs_cow_block(trans, root, b,
-					       p->nodes[level + 1],
-					       p->slots[level + 1], &b);
-			if (wret) {
+			err = btrfs_cow_block(trans, root, b,
+					      p->nodes[level + 1],
+					      p->slots[level + 1], &b);
+			if (err) {
 				free_extent_buffer(b);
 				free_extent_buffer(b);
-				ret = wret;
+				ret = err;
 				goto done;
 				goto done;
 			}
 			}
 		}
 		}
@@ -1793,41 +1777,45 @@ cow_done:
 		ret = bin_search(b, key, level, &slot);
 		ret = bin_search(b, key, level, &slot);
 
 
 		if (level != 0) {
 		if (level != 0) {
-			if (ret && slot > 0)
+			int dec = 0;
+			if (ret && slot > 0) {
+				dec = 1;
 				slot -= 1;
 				slot -= 1;
+			}
 			p->slots[level] = slot;
 			p->slots[level] = slot;
-			ret = setup_nodes_for_search(trans, root, p, b, level,
+			err = setup_nodes_for_search(trans, root, p, b, level,
 						     ins_len);
 						     ins_len);
-			if (ret == -EAGAIN)
+			if (err == -EAGAIN)
 				goto again;
 				goto again;
-			else if (ret)
+			if (err) {
+				ret = err;
 				goto done;
 				goto done;
+			}
 			b = p->nodes[level];
 			b = p->nodes[level];
 			slot = p->slots[level];
 			slot = p->slots[level];
 
 
 			unlock_up(p, level, lowest_unlock);
 			unlock_up(p, level, lowest_unlock);
 
 
-			/* this is only true while dropping a snapshot */
 			if (level == lowest_level) {
 			if (level == lowest_level) {
-				ret = 0;
+				if (dec)
+					p->slots[level]++;
 				goto done;
 				goto done;
 			}
 			}
 
 
-			ret = read_block_for_search(trans, root, p,
+			err = read_block_for_search(trans, root, p,
 						    &b, level, slot, key);
 						    &b, level, slot, key);
-			if (ret == -EAGAIN)
+			if (err == -EAGAIN)
 				goto again;
 				goto again;
-
-			if (ret == -EIO)
+			if (err) {
+				ret = err;
 				goto done;
 				goto done;
+			}
 
 
 			if (!p->skip_locking) {
 			if (!p->skip_locking) {
-				int lret;
-
 				btrfs_clear_path_blocking(p, NULL);
 				btrfs_clear_path_blocking(p, NULL);
-				lret = btrfs_try_spin_lock(b);
+				err = btrfs_try_spin_lock(b);
 
 
-				if (!lret) {
+				if (!err) {
 					btrfs_set_path_blocking(p);
 					btrfs_set_path_blocking(p);
 					btrfs_tree_lock(b);
 					btrfs_tree_lock(b);
 					btrfs_clear_path_blocking(p, b);
 					btrfs_clear_path_blocking(p, b);
@@ -1837,16 +1825,14 @@ cow_done:
 			p->slots[level] = slot;
 			p->slots[level] = slot;
 			if (ins_len > 0 &&
 			if (ins_len > 0 &&
 			    btrfs_leaf_free_space(root, b) < ins_len) {
 			    btrfs_leaf_free_space(root, b) < ins_len) {
-				int sret;
-
 				btrfs_set_path_blocking(p);
 				btrfs_set_path_blocking(p);
-				sret = split_leaf(trans, root, key,
-						      p, ins_len, ret == 0);
+				err = split_leaf(trans, root, key,
+						 p, ins_len, ret == 0);
 				btrfs_clear_path_blocking(p, NULL);
 				btrfs_clear_path_blocking(p, NULL);
 
 
-				BUG_ON(sret > 0);
-				if (sret) {
-					ret = sret;
+				BUG_ON(err > 0);
+				if (err) {
+					ret = err;
 					goto done;
 					goto done;
 				}
 				}
 			}
 			}
@@ -3807,7 +3793,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 		}
 		}
 
 
 		/* delete the leaf if it is mostly empty */
 		/* delete the leaf if it is mostly empty */
-		if (used < BTRFS_LEAF_DATA_SIZE(root) / 2) {
+		if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
 			/* push_leaf_left fixes the path.
 			/* push_leaf_left fixes the path.
 			 * make sure the path still points to our leaf
 			 * make sure the path still points to our leaf
 			 * for possible call to del_ptr below
 			 * for possible call to del_ptr below
@@ -4042,10 +4028,9 @@ out:
  * calling this function.
  * calling this function.
  */
  */
 int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
 int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
-			struct btrfs_key *key, int lowest_level,
+			struct btrfs_key *key, int level,
 			int cache_only, u64 min_trans)
 			int cache_only, u64 min_trans)
 {
 {
-	int level = lowest_level;
 	int slot;
 	int slot;
 	struct extent_buffer *c;
 	struct extent_buffer *c;
 
 
@@ -4058,11 +4043,40 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
 		c = path->nodes[level];
 		c = path->nodes[level];
 next:
 next:
 		if (slot >= btrfs_header_nritems(c)) {
 		if (slot >= btrfs_header_nritems(c)) {
-			level++;
-			if (level == BTRFS_MAX_LEVEL)
+			int ret;
+			int orig_lowest;
+			struct btrfs_key cur_key;
+			if (level + 1 >= BTRFS_MAX_LEVEL ||
+			    !path->nodes[level + 1])
 				return 1;
 				return 1;
-			continue;
+
+			if (path->locks[level + 1]) {
+				level++;
+				continue;
+			}
+
+			slot = btrfs_header_nritems(c) - 1;
+			if (level == 0)
+				btrfs_item_key_to_cpu(c, &cur_key, slot);
+			else
+				btrfs_node_key_to_cpu(c, &cur_key, slot);
+
+			orig_lowest = path->lowest_level;
+			btrfs_release_path(root, path);
+			path->lowest_level = level;
+			ret = btrfs_search_slot(NULL, root, &cur_key, path,
+						0, 0);
+			path->lowest_level = orig_lowest;
+			if (ret < 0)
+				return ret;
+
+			c = path->nodes[level];
+			slot = path->slots[level];
+			if (ret == 0)
+				slot++;
+			goto next;
 		}
 		}
+
 		if (level == 0)
 		if (level == 0)
 			btrfs_item_key_to_cpu(c, key, slot);
 			btrfs_item_key_to_cpu(c, key, slot);
 		else {
 		else {
@@ -4146,7 +4160,8 @@ again:
 	 * advance the path if there are now more items available.
 	 * advance the path if there are now more items available.
 	 */
 	 */
 	if (nritems > 0 && path->slots[0] < nritems - 1) {
 	if (nritems > 0 && path->slots[0] < nritems - 1) {
-		path->slots[0]++;
+		if (ret == 0)
+			path->slots[0]++;
 		ret = 0;
 		ret = 0;
 		goto done;
 		goto done;
 	}
 	}
@@ -4278,10 +4293,10 @@ int btrfs_previous_item(struct btrfs_root *root,
 			path->slots[0]--;
 			path->slots[0]--;
 
 
 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-		if (found_key.type == type)
-			return 0;
 		if (found_key.objectid < min_objectid)
 		if (found_key.objectid < min_objectid)
 			break;
 			break;
+		if (found_key.type == type)
+			return 0;
 		if (found_key.objectid == min_objectid &&
 		if (found_key.objectid == min_objectid &&
 		    found_key.type < type)
 		    found_key.type < type)
 			break;
 			break;

+ 25 - 4
fs/btrfs/ctree.h

@@ -481,7 +481,7 @@ struct btrfs_shared_data_ref {
 
 
 struct btrfs_extent_inline_ref {
 struct btrfs_extent_inline_ref {
 	u8 type;
 	u8 type;
-	u64 offset;
+	__le64 offset;
 } __attribute__ ((__packed__));
 } __attribute__ ((__packed__));
 
 
 /* old style backrefs item */
 /* old style backrefs item */
@@ -689,6 +689,7 @@ struct btrfs_space_info {
 	struct list_head block_groups;
 	struct list_head block_groups;
 	spinlock_t lock;
 	spinlock_t lock;
 	struct rw_semaphore groups_sem;
 	struct rw_semaphore groups_sem;
+	atomic_t caching_threads;
 };
 };
 
 
 /*
 /*
@@ -707,6 +708,9 @@ struct btrfs_free_cluster {
 	/* first extent starting offset */
 	/* first extent starting offset */
 	u64 window_start;
 	u64 window_start;
 
 
+	/* if this cluster simply points at a bitmap in the block group */
+	bool points_to_bitmap;
+
 	struct btrfs_block_group_cache *block_group;
 	struct btrfs_block_group_cache *block_group;
 	/*
 	/*
 	 * when a cluster is allocated from a block group, we put the
 	 * when a cluster is allocated from a block group, we put the
@@ -716,24 +720,37 @@ struct btrfs_free_cluster {
 	struct list_head block_group_list;
 	struct list_head block_group_list;
 };
 };
 
 
+enum btrfs_caching_type {
+	BTRFS_CACHE_NO		= 0,
+	BTRFS_CACHE_STARTED	= 1,
+	BTRFS_CACHE_FINISHED	= 2,
+};
+
 struct btrfs_block_group_cache {
 struct btrfs_block_group_cache {
 	struct btrfs_key key;
 	struct btrfs_key key;
 	struct btrfs_block_group_item item;
 	struct btrfs_block_group_item item;
+	struct btrfs_fs_info *fs_info;
 	spinlock_t lock;
 	spinlock_t lock;
-	struct mutex cache_mutex;
 	u64 pinned;
 	u64 pinned;
 	u64 reserved;
 	u64 reserved;
 	u64 flags;
 	u64 flags;
-	int cached;
+	u64 sectorsize;
+	int extents_thresh;
+	int free_extents;
+	int total_bitmaps;
 	int ro;
 	int ro;
 	int dirty;
 	int dirty;
 
 
+	/* cache tracking stuff */
+	wait_queue_head_t caching_q;
+	int cached;
+
 	struct btrfs_space_info *space_info;
 	struct btrfs_space_info *space_info;
 
 
 	/* free space cache stuff */
 	/* free space cache stuff */
 	spinlock_t tree_lock;
 	spinlock_t tree_lock;
-	struct rb_root free_space_bytes;
 	struct rb_root free_space_offset;
 	struct rb_root free_space_offset;
+	u64 free_space;
 
 
 	/* block group cache stuff */
 	/* block group cache stuff */
 	struct rb_node cache_node;
 	struct rb_node cache_node;
@@ -942,6 +959,9 @@ struct btrfs_root {
 	/* the node lock is held while changing the node pointer */
 	/* the node lock is held while changing the node pointer */
 	spinlock_t node_lock;
 	spinlock_t node_lock;
 
 
+	/* taken when updating the commit root */
+	struct rw_semaphore commit_root_sem;
+
 	struct extent_buffer *commit_root;
 	struct extent_buffer *commit_root;
 	struct btrfs_root *log_root;
 	struct btrfs_root *log_root;
 	struct btrfs_root *reloc_root;
 	struct btrfs_root *reloc_root;
@@ -1988,6 +2008,7 @@ void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
 				 u64 bytes);
 				 u64 bytes);
 void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
 void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
 			      u64 bytes);
 			      u64 bytes);
+void btrfs_free_pinned_extents(struct btrfs_fs_info *info);
 /* ctree.c */
 /* ctree.c */
 int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
 int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
 		     int level, int *slot);
 		     int level, int *slot);

+ 15 - 0
fs/btrfs/disk-io.c

@@ -909,6 +909,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
 	spin_lock_init(&root->inode_lock);
 	spin_lock_init(&root->inode_lock);
 	mutex_init(&root->objectid_mutex);
 	mutex_init(&root->objectid_mutex);
 	mutex_init(&root->log_mutex);
 	mutex_init(&root->log_mutex);
+	init_rwsem(&root->commit_root_sem);
 	init_waitqueue_head(&root->log_writer_wait);
 	init_waitqueue_head(&root->log_writer_wait);
 	init_waitqueue_head(&root->log_commit_wait[0]);
 	init_waitqueue_head(&root->log_commit_wait[0]);
 	init_waitqueue_head(&root->log_commit_wait[1]);
 	init_waitqueue_head(&root->log_commit_wait[1]);
@@ -1799,6 +1800,11 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 					   btrfs_super_chunk_root(disk_super),
 					   btrfs_super_chunk_root(disk_super),
 					   blocksize, generation);
 					   blocksize, generation);
 	BUG_ON(!chunk_root->node);
 	BUG_ON(!chunk_root->node);
+	if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
+		printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n",
+		       sb->s_id);
+		goto fail_chunk_root;
+	}
 	btrfs_set_root_node(&chunk_root->root_item, chunk_root->node);
 	btrfs_set_root_node(&chunk_root->root_item, chunk_root->node);
 	chunk_root->commit_root = btrfs_root_node(chunk_root);
 	chunk_root->commit_root = btrfs_root_node(chunk_root);
 
 
@@ -1826,6 +1832,11 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 					  blocksize, generation);
 					  blocksize, generation);
 	if (!tree_root->node)
 	if (!tree_root->node)
 		goto fail_chunk_root;
 		goto fail_chunk_root;
+	if (!test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) {
+		printk(KERN_WARNING "btrfs: failed to read tree root on %s\n",
+		       sb->s_id);
+		goto fail_tree_root;
+	}
 	btrfs_set_root_node(&tree_root->root_item, tree_root->node);
 	btrfs_set_root_node(&tree_root->root_item, tree_root->node);
 	tree_root->commit_root = btrfs_root_node(tree_root);
 	tree_root->commit_root = btrfs_root_node(tree_root);
 
 
@@ -2322,6 +2333,9 @@ int close_ctree(struct btrfs_root *root)
 			printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
 			printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
 	}
 	}
 
 
+	fs_info->closing = 2;
+	smp_mb();
+
 	if (fs_info->delalloc_bytes) {
 	if (fs_info->delalloc_bytes) {
 		printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n",
 		printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n",
 		       (unsigned long long)fs_info->delalloc_bytes);
 		       (unsigned long long)fs_info->delalloc_bytes);
@@ -2343,6 +2357,7 @@ int close_ctree(struct btrfs_root *root)
 	free_extent_buffer(root->fs_info->csum_root->commit_root);
 	free_extent_buffer(root->fs_info->csum_root->commit_root);
 
 
 	btrfs_free_block_groups(root->fs_info);
 	btrfs_free_block_groups(root->fs_info);
+	btrfs_free_pinned_extents(root->fs_info);
 
 
 	del_fs_roots(fs_info);
 	del_fs_roots(fs_info);
 
 

+ 389 - 127
fs/btrfs/extent-tree.c

@@ -21,6 +21,7 @@
 #include <linux/blkdev.h>
 #include <linux/blkdev.h>
 #include <linux/sort.h>
 #include <linux/sort.h>
 #include <linux/rcupdate.h>
 #include <linux/rcupdate.h>
+#include <linux/kthread.h>
 #include "compat.h"
 #include "compat.h"
 #include "hash.h"
 #include "hash.h"
 #include "ctree.h"
 #include "ctree.h"
@@ -61,6 +62,13 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *extent_root, u64 alloc_bytes,
 			  struct btrfs_root *extent_root, u64 alloc_bytes,
 			  u64 flags, int force);
 			  u64 flags, int force);
 
 
+static noinline int
+block_group_cache_done(struct btrfs_block_group_cache *cache)
+{
+	smp_mb();
+	return cache->cached == BTRFS_CACHE_FINISHED;
+}
+
 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
 {
 {
 	return (cache->flags & bits) == bits;
 	return (cache->flags & bits) == bits;
@@ -145,21 +153,71 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
 	return ret;
 	return ret;
 }
 }
 
 
+/*
+ * We always set EXTENT_LOCKED for the super mirror extents so we don't
+ * overwrite them, so those bits need to be unset.  Also, if we are unmounting
+ * with pinned extents still sitting there because we had a block group caching,
+ * we need to clear those now, since we are done.
+ */
+void btrfs_free_pinned_extents(struct btrfs_fs_info *info)
+{
+	u64 start, end, last = 0;
+	int ret;
+
+	while (1) {
+		ret = find_first_extent_bit(&info->pinned_extents, last,
+					    &start, &end,
+					    EXTENT_LOCKED|EXTENT_DIRTY);
+		if (ret)
+			break;
+
+		clear_extent_bits(&info->pinned_extents, start, end,
+				  EXTENT_LOCKED|EXTENT_DIRTY, GFP_NOFS);
+		last = end+1;
+	}
+}
+
+static int remove_sb_from_cache(struct btrfs_root *root,
+				struct btrfs_block_group_cache *cache)
+{
+	struct btrfs_fs_info *fs_info = root->fs_info;
+	u64 bytenr;
+	u64 *logical;
+	int stripe_len;
+	int i, nr, ret;
+
+	for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+		bytenr = btrfs_sb_offset(i);
+		ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
+				       cache->key.objectid, bytenr,
+				       0, &logical, &nr, &stripe_len);
+		BUG_ON(ret);
+		while (nr--) {
+			try_lock_extent(&fs_info->pinned_extents,
+					logical[nr],
+					logical[nr] + stripe_len - 1, GFP_NOFS);
+		}
+		kfree(logical);
+	}
+
+	return 0;
+}
+
 /*
 /*
  * this is only called by cache_block_group, since we could have freed extents
  * this is only called by cache_block_group, since we could have freed extents
  * we need to check the pinned_extents for any extents that can't be used yet
  * we need to check the pinned_extents for any extents that can't be used yet
  * since their free space will be released as soon as the transaction commits.
  * since their free space will be released as soon as the transaction commits.
  */
  */
-static int add_new_free_space(struct btrfs_block_group_cache *block_group,
+static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
 			      struct btrfs_fs_info *info, u64 start, u64 end)
 			      struct btrfs_fs_info *info, u64 start, u64 end)
 {
 {
-	u64 extent_start, extent_end, size;
+	u64 extent_start, extent_end, size, total_added = 0;
 	int ret;
 	int ret;
 
 
 	while (start < end) {
 	while (start < end) {
 		ret = find_first_extent_bit(&info->pinned_extents, start,
 		ret = find_first_extent_bit(&info->pinned_extents, start,
 					    &extent_start, &extent_end,
 					    &extent_start, &extent_end,
-					    EXTENT_DIRTY);
+					    EXTENT_DIRTY|EXTENT_LOCKED);
 		if (ret)
 		if (ret)
 			break;
 			break;
 
 
@@ -167,6 +225,7 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group,
 			start = extent_end + 1;
 			start = extent_end + 1;
 		} else if (extent_start > start && extent_start < end) {
 		} else if (extent_start > start && extent_start < end) {
 			size = extent_start - start;
 			size = extent_start - start;
+			total_added += size;
 			ret = btrfs_add_free_space(block_group, start,
 			ret = btrfs_add_free_space(block_group, start,
 						   size);
 						   size);
 			BUG_ON(ret);
 			BUG_ON(ret);
@@ -178,84 +237,79 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group,
 
 
 	if (start < end) {
 	if (start < end) {
 		size = end - start;
 		size = end - start;
+		total_added += size;
 		ret = btrfs_add_free_space(block_group, start, size);
 		ret = btrfs_add_free_space(block_group, start, size);
 		BUG_ON(ret);
 		BUG_ON(ret);
 	}
 	}
 
 
-	return 0;
+	return total_added;
 }
 }
 
 
-static int remove_sb_from_cache(struct btrfs_root *root,
-				struct btrfs_block_group_cache *cache)
-{
-	u64 bytenr;
-	u64 *logical;
-	int stripe_len;
-	int i, nr, ret;
-
-	for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
-		bytenr = btrfs_sb_offset(i);
-		ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
-				       cache->key.objectid, bytenr, 0,
-				       &logical, &nr, &stripe_len);
-		BUG_ON(ret);
-		while (nr--) {
-			btrfs_remove_free_space(cache, logical[nr],
-						stripe_len);
-		}
-		kfree(logical);
-	}
-	return 0;
-}
-
-static int cache_block_group(struct btrfs_root *root,
-			     struct btrfs_block_group_cache *block_group)
+static int caching_kthread(void *data)
 {
 {
+	struct btrfs_block_group_cache *block_group = data;
+	struct btrfs_fs_info *fs_info = block_group->fs_info;
+	u64 last = 0;
 	struct btrfs_path *path;
 	struct btrfs_path *path;
 	int ret = 0;
 	int ret = 0;
 	struct btrfs_key key;
 	struct btrfs_key key;
 	struct extent_buffer *leaf;
 	struct extent_buffer *leaf;
 	int slot;
 	int slot;
-	u64 last;
-
-	if (!block_group)
-		return 0;
+	u64 total_found = 0;
 
 
-	root = root->fs_info->extent_root;
-
-	if (block_group->cached)
-		return 0;
+	BUG_ON(!fs_info);
 
 
 	path = btrfs_alloc_path();
 	path = btrfs_alloc_path();
 	if (!path)
 	if (!path)
 		return -ENOMEM;
 		return -ENOMEM;
 
 
-	path->reada = 2;
+	atomic_inc(&block_group->space_info->caching_threads);
+	last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
+again:
+	/* need to make sure the commit_root doesn't disappear */
+	down_read(&fs_info->extent_root->commit_root_sem);
+
 	/*
 	/*
-	 * we get into deadlocks with paths held by callers of this function.
-	 * since the alloc_mutex is protecting things right now, just
-	 * skip the locking here
+	 * We don't want to deadlock with somebody trying to allocate a new
+	 * extent for the extent root while also trying to search the extent
+	 * root to add free space.  So we skip locking and search the commit
+	 * root, since its read-only
 	 */
 	 */
 	path->skip_locking = 1;
 	path->skip_locking = 1;
-	last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
+	path->search_commit_root = 1;
+	path->reada = 2;
+
 	key.objectid = last;
 	key.objectid = last;
 	key.offset = 0;
 	key.offset = 0;
 	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
 	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
-	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
 	if (ret < 0)
 	if (ret < 0)
 		goto err;
 		goto err;
 
 
 	while (1) {
 	while (1) {
+		smp_mb();
+		if (block_group->fs_info->closing > 1) {
+			last = (u64)-1;
+			break;
+		}
+
 		leaf = path->nodes[0];
 		leaf = path->nodes[0];
 		slot = path->slots[0];
 		slot = path->slots[0];
 		if (slot >= btrfs_header_nritems(leaf)) {
 		if (slot >= btrfs_header_nritems(leaf)) {
-			ret = btrfs_next_leaf(root, path);
+			ret = btrfs_next_leaf(fs_info->extent_root, path);
 			if (ret < 0)
 			if (ret < 0)
 				goto err;
 				goto err;
-			if (ret == 0)
-				continue;
-			else
+			else if (ret)
 				break;
 				break;
+
+			if (need_resched()) {
+				btrfs_release_path(fs_info->extent_root, path);
+				up_read(&fs_info->extent_root->commit_root_sem);
+				cond_resched();
+				goto again;
+			}
+
+			continue;
 		}
 		}
 		btrfs_item_key_to_cpu(leaf, &key, slot);
 		btrfs_item_key_to_cpu(leaf, &key, slot);
 		if (key.objectid < block_group->key.objectid)
 		if (key.objectid < block_group->key.objectid)
@@ -266,24 +320,59 @@ static int cache_block_group(struct btrfs_root *root,
 			break;
 			break;
 
 
 		if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
 		if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
-			add_new_free_space(block_group, root->fs_info, last,
-					   key.objectid);
-
+			total_found += add_new_free_space(block_group,
+							  fs_info, last,
+							  key.objectid);
 			last = key.objectid + key.offset;
 			last = key.objectid + key.offset;
 		}
 		}
+
+		if (total_found > (1024 * 1024 * 2)) {
+			total_found = 0;
+			wake_up(&block_group->caching_q);
+		}
 next:
 next:
 		path->slots[0]++;
 		path->slots[0]++;
 	}
 	}
+	ret = 0;
 
 
-	add_new_free_space(block_group, root->fs_info, last,
-			   block_group->key.objectid +
-			   block_group->key.offset);
+	total_found += add_new_free_space(block_group, fs_info, last,
+					  block_group->key.objectid +
+					  block_group->key.offset);
+
+	spin_lock(&block_group->lock);
+	block_group->cached = BTRFS_CACHE_FINISHED;
+	spin_unlock(&block_group->lock);
 
 
-	block_group->cached = 1;
-	remove_sb_from_cache(root, block_group);
-	ret = 0;
 err:
 err:
 	btrfs_free_path(path);
 	btrfs_free_path(path);
+	up_read(&fs_info->extent_root->commit_root_sem);
+	atomic_dec(&block_group->space_info->caching_threads);
+	wake_up(&block_group->caching_q);
+
+	return 0;
+}
+
+static int cache_block_group(struct btrfs_block_group_cache *cache)
+{
+	struct task_struct *tsk;
+	int ret = 0;
+
+	spin_lock(&cache->lock);
+	if (cache->cached != BTRFS_CACHE_NO) {
+		spin_unlock(&cache->lock);
+		return ret;
+	}
+	cache->cached = BTRFS_CACHE_STARTED;
+	spin_unlock(&cache->lock);
+
+	tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
+			  cache->key.objectid);
+	if (IS_ERR(tsk)) {
+		ret = PTR_ERR(tsk);
+		printk(KERN_ERR "error running thread %d\n", ret);
+		BUG();
+	}
+
 	return ret;
 	return ret;
 }
 }
 
 
@@ -2387,13 +2476,29 @@ fail:
 
 
 }
 }
 
 
+static struct btrfs_block_group_cache *
+next_block_group(struct btrfs_root *root,
+		 struct btrfs_block_group_cache *cache)
+{
+	struct rb_node *node;
+	spin_lock(&root->fs_info->block_group_cache_lock);
+	node = rb_next(&cache->cache_node);
+	btrfs_put_block_group(cache);
+	if (node) {
+		cache = rb_entry(node, struct btrfs_block_group_cache,
+				 cache_node);
+		atomic_inc(&cache->count);
+	} else
+		cache = NULL;
+	spin_unlock(&root->fs_info->block_group_cache_lock);
+	return cache;
+}
+
 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
 				   struct btrfs_root *root)
 				   struct btrfs_root *root)
 {
 {
-	struct btrfs_block_group_cache *cache, *entry;
-	struct rb_node *n;
+	struct btrfs_block_group_cache *cache;
 	int err = 0;
 	int err = 0;
-	int werr = 0;
 	struct btrfs_path *path;
 	struct btrfs_path *path;
 	u64 last = 0;
 	u64 last = 0;
 
 
@@ -2402,39 +2507,35 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
 		return -ENOMEM;
 		return -ENOMEM;
 
 
 	while (1) {
 	while (1) {
-		cache = NULL;
-		spin_lock(&root->fs_info->block_group_cache_lock);
-		for (n = rb_first(&root->fs_info->block_group_cache_tree);
-		     n; n = rb_next(n)) {
-			entry = rb_entry(n, struct btrfs_block_group_cache,
-					 cache_node);
-			if (entry->dirty) {
-				cache = entry;
-				break;
-			}
+		if (last == 0) {
+			err = btrfs_run_delayed_refs(trans, root,
+						     (unsigned long)-1);
+			BUG_ON(err);
 		}
 		}
-		spin_unlock(&root->fs_info->block_group_cache_lock);
 
 
-		if (!cache)
-			break;
+		cache = btrfs_lookup_first_block_group(root->fs_info, last);
+		while (cache) {
+			if (cache->dirty)
+				break;
+			cache = next_block_group(root, cache);
+		}
+		if (!cache) {
+			if (last == 0)
+				break;
+			last = 0;
+			continue;
+		}
 
 
 		cache->dirty = 0;
 		cache->dirty = 0;
-		last += cache->key.offset;
+		last = cache->key.objectid + cache->key.offset;
 
 
-		err = write_one_cache_group(trans, root,
-					    path, cache);
-		/*
-		 * if we fail to write the cache group, we want
-		 * to keep it marked dirty in hopes that a later
-		 * write will work
-		 */
-		if (err) {
-			werr = err;
-			continue;
-		}
+		err = write_one_cache_group(trans, root, path, cache);
+		BUG_ON(err);
+		btrfs_put_block_group(cache);
 	}
 	}
+
 	btrfs_free_path(path);
 	btrfs_free_path(path);
-	return werr;
+	return 0;
 }
 }
 
 
 int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
 int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
@@ -2484,6 +2585,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
 	found->force_alloc = 0;
 	found->force_alloc = 0;
 	*space_info = found;
 	*space_info = found;
 	list_add_rcu(&found->list, &info->space_info);
 	list_add_rcu(&found->list, &info->space_info);
+	atomic_set(&found->caching_threads, 0);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -2947,13 +3049,9 @@ int btrfs_update_pinned_extents(struct btrfs_root *root,
 	struct btrfs_block_group_cache *cache;
 	struct btrfs_block_group_cache *cache;
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct btrfs_fs_info *fs_info = root->fs_info;
 
 
-	if (pin) {
+	if (pin)
 		set_extent_dirty(&fs_info->pinned_extents,
 		set_extent_dirty(&fs_info->pinned_extents,
 				bytenr, bytenr + num - 1, GFP_NOFS);
 				bytenr, bytenr + num - 1, GFP_NOFS);
-	} else {
-		clear_extent_dirty(&fs_info->pinned_extents,
-				bytenr, bytenr + num - 1, GFP_NOFS);
-	}
 
 
 	while (num > 0) {
 	while (num > 0) {
 		cache = btrfs_lookup_block_group(fs_info, bytenr);
 		cache = btrfs_lookup_block_group(fs_info, bytenr);
@@ -2969,14 +3067,34 @@ int btrfs_update_pinned_extents(struct btrfs_root *root,
 			spin_unlock(&cache->space_info->lock);
 			spin_unlock(&cache->space_info->lock);
 			fs_info->total_pinned += len;
 			fs_info->total_pinned += len;
 		} else {
 		} else {
+			int unpin = 0;
+
+			/*
+			 * in order to not race with the block group caching, we
+			 * only want to unpin the extent if we are cached.  If
+			 * we aren't cached, we want to start async caching this
+			 * block group so we can free the extent the next time
+			 * around.
+			 */
 			spin_lock(&cache->space_info->lock);
 			spin_lock(&cache->space_info->lock);
 			spin_lock(&cache->lock);
 			spin_lock(&cache->lock);
-			cache->pinned -= len;
-			cache->space_info->bytes_pinned -= len;
+			unpin = (cache->cached == BTRFS_CACHE_FINISHED);
+			if (likely(unpin)) {
+				cache->pinned -= len;
+				cache->space_info->bytes_pinned -= len;
+				fs_info->total_pinned -= len;
+			}
 			spin_unlock(&cache->lock);
 			spin_unlock(&cache->lock);
 			spin_unlock(&cache->space_info->lock);
 			spin_unlock(&cache->space_info->lock);
-			fs_info->total_pinned -= len;
-			if (cache->cached)
+
+			if (likely(unpin))
+				clear_extent_dirty(&fs_info->pinned_extents,
+						   bytenr, bytenr + len -1,
+						   GFP_NOFS);
+			else
+				cache_block_group(cache);
+
+			if (unpin)
 				btrfs_add_free_space(cache, bytenr, len);
 				btrfs_add_free_space(cache, bytenr, len);
 		}
 		}
 		btrfs_put_block_group(cache);
 		btrfs_put_block_group(cache);
@@ -3030,6 +3148,7 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
 					    &start, &end, EXTENT_DIRTY);
 					    &start, &end, EXTENT_DIRTY);
 		if (ret)
 		if (ret)
 			break;
 			break;
+
 		set_extent_dirty(copy, start, end, GFP_NOFS);
 		set_extent_dirty(copy, start, end, GFP_NOFS);
 		last = end + 1;
 		last = end + 1;
 	}
 	}
@@ -3058,6 +3177,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
 
 
 		cond_resched();
 		cond_resched();
 	}
 	}
+
 	return ret;
 	return ret;
 }
 }
 
 
@@ -3435,6 +3555,45 @@ static u64 stripe_align(struct btrfs_root *root, u64 val)
 	return ret;
 	return ret;
 }
 }
 
 
+/*
+ * when we wait for progress in the block group caching, its because
+ * our allocation attempt failed at least once.  So, we must sleep
+ * and let some progress happen before we try again.
+ *
+ * This function will sleep at least once waiting for new free space to
+ * show up, and then it will check the block group free space numbers
+ * for our min num_bytes.  Another option is to have it go ahead
+ * and look in the rbtree for a free extent of a given size, but this
+ * is a good start.
+ */
+static noinline int
+wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
+				u64 num_bytes)
+{
+	DEFINE_WAIT(wait);
+
+	prepare_to_wait(&cache->caching_q, &wait, TASK_UNINTERRUPTIBLE);
+
+	if (block_group_cache_done(cache)) {
+		finish_wait(&cache->caching_q, &wait);
+		return 0;
+	}
+	schedule();
+	finish_wait(&cache->caching_q, &wait);
+
+	wait_event(cache->caching_q, block_group_cache_done(cache) ||
+		   (cache->free_space >= num_bytes));
+	return 0;
+}
+
+enum btrfs_loop_type {
+	LOOP_CACHED_ONLY = 0,
+	LOOP_CACHING_NOWAIT = 1,
+	LOOP_CACHING_WAIT = 2,
+	LOOP_ALLOC_CHUNK = 3,
+	LOOP_NO_EMPTY_SIZE = 4,
+};
+
 /*
 /*
  * walks the btree of allocated extents and find a hole of a given size.
  * walks the btree of allocated extents and find a hole of a given size.
  * The key ins is changed to record the hole:
  * The key ins is changed to record the hole:
@@ -3460,6 +3619,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
 	struct btrfs_space_info *space_info;
 	struct btrfs_space_info *space_info;
 	int last_ptr_loop = 0;
 	int last_ptr_loop = 0;
 	int loop = 0;
 	int loop = 0;
+	bool found_uncached_bg = false;
 
 
 	WARN_ON(num_bytes < root->sectorsize);
 	WARN_ON(num_bytes < root->sectorsize);
 	btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
 	btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
@@ -3491,15 +3651,18 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
 	search_start = max(search_start, first_logical_byte(root, 0));
 	search_start = max(search_start, first_logical_byte(root, 0));
 	search_start = max(search_start, hint_byte);
 	search_start = max(search_start, hint_byte);
 
 
-	if (!last_ptr) {
+	if (!last_ptr)
 		empty_cluster = 0;
 		empty_cluster = 0;
-		loop = 1;
-	}
 
 
 	if (search_start == hint_byte) {
 	if (search_start == hint_byte) {
 		block_group = btrfs_lookup_block_group(root->fs_info,
 		block_group = btrfs_lookup_block_group(root->fs_info,
 						       search_start);
 						       search_start);
-		if (block_group && block_group_bits(block_group, data)) {
+		/*
+		 * we don't want to use the block group if it doesn't match our
+		 * allocation bits, or if its not cached.
+		 */
+		if (block_group && block_group_bits(block_group, data) &&
+		    block_group_cache_done(block_group)) {
 			down_read(&space_info->groups_sem);
 			down_read(&space_info->groups_sem);
 			if (list_empty(&block_group->list) ||
 			if (list_empty(&block_group->list) ||
 			    block_group->ro) {
 			    block_group->ro) {
@@ -3522,21 +3685,35 @@ search:
 	down_read(&space_info->groups_sem);
 	down_read(&space_info->groups_sem);
 	list_for_each_entry(block_group, &space_info->block_groups, list) {
 	list_for_each_entry(block_group, &space_info->block_groups, list) {
 		u64 offset;
 		u64 offset;
+		int cached;
 
 
 		atomic_inc(&block_group->count);
 		atomic_inc(&block_group->count);
 		search_start = block_group->key.objectid;
 		search_start = block_group->key.objectid;
 
 
 have_block_group:
 have_block_group:
-		if (unlikely(!block_group->cached)) {
-			mutex_lock(&block_group->cache_mutex);
-			ret = cache_block_group(root, block_group);
-			mutex_unlock(&block_group->cache_mutex);
-			if (ret) {
-				btrfs_put_block_group(block_group);
-				break;
+		if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
+			/*
+			 * we want to start caching kthreads, but not too many
+			 * right off the bat so we don't overwhelm the system,
+			 * so only start them if there are less than 2 and we're
+			 * in the initial allocation phase.
+			 */
+			if (loop > LOOP_CACHING_NOWAIT ||
+			    atomic_read(&space_info->caching_threads) < 2) {
+				ret = cache_block_group(block_group);
+				BUG_ON(ret);
 			}
 			}
 		}
 		}
 
 
+		cached = block_group_cache_done(block_group);
+		if (unlikely(!cached)) {
+			found_uncached_bg = true;
+
+			/* if we only want cached bgs, loop */
+			if (loop == LOOP_CACHED_ONLY)
+				goto loop;
+		}
+
 		if (unlikely(block_group->ro))
 		if (unlikely(block_group->ro))
 			goto loop;
 			goto loop;
 
 
@@ -3615,14 +3792,21 @@ refill_cluster:
 					spin_unlock(&last_ptr->refill_lock);
 					spin_unlock(&last_ptr->refill_lock);
 					goto checks;
 					goto checks;
 				}
 				}
+			} else if (!cached && loop > LOOP_CACHING_NOWAIT) {
+				spin_unlock(&last_ptr->refill_lock);
+
+				wait_block_group_cache_progress(block_group,
+				       num_bytes + empty_cluster + empty_size);
+				goto have_block_group;
 			}
 			}
+
 			/*
 			/*
 			 * at this point we either didn't find a cluster
 			 * at this point we either didn't find a cluster
 			 * or we weren't able to allocate a block from our
 			 * or we weren't able to allocate a block from our
 			 * cluster.  Free the cluster we've been trying
 			 * cluster.  Free the cluster we've been trying
 			 * to use, and go to the next block group
 			 * to use, and go to the next block group
 			 */
 			 */
-			if (loop < 2) {
+			if (loop < LOOP_NO_EMPTY_SIZE) {
 				btrfs_return_cluster_to_free_space(NULL,
 				btrfs_return_cluster_to_free_space(NULL,
 								   last_ptr);
 								   last_ptr);
 				spin_unlock(&last_ptr->refill_lock);
 				spin_unlock(&last_ptr->refill_lock);
@@ -3633,11 +3817,17 @@ refill_cluster:
 
 
 		offset = btrfs_find_space_for_alloc(block_group, search_start,
 		offset = btrfs_find_space_for_alloc(block_group, search_start,
 						    num_bytes, empty_size);
 						    num_bytes, empty_size);
-		if (!offset)
+		if (!offset && (cached || (!cached &&
+					   loop == LOOP_CACHING_NOWAIT))) {
 			goto loop;
 			goto loop;
+		} else if (!offset && (!cached &&
+				       loop > LOOP_CACHING_NOWAIT)) {
+			wait_block_group_cache_progress(block_group,
+					num_bytes + empty_size);
+			goto have_block_group;
+		}
 checks:
 checks:
 		search_start = stripe_align(root, offset);
 		search_start = stripe_align(root, offset);
-
 		/* move on to the next group */
 		/* move on to the next group */
 		if (search_start + num_bytes >= search_end) {
 		if (search_start + num_bytes >= search_end) {
 			btrfs_add_free_space(block_group, offset, num_bytes);
 			btrfs_add_free_space(block_group, offset, num_bytes);
@@ -3683,13 +3873,26 @@ loop:
 	}
 	}
 	up_read(&space_info->groups_sem);
 	up_read(&space_info->groups_sem);
 
 
-	/* loop == 0, try to find a clustered alloc in every block group
-	 * loop == 1, try again after forcing a chunk allocation
-	 * loop == 2, set empty_size and empty_cluster to 0 and try again
+	/* LOOP_CACHED_ONLY, only search fully cached block groups
+	 * LOOP_CACHING_NOWAIT, search partially cached block groups, but
+	 *			dont wait foR them to finish caching
+	 * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
+	 * LOOP_ALLOC_CHUNK, force a chunk allocation and try again
+	 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
+	 *			again
 	 */
 	 */
-	if (!ins->objectid && loop < 3 &&
-	    (empty_size || empty_cluster || allowed_chunk_alloc)) {
-		if (loop >= 2) {
+	if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE &&
+	    (found_uncached_bg || empty_size || empty_cluster ||
+	     allowed_chunk_alloc)) {
+		if (found_uncached_bg) {
+			found_uncached_bg = false;
+			if (loop < LOOP_CACHING_WAIT) {
+				loop++;
+				goto search;
+			}
+		}
+
+		if (loop == LOOP_ALLOC_CHUNK) {
 			empty_size = 0;
 			empty_size = 0;
 			empty_cluster = 0;
 			empty_cluster = 0;
 		}
 		}
@@ -3702,7 +3905,7 @@ loop:
 			space_info->force_alloc = 1;
 			space_info->force_alloc = 1;
 		}
 		}
 
 
-		if (loop < 3) {
+		if (loop < LOOP_NO_EMPTY_SIZE) {
 			loop++;
 			loop++;
 			goto search;
 			goto search;
 		}
 		}
@@ -3798,7 +4001,7 @@ again:
 			       num_bytes, data, 1);
 			       num_bytes, data, 1);
 		goto again;
 		goto again;
 	}
 	}
-	if (ret) {
+	if (ret == -ENOSPC) {
 		struct btrfs_space_info *sinfo;
 		struct btrfs_space_info *sinfo;
 
 
 		sinfo = __find_space_info(root->fs_info, data);
 		sinfo = __find_space_info(root->fs_info, data);
@@ -3806,7 +4009,6 @@ again:
 		       "wanted %llu\n", (unsigned long long)data,
 		       "wanted %llu\n", (unsigned long long)data,
 		       (unsigned long long)num_bytes);
 		       (unsigned long long)num_bytes);
 		dump_space_info(sinfo, num_bytes);
 		dump_space_info(sinfo, num_bytes);
-		BUG();
 	}
 	}
 
 
 	return ret;
 	return ret;
@@ -3844,7 +4046,9 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
 	ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
 	ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
 				     empty_size, hint_byte, search_end, ins,
 				     empty_size, hint_byte, search_end, ins,
 				     data);
 				     data);
-	update_reserved_extents(root, ins->objectid, ins->offset, 1);
+	if (!ret)
+		update_reserved_extents(root, ins->objectid, ins->offset, 1);
+
 	return ret;
 	return ret;
 }
 }
 
 
@@ -4006,9 +4210,9 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
 	struct btrfs_block_group_cache *block_group;
 	struct btrfs_block_group_cache *block_group;
 
 
 	block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
 	block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
-	mutex_lock(&block_group->cache_mutex);
-	cache_block_group(root, block_group);
-	mutex_unlock(&block_group->cache_mutex);
+	cache_block_group(block_group);
+	wait_event(block_group->caching_q,
+		   block_group_cache_done(block_group));
 
 
 	ret = btrfs_remove_free_space(block_group, ins->objectid,
 	ret = btrfs_remove_free_space(block_group, ins->objectid,
 				      ins->offset);
 				      ins->offset);
@@ -4039,7 +4243,8 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
 	ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes,
 	ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes,
 				     empty_size, hint_byte, search_end,
 				     empty_size, hint_byte, search_end,
 				     ins, 0);
 				     ins, 0);
-	BUG_ON(ret);
+	if (ret)
+		return ret;
 
 
 	if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
 	if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
 		if (parent == 0)
 		if (parent == 0)
@@ -6955,11 +7160,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
 			 &info->block_group_cache_tree);
 			 &info->block_group_cache_tree);
 		spin_unlock(&info->block_group_cache_lock);
 		spin_unlock(&info->block_group_cache_lock);
 
 
-		btrfs_remove_free_space_cache(block_group);
 		down_write(&block_group->space_info->groups_sem);
 		down_write(&block_group->space_info->groups_sem);
 		list_del(&block_group->list);
 		list_del(&block_group->list);
 		up_write(&block_group->space_info->groups_sem);
 		up_write(&block_group->space_info->groups_sem);
 
 
+		if (block_group->cached == BTRFS_CACHE_STARTED)
+			wait_event(block_group->caching_q,
+				   block_group_cache_done(block_group));
+
+		btrfs_remove_free_space_cache(block_group);
+
 		WARN_ON(atomic_read(&block_group->count) != 1);
 		WARN_ON(atomic_read(&block_group->count) != 1);
 		kfree(block_group);
 		kfree(block_group);
 
 
@@ -7025,9 +7235,19 @@ int btrfs_read_block_groups(struct btrfs_root *root)
 		atomic_set(&cache->count, 1);
 		atomic_set(&cache->count, 1);
 		spin_lock_init(&cache->lock);
 		spin_lock_init(&cache->lock);
 		spin_lock_init(&cache->tree_lock);
 		spin_lock_init(&cache->tree_lock);
-		mutex_init(&cache->cache_mutex);
+		cache->fs_info = info;
+		init_waitqueue_head(&cache->caching_q);
 		INIT_LIST_HEAD(&cache->list);
 		INIT_LIST_HEAD(&cache->list);
 		INIT_LIST_HEAD(&cache->cluster_list);
 		INIT_LIST_HEAD(&cache->cluster_list);
+
+		/*
+		 * we only want to have 32k of ram per block group for keeping
+		 * track of free space, and if we pass 1/2 of that we want to
+		 * start converting things over to using bitmaps
+		 */
+		cache->extents_thresh = ((1024 * 32) / 2) /
+			sizeof(struct btrfs_free_space);
+
 		read_extent_buffer(leaf, &cache->item,
 		read_extent_buffer(leaf, &cache->item,
 				   btrfs_item_ptr_offset(leaf, path->slots[0]),
 				   btrfs_item_ptr_offset(leaf, path->slots[0]),
 				   sizeof(cache->item));
 				   sizeof(cache->item));
@@ -7036,6 +7256,26 @@ int btrfs_read_block_groups(struct btrfs_root *root)
 		key.objectid = found_key.objectid + found_key.offset;
 		key.objectid = found_key.objectid + found_key.offset;
 		btrfs_release_path(root, path);
 		btrfs_release_path(root, path);
 		cache->flags = btrfs_block_group_flags(&cache->item);
 		cache->flags = btrfs_block_group_flags(&cache->item);
+		cache->sectorsize = root->sectorsize;
+
+		remove_sb_from_cache(root, cache);
+
+		/*
+		 * check for two cases, either we are full, and therefore
+		 * don't need to bother with the caching work since we won't
+		 * find any space, or we are empty, and we can just add all
+		 * the space in and be done with it.  This saves us _alot_ of
+		 * time, particularly in the full case.
+		 */
+		if (found_key.offset == btrfs_block_group_used(&cache->item)) {
+			cache->cached = BTRFS_CACHE_FINISHED;
+		} else if (btrfs_block_group_used(&cache->item) == 0) {
+			cache->cached = BTRFS_CACHE_FINISHED;
+			add_new_free_space(cache, root->fs_info,
+					   found_key.objectid,
+					   found_key.objectid +
+					   found_key.offset);
+		}
 
 
 		ret = update_space_info(info, cache->flags, found_key.offset,
 		ret = update_space_info(info, cache->flags, found_key.offset,
 					btrfs_block_group_used(&cache->item),
 					btrfs_block_group_used(&cache->item),
@@ -7079,10 +7319,19 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
 	cache->key.objectid = chunk_offset;
 	cache->key.objectid = chunk_offset;
 	cache->key.offset = size;
 	cache->key.offset = size;
 	cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
 	cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+	cache->sectorsize = root->sectorsize;
+
+	/*
+	 * we only want to have 32k of ram per block group for keeping track
+	 * of free space, and if we pass 1/2 of that we want to start
+	 * converting things over to using bitmaps
+	 */
+	cache->extents_thresh = ((1024 * 32) / 2) /
+		sizeof(struct btrfs_free_space);
 	atomic_set(&cache->count, 1);
 	atomic_set(&cache->count, 1);
 	spin_lock_init(&cache->lock);
 	spin_lock_init(&cache->lock);
 	spin_lock_init(&cache->tree_lock);
 	spin_lock_init(&cache->tree_lock);
-	mutex_init(&cache->cache_mutex);
+	init_waitqueue_head(&cache->caching_q);
 	INIT_LIST_HEAD(&cache->list);
 	INIT_LIST_HEAD(&cache->list);
 	INIT_LIST_HEAD(&cache->cluster_list);
 	INIT_LIST_HEAD(&cache->cluster_list);
 
 
@@ -7091,6 +7340,12 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
 	cache->flags = type;
 	cache->flags = type;
 	btrfs_set_block_group_flags(&cache->item, type);
 	btrfs_set_block_group_flags(&cache->item, type);
 
 
+	cache->cached = BTRFS_CACHE_FINISHED;
+	remove_sb_from_cache(root, cache);
+
+	add_new_free_space(cache, root->fs_info, chunk_offset,
+			   chunk_offset + size);
+
 	ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
 	ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
 				&cache->space_info);
 				&cache->space_info);
 	BUG_ON(ret);
 	BUG_ON(ret);
@@ -7149,7 +7404,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 	rb_erase(&block_group->cache_node,
 	rb_erase(&block_group->cache_node,
 		 &root->fs_info->block_group_cache_tree);
 		 &root->fs_info->block_group_cache_tree);
 	spin_unlock(&root->fs_info->block_group_cache_lock);
 	spin_unlock(&root->fs_info->block_group_cache_lock);
-	btrfs_remove_free_space_cache(block_group);
+
 	down_write(&block_group->space_info->groups_sem);
 	down_write(&block_group->space_info->groups_sem);
 	/*
 	/*
 	 * we must use list_del_init so people can check to see if they
 	 * we must use list_del_init so people can check to see if they
@@ -7158,11 +7413,18 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 	list_del_init(&block_group->list);
 	list_del_init(&block_group->list);
 	up_write(&block_group->space_info->groups_sem);
 	up_write(&block_group->space_info->groups_sem);
 
 
+	if (block_group->cached == BTRFS_CACHE_STARTED)
+		wait_event(block_group->caching_q,
+			   block_group_cache_done(block_group));
+
+	btrfs_remove_free_space_cache(block_group);
+
 	spin_lock(&block_group->space_info->lock);
 	spin_lock(&block_group->space_info->lock);
 	block_group->space_info->total_bytes -= block_group->key.offset;
 	block_group->space_info->total_bytes -= block_group->key.offset;
 	block_group->space_info->bytes_readonly -= block_group->key.offset;
 	block_group->space_info->bytes_readonly -= block_group->key.offset;
 	spin_unlock(&block_group->space_info->lock);
 	spin_unlock(&block_group->space_info->lock);
-	block_group->space_info->full = 0;
+
+	btrfs_clear_space_info_full(root->fs_info);
 
 
 	btrfs_put_block_group(block_group);
 	btrfs_put_block_group(block_group);
 	btrfs_put_block_group(block_group);
 	btrfs_put_block_group(block_group);

Різницю між файлами не показано, бо вона завелика
+ 611 - 190
fs/btrfs/free-space-cache.c


+ 8 - 0
fs/btrfs/free-space-cache.h

@@ -19,6 +19,14 @@
 #ifndef __BTRFS_FREE_SPACE_CACHE
 #ifndef __BTRFS_FREE_SPACE_CACHE
 #define __BTRFS_FREE_SPACE_CACHE
 #define __BTRFS_FREE_SPACE_CACHE
 
 
+struct btrfs_free_space {
+	struct rb_node offset_index;
+	u64 offset;
+	u64 bytes;
+	unsigned long *bitmap;
+	struct list_head list;
+};
+
 int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
 int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
 			 u64 bytenr, u64 size);
 			 u64 bytenr, u64 size);
 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,

+ 1 - 1
fs/btrfs/inode.c

@@ -2603,8 +2603,8 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 	if (root->ref_cows)
 	if (root->ref_cows)
 		btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
 		btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
 	path = btrfs_alloc_path();
 	path = btrfs_alloc_path();
-	path->reada = -1;
 	BUG_ON(!path);
 	BUG_ON(!path);
+	path->reada = -1;
 
 
 	/* FIXME, add redo link to tree so we don't leak on crash */
 	/* FIXME, add redo link to tree so we don't leak on crash */
 	key.objectid = inode->i_ino;
 	key.objectid = inode->i_ino;

+ 3 - 3
fs/btrfs/print-tree.c

@@ -309,7 +309,7 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c)
 	}
 	}
 	printk(KERN_INFO "node %llu level %d total ptrs %d free spc %u\n",
 	printk(KERN_INFO "node %llu level %d total ptrs %d free spc %u\n",
 	       (unsigned long long)btrfs_header_bytenr(c),
 	       (unsigned long long)btrfs_header_bytenr(c),
-	       btrfs_header_level(c), nr,
+	      level, nr,
 	       (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr);
 	       (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr);
 	for (i = 0; i < nr; i++) {
 	for (i = 0; i < nr; i++) {
 		btrfs_node_key_to_cpu(c, &key, i);
 		btrfs_node_key_to_cpu(c, &key, i);
@@ -326,10 +326,10 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c)
 					btrfs_level_size(root, level - 1),
 					btrfs_level_size(root, level - 1),
 					btrfs_node_ptr_generation(c, i));
 					btrfs_node_ptr_generation(c, i));
 		if (btrfs_is_leaf(next) &&
 		if (btrfs_is_leaf(next) &&
-		    btrfs_header_level(c) != 1)
+		   level != 1)
 			BUG();
 			BUG();
 		if (btrfs_header_level(next) !=
 		if (btrfs_header_level(next) !=
-			btrfs_header_level(c) - 1)
+		       level - 1)
 			BUG();
 			BUG();
 		btrfs_print_tree(root, next);
 		btrfs_print_tree(root, next);
 		free_extent_buffer(next);
 		free_extent_buffer(next);

+ 3 - 0
fs/btrfs/relocation.c

@@ -670,6 +670,8 @@ again:
 			err = ret;
 			err = ret;
 			goto out;
 			goto out;
 		}
 		}
+		if (ret > 0 && path2->slots[level] > 0)
+			path2->slots[level]--;
 
 
 		eb = path2->nodes[level];
 		eb = path2->nodes[level];
 		WARN_ON(btrfs_node_blockptr(eb, path2->slots[level]) !=
 		WARN_ON(btrfs_node_blockptr(eb, path2->slots[level]) !=
@@ -1609,6 +1611,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
 		BUG_ON(level == 0);
 		BUG_ON(level == 0);
 		path->lowest_level = level;
 		path->lowest_level = level;
 		ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0);
 		ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0);
+		path->lowest_level = 0;
 		if (ret < 0) {
 		if (ret < 0) {
 			btrfs_free_path(path);
 			btrfs_free_path(path);
 			return ret;
 			return ret;

+ 19 - 21
fs/btrfs/transaction.c

@@ -40,6 +40,14 @@ static noinline void put_transaction(struct btrfs_transaction *transaction)
 	}
 	}
 }
 }
 
 
+static noinline void switch_commit_root(struct btrfs_root *root)
+{
+	down_write(&root->commit_root_sem);
+	free_extent_buffer(root->commit_root);
+	root->commit_root = btrfs_root_node(root);
+	up_write(&root->commit_root_sem);
+}
+
 /*
 /*
  * either allocate a new transaction or hop into the existing one
  * either allocate a new transaction or hop into the existing one
  */
  */
@@ -444,9 +452,6 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
 
 
 	btrfs_write_dirty_block_groups(trans, root);
 	btrfs_write_dirty_block_groups(trans, root);
 
 
-	ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
-	BUG_ON(ret);
-
 	while (1) {
 	while (1) {
 		old_root_bytenr = btrfs_root_bytenr(&root->root_item);
 		old_root_bytenr = btrfs_root_bytenr(&root->root_item);
 		if (old_root_bytenr == root->node->start)
 		if (old_root_bytenr == root->node->start)
@@ -457,13 +462,11 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
 					&root->root_key,
 					&root->root_key,
 					&root->root_item);
 					&root->root_item);
 		BUG_ON(ret);
 		BUG_ON(ret);
-		btrfs_write_dirty_block_groups(trans, root);
 
 
-		ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
+		ret = btrfs_write_dirty_block_groups(trans, root);
 		BUG_ON(ret);
 		BUG_ON(ret);
 	}
 	}
-	free_extent_buffer(root->commit_root);
-	root->commit_root = btrfs_root_node(root);
+	switch_commit_root(root);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -495,9 +498,6 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
 		root = list_entry(next, struct btrfs_root, dirty_list);
 		root = list_entry(next, struct btrfs_root, dirty_list);
 
 
 		update_cowonly_root(trans, root);
 		update_cowonly_root(trans, root);
-
-		ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
-		BUG_ON(ret);
 	}
 	}
 	return 0;
 	return 0;
 }
 }
@@ -544,8 +544,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
 			btrfs_update_reloc_root(trans, root);
 			btrfs_update_reloc_root(trans, root);
 
 
 			if (root->commit_root != root->node) {
 			if (root->commit_root != root->node) {
-				free_extent_buffer(root->commit_root);
-				root->commit_root = btrfs_root_node(root);
+				switch_commit_root(root);
 				btrfs_set_root_node(&root->root_item,
 				btrfs_set_root_node(&root->root_item,
 						    root->node);
 						    root->node);
 			}
 			}
@@ -943,9 +942,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 
 
 		mutex_unlock(&root->fs_info->trans_mutex);
 		mutex_unlock(&root->fs_info->trans_mutex);
 
 
-		if (flush_on_commit || snap_pending) {
-			if (flush_on_commit)
-				btrfs_start_delalloc_inodes(root);
+		if (flush_on_commit) {
+			btrfs_start_delalloc_inodes(root);
+			ret = btrfs_wait_ordered_extents(root, 0);
+			BUG_ON(ret);
+		} else if (snap_pending) {
 			ret = btrfs_wait_ordered_extents(root, 1);
 			ret = btrfs_wait_ordered_extents(root, 1);
 			BUG_ON(ret);
 			BUG_ON(ret);
 		}
 		}
@@ -1009,15 +1010,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 
 
 	btrfs_set_root_node(&root->fs_info->tree_root->root_item,
 	btrfs_set_root_node(&root->fs_info->tree_root->root_item,
 			    root->fs_info->tree_root->node);
 			    root->fs_info->tree_root->node);
-	free_extent_buffer(root->fs_info->tree_root->commit_root);
-	root->fs_info->tree_root->commit_root =
-				btrfs_root_node(root->fs_info->tree_root);
+	switch_commit_root(root->fs_info->tree_root);
 
 
 	btrfs_set_root_node(&root->fs_info->chunk_root->root_item,
 	btrfs_set_root_node(&root->fs_info->chunk_root->root_item,
 			    root->fs_info->chunk_root->node);
 			    root->fs_info->chunk_root->node);
-	free_extent_buffer(root->fs_info->chunk_root->commit_root);
-	root->fs_info->chunk_root->commit_root =
-				btrfs_root_node(root->fs_info->chunk_root);
+	switch_commit_root(root->fs_info->chunk_root);
 
 
 	update_super_roots(root);
 	update_super_roots(root);
 
 
@@ -1057,6 +1054,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 	cur_trans->commit_done = 1;
 	cur_trans->commit_done = 1;
 
 
 	root->fs_info->last_trans_committed = cur_trans->transid;
 	root->fs_info->last_trans_committed = cur_trans->transid;
+
 	wake_up(&cur_trans->commit_wait);
 	wake_up(&cur_trans->commit_wait);
 
 
 	put_transaction(cur_trans);
 	put_transaction(cur_trans);

+ 1 - 1
fs/btrfs/tree-log.c

@@ -797,7 +797,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
 		return -ENOENT;
 		return -ENOENT;
 
 
 	inode = read_one_inode(root, key->objectid);
 	inode = read_one_inode(root, key->objectid);
-	BUG_ON(!dir);
+	BUG_ON(!inode);
 
 
 	ref_ptr = btrfs_item_ptr_offset(eb, slot);
 	ref_ptr = btrfs_item_ptr_offset(eb, slot);
 	ref_end = ref_ptr + btrfs_item_size_nr(eb, slot);
 	ref_end = ref_ptr + btrfs_item_size_nr(eb, slot);

+ 19 - 27
fs/btrfs/volumes.c

@@ -721,7 +721,8 @@ error:
  */
  */
 static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans,
 static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans,
 					 struct btrfs_device *device,
 					 struct btrfs_device *device,
-					 u64 num_bytes, u64 *start)
+					 u64 num_bytes, u64 *start,
+					 u64 *max_avail)
 {
 {
 	struct btrfs_key key;
 	struct btrfs_key key;
 	struct btrfs_root *root = device->dev_root;
 	struct btrfs_root *root = device->dev_root;
@@ -758,9 +759,13 @@ static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans,
 	ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
 	ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
 	if (ret < 0)
 	if (ret < 0)
 		goto error;
 		goto error;
-	ret = btrfs_previous_item(root, path, 0, key.type);
-	if (ret < 0)
-		goto error;
+	if (ret > 0) {
+		ret = btrfs_previous_item(root, path, key.objectid, key.type);
+		if (ret < 0)
+			goto error;
+		if (ret > 0)
+			start_found = 1;
+	}
 	l = path->nodes[0];
 	l = path->nodes[0];
 	btrfs_item_key_to_cpu(l, &key, path->slots[0]);
 	btrfs_item_key_to_cpu(l, &key, path->slots[0]);
 	while (1) {
 	while (1) {
@@ -803,6 +808,10 @@ no_more_items:
 			if (last_byte < search_start)
 			if (last_byte < search_start)
 				last_byte = search_start;
 				last_byte = search_start;
 			hole_size = key.offset - last_byte;
 			hole_size = key.offset - last_byte;
+
+			if (hole_size > *max_avail)
+				*max_avail = hole_size;
+
 			if (key.offset > last_byte &&
 			if (key.offset > last_byte &&
 			    hole_size >= num_bytes) {
 			    hole_size >= num_bytes) {
 				*start = last_byte;
 				*start = last_byte;
@@ -1621,6 +1630,7 @@ static int __btrfs_grow_device(struct btrfs_trans_handle *trans,
 	device->fs_devices->total_rw_bytes += diff;
 	device->fs_devices->total_rw_bytes += diff;
 
 
 	device->total_bytes = new_size;
 	device->total_bytes = new_size;
+	device->disk_total_bytes = new_size;
 	btrfs_clear_space_info_full(device->dev_root->fs_info);
 	btrfs_clear_space_info_full(device->dev_root->fs_info);
 
 
 	return btrfs_update_device(trans, device);
 	return btrfs_update_device(trans, device);
@@ -2007,7 +2017,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
 			goto done;
 			goto done;
 		if (ret) {
 		if (ret) {
 			ret = 0;
 			ret = 0;
-			goto done;
+			break;
 		}
 		}
 
 
 		l = path->nodes[0];
 		l = path->nodes[0];
@@ -2015,7 +2025,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
 		btrfs_item_key_to_cpu(l, &key, path->slots[0]);
 		btrfs_item_key_to_cpu(l, &key, path->slots[0]);
 
 
 		if (key.objectid != device->devid)
 		if (key.objectid != device->devid)
-			goto done;
+			break;
 
 
 		dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
 		dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
 		length = btrfs_dev_extent_length(l, dev_extent);
 		length = btrfs_dev_extent_length(l, dev_extent);
@@ -2171,6 +2181,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 			     max_chunk_size);
 			     max_chunk_size);
 
 
 again:
 again:
+	max_avail = 0;
 	if (!map || map->num_stripes != num_stripes) {
 	if (!map || map->num_stripes != num_stripes) {
 		kfree(map);
 		kfree(map);
 		map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
 		map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
@@ -2219,7 +2230,8 @@ again:
 
 
 		if (device->in_fs_metadata && avail >= min_free) {
 		if (device->in_fs_metadata && avail >= min_free) {
 			ret = find_free_dev_extent(trans, device,
 			ret = find_free_dev_extent(trans, device,
-						   min_free, &dev_offset);
+						   min_free, &dev_offset,
+						   &max_avail);
 			if (ret == 0) {
 			if (ret == 0) {
 				list_move_tail(&device->dev_alloc_list,
 				list_move_tail(&device->dev_alloc_list,
 					       &private_devs);
 					       &private_devs);
@@ -2795,26 +2807,6 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
 		}
 		}
 	}
 	}
 
 
-	for (i = 0; i > nr; i++) {
-		struct btrfs_multi_bio *multi;
-		struct btrfs_bio_stripe *stripe;
-		int ret;
-
-		length = 1;
-		ret = btrfs_map_block(map_tree, WRITE, buf[i],
-				      &length, &multi, 0);
-		BUG_ON(ret);
-
-		stripe = multi->stripes;
-		for (j = 0; j < multi->num_stripes; j++) {
-			if (stripe->physical >= physical &&
-			    physical < stripe->physical + length)
-				break;
-		}
-		BUG_ON(j >= multi->num_stripes);
-		kfree(multi);
-	}
-
 	*logical = buf;
 	*logical = buf;
 	*naddrs = nr;
 	*naddrs = nr;
 	*stripe_len = map->stripe_len;
 	*stripe_len = map->stripe_len;

Деякі файли не було показано, через те що забагато файлів було змінено