Pārlūkot izejas kodu

Btrfs: Add support for mirroring across drives

Signed-off-by: Chris Mason <chris.mason@oracle.com>
Chris Mason 17 gadi atpakaļ
vecāks
revīzija
8790d502e4
6 mainītis faili ar 198 papildinājumiem un 93 dzēšanām
  1. 7 2
      fs/btrfs/ctree.h
  2. 7 13
      fs/btrfs/disk-io.c
  3. 49 44
      fs/btrfs/extent-tree.c
  4. 3 1
      fs/btrfs/inode.c
  5. 126 28
      fs/btrfs/volumes.c
  6. 6 5
      fs/btrfs/volumes.h

+ 7 - 2
fs/btrfs/ctree.h

@@ -418,6 +418,7 @@ struct btrfs_csum_item {
 #define BTRFS_BLOCK_GROUP_SYSTEM   (1 << 1)
 #define BTRFS_BLOCK_GROUP_SYSTEM   (1 << 1)
 #define BTRFS_BLOCK_GROUP_METADATA (1 << 2)
 #define BTRFS_BLOCK_GROUP_METADATA (1 << 2)
 #define BTRFS_BLOCK_GROUP_RAID0    (1 << 3)
 #define BTRFS_BLOCK_GROUP_RAID0    (1 << 3)
+#define BTRFS_BLOCK_GROUP_RAID1    (1 << 4)
 
 
 
 
 struct btrfs_block_group_item {
 struct btrfs_block_group_item {
@@ -504,8 +505,12 @@ struct btrfs_fs_info {
 	u64 delalloc_bytes;
 	u64 delalloc_bytes;
 	u64 last_alloc;
 	u64 last_alloc;
 	u64 last_data_alloc;
 	u64 last_data_alloc;
-	int extra_data_alloc_bits;
-	int extra_alloc_bits;
+	int avail_data_alloc_bits;
+	int avail_metadata_alloc_bits;
+	int avail_system_alloc_bits;
+	int data_alloc_profile;
+	int metadata_alloc_profile;
+	int system_alloc_profile;
 };
 };
 
 
 /*
 /*

+ 7 - 13
fs/btrfs/disk-io.c

@@ -735,7 +735,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 						 GFP_NOFS);
 						 GFP_NOFS);
 	struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root),
 	struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root),
 					       GFP_NOFS);
 					       GFP_NOFS);
-	struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info),
+	struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info),
 						GFP_NOFS);
 						GFP_NOFS);
 	struct btrfs_root *chunk_root = kmalloc(sizeof(struct btrfs_root),
 	struct btrfs_root *chunk_root = kmalloc(sizeof(struct btrfs_root),
 						GFP_NOFS);
 						GFP_NOFS);
@@ -744,6 +744,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 	int ret;
 	int ret;
 	int err = -EINVAL;
 	int err = -EINVAL;
 	struct btrfs_super_block *disk_super;
 	struct btrfs_super_block *disk_super;
+
 	if (!extent_root || !tree_root || !fs_info) {
 	if (!extent_root || !tree_root || !fs_info) {
 		err = -ENOMEM;
 		err = -ENOMEM;
 		goto fail;
 		goto fail;
@@ -756,11 +757,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 	spin_lock_init(&fs_info->delalloc_lock);
 	spin_lock_init(&fs_info->delalloc_lock);
 	spin_lock_init(&fs_info->new_trans_lock);
 	spin_lock_init(&fs_info->new_trans_lock);
 
 
-	memset(&fs_info->super_kobj, 0, sizeof(fs_info->super_kobj));
 	init_completion(&fs_info->kobj_unregister);
 	init_completion(&fs_info->kobj_unregister);
 	sb_set_blocksize(sb, 4096);
 	sb_set_blocksize(sb, 4096);
-	fs_info->running_transaction = NULL;
-	fs_info->last_trans_committed = 0;
 	fs_info->tree_root = tree_root;
 	fs_info->tree_root = tree_root;
 	fs_info->extent_root = extent_root;
 	fs_info->extent_root = extent_root;
 	fs_info->chunk_root = chunk_root;
 	fs_info->chunk_root = chunk_root;
@@ -770,11 +768,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 	INIT_LIST_HEAD(&fs_info->space_info);
 	INIT_LIST_HEAD(&fs_info->space_info);
 	btrfs_mapping_init(&fs_info->mapping_tree);
 	btrfs_mapping_init(&fs_info->mapping_tree);
 	fs_info->sb = sb;
 	fs_info->sb = sb;
-	fs_info->throttles = 0;
-	fs_info->mount_opt = 0;
 	fs_info->max_extent = (u64)-1;
 	fs_info->max_extent = (u64)-1;
 	fs_info->max_inline = 8192 * 1024;
 	fs_info->max_inline = 8192 * 1024;
-	fs_info->delalloc_bytes = 0;
 	setup_bdi(fs_info, &fs_info->bdi);
 	setup_bdi(fs_info, &fs_info->bdi);
 	fs_info->btree_inode = new_inode(sb);
 	fs_info->btree_inode = new_inode(sb);
 	fs_info->btree_inode->i_ino = 1;
 	fs_info->btree_inode->i_ino = 1;
@@ -802,12 +797,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 	extent_io_tree_init(&fs_info->extent_ins,
 	extent_io_tree_init(&fs_info->extent_ins,
 			     fs_info->btree_inode->i_mapping, GFP_NOFS);
 			     fs_info->btree_inode->i_mapping, GFP_NOFS);
 	fs_info->do_barriers = 1;
 	fs_info->do_barriers = 1;
-	fs_info->closing = 0;
-	fs_info->total_pinned = 0;
-	fs_info->last_alloc = 0;
-	fs_info->last_data_alloc = 0;
-	fs_info->extra_alloc_bits = 0;
-	fs_info->extra_data_alloc_bits = 0;
 
 
 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
 	INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info);
 	INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info);
@@ -923,6 +912,11 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 	btrfs_read_block_groups(extent_root);
 	btrfs_read_block_groups(extent_root);
 
 
 	fs_info->generation = btrfs_super_generation(disk_super) + 1;
 	fs_info->generation = btrfs_super_generation(disk_super) + 1;
+	if (btrfs_super_num_devices(disk_super) > 0) {
+		fs_info->data_alloc_profile = BTRFS_BLOCK_GROUP_RAID0;
+		fs_info->metadata_alloc_profile = BTRFS_BLOCK_GROUP_RAID1;
+		fs_info->system_alloc_profile = BTRFS_BLOCK_GROUP_RAID0;
+	}
 	mutex_unlock(&fs_info->fs_mutex);
 	mutex_unlock(&fs_info->fs_mutex);
 	return tree_root;
 	return tree_root;
 
 

+ 49 - 44
fs/btrfs/extent-tree.c

@@ -230,9 +230,13 @@ again:
 			goto new_group;
 			goto new_group;
 		if (start + num  > total_fs_bytes)
 		if (start + num  > total_fs_bytes)
 			goto new_group;
 			goto new_group;
+		if (!block_group_bits(cache, data)) {
+			printk("block group bits don't match %Lu %Lu\n", cache->flags, data);
+		}
 		*start_ret = start;
 		*start_ret = start;
 		return 0;
 		return 0;
-	} out:
+	}
+out:
 	cache = btrfs_lookup_block_group(root->fs_info, search_start);
 	cache = btrfs_lookup_block_group(root->fs_info, search_start);
 	if (!cache) {
 	if (!cache) {
 		printk("Unable to find block group for %Lu\n", search_start);
 		printk("Unable to find block group for %Lu\n", search_start);
@@ -365,14 +369,17 @@ again:
 		if (cache->key.objectid > total_fs_bytes)
 		if (cache->key.objectid > total_fs_bytes)
 			break;
 			break;
 
 
-		if (full_search)
-			free_check = cache->key.offset;
-		else
-			free_check = div_factor(cache->key.offset, factor);
+		if (block_group_bits(cache, data)) {
+			if (full_search)
+				free_check = cache->key.offset;
+			else
+				free_check = div_factor(cache->key.offset,
+							factor);
 
 
-		if (used + cache->pinned < free_check) {
-			found_group = cache;
-			goto found;
+			if (used + cache->pinned < free_check) {
+				found_group = cache;
+				goto found;
+			}
 		}
 		}
 		cond_resched();
 		cond_resched();
 	}
 	}
@@ -1038,6 +1045,19 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
 	return 0;
 	return 0;
 }
 }
 
 
+static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
+{
+	u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
+				   BTRFS_BLOCK_GROUP_RAID1);
+	if (extra_flags) {
+		if (flags & BTRFS_BLOCK_GROUP_DATA)
+			fs_info->avail_data_alloc_bits |= extra_flags;
+		if (flags & BTRFS_BLOCK_GROUP_METADATA)
+			fs_info->avail_metadata_alloc_bits |= extra_flags;
+		if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
+			fs_info->avail_system_alloc_bits |= extra_flags;
+	}
+}
 
 
 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *extent_root, u64 alloc_bytes,
 			  struct btrfs_root *extent_root, u64 alloc_bytes,
@@ -1060,7 +1080,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
 	if (space_info->full)
 	if (space_info->full)
 		return 0;
 		return 0;
 
 
-	thresh = div_factor(space_info->total_bytes, 7);
+	thresh = div_factor(space_info->total_bytes, 6);
 	if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) <
 	if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) <
 	    thresh)
 	    thresh)
 		return 0;
 		return 0;
@@ -1079,16 +1099,7 @@ printk("space info full %Lu\n", flags);
 		     start, num_bytes);
 		     start, num_bytes);
 	BUG_ON(ret);
 	BUG_ON(ret);
 
 
-	if (flags & BTRFS_BLOCK_GROUP_RAID0) {
-		if (flags & BTRFS_BLOCK_GROUP_DATA) {
-			extent_root->fs_info->extra_data_alloc_bits =
-				BTRFS_BLOCK_GROUP_RAID0;
-		}
-		if (flags & BTRFS_BLOCK_GROUP_METADATA) {
-			extent_root->fs_info->extra_alloc_bits =
-				BTRFS_BLOCK_GROUP_RAID0;
-		}
-	}
+	set_avail_alloc_bits(extent_root->fs_info, flags);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -1529,6 +1540,7 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans,
 
 
 	if (data & BTRFS_BLOCK_GROUP_METADATA) {
 	if (data & BTRFS_BLOCK_GROUP_METADATA) {
 		last_ptr = &root->fs_info->last_alloc;
 		last_ptr = &root->fs_info->last_alloc;
+		empty_cluster = 256 * 1024;
 	}
 	}
 
 
 	if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) {
 	if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) {
@@ -1693,6 +1705,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
 	u64 root_used;
 	u64 root_used;
 	u64 search_start = 0;
 	u64 search_start = 0;
 	u64 new_hint;
 	u64 new_hint;
+	u64 alloc_profile;
 	u32 sizes[2];
 	u32 sizes[2];
 	struct btrfs_fs_info *info = root->fs_info;
 	struct btrfs_fs_info *info = root->fs_info;
 	struct btrfs_root *extent_root = info->extent_root;
 	struct btrfs_root *extent_root = info->extent_root;
@@ -1700,31 +1713,32 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
 	struct btrfs_extent_ref *ref;
 	struct btrfs_extent_ref *ref;
 	struct btrfs_path *path;
 	struct btrfs_path *path;
 	struct btrfs_key keys[2];
 	struct btrfs_key keys[2];
-	int extra_chunk_alloc_bits = 0;
 
 
 	if (data) {
 	if (data) {
-		data = BTRFS_BLOCK_GROUP_DATA | info->extra_data_alloc_bits;
+		alloc_profile = info->avail_data_alloc_bits &
+			        info->data_alloc_profile;
+		data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
 	} else if (root == root->fs_info->chunk_root) {
 	} else if (root == root->fs_info->chunk_root) {
-		data = BTRFS_BLOCK_GROUP_SYSTEM;
+		alloc_profile = info->avail_system_alloc_bits &
+			        info->system_alloc_profile;
+		data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
 	} else {
 	} else {
-		data = BTRFS_BLOCK_GROUP_METADATA | info->extra_alloc_bits;
+		alloc_profile = info->avail_metadata_alloc_bits &
+			        info->metadata_alloc_profile;
+		data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
 	}
 	}
-	if (btrfs_super_num_devices(&info->super_copy) > 1 &&
-	    !(data & BTRFS_BLOCK_GROUP_SYSTEM))
-		extra_chunk_alloc_bits = BTRFS_BLOCK_GROUP_RAID0;
 
 
 	if (root->ref_cows) {
 	if (root->ref_cows) {
 		if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
 		if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
 			ret = do_chunk_alloc(trans, root->fs_info->extent_root,
 			ret = do_chunk_alloc(trans, root->fs_info->extent_root,
 					     2 * 1024 * 1024,
 					     2 * 1024 * 1024,
 					     BTRFS_BLOCK_GROUP_METADATA |
 					     BTRFS_BLOCK_GROUP_METADATA |
-					     info->extra_alloc_bits |
-					     extra_chunk_alloc_bits);
+					     (info->metadata_alloc_profile &
+					      info->avail_metadata_alloc_bits));
 			BUG_ON(ret);
 			BUG_ON(ret);
 		}
 		}
 		ret = do_chunk_alloc(trans, root->fs_info->extent_root,
 		ret = do_chunk_alloc(trans, root->fs_info->extent_root,
-				     num_bytes + 2 * 1024 * 1024, data |
-				     extra_chunk_alloc_bits);
+				     num_bytes + 2 * 1024 * 1024, data);
 		BUG_ON(ret);
 		BUG_ON(ret);
 	}
 	}
 
 
@@ -2046,12 +2060,12 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
 		if (!next || !btrfs_buffer_uptodate(next)) {
 		if (!next || !btrfs_buffer_uptodate(next)) {
 			free_extent_buffer(next);
 			free_extent_buffer(next);
 			reada_walk_down(root, cur, path->slots[*level]);
 			reada_walk_down(root, cur, path->slots[*level]);
+
+			mutex_unlock(&root->fs_info->fs_mutex);
 			next = read_tree_block(root, bytenr, blocksize);
 			next = read_tree_block(root, bytenr, blocksize);
+			mutex_lock(&root->fs_info->fs_mutex);
 
 
-			/* we used to drop the lock above, keep the
-			 * code to double check so that we won't forget
-			 * when we drop the lock again in the future
-			 */
+			/* we've dropped the lock, double check */
 			ret = lookup_extent_ref(trans, root, bytenr,
 			ret = lookup_extent_ref(trans, root, bytenr,
 						blocksize, &refs);
 						blocksize, &refs);
 			BUG_ON(ret);
 			BUG_ON(ret);
@@ -2739,16 +2753,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
 		} else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
 		} else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
 			bit = BLOCK_GROUP_METADATA;
 			bit = BLOCK_GROUP_METADATA;
 		}
 		}
-		if (cache->flags & BTRFS_BLOCK_GROUP_RAID0) {
-			if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
-				info->extra_data_alloc_bits =
-					BTRFS_BLOCK_GROUP_RAID0;
-			}
-			if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
-				info->extra_alloc_bits =
-					BTRFS_BLOCK_GROUP_RAID0;
-			}
-		}
+		set_avail_alloc_bits(info, cache->flags);
 
 
 		ret = update_space_info(info, cache->flags, found_key.offset,
 		ret = update_space_info(info, cache->flags, found_key.offset,
 					btrfs_block_group_used(&cache->item),
 					btrfs_block_group_used(&cache->item),

+ 3 - 1
fs/btrfs/inode.c

@@ -306,6 +306,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
 	u64 physical;
 	u64 physical;
 	u64 length = 0;
 	u64 length = 0;
 	u64 map_length;
 	u64 map_length;
+	int total_devs;
 	struct bio_vec *bvec;
 	struct bio_vec *bvec;
 	int i;
 	int i;
 	int ret;
 	int ret;
@@ -315,7 +316,8 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
 	}
 	}
 	map_tree = &root->fs_info->mapping_tree;
 	map_tree = &root->fs_info->mapping_tree;
 	map_length = length;
 	map_length = length;
-	ret = btrfs_map_block(map_tree, logical, &physical, &map_length, &dev);
+	ret = btrfs_map_block(map_tree, READ, 0, logical, &physical,
+			      &map_length, &dev, &total_devs);
 	if (map_length < length + size) {
 	if (map_length < length + size) {
 		return 1;
 		return 1;
 	}
 	}

+ 126 - 28
fs/btrfs/volumes.c

@@ -31,6 +31,13 @@ struct stripe {
 	u64 physical;
 	u64 physical;
 };
 };
 
 
+struct multi_bio {
+	atomic_t stripes;
+	bio_end_io_t *end_io;
+	void *private;
+	int error;
+};
+
 struct map_lookup {
 struct map_lookup {
 	u64 type;
 	u64 type;
 	int io_align;
 	int io_align;
@@ -632,12 +639,12 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 	if (list_empty(dev_list))
 	if (list_empty(dev_list))
 		return -ENOSPC;
 		return -ENOSPC;
 
 
-	if (type & BTRFS_BLOCK_GROUP_RAID0)
+	if (type & (BTRFS_BLOCK_GROUP_RAID0))
 		num_stripes = btrfs_super_num_devices(&info->super_copy);
 		num_stripes = btrfs_super_num_devices(&info->super_copy);
-	if (type & BTRFS_BLOCK_GROUP_DATA)
-		stripe_len = 64 * 1024;
-	if (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM))
-		stripe_len = 32 * 1024;
+	if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
+		num_stripes = min_t(u64, 2,
+				  btrfs_super_num_devices(&info->super_copy));
+	}
 again:
 again:
 	INIT_LIST_HEAD(&private_devs);
 	INIT_LIST_HEAD(&private_devs);
 	cur = dev_list->next;
 	cur = dev_list->next;
@@ -682,7 +689,11 @@ again:
 
 
 	stripes = &chunk->stripe;
 	stripes = &chunk->stripe;
 
 
-	*num_bytes = calc_size * num_stripes;
+	if (type & BTRFS_BLOCK_GROUP_RAID1)
+		*num_bytes = calc_size;
+	else
+		*num_bytes = calc_size * num_stripes;
+
 	index = 0;
 	index = 0;
 	while(index < num_stripes) {
 	while(index < num_stripes) {
 		BUG_ON(list_empty(&private_devs));
 		BUG_ON(list_empty(&private_devs));
@@ -694,7 +705,7 @@ again:
 					     key.objectid,
 					     key.objectid,
 					     calc_size, &dev_offset);
 					     calc_size, &dev_offset);
 		BUG_ON(ret);
 		BUG_ON(ret);
-printk("alloc chunk size %Lu from dev %Lu\n", calc_size, device->devid);
+printk("alloc chunk start %Lu size %Lu from dev %Lu type %Lu\n", key.objectid, calc_size, device->devid, type);
 		device->bytes_used += calc_size;
 		device->bytes_used += calc_size;
 		ret = btrfs_update_device(trans, device);
 		ret = btrfs_update_device(trans, device);
 		BUG_ON(ret);
 		BUG_ON(ret);
@@ -774,9 +785,9 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
 	}
 	}
 }
 }
 
 
-int btrfs_map_block(struct btrfs_mapping_tree *map_tree,
-		    u64 logical, u64 *phys, u64 *length,
-		    struct btrfs_device **dev)
+int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
+		    int dev_nr, u64 logical, u64 *phys, u64 *length,
+		    struct btrfs_device **dev, int *total_devs)
 {
 {
 	struct extent_map *em;
 	struct extent_map *em;
 	struct map_lookup *map;
 	struct map_lookup *map;
@@ -808,19 +819,39 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree,
 	/* stripe_offset is the offset of this block in its stripe*/
 	/* stripe_offset is the offset of this block in its stripe*/
 	stripe_offset = offset - stripe_offset;
 	stripe_offset = offset - stripe_offset;
 
 
-	/*
-	 * after this do_div call, stripe_nr is the number of stripes
-	 * on this device we have to walk to find the data, and
-	 * stripe_index is the number of our device in the stripe array
-	 */
-	stripe_index = do_div(stripe_nr, map->num_stripes);
-
+	if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
+		stripe_index = dev_nr;
+		if (rw & (1 << BIO_RW))
+			*total_devs = map->num_stripes;
+		else {
+			int i;
+			u64 least = (u64)-1;
+			struct btrfs_device *cur;
+
+			for (i = 0; i < map->num_stripes; i++) {
+				cur = map->stripes[i].dev;
+				spin_lock(&cur->io_lock);
+				if (cur->total_ios < least) {
+					least = cur->total_ios;
+					stripe_index = i;
+				}
+				spin_unlock(&cur->io_lock);
+			}
+			*total_devs = 1;
+		}
+	} else {
+		/*
+		 * after this do_div call, stripe_nr is the number of stripes
+		 * on this device we have to walk to find the data, and
+		 * stripe_index is the number of our device in the stripe array
+		 */
+		stripe_index = do_div(stripe_nr, map->num_stripes);
+	}
 	BUG_ON(stripe_index >= map->num_stripes);
 	BUG_ON(stripe_index >= map->num_stripes);
-
 	*phys = map->stripes[stripe_index].physical + stripe_offset +
 	*phys = map->stripes[stripe_index].physical + stripe_offset +
 		stripe_nr * map->stripe_len;
 		stripe_nr * map->stripe_len;
 
 
-	if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
+	if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1)) {
 		/* we limit the length of each bio to what fits in a stripe */
 		/* we limit the length of each bio to what fits in a stripe */
 		*length = min_t(u64, em->len - offset,
 		*length = min_t(u64, em->len - offset,
 			      map->stripe_len - stripe_offset);
 			      map->stripe_len - stripe_offset);
@@ -833,33 +864,98 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree,
 	return 0;
 	return 0;
 }
 }
 
 
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
+static void end_bio_multi_stripe(struct bio *bio, int err)
+#else
+static int end_bio_multi_stripe(struct bio *bio,
+				   unsigned int bytes_done, int err)
+#endif
+{
+	struct multi_bio *multi = bio->bi_private;
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
+	if (bio->bi_size)
+		return 1;
+#endif
+	if (err)
+		multi->error = err;
+
+	if (atomic_dec_and_test(&multi->stripes)) {
+		bio->bi_private = multi->private;
+		bio->bi_end_io = multi->end_io;
+
+		if (!err && multi->error)
+			err = multi->error;
+		kfree(multi);
+
+		bio_endio(bio, err);
+	} else {
+		bio_put(bio);
+	}
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
+	return 0;
+#endif
+}
+
 int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio)
 int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio)
 {
 {
 	struct btrfs_mapping_tree *map_tree;
 	struct btrfs_mapping_tree *map_tree;
 	struct btrfs_device *dev;
 	struct btrfs_device *dev;
+	struct bio *first_bio = bio;
 	u64 logical = bio->bi_sector << 9;
 	u64 logical = bio->bi_sector << 9;
 	u64 physical;
 	u64 physical;
 	u64 length = 0;
 	u64 length = 0;
 	u64 map_length;
 	u64 map_length;
 	struct bio_vec *bvec;
 	struct bio_vec *bvec;
+	struct multi_bio *multi = NULL;
 	int i;
 	int i;
 	int ret;
 	int ret;
+	int dev_nr = 0;
+	int total_devs = 1;
 
 
 	bio_for_each_segment(bvec, bio, i) {
 	bio_for_each_segment(bvec, bio, i) {
 		length += bvec->bv_len;
 		length += bvec->bv_len;
 	}
 	}
+
 	map_tree = &root->fs_info->mapping_tree;
 	map_tree = &root->fs_info->mapping_tree;
 	map_length = length;
 	map_length = length;
-	ret = btrfs_map_block(map_tree, logical, &physical, &map_length, &dev);
-	if (map_length < length) {
-		printk("mapping failed logical %Lu bio len %Lu physical %Lu "
-		       "len %Lu\n", logical, length, physical, map_length);
-		BUG();
+	while(dev_nr < total_devs) {
+		ret = btrfs_map_block(map_tree, rw, dev_nr, logical,
+				      &physical, &map_length, &dev,
+				      &total_devs);
+		if (map_length < length) {
+			printk("mapping failed logical %Lu bio len %Lu physical %Lu "
+			       "len %Lu\n", logical, length, physical, map_length);
+			BUG();
+		}
+		BUG_ON(map_length < length);
+		if (total_devs > 1) {
+			if (!multi) {
+				multi = kmalloc(sizeof(*multi), GFP_NOFS);
+				atomic_set(&multi->stripes, 1);
+				multi->end_io = bio->bi_end_io;
+				multi->private = first_bio->bi_private;
+				multi->error = 0;
+			} else {
+				atomic_inc(&multi->stripes);
+			}
+			if (dev_nr < total_devs - 1) {
+				bio = bio_clone(first_bio, GFP_NOFS);
+				BUG_ON(!bio);
+			} else {
+				bio = first_bio;
+			}
+			bio->bi_private = multi;
+			bio->bi_end_io = end_bio_multi_stripe;
+		}
+		bio->bi_sector = physical >> 9;
+		bio->bi_bdev = dev->bdev;
+		spin_lock(&dev->io_lock);
+		dev->total_ios++;
+		spin_unlock(&dev->io_lock);
+		submit_bio(rw, bio);
+		dev_nr++;
 	}
 	}
-	BUG_ON(map_length < length);
-	bio->bi_sector = physical >> 9;
-	bio->bi_bdev = dev->bdev;
-	submit_bio(rw, bio);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -982,6 +1078,8 @@ static int read_one_dev(struct btrfs_root *root,
 			return -ENOMEM;
 			return -ENOMEM;
 		list_add(&device->dev_list,
 		list_add(&device->dev_list,
 			 &root->fs_info->fs_devices->devices);
 			 &root->fs_info->fs_devices->devices);
+		device->total_ios = 0;
+		spin_lock_init(&device->io_lock);
 	}
 	}
 
 
 	fill_device_from_item(leaf, dev_item, device);
 	fill_device_from_item(leaf, dev_item, device);

+ 6 - 5
fs/btrfs/volumes.h

@@ -18,12 +18,16 @@
 
 
 #ifndef __BTRFS_VOLUMES_
 #ifndef __BTRFS_VOLUMES_
 #define __BTRFS_VOLUMES_
 #define __BTRFS_VOLUMES_
+
 struct btrfs_device {
 struct btrfs_device {
 	struct list_head dev_list;
 	struct list_head dev_list;
 	struct btrfs_root *dev_root;
 	struct btrfs_root *dev_root;
+	spinlock_t io_lock;
 
 
 	struct block_device *bdev;
 	struct block_device *bdev;
 
 
+	u64 total_ios;
+
 	char *name;
 	char *name;
 
 
 	/* the internal btrfs device id */
 	/* the internal btrfs device id */
@@ -68,9 +72,9 @@ struct btrfs_fs_devices {
 int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
 int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
 			   struct btrfs_device *device,
 			   struct btrfs_device *device,
 			   u64 owner, u64 num_bytes, u64 *start);
 			   u64 owner, u64 num_bytes, u64 *start);
-int btrfs_map_block(struct btrfs_mapping_tree *map_tree,
+int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, int stripe_nr,
 		    u64 logical, u64 *phys, u64 *length,
 		    u64 logical, u64 *phys, u64 *length,
-		    struct btrfs_device **dev);
+		    struct btrfs_device **dev, int *total_stripes);
 int btrfs_read_sys_array(struct btrfs_root *root);
 int btrfs_read_sys_array(struct btrfs_root *root);
 int btrfs_read_chunk_tree(struct btrfs_root *root);
 int btrfs_read_chunk_tree(struct btrfs_root *root);
 int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
@@ -80,9 +84,6 @@ void btrfs_mapping_init(struct btrfs_mapping_tree *tree);
 void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree);
 void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree);
 int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio);
 int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio);
 int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf);
 int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf);
-int btrfs_map_block(struct btrfs_mapping_tree *map_tree,
-		    u64 logical, u64 *phys, u64 *length,
-		    struct btrfs_device **dev);
 int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
 int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
 		       int flags, void *holder);
 		       int flags, void *holder);
 int btrfs_scan_one_device(const char *path, int flags, void *holder,
 int btrfs_scan_one_device(const char *path, int flags, void *holder,