14 years ago · 4a54c8c165
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -17,6 +17,7 @@
 
				 #include "compat.h"
			
 
				 #include "ctree.h"
			
 
				 #include "btrfs_inode.h"
			
 
				+#include "volumes.h"
			
 
				 
			
 
				 static struct kmem_cache *extent_state_cache;
			
 
				 static struct kmem_cache *extent_buffer_cache;
			
@@ -1599,6 +1600,368 @@ static int check_page_writeback(struct extent_io_tree *tree,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * When IO fails, either with EIO or csum verification fails, we
			
 
				+ * try other mirrors that might have a good copy of the data.  This
			
 
				+ * io_failure_record is used to record state as we go through all the
			
 
				+ * mirrors.  If another mirror has good data, the page is set up to date
			
 
				+ * and things continue.  If a good mirror can't be found, the original
			
 
				+ * bio end_io callback is called to indicate things have failed.
			
 
				+ */
			
 
				+struct io_failure_record {
			
 
				+	struct page *page;
			
 
				+	u64 start;
			
 
				+	u64 len;
			
 
				+	u64 logical;
			
 
				+	unsigned long bio_flags;
			
 
				+	int this_mirror;
			
 
				+	int failed_mirror;
			
 
				+	int in_validation;
			
 
				+};
			
 
				+
			
 
				+static int free_io_failure(struct inode *inode, struct io_failure_record *rec,
			
 
				+				int did_repair)
			
 
				+{
			
 
				+	int ret;
			
 
				+	int err = 0;
			
 
				+	struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
			
 
				+
			
 
				+	set_state_private(failure_tree, rec->start, 0);
			
 
				+	ret = clear_extent_bits(failure_tree, rec->start,
			
 
				+				rec->start + rec->len - 1,
			
 
				+				EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS);
			
 
				+	if (ret)
			
 
				+		err = ret;
			
 
				+
			
 
				+	if (did_repair) {
			
 
				+		ret = clear_extent_bits(&BTRFS_I(inode)->io_tree, rec->start,
			
 
				+					rec->start + rec->len - 1,
			
 
				+					EXTENT_DAMAGED, GFP_NOFS);
			
 
				+		if (ret && !err)
			
 
				+			err = ret;
			
 
				+	}
			
 
				+
			
 
				+	kfree(rec);
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+static void repair_io_failure_callback(struct bio *bio, int err)
			
 
				+{
			
 
				+	complete(bio->bi_private);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * this bypasses the standard btrfs submit functions deliberately, as
			
 
				+ * the standard behavior is to write all copies in a raid setup. here we only
			
 
				+ * want to write the one bad copy. so we do the mapping for ourselves and issue
			
 
				+ * submit_bio directly.
			
 
				+ * to avoid any synchonization issues, wait for the data after writing, which
			
 
				+ * actually prevents the read that triggered the error from finishing.
			
 
				+ * currently, there can be no more than two copies of every data bit. thus,
			
 
				+ * exactly one rewrite is required.
			
 
				+ */
			
 
				+int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
			
 
				+			u64 length, u64 logical, struct page *page,
			
 
				+			int mirror_num)
			
 
				+{
			
 
				+	struct bio *bio;
			
 
				+	struct btrfs_device *dev;
			
 
				+	DECLARE_COMPLETION_ONSTACK(compl);
			
 
				+	u64 map_length = 0;
			
 
				+	u64 sector;
			
 
				+	struct btrfs_bio *bbio = NULL;
			
 
				+	int ret;
			
 
				+
			
 
				+	BUG_ON(!mirror_num);
			
 
				+
			
 
				+	bio = bio_alloc(GFP_NOFS, 1);
			
 
				+	if (!bio)
			
 
				+		return -EIO;
			
 
				+	bio->bi_private = &compl;
			
 
				+	bio->bi_end_io = repair_io_failure_callback;
			
 
				+	bio->bi_size = 0;
			
 
				+	map_length = length;
			
 
				+
			
 
				+	ret = btrfs_map_block(map_tree, WRITE, logical,
			
 
				+			      &map_length, &bbio, mirror_num);
			
 
				+	if (ret) {
			
 
				+		bio_put(bio);
			
 
				+		return -EIO;
			
 
				+	}
			
 
				+	BUG_ON(mirror_num != bbio->mirror_num);
			
 
				+	sector = bbio->stripes[mirror_num-1].physical >> 9;
			
 
				+	bio->bi_sector = sector;
			
 
				+	dev = bbio->stripes[mirror_num-1].dev;
			
 
				+	kfree(bbio);
			
 
				+	if (!dev || !dev->bdev || !dev->writeable) {
			
 
				+		bio_put(bio);
			
 
				+		return -EIO;
			
 
				+	}
			
 
				+	bio->bi_bdev = dev->bdev;
			
 
				+	bio_add_page(bio, page, length, start-page_offset(page));
			
 
				+	submit_bio(WRITE_SYNC, bio);
			
 
				+	wait_for_completion(&compl);
			
 
				+
			
 
				+	if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
			
 
				+		/* try to remap that extent elsewhere? */
			
 
				+		bio_put(bio);
			
 
				+		return -EIO;
			
 
				+	}
			
 
				+
			
 
				+	printk(KERN_INFO "btrfs read error corrected: ino %lu off %llu (dev %s "
			
 
				+			"sector %llu)\n", page->mapping->host->i_ino, start,
			
 
				+			dev->name, sector);
			
 
				+
			
 
				+	bio_put(bio);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * each time an IO finishes, we do a fast check in the IO failure tree
			
 
				+ * to see if we need to process or clean up an io_failure_record
			
 
				+ */
			
 
				+static int clean_io_failure(u64 start, struct page *page)
			
 
				+{
			
 
				+	u64 private;
			
 
				+	u64 private_failure;
			
 
				+	struct io_failure_record *failrec;
			
 
				+	struct btrfs_mapping_tree *map_tree;
			
 
				+	struct extent_state *state;
			
 
				+	int num_copies;
			
 
				+	int did_repair = 0;
			
 
				+	int ret;
			
 
				+	struct inode *inode = page->mapping->host;
			
 
				+
			
 
				+	private = 0;
			
 
				+	ret = count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
			
 
				+				(u64)-1, 1, EXTENT_DIRTY, 0);
			
 
				+	if (!ret)
			
 
				+		return 0;
			
 
				+
			
 
				+	ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, start,
			
 
				+				&private_failure);
			
 
				+	if (ret)
			
 
				+		return 0;
			
 
				+
			
 
				+	failrec = (struct io_failure_record *)(unsigned long) private_failure;
			
 
				+	BUG_ON(!failrec->this_mirror);
			
 
				+
			
 
				+	if (failrec->in_validation) {
			
 
				+		/* there was no real error, just free the record */
			
 
				+		pr_debug("clean_io_failure: freeing dummy error at %llu\n",
			
 
				+			 failrec->start);
			
 
				+		did_repair = 1;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	spin_lock(&BTRFS_I(inode)->io_tree.lock);
			
 
				+	state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
			
 
				+					    failrec->start,
			
 
				+					    EXTENT_LOCKED);
			
 
				+	spin_unlock(&BTRFS_I(inode)->io_tree.lock);
			
 
				+
			
 
				+	if (state && state->start == failrec->start) {
			
 
				+		map_tree = &BTRFS_I(inode)->root->fs_info->mapping_tree;
			
 
				+		num_copies = btrfs_num_copies(map_tree, failrec->logical,
			
 
				+						failrec->len);
			
 
				+		if (num_copies > 1)  {
			
 
				+			ret = repair_io_failure(map_tree, start, failrec->len,
			
 
				+						failrec->logical, page,
			
 
				+						failrec->failed_mirror);
			
 
				+			did_repair = !ret;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+out:
			
 
				+	if (!ret)
			
 
				+		ret = free_io_failure(inode, failrec, did_repair);
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * this is a generic handler for readpage errors (default
			
 
				+ * readpage_io_failed_hook). if other copies exist, read those and write back
			
 
				+ * good data to the failed position. does not investigate in remapping the
			
 
				+ * failed extent elsewhere, hoping the device will be smart enough to do this as
			
 
				+ * needed
			
 
				+ */
			
 
				+
			
 
				+static int bio_readpage_error(struct bio *failed_bio, struct page *page,
			
 
				+				u64 start, u64 end, int failed_mirror,
			
 
				+				struct extent_state *state)
			
 
				+{
			
 
				+	struct io_failure_record *failrec = NULL;
			
 
				+	u64 private;
			
 
				+	struct extent_map *em;
			
 
				+	struct inode *inode = page->mapping->host;
			
 
				+	struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
			
 
				+	struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
			
 
				+	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
			
 
				+	struct bio *bio;
			
 
				+	int num_copies;
			
 
				+	int ret;
			
 
				+	int read_mode;
			
 
				+	u64 logical;
			
 
				+
			
 
				+	BUG_ON(failed_bio->bi_rw & REQ_WRITE);
			
 
				+
			
 
				+	ret = get_state_private(failure_tree, start, &private);
			
 
				+	if (ret) {
			
 
				+		failrec = kzalloc(sizeof(*failrec), GFP_NOFS);
			
 
				+		if (!failrec)
			
 
				+			return -ENOMEM;
			
 
				+		failrec->start = start;
			
 
				+		failrec->len = end - start + 1;
			
 
				+		failrec->this_mirror = 0;
			
 
				+		failrec->bio_flags = 0;
			
 
				+		failrec->in_validation = 0;
			
 
				+
			
 
				+		read_lock(&em_tree->lock);
			
 
				+		em = lookup_extent_mapping(em_tree, start, failrec->len);
			
 
				+		if (!em) {
			
 
				+			read_unlock(&em_tree->lock);
			
 
				+			kfree(failrec);
			
 
				+			return -EIO;
			
 
				+		}
			
 
				+
			
 
				+		if (em->start > start || em->start + em->len < start) {
			
 
				+			free_extent_map(em);
			
 
				+			em = NULL;
			
 
				+		}
			
 
				+		read_unlock(&em_tree->lock);
			
 
				+
			
 
				+		if (!em || IS_ERR(em)) {
			
 
				+			kfree(failrec);
			
 
				+			return -EIO;
			
 
				+		}
			
 
				+		logical = start - em->start;
			
 
				+		logical = em->block_start + logical;
			
 
				+		if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
			
 
				+			logical = em->block_start;
			
 
				+			failrec->bio_flags = EXTENT_BIO_COMPRESSED;
			
 
				+			extent_set_compress_type(&failrec->bio_flags,
			
 
				+						 em->compress_type);
			
 
				+		}
			
 
				+		pr_debug("bio_readpage_error: (new) logical=%llu, start=%llu, "
			
 
				+			 "len=%llu\n", logical, start, failrec->len);
			
 
				+		failrec->logical = logical;
			
 
				+		free_extent_map(em);
			
 
				+
			
 
				+		/* set the bits in the private failure tree */
			
 
				+		ret = set_extent_bits(failure_tree, start, end,
			
 
				+					EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS);
			
 
				+		if (ret >= 0)
			
 
				+			ret = set_state_private(failure_tree, start,
			
 
				+						(u64)(unsigned long)failrec);
			
 
				+		/* set the bits in the inode's tree */
			
 
				+		if (ret >= 0)
			
 
				+			ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED,
			
 
				+						GFP_NOFS);
			
 
				+		if (ret < 0) {
			
 
				+			kfree(failrec);
			
 
				+			return ret;
			
 
				+		}
			
 
				+	} else {
			
 
				+		failrec = (struct io_failure_record *)(unsigned long)private;
			
 
				+		pr_debug("bio_readpage_error: (found) logical=%llu, "
			
 
				+			 "start=%llu, len=%llu, validation=%d\n",
			
 
				+			 failrec->logical, failrec->start, failrec->len,
			
 
				+			 failrec->in_validation);
			
 
				+		/*
			
 
				+		 * when data can be on disk more than twice, add to failrec here
			
 
				+		 * (e.g. with a list for failed_mirror) to make
			
 
				+		 * clean_io_failure() clean all those errors at once.
			
 
				+		 */
			
 
				+	}
			
 
				+	num_copies = btrfs_num_copies(
			
 
				+			      &BTRFS_I(inode)->root->fs_info->mapping_tree,
			
 
				+			      failrec->logical, failrec->len);
			
 
				+	if (num_copies == 1) {
			
 
				+		/*
			
 
				+		 * we only have a single copy of the data, so don't bother with
			
 
				+		 * all the retry and error correction code that follows. no
			
 
				+		 * matter what the error is, it is very likely to persist.
			
 
				+		 */
			
 
				+		pr_debug("bio_readpage_error: cannot repair, num_copies == 1. "
			
 
				+			 "state=%p, num_copies=%d, next_mirror %d, "
			
 
				+			 "failed_mirror %d\n", state, num_copies,
			
 
				+			 failrec->this_mirror, failed_mirror);
			
 
				+		free_io_failure(inode, failrec, 0);
			
 
				+		return -EIO;
			
 
				+	}
			
 
				+
			
 
				+	if (!state) {
			
 
				+		spin_lock(&tree->lock);
			
 
				+		state = find_first_extent_bit_state(tree, failrec->start,
			
 
				+						    EXTENT_LOCKED);
			
 
				+		if (state && state->start != failrec->start)
			
 
				+			state = NULL;
			
 
				+		spin_unlock(&tree->lock);
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * there are two premises:
			
 
				+	 *	a) deliver good data to the caller
			
 
				+	 *	b) correct the bad sectors on disk
			
 
				+	 */
			
 
				+	if (failed_bio->bi_vcnt > 1) {
			
 
				+		/*
			
 
				+		 * to fulfill b), we need to know the exact failing sectors, as
			
 
				+		 * we don't want to rewrite any more than the failed ones. thus,
			
 
				+		 * we need separate read requests for the failed bio
			
 
				+		 *
			
 
				+		 * if the following BUG_ON triggers, our validation request got
			
 
				+		 * merged. we need separate requests for our algorithm to work.
			
 
				+		 */
			
 
				+		BUG_ON(failrec->in_validation);
			
 
				+		failrec->in_validation = 1;
			
 
				+		failrec->this_mirror = failed_mirror;
			
 
				+		read_mode = READ_SYNC | REQ_FAILFAST_DEV;
			
 
				+	} else {
			
 
				+		/*
			
 
				+		 * we're ready to fulfill a) and b) alongside. get a good copy
			
 
				+		 * of the failed sector and if we succeed, we have setup
			
 
				+		 * everything for repair_io_failure to do the rest for us.
			
 
				+		 */
			
 
				+		if (failrec->in_validation) {
			
 
				+			BUG_ON(failrec->this_mirror != failed_mirror);
			
 
				+			failrec->in_validation = 0;
			
 
				+			failrec->this_mirror = 0;
			
 
				+		}
			
 
				+		failrec->failed_mirror = failed_mirror;
			
 
				+		failrec->this_mirror++;
			
 
				+		if (failrec->this_mirror == failed_mirror)
			
 
				+			failrec->this_mirror++;
			
 
				+		read_mode = READ_SYNC;
			
 
				+	}
			
 
				+
			
 
				+	if (!state || failrec->this_mirror > num_copies) {
			
 
				+		pr_debug("bio_readpage_error: (fail) state=%p, num_copies=%d, "
			
 
				+			 "next_mirror %d, failed_mirror %d\n", state,
			
 
				+			 num_copies, failrec->this_mirror, failed_mirror);
			
 
				+		free_io_failure(inode, failrec, 0);
			
 
				+		return -EIO;
			
 
				+	}
			
 
				+
			
 
				+	bio = bio_alloc(GFP_NOFS, 1);
			
 
				+	bio->bi_private = state;
			
 
				+	bio->bi_end_io = failed_bio->bi_end_io;
			
 
				+	bio->bi_sector = failrec->logical >> 9;
			
 
				+	bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
			
 
				+	bio->bi_size = 0;
			
 
				+
			
 
				+	bio_add_page(bio, page, failrec->len, start - page_offset(page));
			
 
				+
			
 
				+	pr_debug("bio_readpage_error: submitting new read[%#x] to "
			
 
				+		 "this_mirror=%d, num_copies=%d, in_validation=%d\n", read_mode,
			
 
				+		 failrec->this_mirror, num_copies, failrec->in_validation);
			
 
				+
			
 
				+	tree->ops->submit_bio_hook(inode, read_mode, bio, failrec->this_mirror,
			
 
				+					failrec->bio_flags, 0);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 /* lots and lots of room for performance fixes in the end_bio funcs */
			
 
				 
			
 
				 /*
			
@@ -1697,6 +2060,9 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
 
				 		struct extent_state *cached = NULL;
			
 
				 		struct extent_state *state;
			
 
				 
			
 
				+		pr_debug("end_bio_extent_readpage: bi_vcnt=%d, idx=%d, err=%d, "
			
 
				+			 "mirror=%ld\n", bio->bi_vcnt, bio->bi_idx, err,
			
 
				+			 (long int)bio->bi_bdev);
			
 
				 		tree = &BTRFS_I(page->mapping->host)->io_tree;
			
 
				 
			
 
				 		start = ((u64)page->index << PAGE_CACHE_SHIFT) +
			
@@ -1727,11 +2093,19 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
 
				 							      state);
			
 
				 			if (ret)
			
 
				 				uptodate = 0;
			
 
				+			else
			
 
				+				clean_io_failure(start, page);
			
 
				 		}
			
 
				-		if (!uptodate && tree->ops &&
			
 
				-		    tree->ops->readpage_io_failed_hook) {
			
 
				-			ret = tree->ops->readpage_io_failed_hook(bio, page,
			
 
				-							 start, end, NULL);
			
 
				+		if (!uptodate) {
			
 
				+			u64 failed_mirror;
			
 
				+			failed_mirror = (u64)bio->bi_bdev;
			
 
				+			if (tree->ops && tree->ops->readpage_io_failed_hook)
			
 
				+				ret = tree->ops->readpage_io_failed_hook(
			
 
				+						bio, page, start, end,
			
 
				+						failed_mirror, NULL);
			
 
				+			else
			
 
				+				ret = bio_readpage_error(bio, page, start, end,
			
 
				+							 failed_mirror, NULL);
			
 
				 			if (ret == 0) {
			
 
				 				uptodate =
			
 
				 					test_bit(BIO_UPTODATE, &bio->bi_flags);
			
@@ -1811,6 +2185,7 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num,
 
				 					   mirror_num, bio_flags, start);
			
 
				 	else
			
 
				 		submit_bio(rw, bio);
			
 
				+
			
 
				 	if (bio_flagged(bio, BIO_EOPNOTSUPP))
			
 
				 		ret = -EOPNOTSUPP;
			
 
				 	bio_put(bio);
			
@@ -2926,7 +3301,7 @@ out:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static inline struct page *extent_buffer_page(struct extent_buffer *eb,
			
 
				+inline struct page *extent_buffer_page(struct extent_buffer *eb,
			
 
				 					      unsigned long i)
			
 
				 {
			
 
				 	struct page *p;
			
@@ -2951,7 +3326,7 @@ static inline struct page *extent_buffer_page(struct extent_buffer *eb,
 
				 	return p;
			
 
				 }
			
 
				 
			
 
				-static inline unsigned long num_extent_pages(u64 start, u64 len)
			
 
				+inline unsigned long num_extent_pages(u64 start, u64 len)
			
 
				 {
			
 
				 	return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
			
 
				 		(start >> PAGE_CACHE_SHIFT);
			
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -68,7 +68,7 @@ struct extent_io_ops {
 
				 			      unsigned long bio_flags);
			
 
				 	int (*readpage_io_hook)(struct page *page, u64 start, u64 end);
			
 
				 	int (*readpage_io_failed_hook)(struct bio *bio, struct page *page,
			
 
				-				       u64 start, u64 end,
			
 
				+				       u64 start, u64 end, u64 failed_mirror,
			
 
				 				       struct extent_state *state);
			
 
				 	int (*writepage_io_failed_hook)(struct bio *bio, struct page *page,
			
 
				 					u64 start, u64 end,
			
@@ -252,6 +252,8 @@ void free_extent_buffer(struct extent_buffer *eb);
 
				 int read_extent_buffer_pages(struct extent_io_tree *tree,
			
 
				 			     struct extent_buffer *eb, u64 start, int wait,
			
 
				 			     get_extent_t *get_extent, int mirror_num);
			
 
				+unsigned long num_extent_pages(u64 start, u64 len);
			
 
				+struct page *extent_buffer_page(struct extent_buffer *eb, unsigned long i);
			
 
				 
			
 
				 static inline void extent_buffer_get(struct extent_buffer *eb)
			
 
				 {
			
@@ -301,4 +303,10 @@ int extent_clear_unlock_delalloc(struct inode *inode,
 
				 struct bio *
			
 
				 btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
			
 
				 		gfp_t gfp_flags);
			
 
				+
			
 
				+struct btrfs_mapping_tree;
			
 
				+
			
 
				+int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
			
 
				+			u64 length, u64 logical, struct page *page,
			
 
				+			int mirror_num);
			
 
				 #endif
			
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -45,10 +45,10 @@
 
				 #include "btrfs_inode.h"
			
 
				 #include "ioctl.h"
			
 
				 #include "print-tree.h"
			
 
				-#include "volumes.h"
			
 
				 #include "ordered-data.h"
			
 
				 #include "xattr.h"
			
 
				 #include "tree-log.h"
			
 
				+#include "volumes.h"
			
 
				 #include "compression.h"
			
 
				 #include "locking.h"
			
 
				 #include "free-space-cache.h"
			
@@ -1818,154 +1818,10 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
 
				 	return btrfs_finish_ordered_io(page->mapping->host, start, end);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * When IO fails, either with EIO or csum verification fails, we
			
 
				- * try other mirrors that might have a good copy of the data.  This
			
 
				- * io_failure_record is used to record state as we go through all the
			
 
				- * mirrors.  If another mirror has good data, the page is set up to date
			
 
				- * and things continue.  If a good mirror can't be found, the original
			
 
				- * bio end_io callback is called to indicate things have failed.
			
 
				- */
			
 
				-struct io_failure_record {
			
 
				-	struct page *page;
			
 
				-	u64 start;
			
 
				-	u64 len;
			
 
				-	u64 logical;
			
 
				-	unsigned long bio_flags;
			
 
				-	int last_mirror;
			
 
				-};
			
 
				-
			
 
				-static int btrfs_io_failed_hook(struct bio *failed_bio,
			
 
				-			 struct page *page, u64 start, u64 end,
			
 
				-			 struct extent_state *state)
			
 
				-{
			
 
				-	struct io_failure_record *failrec = NULL;
			
 
				-	u64 private;
			
 
				-	struct extent_map *em;
			
 
				-	struct inode *inode = page->mapping->host;
			
 
				-	struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
			
 
				-	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
			
 
				-	struct bio *bio;
			
 
				-	int num_copies;
			
 
				-	int ret;
			
 
				-	int rw;
			
 
				-	u64 logical;
			
 
				-
			
 
				-	ret = get_state_private(failure_tree, start, &private);
			
 
				-	if (ret) {
			
 
				-		failrec = kmalloc(sizeof(*failrec), GFP_NOFS);
			
 
				-		if (!failrec)
			
 
				-			return -ENOMEM;
			
 
				-		failrec->start = start;
			
 
				-		failrec->len = end - start + 1;
			
 
				-		failrec->last_mirror = 0;
			
 
				-		failrec->bio_flags = 0;
			
 
				-
			
 
				-		read_lock(&em_tree->lock);
			
 
				-		em = lookup_extent_mapping(em_tree, start, failrec->len);
			
 
				-		if (em->start > start || em->start + em->len < start) {
			
 
				-			free_extent_map(em);
			
 
				-			em = NULL;
			
 
				-		}
			
 
				-		read_unlock(&em_tree->lock);
			
 
				-
			
 
				-		if (IS_ERR_OR_NULL(em)) {
			
 
				-			kfree(failrec);
			
 
				-			return -EIO;
			
 
				-		}
			
 
				-		logical = start - em->start;
			
 
				-		logical = em->block_start + logical;
			
 
				-		if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
			
 
				-			logical = em->block_start;
			
 
				-			failrec->bio_flags = EXTENT_BIO_COMPRESSED;
			
 
				-			extent_set_compress_type(&failrec->bio_flags,
			
 
				-						 em->compress_type);
			
 
				-		}
			
 
				-		failrec->logical = logical;
			
 
				-		free_extent_map(em);
			
 
				-		set_extent_bits(failure_tree, start, end, EXTENT_LOCKED |
			
 
				-				EXTENT_DIRTY, GFP_NOFS);
			
 
				-		set_state_private(failure_tree, start,
			
 
				-				 (u64)(unsigned long)failrec);
			
 
				-	} else {
			
 
				-		failrec = (struct io_failure_record *)(unsigned long)private;
			
 
				-	}
			
 
				-	num_copies = btrfs_num_copies(
			
 
				-			      &BTRFS_I(inode)->root->fs_info->mapping_tree,
			
 
				-			      failrec->logical, failrec->len);
			
 
				-	failrec->last_mirror++;
			
 
				-	if (!state) {
			
 
				-		spin_lock(&BTRFS_I(inode)->io_tree.lock);
			
 
				-		state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
			
 
				-						    failrec->start,
			
 
				-						    EXTENT_LOCKED);
			
 
				-		if (state && state->start != failrec->start)
			
 
				-			state = NULL;
			
 
				-		spin_unlock(&BTRFS_I(inode)->io_tree.lock);
			
 
				-	}
			
 
				-	if (!state || failrec->last_mirror > num_copies) {
			
 
				-		set_state_private(failure_tree, failrec->start, 0);
			
 
				-		clear_extent_bits(failure_tree, failrec->start,
			
 
				-				  failrec->start + failrec->len - 1,
			
 
				-				  EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS);
			
 
				-		kfree(failrec);
			
 
				-		return -EIO;
			
 
				-	}
			
 
				-	bio = bio_alloc(GFP_NOFS, 1);
			
 
				-	bio->bi_private = state;
			
 
				-	bio->bi_end_io = failed_bio->bi_end_io;
			
 
				-	bio->bi_sector = failrec->logical >> 9;
			
 
				-	bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
			
 
				-	bio->bi_size = 0;
			
 
				-
			
 
				-	bio_add_page(bio, page, failrec->len, start - page_offset(page));
			
 
				-	if (failed_bio->bi_rw & REQ_WRITE)
			
 
				-		rw = WRITE;
			
 
				-	else
			
 
				-		rw = READ;
			
 
				-
			
 
				-	ret = BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio,
			
 
				-						      failrec->last_mirror,
			
 
				-						      failrec->bio_flags, 0);
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * each time an IO finishes, we do a fast check in the IO failure tree
			
 
				- * to see if we need to process or clean up an io_failure_record
			
 
				- */
			
 
				-static int btrfs_clean_io_failures(struct inode *inode, u64 start)
			
 
				-{
			
 
				-	u64 private;
			
 
				-	u64 private_failure;
			
 
				-	struct io_failure_record *failure;
			
 
				-	int ret;
			
 
				-
			
 
				-	private = 0;
			
 
				-	if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
			
 
				-			     (u64)-1, 1, EXTENT_DIRTY, 0)) {
			
 
				-		ret = get_state_private(&BTRFS_I(inode)->io_failure_tree,
			
 
				-					start, &private_failure);
			
 
				-		if (ret == 0) {
			
 
				-			failure = (struct io_failure_record *)(unsigned long)
			
 
				-				   private_failure;
			
 
				-			set_state_private(&BTRFS_I(inode)->io_failure_tree,
			
 
				-					  failure->start, 0);
			
 
				-			clear_extent_bits(&BTRFS_I(inode)->io_failure_tree,
			
 
				-					  failure->start,
			
 
				-					  failure->start + failure->len - 1,
			
 
				-					  EXTENT_DIRTY | EXTENT_LOCKED,
			
 
				-					  GFP_NOFS);
			
 
				-			kfree(failure);
			
 
				-		}
			
 
				-	}
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * when reads are done, we need to check csums to verify the data is correct
			
 
				- * if there's a match, we allow the bio to finish.  If not, we go through
			
 
				- * the io_failure_record routines to find good copies
			
 
				+ * if there's a match, we allow the bio to finish.  If not, the code in
			
 
				+ * extent_io.c will try to find good copies for us.
			
 
				  */
			
 
				 static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
			
 
				 			       struct extent_state *state)
			
@@ -2011,10 +1867,6 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
 
				 
			
 
				 	kunmap_atomic(kaddr, KM_USER0);
			
 
				 good:
			
 
				-	/* if the io failure tree for this inode is non-empty,
			
 
				-	 * check to see if we've recovered from a failed IO
			
 
				-	 */
			
 
				-	btrfs_clean_io_failures(inode, start);
			
 
				 	return 0;
			
 
				 
			
 
				 zeroit:
			
@@ -7420,7 +7272,6 @@ static struct extent_io_ops btrfs_extent_io_ops = {
 
				 	.readpage_end_io_hook = btrfs_readpage_end_io_hook,
			
 
				 	.writepage_end_io_hook = btrfs_writepage_end_io_hook,
			
 
				 	.writepage_start_hook = btrfs_writepage_start_hook,
			
 
				-	.readpage_io_failed_hook = btrfs_io_failed_hook,
			
 
				 	.set_bit_hook = btrfs_set_bit_hook,
			
 
				 	.clear_bit_hook = btrfs_clear_bit_hook,
			
 
				 	.merge_extent_hook = btrfs_merge_extent_hook,