|
@@ -503,8 +503,8 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
|
|
|
|
|
|
for (i = disks; i--; ) {
|
|
|
int rw;
|
|
|
- struct bio *bi;
|
|
|
- struct md_rdev *rdev;
|
|
|
+ struct bio *bi, *rbi;
|
|
|
+ struct md_rdev *rdev, *rrdev = NULL;
|
|
|
if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) {
|
|
|
if (test_and_clear_bit(R5_WantFUA, &sh->dev[i].flags))
|
|
|
rw = WRITE_FUA;
|
|
@@ -516,27 +516,36 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
|
|
|
continue;
|
|
|
|
|
|
bi = &sh->dev[i].req;
|
|
|
+ rbi = &sh->dev[i].rreq; /* For writing to replacement */
|
|
|
|
|
|
bi->bi_rw = rw;
|
|
|
- if (rw & WRITE)
|
|
|
+ rbi->bi_rw = rw;
|
|
|
+ if (rw & WRITE) {
|
|
|
bi->bi_end_io = raid5_end_write_request;
|
|
|
- else
|
|
|
+ rbi->bi_end_io = raid5_end_write_request;
|
|
|
+ } else
|
|
|
bi->bi_end_io = raid5_end_read_request;
|
|
|
|
|
|
rcu_read_lock();
|
|
|
- if (rw == READ &&
|
|
|
- test_bit(R5_ReadRepl, &sh->dev[i].flags))
|
|
|
+ rdev = rcu_dereference(conf->disks[i].rdev);
|
|
|
+ if (rw & WRITE)
|
|
|
+ rrdev = rcu_dereference(conf->disks[i].replacement);
|
|
|
+ else if (test_bit(R5_ReadRepl, &sh->dev[i].flags))
|
|
|
rdev = rcu_dereference(conf->disks[i].replacement);
|
|
|
- else
|
|
|
- rdev = rcu_dereference(conf->disks[i].rdev);
|
|
|
+
|
|
|
if (rdev && test_bit(Faulty, &rdev->flags))
|
|
|
rdev = NULL;
|
|
|
if (rdev)
|
|
|
atomic_inc(&rdev->nr_pending);
|
|
|
+ if (rrdev && test_bit(Faulty, &rrdev->flags))
|
|
|
+ rrdev = NULL;
|
|
|
+ if (rrdev)
|
|
|
+ atomic_inc(&rrdev->nr_pending);
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
/* We have already checked bad blocks for reads. Now
|
|
|
- * need to check for writes.
|
|
|
+ * need to check for writes. We never accept write errors
|
|
|
+ * on the replacement, so we don't to check rrdev.
|
|
|
*/
|
|
|
while ((rw & WRITE) && rdev &&
|
|
|
test_bit(WriteErrorSeen, &rdev->flags)) {
|
|
@@ -583,8 +592,32 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
|
|
|
bi->bi_io_vec[0].bv_offset = 0;
|
|
|
bi->bi_size = STRIPE_SIZE;
|
|
|
bi->bi_next = NULL;
|
|
|
+ if (rrdev)
|
|
|
+ set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags);
|
|
|
generic_make_request(bi);
|
|
|
- } else {
|
|
|
+ }
|
|
|
+ if (rrdev) {
|
|
|
+ if (s->syncing || s->expanding || s->expanded)
|
|
|
+ md_sync_acct(rrdev->bdev, STRIPE_SECTORS);
|
|
|
+
|
|
|
+ set_bit(STRIPE_IO_STARTED, &sh->state);
|
|
|
+
|
|
|
+ rbi->bi_bdev = rrdev->bdev;
|
|
|
+ pr_debug("%s: for %llu schedule op %ld on "
|
|
|
+ "replacement disc %d\n",
|
|
|
+ __func__, (unsigned long long)sh->sector,
|
|
|
+ rbi->bi_rw, i);
|
|
|
+ atomic_inc(&sh->count);
|
|
|
+ rbi->bi_sector = sh->sector + rrdev->data_offset;
|
|
|
+ rbi->bi_flags = 1 << BIO_UPTODATE;
|
|
|
+ rbi->bi_idx = 0;
|
|
|
+ rbi->bi_io_vec[0].bv_len = STRIPE_SIZE;
|
|
|
+ rbi->bi_io_vec[0].bv_offset = 0;
|
|
|
+ rbi->bi_size = STRIPE_SIZE;
|
|
|
+ rbi->bi_next = NULL;
|
|
|
+ generic_make_request(rbi);
|
|
|
+ }
|
|
|
+ if (!rdev && !rrdev) {
|
|
|
if (rw & WRITE)
|
|
|
set_bit(STRIPE_DEGRADED, &sh->state);
|
|
|
pr_debug("skip op %ld on disc %d for sector %llu\n",
|
|
@@ -1695,14 +1728,23 @@ static void raid5_end_write_request(struct bio *bi, int error)
|
|
|
struct stripe_head *sh = bi->bi_private;
|
|
|
struct r5conf *conf = sh->raid_conf;
|
|
|
int disks = sh->disks, i;
|
|
|
+ struct md_rdev *uninitialized_var(rdev);
|
|
|
int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
|
|
|
sector_t first_bad;
|
|
|
int bad_sectors;
|
|
|
+ int replacement = 0;
|
|
|
|
|
|
- for (i=0 ; i<disks; i++)
|
|
|
- if (bi == &sh->dev[i].req)
|
|
|
+ for (i = 0 ; i < disks; i++) {
|
|
|
+ if (bi == &sh->dev[i].req) {
|
|
|
+ rdev = conf->disks[i].rdev;
|
|
|
break;
|
|
|
-
|
|
|
+ }
|
|
|
+ if (bi == &sh->dev[i].rreq) {
|
|
|
+ rdev = conf->disks[i].replacement;
|
|
|
+ replacement = 1;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
pr_debug("end_write_request %llu/%d, count %d, uptodate: %d.\n",
|
|
|
(unsigned long long)sh->sector, i, atomic_read(&sh->count),
|
|
|
uptodate);
|
|
@@ -1711,21 +1753,30 @@ static void raid5_end_write_request(struct bio *bi, int error)
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
- if (!uptodate) {
|
|
|
- set_bit(WriteErrorSeen, &conf->disks[i].rdev->flags);
|
|
|
- set_bit(R5_WriteError, &sh->dev[i].flags);
|
|
|
- } else if (is_badblock(conf->disks[i].rdev, sh->sector, STRIPE_SECTORS,
|
|
|
- &first_bad, &bad_sectors))
|
|
|
- set_bit(R5_MadeGood, &sh->dev[i].flags);
|
|
|
+ if (replacement) {
|
|
|
+ if (!uptodate)
|
|
|
+ md_error(conf->mddev, rdev);
|
|
|
+ else if (is_badblock(rdev, sh->sector,
|
|
|
+ STRIPE_SECTORS,
|
|
|
+ &first_bad, &bad_sectors))
|
|
|
+ set_bit(R5_MadeGoodRepl, &sh->dev[i].flags);
|
|
|
+ } else {
|
|
|
+ if (!uptodate) {
|
|
|
+ set_bit(WriteErrorSeen, &rdev->flags);
|
|
|
+ set_bit(R5_WriteError, &sh->dev[i].flags);
|
|
|
+ } else if (is_badblock(rdev, sh->sector,
|
|
|
+ STRIPE_SECTORS,
|
|
|
+ &first_bad, &bad_sectors))
|
|
|
+ set_bit(R5_MadeGood, &sh->dev[i].flags);
|
|
|
+ }
|
|
|
+ rdev_dec_pending(rdev, conf->mddev);
|
|
|
|
|
|
- rdev_dec_pending(conf->disks[i].rdev, conf->mddev);
|
|
|
-
|
|
|
- clear_bit(R5_LOCKED, &sh->dev[i].flags);
|
|
|
+ if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags))
|
|
|
+ clear_bit(R5_LOCKED, &sh->dev[i].flags);
|
|
|
set_bit(STRIPE_HANDLE, &sh->state);
|
|
|
release_stripe(sh);
|
|
|
}
|
|
|
|
|
|
-
|
|
|
static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous);
|
|
|
|
|
|
static void raid5_build_block(struct stripe_head *sh, int i, int previous)
|
|
@@ -1739,6 +1790,13 @@ static void raid5_build_block(struct stripe_head *sh, int i, int previous)
|
|
|
dev->req.bi_private = sh;
|
|
|
dev->vec.bv_page = dev->page;
|
|
|
|
|
|
+ bio_init(&dev->rreq);
|
|
|
+ dev->rreq.bi_io_vec = &dev->rvec;
|
|
|
+ dev->rreq.bi_vcnt++;
|
|
|
+ dev->rreq.bi_max_vecs++;
|
|
|
+ dev->rreq.bi_private = sh;
|
|
|
+ dev->rvec.bv_page = dev->page;
|
|
|
+
|
|
|
dev->flags = 0;
|
|
|
dev->sector = compute_blocknr(sh, i, previous);
|
|
|
}
|
|
@@ -3132,6 +3190,15 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
|
|
|
} else
|
|
|
clear_bit(R5_MadeGood, &dev->flags);
|
|
|
}
|
|
|
+ if (test_bit(R5_MadeGoodRepl, &dev->flags)) {
|
|
|
+ struct md_rdev *rdev2 = rcu_dereference(
|
|
|
+ conf->disks[i].replacement);
|
|
|
+ if (rdev2 && !test_bit(Faulty, &rdev2->flags)) {
|
|
|
+ s->handle_bad_blocks = 1;
|
|
|
+ atomic_inc(&rdev2->nr_pending);
|
|
|
+ } else
|
|
|
+ clear_bit(R5_MadeGoodRepl, &dev->flags);
|
|
|
+ }
|
|
|
if (!test_bit(R5_Insync, &dev->flags)) {
|
|
|
/* The ReadError flag will just be confusing now */
|
|
|
clear_bit(R5_ReadError, &dev->flags);
|
|
@@ -3404,6 +3471,12 @@ finish:
|
|
|
STRIPE_SECTORS);
|
|
|
rdev_dec_pending(rdev, conf->mddev);
|
|
|
}
|
|
|
+ if (test_and_clear_bit(R5_MadeGoodRepl, &dev->flags)) {
|
|
|
+ rdev = conf->disks[i].replacement;
|
|
|
+ rdev_clear_badblocks(rdev, sh->sector,
|
|
|
+ STRIPE_SECTORS);
|
|
|
+ rdev_dec_pending(rdev, conf->mddev);
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
if (s.ops_request)
|