|
@@ -524,7 +524,11 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
|
|
|
bi->bi_end_io = raid5_end_read_request;
|
|
|
|
|
|
rcu_read_lock();
|
|
|
- rdev = rcu_dereference(conf->disks[i].rdev);
|
|
|
+ if (rw == READ &&
|
|
|
+ test_bit(R5_ReadRepl, &sh->dev[i].flags))
|
|
|
+ rdev = rcu_dereference(conf->disks[i].replacement);
|
|
|
+ else
|
|
|
+ rdev = rcu_dereference(conf->disks[i].rdev);
|
|
|
if (rdev && test_bit(Faulty, &rdev->flags))
|
|
|
rdev = NULL;
|
|
|
if (rdev)
|
|
@@ -1605,11 +1609,18 @@ static void raid5_end_read_request(struct bio * bi, int error)
|
|
|
BUG();
|
|
|
return;
|
|
|
}
|
|
|
+ if (test_bit(R5_ReadRepl, &sh->dev[i].flags))
|
|
|
+ rdev = conf->disks[i].replacement;
|
|
|
+ else
|
|
|
+ rdev = conf->disks[i].rdev;
|
|
|
|
|
|
if (uptodate) {
|
|
|
set_bit(R5_UPTODATE, &sh->dev[i].flags);
|
|
|
if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
|
|
|
- rdev = conf->disks[i].rdev;
|
|
|
+ /* Note that this cannot happen on a
|
|
|
+ * replacement device. We just fail those on
|
|
|
+ * any error
|
|
|
+ */
|
|
|
printk_ratelimited(
|
|
|
KERN_INFO
|
|
|
"md/raid:%s: read error corrected"
|
|
@@ -1622,16 +1633,24 @@ static void raid5_end_read_request(struct bio * bi, int error)
|
|
|
clear_bit(R5_ReadError, &sh->dev[i].flags);
|
|
|
clear_bit(R5_ReWrite, &sh->dev[i].flags);
|
|
|
}
|
|
|
- if (atomic_read(&conf->disks[i].rdev->read_errors))
|
|
|
- atomic_set(&conf->disks[i].rdev->read_errors, 0);
|
|
|
+ if (atomic_read(&rdev->read_errors))
|
|
|
+ atomic_set(&rdev->read_errors, 0);
|
|
|
} else {
|
|
|
- const char *bdn = bdevname(conf->disks[i].rdev->bdev, b);
|
|
|
+ const char *bdn = bdevname(rdev->bdev, b);
|
|
|
int retry = 0;
|
|
|
- rdev = conf->disks[i].rdev;
|
|
|
|
|
|
clear_bit(R5_UPTODATE, &sh->dev[i].flags);
|
|
|
atomic_inc(&rdev->read_errors);
|
|
|
- if (conf->mddev->degraded >= conf->max_degraded)
|
|
|
+ if (test_bit(R5_ReadRepl, &sh->dev[i].flags))
|
|
|
+ printk_ratelimited(
|
|
|
+ KERN_WARNING
|
|
|
+ "md/raid:%s: read error on replacement device "
|
|
|
+ "(sector %llu on %s).\n",
|
|
|
+ mdname(conf->mddev),
|
|
|
+ (unsigned long long)(sh->sector
|
|
|
+ + rdev->data_offset),
|
|
|
+ bdn);
|
|
|
+ else if (conf->mddev->degraded >= conf->max_degraded)
|
|
|
printk_ratelimited(
|
|
|
KERN_WARNING
|
|
|
"md/raid:%s: read error not correctable "
|
|
@@ -1665,7 +1684,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
|
|
|
md_error(conf->mddev, rdev);
|
|
|
}
|
|
|
}
|
|
|
- rdev_dec_pending(conf->disks[i].rdev, conf->mddev);
|
|
|
+ rdev_dec_pending(rdev, conf->mddev);
|
|
|
clear_bit(R5_LOCKED, &sh->dev[i].flags);
|
|
|
set_bit(STRIPE_HANDLE, &sh->state);
|
|
|
release_stripe(sh);
|
|
@@ -3036,7 +3055,19 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
|
|
|
}
|
|
|
if (dev->written)
|
|
|
s->written++;
|
|
|
- rdev = rcu_dereference(conf->disks[i].rdev);
|
|
|
+ /* Prefer to use the replacement for reads, but only
|
|
|
+ * if it is recovered enough and has no bad blocks.
|
|
|
+ */
|
|
|
+ rdev = rcu_dereference(conf->disks[i].replacement);
|
|
|
+ if (rdev && !test_bit(Faulty, &rdev->flags) &&
|
|
|
+ rdev->recovery_offset >= sh->sector + STRIPE_SECTORS &&
|
|
|
+ !is_badblock(rdev, sh->sector, STRIPE_SECTORS,
|
|
|
+ &first_bad, &bad_sectors))
|
|
|
+ set_bit(R5_ReadRepl, &dev->flags);
|
|
|
+ else {
|
|
|
+ rdev = rcu_dereference(conf->disks[i].rdev);
|
|
|
+ clear_bit(R5_ReadRepl, &dev->flags);
|
|
|
+ }
|
|
|
if (rdev && test_bit(Faulty, &rdev->flags))
|
|
|
rdev = NULL;
|
|
|
if (rdev) {
|
|
@@ -3078,17 +3109,26 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
|
|
|
set_bit(R5_Insync, &dev->flags);
|
|
|
|
|
|
if (rdev && test_bit(R5_WriteError, &dev->flags)) {
|
|
|
- clear_bit(R5_Insync, &dev->flags);
|
|
|
- if (!test_bit(Faulty, &rdev->flags)) {
|
|
|
+ /* This flag does not apply to '.replacement'
|
|
|
+ * only to .rdev, so make sure to check that*/
|
|
|
+ struct md_rdev *rdev2 = rcu_dereference(
|
|
|
+ conf->disks[i].rdev);
|
|
|
+ if (rdev2 == rdev)
|
|
|
+ clear_bit(R5_Insync, &dev->flags);
|
|
|
+ if (rdev2 && !test_bit(Faulty, &rdev2->flags)) {
|
|
|
s->handle_bad_blocks = 1;
|
|
|
- atomic_inc(&rdev->nr_pending);
|
|
|
+ atomic_inc(&rdev2->nr_pending);
|
|
|
} else
|
|
|
clear_bit(R5_WriteError, &dev->flags);
|
|
|
}
|
|
|
if (rdev && test_bit(R5_MadeGood, &dev->flags)) {
|
|
|
- if (!test_bit(Faulty, &rdev->flags)) {
|
|
|
+ /* This flag does not apply to '.replacement'
|
|
|
+ * only to .rdev, so make sure to check that*/
|
|
|
+ struct md_rdev *rdev2 = rcu_dereference(
|
|
|
+ conf->disks[i].rdev);
|
|
|
+ if (rdev2 && !test_bit(Faulty, &rdev2->flags)) {
|
|
|
s->handle_bad_blocks = 1;
|
|
|
- atomic_inc(&rdev->nr_pending);
|
|
|
+ atomic_inc(&rdev2->nr_pending);
|
|
|
} else
|
|
|
clear_bit(R5_MadeGood, &dev->flags);
|
|
|
}
|
|
@@ -4220,7 +4260,6 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
|
|
|
return handled;
|
|
|
}
|
|
|
|
|
|
- set_bit(R5_ReadError, &sh->dev[dd_idx].flags);
|
|
|
if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) {
|
|
|
release_stripe(sh);
|
|
|
raid5_set_bi_hw_segments(raid_bio, scnt);
|