|
@@ -532,13 +532,21 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
|
|
|
bi->bi_end_io = raid5_end_read_request;
|
|
|
|
|
|
rcu_read_lock();
|
|
|
- rdev = rcu_dereference(conf->disks[i].rdev);
|
|
|
rrdev = rcu_dereference(conf->disks[i].replacement);
|
|
|
+ smp_mb(); /* Ensure that if rrdev is NULL, rdev won't be */
|
|
|
+ rdev = rcu_dereference(conf->disks[i].rdev);
|
|
|
+ if (!rdev) {
|
|
|
+ rdev = rrdev;
|
|
|
+ rrdev = NULL;
|
|
|
+ }
|
|
|
if (rw & WRITE) {
|
|
|
if (replace_only)
|
|
|
rdev = NULL;
|
|
|
+ if (rdev == rrdev)
|
|
|
+ /* We raced and saw duplicates */
|
|
|
+ rrdev = NULL;
|
|
|
} else {
|
|
|
- if (test_bit(R5_ReadRepl, &sh->dev[i].flags))
|
|
|
+ if (test_bit(R5_ReadRepl, &sh->dev[i].flags) && rrdev)
|
|
|
rdev = rrdev;
|
|
|
rrdev = NULL;
|
|
|
}
|
|
@@ -1640,7 +1648,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
|
|
|
int disks = sh->disks, i;
|
|
|
int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
|
|
|
char b[BDEVNAME_SIZE];
|
|
|
- struct md_rdev *rdev;
|
|
|
+ struct md_rdev *rdev = NULL;
|
|
|
|
|
|
|
|
|
for (i=0 ; i<disks; i++)
|
|
@@ -1655,8 +1663,13 @@ static void raid5_end_read_request(struct bio * bi, int error)
|
|
|
return;
|
|
|
}
|
|
|
if (test_bit(R5_ReadRepl, &sh->dev[i].flags))
|
|
|
+ /* If replacement finished while this request was outstanding,
|
|
|
+ * 'replacement' might be NULL already.
|
|
|
+ * In that case it moved down to 'rdev'.
|
|
|
+ * rdev is not removed until all requests are finished.
|
|
|
+ */
|
|
|
rdev = conf->disks[i].replacement;
|
|
|
- else
|
|
|
+ if (!rdev)
|
|
|
rdev = conf->disks[i].rdev;
|
|
|
|
|
|
if (uptodate) {
|
|
@@ -1753,7 +1766,14 @@ static void raid5_end_write_request(struct bio *bi, int error)
|
|
|
}
|
|
|
if (bi == &sh->dev[i].rreq) {
|
|
|
rdev = conf->disks[i].replacement;
|
|
|
- replacement = 1;
|
|
|
+ if (rdev)
|
|
|
+ replacement = 1;
|
|
|
+ else
|
|
|
+ /* rdev was removed and 'replacement'
|
|
|
+ * replaced it. rdev is not removed
|
|
|
+ * until all requests are finished.
|
|
|
+ */
|
|
|
+ rdev = conf->disks[i].rdev;
|
|
|
break;
|
|
|
}
|
|
|
}
|
|
@@ -3539,6 +3559,9 @@ finish:
|
|
|
}
|
|
|
if (test_and_clear_bit(R5_MadeGoodRepl, &dev->flags)) {
|
|
|
rdev = conf->disks[i].replacement;
|
|
|
+ if (!rdev)
|
|
|
+ /* rdev have been moved down */
|
|
|
+ rdev = conf->disks[i].rdev;
|
|
|
rdev_clear_badblocks(rdev, sh->sector,
|
|
|
STRIPE_SECTORS);
|
|
|
rdev_dec_pending(rdev, conf->mddev);
|
|
@@ -5204,7 +5227,25 @@ static int raid5_spare_active(struct mddev *mddev)
|
|
|
|
|
|
for (i = 0; i < conf->raid_disks; i++) {
|
|
|
tmp = conf->disks + i;
|
|
|
- if (tmp->rdev
|
|
|
+ if (tmp->replacement
|
|
|
+ && tmp->replacement->recovery_offset == MaxSector
|
|
|
+ && !test_bit(Faulty, &tmp->replacement->flags)
|
|
|
+ && !test_and_set_bit(In_sync, &tmp->replacement->flags)) {
|
|
|
+ /* Replacement has just become active. */
|
|
|
+ if (!tmp->rdev
|
|
|
+ || !test_and_clear_bit(In_sync, &tmp->rdev->flags))
|
|
|
+ count++;
|
|
|
+ if (tmp->rdev) {
|
|
|
+ /* Replaced device not technically faulty,
|
|
|
+ * but we need to be sure it gets removed
|
|
|
+ * and never re-added.
|
|
|
+ */
|
|
|
+ set_bit(Faulty, &tmp->rdev->flags);
|
|
|
+ sysfs_notify_dirent_safe(
|
|
|
+ tmp->rdev->sysfs_state);
|
|
|
+ }
|
|
|
+ sysfs_notify_dirent_safe(tmp->replacement->sysfs_state);
|
|
|
+ } else if (tmp->rdev
|
|
|
&& tmp->rdev->recovery_offset == MaxSector
|
|
|
&& !test_bit(Faulty, &tmp->rdev->flags)
|
|
|
&& !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
|
|
@@ -5250,6 +5291,7 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
|
|
if (!test_bit(Faulty, &rdev->flags) &&
|
|
|
mddev->recovery_disabled != conf->recovery_disabled &&
|
|
|
!has_failed(conf) &&
|
|
|
+ (!p->replacement || p->replacement == rdev) &&
|
|
|
number < conf->raid_disks) {
|
|
|
err = -EBUSY;
|
|
|
goto abort;
|
|
@@ -5260,7 +5302,20 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
|
|
/* lost the race, try later */
|
|
|
err = -EBUSY;
|
|
|
*rdevp = rdev;
|
|
|
- }
|
|
|
+ } else if (p->replacement) {
|
|
|
+ /* We must have just cleared 'rdev' */
|
|
|
+ p->rdev = p->replacement;
|
|
|
+ clear_bit(Replacement, &p->replacement->flags);
|
|
|
+ smp_mb(); /* Make sure other CPUs may see both as identical
|
|
|
+ * but will never see neither - if they are careful
|
|
|
+ */
|
|
|
+ p->replacement = NULL;
|
|
|
+ clear_bit(WantReplacement, &rdev->flags);
|
|
|
+ } else
|
|
|
+ /* We might have just removed the Replacement as faulty-
|
|
|
+ * clear the bit just in case
|
|
|
+ */
|
|
|
+ clear_bit(WantReplacement, &rdev->flags);
|
|
|
abort:
|
|
|
|
|
|
print_raid5_conf(conf);
|