|
@@ -411,10 +411,10 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
|
|
|
{
|
|
|
const sector_t this_sector = r1_bio->sector;
|
|
|
const int sectors = r1_bio->sectors;
|
|
|
- int new_disk = -1;
|
|
|
int start_disk;
|
|
|
+ int best_disk;
|
|
|
int i;
|
|
|
- sector_t new_distance, current_distance;
|
|
|
+ sector_t best_dist;
|
|
|
mdk_rdev_t *rdev;
|
|
|
int choose_first;
|
|
|
|
|
@@ -425,6 +425,8 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
|
|
|
* We take the first readable disk when above the resync window.
|
|
|
*/
|
|
|
retry:
|
|
|
+ best_disk = -1;
|
|
|
+ best_dist = MaxSector;
|
|
|
if (conf->mddev->recovery_cp < MaxSector &&
|
|
|
(this_sector + sectors >= conf->next_resync)) {
|
|
|
choose_first = 1;
|
|
@@ -434,8 +436,8 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
|
|
|
start_disk = conf->last_used;
|
|
|
}
|
|
|
|
|
|
- /* make sure the disk is operational */
|
|
|
for (i = 0 ; i < conf->raid_disks ; i++) {
|
|
|
+ sector_t dist;
|
|
|
int disk = start_disk + i;
|
|
|
if (disk >= conf->raid_disks)
|
|
|
disk -= conf->raid_disks;
|
|
@@ -443,60 +445,43 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
|
|
|
rdev = rcu_dereference(conf->mirrors[disk].rdev);
|
|
|
if (r1_bio->bios[disk] == IO_BLOCKED
|
|
|
|| rdev == NULL
|
|
|
- || !test_bit(In_sync, &rdev->flags))
|
|
|
+ || test_bit(Faulty, &rdev->flags))
|
|
|
continue;
|
|
|
-
|
|
|
- new_disk = disk;
|
|
|
- if (!test_bit(WriteMostly, &rdev->flags))
|
|
|
- break;
|
|
|
- }
|
|
|
-
|
|
|
- if (new_disk < 0 || choose_first)
|
|
|
- goto rb_out;
|
|
|
-
|
|
|
- /*
|
|
|
- * Don't change to another disk for sequential reads:
|
|
|
- */
|
|
|
- if (conf->next_seq_sect == this_sector)
|
|
|
- goto rb_out;
|
|
|
- if (this_sector == conf->mirrors[new_disk].head_position)
|
|
|
- goto rb_out;
|
|
|
-
|
|
|
- current_distance = abs(this_sector
|
|
|
- - conf->mirrors[new_disk].head_position);
|
|
|
-
|
|
|
- /* look for a better disk - i.e. head is closer */
|
|
|
- start_disk = new_disk;
|
|
|
- for (i = 1; i < conf->raid_disks; i++) {
|
|
|
- int disk = start_disk + 1;
|
|
|
- if (disk >= conf->raid_disks)
|
|
|
- disk -= conf->raid_disks;
|
|
|
-
|
|
|
- rdev = rcu_dereference(conf->mirrors[disk].rdev);
|
|
|
- if (r1_bio->bios[disk] == IO_BLOCKED
|
|
|
- || rdev == NULL
|
|
|
- || !test_bit(In_sync, &rdev->flags)
|
|
|
- || test_bit(WriteMostly, &rdev->flags))
|
|
|
+ if (!test_bit(In_sync, &rdev->flags) &&
|
|
|
+ rdev->recovery_offset < this_sector + sectors)
|
|
|
continue;
|
|
|
-
|
|
|
- if (!atomic_read(&rdev->nr_pending)) {
|
|
|
- new_disk = disk;
|
|
|
+ if (test_bit(WriteMostly, &rdev->flags)) {
|
|
|
+ /* Don't balance among write-mostly, just
|
|
|
+ * use the first as a last resort */
|
|
|
+ if (best_disk < 0)
|
|
|
+ best_disk = disk;
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ /* This is a reasonable device to use. It might
|
|
|
+ * even be best.
|
|
|
+ */
|
|
|
+ dist = abs(this_sector - conf->mirrors[disk].head_position);
|
|
|
+ if (choose_first
|
|
|
+ /* Don't change to another disk for sequential reads */
|
|
|
+ || conf->next_seq_sect == this_sector
|
|
|
+ || dist == 0
|
|
|
+ /* If device is idle, use it */
|
|
|
+ || atomic_read(&rdev->nr_pending) == 0) {
|
|
|
+ best_disk = disk;
|
|
|
break;
|
|
|
}
|
|
|
- new_distance = abs(this_sector - conf->mirrors[disk].head_position);
|
|
|
- if (new_distance < current_distance) {
|
|
|
- current_distance = new_distance;
|
|
|
- new_disk = disk;
|
|
|
+ if (dist < best_dist) {
|
|
|
+ best_dist = dist;
|
|
|
+ best_disk = disk;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- rb_out:
|
|
|
- if (new_disk >= 0) {
|
|
|
- rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
|
|
|
+ if (best_disk >= 0) {
|
|
|
+ rdev = rcu_dereference(conf->mirrors[best_disk].rdev);
|
|
|
if (!rdev)
|
|
|
goto retry;
|
|
|
atomic_inc(&rdev->nr_pending);
|
|
|
- if (!test_bit(In_sync, &rdev->flags)) {
|
|
|
+ if (test_bit(Faulty, &rdev->flags)) {
|
|
|
/* cannot risk returning a device that failed
|
|
|
* before we inc'ed nr_pending
|
|
|
*/
|
|
@@ -504,11 +489,11 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
|
|
|
goto retry;
|
|
|
}
|
|
|
conf->next_seq_sect = this_sector + sectors;
|
|
|
- conf->last_used = new_disk;
|
|
|
+ conf->last_used = best_disk;
|
|
|
}
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
- return new_disk;
|
|
|
+ return best_disk;
|
|
|
}
|
|
|
|
|
|
static int raid1_congested(void *data, int bits)
|