|
@@ -66,7 +66,8 @@
|
|
|
*/
|
|
|
static int max_queued_requests = 1024;
|
|
|
|
|
|
-static void allow_barrier(struct r1conf *conf);
|
|
|
+static void allow_barrier(struct r1conf *conf, sector_t start_next_window,
|
|
|
+ sector_t bi_sector);
|
|
|
static void lower_barrier(struct r1conf *conf);
|
|
|
|
|
|
static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data)
|
|
@@ -227,6 +228,8 @@ static void call_bio_endio(struct r1bio *r1_bio)
|
|
|
struct bio *bio = r1_bio->master_bio;
|
|
|
int done;
|
|
|
struct r1conf *conf = r1_bio->mddev->private;
|
|
|
+ sector_t start_next_window = r1_bio->start_next_window;
|
|
|
+ sector_t bi_sector = bio->bi_sector;
|
|
|
|
|
|
if (bio->bi_phys_segments) {
|
|
|
unsigned long flags;
|
|
@@ -234,6 +237,11 @@ static void call_bio_endio(struct r1bio *r1_bio)
|
|
|
bio->bi_phys_segments--;
|
|
|
done = (bio->bi_phys_segments == 0);
|
|
|
spin_unlock_irqrestore(&conf->device_lock, flags);
|
|
|
+ /*
|
|
|
+ * make_request() might be waiting for
|
|
|
+ * bi_phys_segments to decrease
|
|
|
+ */
|
|
|
+ wake_up(&conf->wait_barrier);
|
|
|
} else
|
|
|
done = 1;
|
|
|
|
|
@@ -245,7 +253,7 @@ static void call_bio_endio(struct r1bio *r1_bio)
|
|
|
* Wake up any possible resync thread that waits for the device
|
|
|
* to go idle.
|
|
|
*/
|
|
|
- allow_barrier(conf);
|
|
|
+ allow_barrier(conf, start_next_window, bi_sector);
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -827,10 +835,19 @@ static void raise_barrier(struct r1conf *conf)
|
|
|
/* block any new IO from starting */
|
|
|
conf->barrier++;
|
|
|
|
|
|
- /* Now wait for all pending IO to complete */
|
|
|
+ /* For these conditions we must wait:
|
|
|
+ * A: while the array is in frozen state
|
|
|
+ * B: while barrier >= RESYNC_DEPTH, meaning resync reach
|
|
|
+ * the max count which allowed.
|
|
|
+ * C: next_resync + RESYNC_SECTORS > start_next_window, meaning
|
|
|
+ * next resync will reach to the window which normal bios are
|
|
|
+ * handling.
|
|
|
+ */
|
|
|
wait_event_lock_irq(conf->wait_barrier,
|
|
|
!conf->array_frozen &&
|
|
|
- !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
|
|
|
+ conf->barrier < RESYNC_DEPTH &&
|
|
|
+ (conf->start_next_window >=
|
|
|
+ conf->next_resync + RESYNC_SECTORS),
|
|
|
conf->resync_lock);
|
|
|
|
|
|
spin_unlock_irq(&conf->resync_lock);
|
|
@@ -846,10 +863,33 @@ static void lower_barrier(struct r1conf *conf)
|
|
|
wake_up(&conf->wait_barrier);
|
|
|
}
|
|
|
|
|
|
-static void wait_barrier(struct r1conf *conf)
|
|
|
+static bool need_to_wait_for_sync(struct r1conf *conf, struct bio *bio)
|
|
|
+{
|
|
|
+ bool wait = false;
|
|
|
+
|
|
|
+ if (conf->array_frozen || !bio)
|
|
|
+ wait = true;
|
|
|
+ else if (conf->barrier && bio_data_dir(bio) == WRITE) {
|
|
|
+ if (conf->next_resync < RESYNC_WINDOW_SECTORS)
|
|
|
+ wait = true;
|
|
|
+ else if ((conf->next_resync - RESYNC_WINDOW_SECTORS
|
|
|
+ >= bio_end_sector(bio)) ||
|
|
|
+ (conf->next_resync + NEXT_NORMALIO_DISTANCE
|
|
|
+ <= bio->bi_sector))
|
|
|
+ wait = false;
|
|
|
+ else
|
|
|
+ wait = true;
|
|
|
+ }
|
|
|
+
|
|
|
+ return wait;
|
|
|
+}
|
|
|
+
|
|
|
+static sector_t wait_barrier(struct r1conf *conf, struct bio *bio)
|
|
|
{
|
|
|
+ sector_t sector = 0;
|
|
|
+
|
|
|
spin_lock_irq(&conf->resync_lock);
|
|
|
- if (conf->barrier) {
|
|
|
+ if (need_to_wait_for_sync(conf, bio)) {
|
|
|
conf->nr_waiting++;
|
|
|
/* Wait for the barrier to drop.
|
|
|
* However if there are already pending
|
|
@@ -863,21 +903,65 @@ static void wait_barrier(struct r1conf *conf)
|
|
|
wait_event_lock_irq(conf->wait_barrier,
|
|
|
!conf->array_frozen &&
|
|
|
(!conf->barrier ||
|
|
|
- (conf->nr_pending &&
|
|
|
+ ((conf->start_next_window <
|
|
|
+ conf->next_resync + RESYNC_SECTORS) &&
|
|
|
current->bio_list &&
|
|
|
!bio_list_empty(current->bio_list))),
|
|
|
conf->resync_lock);
|
|
|
conf->nr_waiting--;
|
|
|
}
|
|
|
+
|
|
|
+ if (bio && bio_data_dir(bio) == WRITE) {
|
|
|
+ if (conf->next_resync + NEXT_NORMALIO_DISTANCE
|
|
|
+ <= bio->bi_sector) {
|
|
|
+ if (conf->start_next_window == MaxSector)
|
|
|
+ conf->start_next_window =
|
|
|
+ conf->next_resync +
|
|
|
+ NEXT_NORMALIO_DISTANCE;
|
|
|
+
|
|
|
+ if ((conf->start_next_window + NEXT_NORMALIO_DISTANCE)
|
|
|
+ <= bio->bi_sector)
|
|
|
+ conf->next_window_requests++;
|
|
|
+ else
|
|
|
+ conf->current_window_requests++;
|
|
|
+ }
|
|
|
+ if (bio->bi_sector >= conf->start_next_window)
|
|
|
+ sector = conf->start_next_window;
|
|
|
+ }
|
|
|
+
|
|
|
conf->nr_pending++;
|
|
|
spin_unlock_irq(&conf->resync_lock);
|
|
|
+ return sector;
|
|
|
}
|
|
|
|
|
|
-static void allow_barrier(struct r1conf *conf)
|
|
|
+static void allow_barrier(struct r1conf *conf, sector_t start_next_window,
|
|
|
+ sector_t bi_sector)
|
|
|
{
|
|
|
unsigned long flags;
|
|
|
+
|
|
|
spin_lock_irqsave(&conf->resync_lock, flags);
|
|
|
conf->nr_pending--;
|
|
|
+ if (start_next_window) {
|
|
|
+ if (start_next_window == conf->start_next_window) {
|
|
|
+ if (conf->start_next_window + NEXT_NORMALIO_DISTANCE
|
|
|
+ <= bi_sector)
|
|
|
+ conf->next_window_requests--;
|
|
|
+ else
|
|
|
+ conf->current_window_requests--;
|
|
|
+ } else
|
|
|
+ conf->current_window_requests--;
|
|
|
+
|
|
|
+ if (!conf->current_window_requests) {
|
|
|
+ if (conf->next_window_requests) {
|
|
|
+ conf->current_window_requests =
|
|
|
+ conf->next_window_requests;
|
|
|
+ conf->next_window_requests = 0;
|
|
|
+ conf->start_next_window +=
|
|
|
+ NEXT_NORMALIO_DISTANCE;
|
|
|
+ } else
|
|
|
+ conf->start_next_window = MaxSector;
|
|
|
+ }
|
|
|
+ }
|
|
|
spin_unlock_irqrestore(&conf->resync_lock, flags);
|
|
|
wake_up(&conf->wait_barrier);
|
|
|
}
|
|
@@ -1012,6 +1096,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
|
|
|
int first_clone;
|
|
|
int sectors_handled;
|
|
|
int max_sectors;
|
|
|
+ sector_t start_next_window;
|
|
|
|
|
|
/*
|
|
|
* Register the new request and wait if the reconstruction
|
|
@@ -1041,7 +1126,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
|
|
|
finish_wait(&conf->wait_barrier, &w);
|
|
|
}
|
|
|
|
|
|
- wait_barrier(conf);
|
|
|
+ start_next_window = wait_barrier(conf, bio);
|
|
|
|
|
|
bitmap = mddev->bitmap;
|
|
|
|
|
@@ -1162,6 +1247,7 @@ read_again:
|
|
|
|
|
|
disks = conf->raid_disks * 2;
|
|
|
retry_write:
|
|
|
+ r1_bio->start_next_window = start_next_window;
|
|
|
blocked_rdev = NULL;
|
|
|
rcu_read_lock();
|
|
|
max_sectors = r1_bio->sectors;
|
|
@@ -1230,14 +1316,24 @@ read_again:
|
|
|
if (unlikely(blocked_rdev)) {
|
|
|
/* Wait for this device to become unblocked */
|
|
|
int j;
|
|
|
+ sector_t old = start_next_window;
|
|
|
|
|
|
for (j = 0; j < i; j++)
|
|
|
if (r1_bio->bios[j])
|
|
|
rdev_dec_pending(conf->mirrors[j].rdev, mddev);
|
|
|
r1_bio->state = 0;
|
|
|
- allow_barrier(conf);
|
|
|
+ allow_barrier(conf, start_next_window, bio->bi_sector);
|
|
|
md_wait_for_blocked_rdev(blocked_rdev, mddev);
|
|
|
- wait_barrier(conf);
|
|
|
+ start_next_window = wait_barrier(conf, bio);
|
|
|
+ /*
|
|
|
+ * We must make sure the multi r1bios of bio have
|
|
|
+ * the same value of bi_phys_segments
|
|
|
+ */
|
|
|
+ if (bio->bi_phys_segments && old &&
|
|
|
+ old != start_next_window)
|
|
|
+ /* Wait for the former r1bio(s) to complete */
|
|
|
+ wait_event(conf->wait_barrier,
|
|
|
+ bio->bi_phys_segments == 1);
|
|
|
goto retry_write;
|
|
|
}
|
|
|
|
|
@@ -1437,11 +1533,14 @@ static void print_conf(struct r1conf *conf)
|
|
|
|
|
|
static void close_sync(struct r1conf *conf)
|
|
|
{
|
|
|
- wait_barrier(conf);
|
|
|
- allow_barrier(conf);
|
|
|
+ wait_barrier(conf, NULL);
|
|
|
+ allow_barrier(conf, 0, 0);
|
|
|
|
|
|
mempool_destroy(conf->r1buf_pool);
|
|
|
conf->r1buf_pool = NULL;
|
|
|
+
|
|
|
+ conf->next_resync = 0;
|
|
|
+ conf->start_next_window = MaxSector;
|
|
|
}
|
|
|
|
|
|
static int raid1_spare_active(struct mddev *mddev)
|
|
@@ -2713,6 +2812,9 @@ static struct r1conf *setup_conf(struct mddev *mddev)
|
|
|
conf->pending_count = 0;
|
|
|
conf->recovery_disabled = mddev->recovery_disabled - 1;
|
|
|
|
|
|
+ conf->start_next_window = MaxSector;
|
|
|
+ conf->current_window_requests = conf->next_window_requests = 0;
|
|
|
+
|
|
|
err = -EIO;
|
|
|
for (i = 0; i < conf->raid_disks * 2; i++) {
|
|
|
|