|
@@ -51,6 +51,8 @@ static mdk_personality_t raid1_personality;
|
|
|
|
|
|
static void unplug_slaves(mddev_t *mddev);
|
|
|
|
|
|
+static void allow_barrier(conf_t *conf);
|
|
|
+static void lower_barrier(conf_t *conf);
|
|
|
|
|
|
static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data)
|
|
|
{
|
|
@@ -160,20 +162,13 @@ static void put_all_bios(conf_t *conf, r1bio_t *r1_bio)
|
|
|
|
|
|
static inline void free_r1bio(r1bio_t *r1_bio)
|
|
|
{
|
|
|
- unsigned long flags;
|
|
|
-
|
|
|
conf_t *conf = mddev_to_conf(r1_bio->mddev);
|
|
|
|
|
|
/*
|
|
|
* Wake up any possible resync thread that waits for the device
|
|
|
* to go idle.
|
|
|
*/
|
|
|
- spin_lock_irqsave(&conf->resync_lock, flags);
|
|
|
- if (!--conf->nr_pending) {
|
|
|
- wake_up(&conf->wait_idle);
|
|
|
- wake_up(&conf->wait_resume);
|
|
|
- }
|
|
|
- spin_unlock_irqrestore(&conf->resync_lock, flags);
|
|
|
+ allow_barrier(conf);
|
|
|
|
|
|
put_all_bios(conf, r1_bio);
|
|
|
mempool_free(r1_bio, conf->r1bio_pool);
|
|
@@ -182,22 +177,10 @@ static inline void free_r1bio(r1bio_t *r1_bio)
|
|
|
static inline void put_buf(r1bio_t *r1_bio)
|
|
|
{
|
|
|
conf_t *conf = mddev_to_conf(r1_bio->mddev);
|
|
|
- unsigned long flags;
|
|
|
|
|
|
mempool_free(r1_bio, conf->r1buf_pool);
|
|
|
|
|
|
- spin_lock_irqsave(&conf->resync_lock, flags);
|
|
|
- if (!conf->barrier)
|
|
|
- BUG();
|
|
|
- --conf->barrier;
|
|
|
- wake_up(&conf->wait_resume);
|
|
|
- wake_up(&conf->wait_idle);
|
|
|
-
|
|
|
- if (!--conf->nr_pending) {
|
|
|
- wake_up(&conf->wait_idle);
|
|
|
- wake_up(&conf->wait_resume);
|
|
|
- }
|
|
|
- spin_unlock_irqrestore(&conf->resync_lock, flags);
|
|
|
+ lower_barrier(conf);
|
|
|
}
|
|
|
|
|
|
static void reschedule_retry(r1bio_t *r1_bio)
|
|
@@ -210,6 +193,7 @@ static void reschedule_retry(r1bio_t *r1_bio)
|
|
|
list_add(&r1_bio->retry_list, &conf->retry_list);
|
|
|
spin_unlock_irqrestore(&conf->device_lock, flags);
|
|
|
|
|
|
+ wake_up(&conf->wait_barrier);
|
|
|
md_wakeup_thread(mddev->thread);
|
|
|
}
|
|
|
|
|
@@ -593,30 +577,83 @@ static int raid1_issue_flush(request_queue_t *q, struct gendisk *disk,
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * Throttle resync depth, so that we can both get proper overlapping of
|
|
|
- * requests, but are still able to handle normal requests quickly.
|
|
|
+/* Barriers....
|
|
|
+ * Sometimes we need to suspend IO while we do something else,
|
|
|
+ * either some resync/recovery, or reconfigure the array.
|
|
|
+ * To do this we raise a 'barrier'.
|
|
|
+ * The 'barrier' is a counter that can be raised multiple times
|
|
|
+ * to count how many activities are happening which preclude
|
|
|
+ * normal IO.
|
|
|
+ * We can only raise the barrier if there is no pending IO.
|
|
|
+ * i.e. if nr_pending == 0.
|
|
|
+ * We choose only to raise the barrier if no-one is waiting for the
|
|
|
+ * barrier to go down. This means that as soon as an IO request
|
|
|
+ * is ready, no other operations which require a barrier will start
|
|
|
+ * until the IO request has had a chance.
|
|
|
+ *
|
|
|
+ * So: regular IO calls 'wait_barrier'. When that returns there
|
|
|
+ * is no backgroup IO happening, It must arrange to call
|
|
|
+ * allow_barrier when it has finished its IO.
|
|
|
+ * backgroup IO calls must call raise_barrier. Once that returns
|
|
|
+ * there is no normal IO happeing. It must arrange to call
|
|
|
+ * lower_barrier when the particular background IO completes.
|
|
|
*/
|
|
|
#define RESYNC_DEPTH 32
|
|
|
|
|
|
-static void device_barrier(conf_t *conf, sector_t sect)
|
|
|
+static void raise_barrier(conf_t *conf)
|
|
|
{
|
|
|
spin_lock_irq(&conf->resync_lock);
|
|
|
- wait_event_lock_irq(conf->wait_idle, !waitqueue_active(&conf->wait_resume),
|
|
|
- conf->resync_lock, raid1_unplug(conf->mddev->queue));
|
|
|
-
|
|
|
- if (!conf->barrier++) {
|
|
|
- wait_event_lock_irq(conf->wait_idle, !conf->nr_pending,
|
|
|
- conf->resync_lock, raid1_unplug(conf->mddev->queue));
|
|
|
- if (conf->nr_pending)
|
|
|
- BUG();
|
|
|
+
|
|
|
+ /* Wait until no block IO is waiting */
|
|
|
+ wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting,
|
|
|
+ conf->resync_lock,
|
|
|
+ raid1_unplug(conf->mddev->queue));
|
|
|
+
|
|
|
+ /* block any new IO from starting */
|
|
|
+ conf->barrier++;
|
|
|
+
|
|
|
+ /* No wait for all pending IO to complete */
|
|
|
+ wait_event_lock_irq(conf->wait_barrier,
|
|
|
+ !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
|
|
|
+ conf->resync_lock,
|
|
|
+ raid1_unplug(conf->mddev->queue));
|
|
|
+
|
|
|
+ spin_unlock_irq(&conf->resync_lock);
|
|
|
+}
|
|
|
+
|
|
|
+static void lower_barrier(conf_t *conf)
|
|
|
+{
|
|
|
+ unsigned long flags;
|
|
|
+ spin_lock_irqsave(&conf->resync_lock, flags);
|
|
|
+ conf->barrier--;
|
|
|
+ spin_unlock_irqrestore(&conf->resync_lock, flags);
|
|
|
+ wake_up(&conf->wait_barrier);
|
|
|
+}
|
|
|
+
|
|
|
+static void wait_barrier(conf_t *conf)
|
|
|
+{
|
|
|
+ spin_lock_irq(&conf->resync_lock);
|
|
|
+ if (conf->barrier) {
|
|
|
+ conf->nr_waiting++;
|
|
|
+ wait_event_lock_irq(conf->wait_barrier, !conf->barrier,
|
|
|
+ conf->resync_lock,
|
|
|
+ raid1_unplug(conf->mddev->queue));
|
|
|
+ conf->nr_waiting--;
|
|
|
}
|
|
|
- wait_event_lock_irq(conf->wait_resume, conf->barrier < RESYNC_DEPTH,
|
|
|
- conf->resync_lock, raid1_unplug(conf->mddev->queue));
|
|
|
- conf->next_resync = sect;
|
|
|
+ conf->nr_pending++;
|
|
|
spin_unlock_irq(&conf->resync_lock);
|
|
|
}
|
|
|
|
|
|
+static void allow_barrier(conf_t *conf)
|
|
|
+{
|
|
|
+ unsigned long flags;
|
|
|
+ spin_lock_irqsave(&conf->resync_lock, flags);
|
|
|
+ conf->nr_pending--;
|
|
|
+ spin_unlock_irqrestore(&conf->resync_lock, flags);
|
|
|
+ wake_up(&conf->wait_barrier);
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
/* duplicate the data pages for behind I/O */
|
|
|
static struct page **alloc_behind_pages(struct bio *bio)
|
|
|
{
|
|
@@ -678,10 +715,7 @@ static int make_request(request_queue_t *q, struct bio * bio)
|
|
|
*/
|
|
|
md_write_start(mddev, bio); /* wait on superblock update early */
|
|
|
|
|
|
- spin_lock_irq(&conf->resync_lock);
|
|
|
- wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, );
|
|
|
- conf->nr_pending++;
|
|
|
- spin_unlock_irq(&conf->resync_lock);
|
|
|
+ wait_barrier(conf);
|
|
|
|
|
|
disk_stat_inc(mddev->gendisk, ios[rw]);
|
|
|
disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
|
|
@@ -909,13 +943,8 @@ static void print_conf(conf_t *conf)
|
|
|
|
|
|
static void close_sync(conf_t *conf)
|
|
|
{
|
|
|
- spin_lock_irq(&conf->resync_lock);
|
|
|
- wait_event_lock_irq(conf->wait_resume, !conf->barrier,
|
|
|
- conf->resync_lock, raid1_unplug(conf->mddev->queue));
|
|
|
- spin_unlock_irq(&conf->resync_lock);
|
|
|
-
|
|
|
- if (conf->barrier) BUG();
|
|
|
- if (waitqueue_active(&conf->wait_idle)) BUG();
|
|
|
+ wait_barrier(conf);
|
|
|
+ allow_barrier(conf);
|
|
|
|
|
|
mempool_destroy(conf->r1buf_pool);
|
|
|
conf->r1buf_pool = NULL;
|
|
@@ -1317,12 +1346,16 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
|
|
|
return sync_blocks;
|
|
|
}
|
|
|
/*
|
|
|
- * If there is non-resync activity waiting for us then
|
|
|
- * put in a delay to throttle resync.
|
|
|
+ * If there is non-resync activity waiting for a turn,
|
|
|
+ * and resync is going fast enough,
|
|
|
+ * then let it though before starting on this new sync request.
|
|
|
*/
|
|
|
- if (!go_faster && waitqueue_active(&conf->wait_resume))
|
|
|
+ if (!go_faster && conf->nr_waiting)
|
|
|
msleep_interruptible(1000);
|
|
|
- device_barrier(conf, sector_nr + RESYNC_SECTORS);
|
|
|
+
|
|
|
+ raise_barrier(conf);
|
|
|
+
|
|
|
+ conf->next_resync = sector_nr;
|
|
|
|
|
|
/*
|
|
|
* If reconstructing, and >1 working disc,
|
|
@@ -1355,10 +1388,6 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
|
|
|
|
|
|
r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO);
|
|
|
|
|
|
- spin_lock_irq(&conf->resync_lock);
|
|
|
- conf->nr_pending++;
|
|
|
- spin_unlock_irq(&conf->resync_lock);
|
|
|
-
|
|
|
r1_bio->mddev = mddev;
|
|
|
r1_bio->sector = sector_nr;
|
|
|
r1_bio->state = 0;
|
|
@@ -1542,8 +1571,7 @@ static int run(mddev_t *mddev)
|
|
|
mddev->recovery_cp = MaxSector;
|
|
|
|
|
|
spin_lock_init(&conf->resync_lock);
|
|
|
- init_waitqueue_head(&conf->wait_idle);
|
|
|
- init_waitqueue_head(&conf->wait_resume);
|
|
|
+ init_waitqueue_head(&conf->wait_barrier);
|
|
|
|
|
|
bio_list_init(&conf->pending_bio_list);
|
|
|
bio_list_init(&conf->flushing_bio_list);
|
|
@@ -1714,11 +1742,7 @@ static int raid1_reshape(mddev_t *mddev, int raid_disks)
|
|
|
}
|
|
|
memset(newmirrors, 0, sizeof(struct mirror_info)*raid_disks);
|
|
|
|
|
|
- spin_lock_irq(&conf->resync_lock);
|
|
|
- conf->barrier++;
|
|
|
- wait_event_lock_irq(conf->wait_idle, !conf->nr_pending,
|
|
|
- conf->resync_lock, raid1_unplug(mddev->queue));
|
|
|
- spin_unlock_irq(&conf->resync_lock);
|
|
|
+ raise_barrier(conf);
|
|
|
|
|
|
/* ok, everything is stopped */
|
|
|
oldpool = conf->r1bio_pool;
|
|
@@ -1738,12 +1762,7 @@ static int raid1_reshape(mddev_t *mddev, int raid_disks)
|
|
|
conf->raid_disks = mddev->raid_disks = raid_disks;
|
|
|
|
|
|
conf->last_used = 0; /* just make sure it is in-range */
|
|
|
- spin_lock_irq(&conf->resync_lock);
|
|
|
- conf->barrier--;
|
|
|
- spin_unlock_irq(&conf->resync_lock);
|
|
|
- wake_up(&conf->wait_resume);
|
|
|
- wake_up(&conf->wait_idle);
|
|
|
-
|
|
|
+ lower_barrier(conf);
|
|
|
|
|
|
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
|
|
md_wakeup_thread(mddev->thread);
|
|
@@ -1758,18 +1777,10 @@ static void raid1_quiesce(mddev_t *mddev, int state)
|
|
|
|
|
|
switch(state) {
|
|
|
case 1:
|
|
|
- spin_lock_irq(&conf->resync_lock);
|
|
|
- conf->barrier++;
|
|
|
- wait_event_lock_irq(conf->wait_idle, !conf->nr_pending,
|
|
|
- conf->resync_lock, raid1_unplug(mddev->queue));
|
|
|
- spin_unlock_irq(&conf->resync_lock);
|
|
|
+ raise_barrier(conf);
|
|
|
break;
|
|
|
case 0:
|
|
|
- spin_lock_irq(&conf->resync_lock);
|
|
|
- conf->barrier--;
|
|
|
- spin_unlock_irq(&conf->resync_lock);
|
|
|
- wake_up(&conf->wait_resume);
|
|
|
- wake_up(&conf->wait_idle);
|
|
|
+ lower_barrier(conf);
|
|
|
break;
|
|
|
}
|
|
|
if (mddev->thread) {
|