17 年之前 · b80aa7a0c2
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -146,6 +146,7 @@ struct mirror_set {
 
				 	region_t nr_regions;
			
 
				 	int in_sync;
			
 
				 	int log_failure;
			
 
				+	atomic_t suspend;
			
 
				 
			
 
				 	atomic_t default_mirror;	/* Default mirror */
			
 
				 
			
@@ -372,6 +373,16 @@ static void complete_resync_work(struct region *reg, int success)
 
				 	struct region_hash *rh = reg->rh;
			
 
				 
			
 
				 	rh->log->type->set_region_sync(rh->log, reg->key, success);
			
 
				+
			
 
				+	/*
			
 
				+	 * Dispatch the bios before we call 'wake_up_all'.
			
 
				+	 * This is important because if we are suspending,
			
 
				+	 * we want to know that recovery is complete and
			
 
				+	 * the work queue is flushed.  If we wake_up_all
			
 
				+	 * before we dispatch_bios (queue bios and call wake()),
			
 
				+	 * then we risk suspending before the work queue
			
 
				+	 * has been properly flushed.
			
 
				+	 */
			
 
				 	dispatch_bios(rh->ms, &reg->delayed_bios);
			
 
				 	if (atomic_dec_and_test(&rh->recovery_in_flight))
			
 
				 		wake_up_all(&_kmirrord_recovery_stopped);
			
@@ -1069,11 +1080,13 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
 
				 	/*
			
 
				 	 * Dispatch io.
			
 
				 	 */
			
 
				-	if (unlikely(ms->log_failure))
			
 
				+	if (unlikely(ms->log_failure)) {
			
 
				+		spin_lock_irq(&ms->lock);
			
 
				+		bio_list_merge(&ms->failures, &sync);
			
 
				+		spin_unlock_irq(&ms->lock);
			
 
				+	} else
			
 
				 		while ((bio = bio_list_pop(&sync)))
			
 
				-			bio_endio(bio, -EIO);
			
 
				-	else while ((bio = bio_list_pop(&sync)))
			
 
				-		do_write(ms, bio);
			
 
				+			do_write(ms, bio);
			
 
				 
			
 
				 	while ((bio = bio_list_pop(&recover)))
			
 
				 		rh_delay(&ms->rh, bio);
			
@@ -1091,8 +1104,46 @@ static void do_failures(struct mirror_set *ms, struct bio_list *failures)
 
				 	if (!failures->head)
			
 
				 		return;
			
 
				 
			
 
				-	while ((bio = bio_list_pop(failures)))
			
 
				-		__bio_mark_nosync(ms, bio, bio->bi_size, 0);
			
 
				+	if (!ms->log_failure) {
			
 
				+		while ((bio = bio_list_pop(failures)))
			
 
				+			__bio_mark_nosync(ms, bio, bio->bi_size, 0);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * If the log has failed, unattempted writes are being
			
 
				+	 * put on the failures list.  We can't issue those writes
			
 
				+	 * until a log has been marked, so we must store them.
			
 
				+	 *
			
 
				+	 * If a 'noflush' suspend is in progress, we can requeue
			
 
				+	 * the I/O's to the core.  This give userspace a chance
			
 
				+	 * to reconfigure the mirror, at which point the core
			
 
				+	 * will reissue the writes.  If the 'noflush' flag is
			
 
				+	 * not set, we have no choice but to return errors.
			
 
				+	 *
			
 
				+	 * Some writes on the failures list may have been
			
 
				+	 * submitted before the log failure and represent a
			
 
				+	 * failure to write to one of the devices.  It is ok
			
 
				+	 * for us to treat them the same and requeue them
			
 
				+	 * as well.
			
 
				+	 */
			
 
				+	if (dm_noflush_suspending(ms->ti)) {
			
 
				+		while ((bio = bio_list_pop(failures)))
			
 
				+			bio_endio(bio, DM_ENDIO_REQUEUE);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (atomic_read(&ms->suspend)) {
			
 
				+		while ((bio = bio_list_pop(failures)))
			
 
				+			bio_endio(bio, -EIO);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	spin_lock_irq(&ms->lock);
			
 
				+	bio_list_merge(&ms->failures, failures);
			
 
				+	spin_unlock_irq(&ms->lock);
			
 
				+
			
 
				+	wake(ms);
			
 
				 }
			
 
				 
			
 
				 static void trigger_event(struct work_struct *work)
			
@@ -1176,6 +1227,8 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors,
 
				 	ms->nr_mirrors = nr_mirrors;
			
 
				 	ms->nr_regions = dm_sector_div_up(ti->len, region_size);
			
 
				 	ms->in_sync = 0;
			
 
				+	ms->log_failure = 0;
			
 
				+	atomic_set(&ms->suspend, 0);
			
 
				 	atomic_set(&ms->default_mirror, DEFAULT_MIRROR);
			
 
				 
			
 
				 	ms->io_client = dm_io_client_create(DM_IO_PAGES);
			
@@ -1511,26 +1564,51 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static void mirror_postsuspend(struct dm_target *ti)
			
 
				+static void mirror_presuspend(struct dm_target *ti)
			
 
				 {
			
 
				 	struct mirror_set *ms = (struct mirror_set *) ti->private;
			
 
				 	struct dirty_log *log = ms->rh.log;
			
 
				 
			
 
				+	atomic_set(&ms->suspend, 1);
			
 
				+
			
 
				+	/*
			
 
				+	 * We must finish up all the work that we've
			
 
				+	 * generated (i.e. recovery work).
			
 
				+	 */
			
 
				 	rh_stop_recovery(&ms->rh);
			
 
				 
			
 
				-	/* Wait for all I/O we generated to complete */
			
 
				 	wait_event(_kmirrord_recovery_stopped,
			
 
				 		   !atomic_read(&ms->rh.recovery_in_flight));
			
 
				 
			
 
				+	if (log->type->presuspend && log->type->presuspend(log))
			
 
				+		/* FIXME: need better error handling */
			
 
				+		DMWARN("log presuspend failed");
			
 
				+
			
 
				+	/*
			
 
				+	 * Now that recovery is complete/stopped and the
			
 
				+	 * delayed bios are queued, we need to wait for
			
 
				+	 * the worker thread to complete.  This way,
			
 
				+	 * we know that all of our I/O has been pushed.
			
 
				+	 */
			
 
				+	flush_workqueue(ms->kmirrord_wq);
			
 
				+}
			
 
				+
			
 
				+static void mirror_postsuspend(struct dm_target *ti)
			
 
				+{
			
 
				+	struct mirror_set *ms = ti->private;
			
 
				+	struct dirty_log *log = ms->rh.log;
			
 
				+
			
 
				 	if (log->type->postsuspend && log->type->postsuspend(log))
			
 
				 		/* FIXME: need better error handling */
			
 
				-		DMWARN("log suspend failed");
			
 
				+		DMWARN("log postsuspend failed");
			
 
				 }
			
 
				 
			
 
				 static void mirror_resume(struct dm_target *ti)
			
 
				 {
			
 
				-	struct mirror_set *ms = (struct mirror_set *) ti->private;
			
 
				+	struct mirror_set *ms = ti->private;
			
 
				 	struct dirty_log *log = ms->rh.log;
			
 
				+
			
 
				+	atomic_set(&ms->suspend, 0);
			
 
				 	if (log->type->resume && log->type->resume(log))
			
 
				 		/* FIXME: need better error handling */
			
 
				 		DMWARN("log resume failed");
			
@@ -1564,7 +1642,7 @@ static int mirror_status(struct dm_target *ti, status_type_t type,
 
				 		DMEMIT("%d", ms->nr_mirrors);
			
 
				 		for (m = 0; m < ms->nr_mirrors; m++)
			
 
				 			DMEMIT(" %s %llu", ms->mirror[m].dev->name,
			
 
				-				(unsigned long long)ms->mirror[m].offset);
			
 
				+			       (unsigned long long)ms->mirror[m].offset);
			
 
				 
			
 
				 		if (ms->features & DM_RAID1_HANDLE_ERRORS)
			
 
				 			DMEMIT(" 1 handle_errors");
			
@@ -1581,6 +1659,7 @@ static struct target_type mirror_target = {
 
				 	.dtr	 = mirror_dtr,
			
 
				 	.map	 = mirror_map,
			
 
				 	.end_io	 = mirror_end_io,
			
 
				+	.presuspend = mirror_presuspend,
			
 
				 	.postsuspend = mirror_postsuspend,
			
 
				 	.resume	 = mirror_resume,
			
 
				 	.status	 = mirror_status,