15 years ago · 6a8736d10c
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -110,7 +110,6 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo);
 
				 #define DMF_FREEING 3
			
 
				 #define DMF_DELETING 4
			
 
				 #define DMF_NOFLUSH_SUSPENDING 5
			
 
				-#define DMF_QUEUE_IO_TO_THREAD 6
			
 
				 
			
 
				 /*
			
 
				  * Work processed by per-device workqueue.
			
@@ -143,11 +142,6 @@ struct mapped_device {
 
				 	struct bio_list deferred;
			
 
				 	spinlock_t deferred_lock;
			
 
				 
			
 
				-	/*
			
 
				-	 * An error from the flush request currently being processed.
			
 
				-	 */
			
 
				-	int flush_error;
			
 
				-
			
 
				 	/*
			
 
				 	 * Processing queue (flush)
			
 
				 	 */
			
@@ -518,16 +512,10 @@ static void end_io_acct(struct dm_io *io)
 
				  */
			
 
				 static void queue_io(struct mapped_device *md, struct bio *bio)
			
 
				 {
			
 
				-	down_write(&md->io_lock);
			
 
				-
			
 
				 	spin_lock_irq(&md->deferred_lock);
			
 
				 	bio_list_add(&md->deferred, bio);
			
 
				 	spin_unlock_irq(&md->deferred_lock);
			
 
				-
			
 
				-	if (!test_and_set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags))
			
 
				-		queue_work(md->wq, &md->work);
			
 
				-
			
 
				-	up_write(&md->io_lock);
			
 
				+	queue_work(md->wq, &md->work);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -615,11 +603,9 @@ static void dec_pending(struct dm_io *io, int error)
 
				 			 * Target requested pushing back the I/O.
			
 
				 			 */
			
 
				 			spin_lock_irqsave(&md->deferred_lock, flags);
			
 
				-			if (__noflush_suspending(md)) {
			
 
				-				if (!(io->bio->bi_rw & REQ_FLUSH))
			
 
				-					bio_list_add_head(&md->deferred,
			
 
				-							  io->bio);
			
 
				-			} else
			
 
				+			if (__noflush_suspending(md))
			
 
				+				bio_list_add_head(&md->deferred, io->bio);
			
 
				+			else
			
 
				 				/* noflush suspend was interrupted. */
			
 
				 				io->error = -EIO;
			
 
				 			spin_unlock_irqrestore(&md->deferred_lock, flags);
			
@@ -627,26 +613,22 @@ static void dec_pending(struct dm_io *io, int error)
 
				 
			
 
				 		io_error = io->error;
			
 
				 		bio = io->bio;
			
 
				+		end_io_acct(io);
			
 
				+		free_io(md, io);
			
 
				+
			
 
				+		if (io_error == DM_ENDIO_REQUEUE)
			
 
				+			return;
			
 
				 
			
 
				-		if (bio->bi_rw & REQ_FLUSH) {
			
 
				+		if (!(bio->bi_rw & REQ_FLUSH) || !bio->bi_size) {
			
 
				+			trace_block_bio_complete(md->queue, bio);
			
 
				+			bio_endio(bio, io_error);
			
 
				+		} else {
			
 
				 			/*
			
 
				-			 * There can be just one flush request so we use
			
 
				-			 * a per-device variable for error reporting.
			
 
				-			 * Note that you can't touch the bio after end_io_acct
			
 
				+			 * Preflush done for flush with data, reissue
			
 
				+			 * without REQ_FLUSH.
			
 
				 			 */
			
 
				-			if (!md->flush_error)
			
 
				-				md->flush_error = io_error;
			
 
				-			end_io_acct(io);
			
 
				-			free_io(md, io);
			
 
				-		} else {
			
 
				-			end_io_acct(io);
			
 
				-			free_io(md, io);
			
 
				-
			
 
				-			if (io_error != DM_ENDIO_REQUEUE) {
			
 
				-				trace_block_bio_complete(md->queue, bio);
			
 
				-
			
 
				-				bio_endio(bio, io_error);
			
 
				-			}
			
 
				+			bio->bi_rw &= ~REQ_FLUSH;
			
 
				+			queue_io(md, bio);
			
 
				 		}
			
 
				 	}
			
 
				 }
			
@@ -1298,21 +1280,17 @@ static int __clone_and_map(struct clone_info *ci)
 
				  */
			
 
				 static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
			
 
				 {
			
 
				+	bool is_flush = bio->bi_rw & REQ_FLUSH;
			
 
				 	struct clone_info ci;
			
 
				 	int error = 0;
			
 
				 
			
 
				 	ci.map = dm_get_live_table(md);
			
 
				 	if (unlikely(!ci.map)) {
			
 
				-		if (!(bio->bi_rw & REQ_FLUSH))
			
 
				-			bio_io_error(bio);
			
 
				-		else
			
 
				-			if (!md->flush_error)
			
 
				-				md->flush_error = -EIO;
			
 
				+		bio_io_error(bio);
			
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				 	ci.md = md;
			
 
				-	ci.bio = bio;
			
 
				 	ci.io = alloc_io(md);
			
 
				 	ci.io->error = 0;
			
 
				 	atomic_set(&ci.io->io_count, 1);
			
@@ -1320,18 +1298,19 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
 
				 	ci.io->md = md;
			
 
				 	spin_lock_init(&ci.io->endio_lock);
			
 
				 	ci.sector = bio->bi_sector;
			
 
				-	if (!(bio->bi_rw & REQ_FLUSH))
			
 
				+	ci.idx = bio->bi_idx;
			
 
				+
			
 
				+	if (!is_flush) {
			
 
				+		ci.bio = bio;
			
 
				 		ci.sector_count = bio_sectors(bio);
			
 
				-	else {
			
 
				-		/* all FLUSH bio's reaching here should be empty */
			
 
				-		WARN_ON_ONCE(bio_has_data(bio));
			
 
				+	} else {
			
 
				+		ci.bio = &ci.md->flush_bio;
			
 
				 		ci.sector_count = 1;
			
 
				 	}
			
 
				-	ci.idx = bio->bi_idx;
			
 
				 
			
 
				 	start_io_acct(ci.io);
			
 
				 	while (ci.sector_count && !error) {
			
 
				-		if (!(bio->bi_rw & REQ_FLUSH))
			
 
				+		if (!is_flush)
			
 
				 			error = __clone_and_map(&ci);
			
 
				 		else
			
 
				 			error = __clone_and_map_flush(&ci);
			
@@ -1419,22 +1398,14 @@ static int _dm_request(struct request_queue *q, struct bio *bio)
 
				 	part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio));
			
 
				 	part_stat_unlock();
			
 
				 
			
 
				-	/*
			
 
				-	 * If we're suspended or the thread is processing flushes
			
 
				-	 * we have to queue this io for later.
			
 
				-	 */
			
 
				-	if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) ||
			
 
				-	    (bio->bi_rw & REQ_FLUSH)) {
			
 
				+	/* if we're suspended, we have to queue this io for later */
			
 
				+	if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) {
			
 
				 		up_read(&md->io_lock);
			
 
				 
			
 
				-		if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) &&
			
 
				-		    bio_rw(bio) == READA) {
			
 
				+		if (bio_rw(bio) != READA)
			
 
				+			queue_io(md, bio);
			
 
				+		else
			
 
				 			bio_io_error(bio);
			
 
				-			return 0;
			
 
				-		}
			
 
				-
			
 
				-		queue_io(md, bio);
			
 
				-
			
 
				 		return 0;
			
 
				 	}
			
 
				 
			
@@ -1923,6 +1894,10 @@ static struct mapped_device *alloc_dev(int minor)
 
				 	if (!md->bdev)
			
 
				 		goto bad_bdev;
			
 
				 
			
 
				+	bio_init(&md->flush_bio);
			
 
				+	md->flush_bio.bi_bdev = md->bdev;
			
 
				+	md->flush_bio.bi_rw = WRITE_FLUSH;
			
 
				+
			
 
				 	/* Populate the mapping, nobody knows we exist yet */
			
 
				 	spin_lock(&_minor_lock);
			
 
				 	old_md = idr_replace(&_minor_idr, md, minor);
			
@@ -2313,37 +2288,6 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
 
				 	return r;
			
 
				 }
			
 
				 
			
 
				-static void process_flush(struct mapped_device *md, struct bio *bio)
			
 
				-{
			
 
				-	md->flush_error = 0;
			
 
				-
			
 
				-	/* handle REQ_FLUSH */
			
 
				-	dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
			
 
				-
			
 
				-	bio_init(&md->flush_bio);
			
 
				-	md->flush_bio.bi_bdev = md->bdev;
			
 
				-	md->flush_bio.bi_rw = WRITE_FLUSH;
			
 
				-	__split_and_process_bio(md, &md->flush_bio);
			
 
				-
			
 
				-	dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
			
 
				-
			
 
				-	/* if it's an empty flush or the preflush failed, we're done */
			
 
				-	if (!bio_has_data(bio) || md->flush_error) {
			
 
				-		if (md->flush_error != DM_ENDIO_REQUEUE)
			
 
				-			bio_endio(bio, md->flush_error);
			
 
				-		else {
			
 
				-			spin_lock_irq(&md->deferred_lock);
			
 
				-			bio_list_add_head(&md->deferred, bio);
			
 
				-			spin_unlock_irq(&md->deferred_lock);
			
 
				-		}
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	/* issue data + REQ_FUA */
			
 
				-	bio->bi_rw &= ~REQ_FLUSH;
			
 
				-	__split_and_process_bio(md, bio);
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Process the deferred bios
			
 
				  */
			
@@ -2353,33 +2297,27 @@ static void dm_wq_work(struct work_struct *work)
 
				 						work);
			
 
				 	struct bio *c;
			
 
				 
			
 
				-	down_write(&md->io_lock);
			
 
				+	down_read(&md->io_lock);
			
 
				 
			
 
				 	while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
			
 
				 		spin_lock_irq(&md->deferred_lock);
			
 
				 		c = bio_list_pop(&md->deferred);
			
 
				 		spin_unlock_irq(&md->deferred_lock);
			
 
				 
			
 
				-		if (!c) {
			
 
				-			clear_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags);
			
 
				+		if (!c)
			
 
				 			break;
			
 
				-		}
			
 
				 
			
 
				-		up_write(&md->io_lock);
			
 
				+		up_read(&md->io_lock);
			
 
				 
			
 
				 		if (dm_request_based(md))
			
 
				 			generic_make_request(c);
			
 
				-		else {
			
 
				-			if (c->bi_rw & REQ_FLUSH)
			
 
				-				process_flush(md, c);
			
 
				-			else
			
 
				-				__split_and_process_bio(md, c);
			
 
				-		}
			
 
				+		else
			
 
				+			__split_and_process_bio(md, c);
			
 
				 
			
 
				-		down_write(&md->io_lock);
			
 
				+		down_read(&md->io_lock);
			
 
				 	}
			
 
				 
			
 
				-	up_write(&md->io_lock);
			
 
				+	up_read(&md->io_lock);
			
 
				 }
			
 
				 
			
 
				 static void dm_queue_flush(struct mapped_device *md)
			
@@ -2511,17 +2449,12 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
 
				 	 *
			
 
				 	 * To get all processes out of __split_and_process_bio in dm_request,
			
 
				 	 * we take the write lock. To prevent any process from reentering
			
 
				-	 * __split_and_process_bio from dm_request, we set
			
 
				-	 * DMF_QUEUE_IO_TO_THREAD.
			
 
				-	 *
			
 
				-	 * To quiesce the thread (dm_wq_work), we set DMF_BLOCK_IO_FOR_SUSPEND
			
 
				-	 * and call flush_workqueue(md->wq). flush_workqueue will wait until
			
 
				-	 * dm_wq_work exits and DMF_BLOCK_IO_FOR_SUSPEND will prevent any
			
 
				-	 * further calls to __split_and_process_bio from dm_wq_work.
			
 
				+	 * __split_and_process_bio from dm_request and quiesce the thread
			
 
				+	 * (dm_wq_work), we set BMF_BLOCK_IO_FOR_SUSPEND and call
			
 
				+	 * flush_workqueue(md->wq).
			
 
				 	 */
			
 
				 	down_write(&md->io_lock);
			
 
				 	set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
			
 
				-	set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags);
			
 
				 	up_write(&md->io_lock);
			
 
				 
			
 
				 	/*