15 years ago · d0bcb87865
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -142,10 +142,20 @@ struct mapped_device {
 
															 	 */
														
 
															 	int barrier_error;
														
 
															+	/*
														
 
															+	 * Protect barrier_error from concurrent endio processing
														
 
															+	 * in request-based dm.
														
 
															+	 */
														
 
															+	spinlock_t barrier_error_lock;
														
 
															+
														
 
															 	/*
														
 
															 	 * Processing queue (flush/barriers)
														
 
															 	 */
														
 
															 	struct workqueue_struct *wq;
														
 
															+	struct work_struct barrier_work;
														
 
															+
														
 
															+	/* A pointer to the currently processing pre/post flush request */
														
 
															+	struct request *flush_request;
														
 
															 	/*
														
 
															 	 * The current mapping.
														
@@ -722,6 +732,23 @@ static void end_clone_bio(struct bio *clone, int error)
 
															 	blk_update_request(tio->orig, 0, nr_bytes);
														
 
															 }
														
 
															+static void store_barrier_error(struct mapped_device *md, int error)
														
 
															+{
														
 
															+	unsigned long flags;
														
 
															+
														
 
															+	spin_lock_irqsave(&md->barrier_error_lock, flags);
														
 
															+	/*
														
 
															+	 * Basically, the first error is taken, but:
														
 
															+	 *   -EOPNOTSUPP supersedes any I/O error.
														
 
															+	 *   Requeue request supersedes any I/O error but -EOPNOTSUPP.
														
 
															+	 */
														
 
															+	if (!md->barrier_error || error == -EOPNOTSUPP ||
														
 
															+	    (md->barrier_error != -EOPNOTSUPP &&
														
 
															+	     error == DM_ENDIO_REQUEUE))
														
 
															+		md->barrier_error = error;
														
 
															+	spin_unlock_irqrestore(&md->barrier_error_lock, flags);
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * Don't touch any member of the md after calling this function because
														
 
															  * the md may be freed in dm_put() at the end of this function.
														
@@ -759,11 +786,13 @@ static void free_rq_clone(struct request *clone)
 
															 static void dm_end_request(struct request *clone, int error)
														
 
															 {
														
 
															 	int rw = rq_data_dir(clone);
														
 
															+	int run_queue = 1;
														
 
															+	bool is_barrier = blk_barrier_rq(clone);
														
 
															 	struct dm_rq_target_io *tio = clone->end_io_data;
														
 
															 	struct mapped_device *md = tio->md;
														
 
															 	struct request *rq = tio->orig;
														
 
															-	if (blk_pc_request(rq)) {
														
 
															+	if (blk_pc_request(rq) && !is_barrier) {
														
 
															 		rq->errors = clone->errors;
														
 
															 		rq->resid_len = clone->resid_len;
														
@@ -778,9 +807,14 @@ static void dm_end_request(struct request *clone, int error)
 
															 	free_rq_clone(clone);
														
 
															-	blk_end_request_all(rq, error);
														
 
															+	if (unlikely(is_barrier)) {
														
 
															+		if (unlikely(error))
														
 
															+			store_barrier_error(md, error);
														
 
															+		run_queue = 0;
														
 
															+	} else
														
 
															+		blk_end_request_all(rq, error);
														
 
															-	rq_completed(md, rw, 1);
														
 
															+	rq_completed(md, rw, run_queue);
														
 
															 }
														
 
															 static void dm_unprep_request(struct request *rq)
														
@@ -805,6 +839,16 @@ void dm_requeue_unmapped_request(struct request *clone)
 
															 	struct request_queue *q = rq->q;
														
 
															 	unsigned long flags;
														
 
															+	if (unlikely(blk_barrier_rq(clone))) {
														
 
															+		/*
														
 
															+		 * Barrier clones share an original request.
														
 
															+		 * Leave it to dm_end_request(), which handles this special
														
 
															+		 * case.
														
 
															+		 */
														
 
															+		dm_end_request(clone, DM_ENDIO_REQUEUE);
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															 	dm_unprep_request(rq);
														
 
															 	spin_lock_irqsave(q->queue_lock, flags);
														
@@ -894,6 +938,19 @@ static void dm_complete_request(struct request *clone, int error)
 
															 	struct dm_rq_target_io *tio = clone->end_io_data;
														
 
															 	struct request *rq = tio->orig;
														
 
															+	if (unlikely(blk_barrier_rq(clone))) {
														
 
															+		/*
														
 
															+		 * Barrier clones share an original request.  So can't use
														
 
															+		 * softirq_done with the original.
														
 
															+		 * Pass the clone to dm_done() directly in this special case.
														
 
															+		 * It is safe (even if clone->q->queue_lock is held here)
														
 
															+		 * because there is no I/O dispatching during the completion
														
 
															+		 * of barrier clone.
														
 
															+		 */
														
 
															+		dm_done(clone, error, true);
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															 	tio->error = error;
														
 
															 	rq->completion_data = clone;
														
 
															 	blk_complete_request(rq);
														
@@ -910,6 +967,17 @@ void dm_kill_unmapped_request(struct request *clone, int error)
 
															 	struct dm_rq_target_io *tio = clone->end_io_data;
														
 
															 	struct request *rq = tio->orig;
														
 
															+	if (unlikely(blk_barrier_rq(clone))) {
														
 
															+		/*
														
 
															+		 * Barrier clones share an original request.
														
 
															+		 * Leave it to dm_end_request(), which handles this special
														
 
															+		 * case.
														
 
															+		 */
														
 
															+		BUG_ON(error > 0);
														
 
															+		dm_end_request(clone, error);
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															 	rq->cmd_flags |= REQ_FAILED;
														
 
															 	dm_complete_request(clone, error);
														
 
															 }
														
@@ -1364,11 +1432,6 @@ static int dm_make_request(struct request_queue *q, struct bio *bio)
 
															 {
														
 
															 	struct mapped_device *md = q->queuedata;
														
 
															-	if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
														
 
															-		bio_endio(bio, -EOPNOTSUPP);
														
 
															-		return 0;
														
 
															-	}
														
 
															-
														
 
															 	return md->saved_make_request_fn(q, bio); /* call __make_request() */
														
 
															 }
														
@@ -1387,6 +1450,25 @@ static int dm_request(struct request_queue *q, struct bio *bio)
 
															 	return _dm_request(q, bio);
														
 
															 }
														
 
															+/*
														
 
															+ * Mark this request as flush request, so that dm_request_fn() can
														
 
															+ * recognize.
														
 
															+ */
														
 
															+static void dm_rq_prepare_flush(struct request_queue *q, struct request *rq)
														
 
															+{
														
 
															+	rq->cmd_type = REQ_TYPE_LINUX_BLOCK;
														
 
															+	rq->cmd[0] = REQ_LB_OP_FLUSH;
														
 
															+}
														
 
															+
														
 
															+static bool dm_rq_is_flush_request(struct request *rq)
														
 
															+{
														
 
															+	if (rq->cmd_type == REQ_TYPE_LINUX_BLOCK &&
														
 
															+	    rq->cmd[0] == REQ_LB_OP_FLUSH)
														
 
															+		return true;
														
 
															+	else
														
 
															+		return false;
														
 
															+}
														
 
															+
														
 
															 void dm_dispatch_request(struct request *rq)
														
 
															 {
														
 
															 	int r;
														
@@ -1432,16 +1514,24 @@ static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
 
															 static int setup_clone(struct request *clone, struct request *rq,
														
 
															 		       struct dm_rq_target_io *tio)
														
 
															 {
														
 
															-	int r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC,
														
 
															-				  dm_rq_bio_constructor, tio);
														
 
															+	int r;
														
 
															-	if (r)
														
 
															-		return r;
														
 
															+	if (dm_rq_is_flush_request(rq)) {
														
 
															+		blk_rq_init(NULL, clone);
														
 
															+		clone->cmd_type = REQ_TYPE_FS;
														
 
															+		clone->cmd_flags |= (REQ_HARDBARRIER | WRITE);
														
 
															+	} else {
														
 
															+		r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC,
														
 
															+				      dm_rq_bio_constructor, tio);
														
 
															+		if (r)
														
 
															+			return r;
														
 
															+
														
 
															+		clone->cmd = rq->cmd;
														
 
															+		clone->cmd_len = rq->cmd_len;
														
 
															+		clone->sense = rq->sense;
														
 
															+		clone->buffer = rq->buffer;
														
 
															+	}
														
 
															-	clone->cmd = rq->cmd;
														
 
															-	clone->cmd_len = rq->cmd_len;
														
 
															-	clone->sense = rq->sense;
														
 
															-	clone->buffer = rq->buffer;
														
 
															 	clone->end_io = end_clone_request;
														
 
															 	clone->end_io_data = tio;
														
@@ -1482,6 +1572,9 @@ static int dm_prep_fn(struct request_queue *q, struct request *rq)
 
															 	struct mapped_device *md = q->queuedata;
														
 
															 	struct request *clone;
														
 
															+	if (unlikely(dm_rq_is_flush_request(rq)))
														
 
															+		return BLKPREP_OK;
														
 
															+
														
 
															 	if (unlikely(rq->special)) {
														
 
															 		DMWARN("Already has something in rq->special.");
														
 
															 		return BLKPREP_KILL;
														
@@ -1560,6 +1653,14 @@ static void dm_request_fn(struct request_queue *q)
 
															 		if (!rq)
														
 
															 			goto plug_and_out;
														
 
															+		if (unlikely(dm_rq_is_flush_request(rq))) {
														
 
															+			BUG_ON(md->flush_request);
														
 
															+			md->flush_request = rq;
														
 
															+			blk_start_request(rq);
														
 
															+			queue_work(md->wq, &md->barrier_work);
														
 
															+			goto out;
														
 
															+		}
														
 
															+
														
 
															 		ti = dm_table_find_target(map, blk_rq_pos(rq));
														
 
															 		if (ti->type->busy && ti->type->busy(ti))
														
 
															 			goto plug_and_out;
														
@@ -1726,6 +1827,7 @@ out:
 
															 static const struct block_device_operations dm_blk_dops;
														
 
															 static void dm_wq_work(struct work_struct *work);
														
 
															+static void dm_rq_barrier_work(struct work_struct *work);
														
 
															 /*
														
 
															  * Allocate and initialise a blank device with a given minor.
														
@@ -1755,6 +1857,7 @@ static struct mapped_device *alloc_dev(int minor)
 
															 	init_rwsem(&md->io_lock);
														
 
															 	mutex_init(&md->suspend_lock);
														
 
															 	spin_lock_init(&md->deferred_lock);
														
 
															+	spin_lock_init(&md->barrier_error_lock);
														
 
															 	rwlock_init(&md->map_lock);
														
 
															 	atomic_set(&md->holders, 1);
														
 
															 	atomic_set(&md->open_count, 0);
														
@@ -1789,6 +1892,8 @@ static struct mapped_device *alloc_dev(int minor)
 
															 	blk_queue_softirq_done(md->queue, dm_softirq_done);
														
 
															 	blk_queue_prep_rq(md->queue, dm_prep_fn);
														
 
															 	blk_queue_lld_busy(md->queue, dm_lld_busy);
														
 
															+	blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH,
														
 
															+			  dm_rq_prepare_flush);
														
 
															 	md->disk = alloc_disk(1);
														
 
															 	if (!md->disk)
														
@@ -1798,6 +1903,7 @@ static struct mapped_device *alloc_dev(int minor)
 
															 	atomic_set(&md->pending[1], 0);
														
 
															 	init_waitqueue_head(&md->wait);
														
 
															 	INIT_WORK(&md->work, dm_wq_work);
														
 
															+	INIT_WORK(&md->barrier_work, dm_rq_barrier_work);
														
 
															 	init_waitqueue_head(&md->eventq);
														
 
															 	md->disk->major = _major;
														
@@ -2185,6 +2291,73 @@ static void dm_queue_flush(struct mapped_device *md)
 
															 	queue_work(md->wq, &md->work);
														
 
															 }
														
 
															+static void dm_rq_set_flush_nr(struct request *clone, unsigned flush_nr)
														
 
															+{
														
 
															+	struct dm_rq_target_io *tio = clone->end_io_data;
														
 
															+
														
 
															+	tio->info.flush_request = flush_nr;
														
 
															+}
														
 
															+
														
 
															+/* Issue barrier requests to targets and wait for their completion. */
														
 
															+static int dm_rq_barrier(struct mapped_device *md)
														
 
															+{
														
 
															+	int i, j;
														
 
															+	struct dm_table *map = dm_get_table(md);
														
 
															+	unsigned num_targets = dm_table_get_num_targets(map);
														
 
															+	struct dm_target *ti;
														
 
															+	struct request *clone;
														
 
															+
														
 
															+	md->barrier_error = 0;
														
 
															+
														
 
															+	for (i = 0; i < num_targets; i++) {
														
 
															+		ti = dm_table_get_target(map, i);
														
 
															+		for (j = 0; j < ti->num_flush_requests; j++) {
														
 
															+			clone = clone_rq(md->flush_request, md, GFP_NOIO);
														
 
															+			dm_rq_set_flush_nr(clone, j);
														
 
															+			atomic_inc(&md->pending[rq_data_dir(clone)]);
														
 
															+			map_request(ti, clone, md);
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
														
 
															+	dm_table_put(map);
														
 
															+
														
 
															+	return md->barrier_error;
														
 
															+}
														
 
															+
														
 
															+static void dm_rq_barrier_work(struct work_struct *work)
														
 
															+{
														
 
															+	int error;
														
 
															+	struct mapped_device *md = container_of(work, struct mapped_device,
														
 
															+						barrier_work);
														
 
															+	struct request_queue *q = md->queue;
														
 
															+	struct request *rq;
														
 
															+	unsigned long flags;
														
 
															+
														
 
															+	/*
														
 
															+	 * Hold the md reference here and leave it at the last part so that
														
 
															+	 * the md can't be deleted by device opener when the barrier request
														
 
															+	 * completes.
														
 
															+	 */
														
 
															+	dm_get(md);
														
 
															+
														
 
															+	error = dm_rq_barrier(md);
														
 
															+
														
 
															+	rq = md->flush_request;
														
 
															+	md->flush_request = NULL;
														
 
															+
														
 
															+	if (error == DM_ENDIO_REQUEUE) {
														
 
															+		spin_lock_irqsave(q->queue_lock, flags);
														
 
															+		blk_requeue_request(q, rq);
														
 
															+		spin_unlock_irqrestore(q->queue_lock, flags);
														
 
															+	} else
														
 
															+		blk_end_request_all(rq, error);
														
 
															+
														
 
															+	blk_run_queue(q);
														
 
															+
														
 
															+	dm_put(md);
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * Swap in a new table (destroying old one).
														
 
															  */
														
@@ -2325,11 +2498,16 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
 
															 	set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags);
														
 
															 	up_write(&md->io_lock);
														
 
															-	flush_workqueue(md->wq);
														
 
															-
														
 
															+	/*
														
 
															+	 * Request-based dm uses md->wq for barrier (dm_rq_barrier_work) which
														
 
															+	 * can be kicked until md->queue is stopped.  So stop md->queue before
														
 
															+	 * flushing md->wq.
														
 
															+	 */
														
 
															 	if (dm_request_based(md))
														
 
															 		stop_queue(md->queue);
														
 
															+	flush_workqueue(md->wq);
														
 
															+
														
 
															 	/*
														
 
															 	 * At this point no more requests are entering target request routines.
														
 
															 	 * We call dm_wait_for_completion to wait for all existing requests