|
@@ -142,10 +142,20 @@ struct mapped_device {
|
|
*/
|
|
*/
|
|
int barrier_error;
|
|
int barrier_error;
|
|
|
|
|
|
|
|
+ /*
|
|
|
|
+ * Protect barrier_error from concurrent endio processing
|
|
|
|
+ * in request-based dm.
|
|
|
|
+ */
|
|
|
|
+ spinlock_t barrier_error_lock;
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* Processing queue (flush/barriers)
|
|
* Processing queue (flush/barriers)
|
|
*/
|
|
*/
|
|
struct workqueue_struct *wq;
|
|
struct workqueue_struct *wq;
|
|
|
|
+ struct work_struct barrier_work;
|
|
|
|
+
|
|
|
|
+ /* A pointer to the currently processing pre/post flush request */
|
|
|
|
+ struct request *flush_request;
|
|
|
|
|
|
/*
|
|
/*
|
|
* The current mapping.
|
|
* The current mapping.
|
|
@@ -722,6 +732,23 @@ static void end_clone_bio(struct bio *clone, int error)
|
|
blk_update_request(tio->orig, 0, nr_bytes);
|
|
blk_update_request(tio->orig, 0, nr_bytes);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+static void store_barrier_error(struct mapped_device *md, int error)
|
|
|
|
+{
|
|
|
|
+ unsigned long flags;
|
|
|
|
+
|
|
|
|
+ spin_lock_irqsave(&md->barrier_error_lock, flags);
|
|
|
|
+ /*
|
|
|
|
+ * Basically, the first error is taken, but:
|
|
|
|
+ * -EOPNOTSUPP supersedes any I/O error.
|
|
|
|
+ * Requeue request supersedes any I/O error but -EOPNOTSUPP.
|
|
|
|
+ */
|
|
|
|
+ if (!md->barrier_error || error == -EOPNOTSUPP ||
|
|
|
|
+ (md->barrier_error != -EOPNOTSUPP &&
|
|
|
|
+ error == DM_ENDIO_REQUEUE))
|
|
|
|
+ md->barrier_error = error;
|
|
|
|
+ spin_unlock_irqrestore(&md->barrier_error_lock, flags);
|
|
|
|
+}
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* Don't touch any member of the md after calling this function because
|
|
* Don't touch any member of the md after calling this function because
|
|
* the md may be freed in dm_put() at the end of this function.
|
|
* the md may be freed in dm_put() at the end of this function.
|
|
@@ -759,11 +786,13 @@ static void free_rq_clone(struct request *clone)
|
|
static void dm_end_request(struct request *clone, int error)
|
|
static void dm_end_request(struct request *clone, int error)
|
|
{
|
|
{
|
|
int rw = rq_data_dir(clone);
|
|
int rw = rq_data_dir(clone);
|
|
|
|
+ int run_queue = 1;
|
|
|
|
+ bool is_barrier = blk_barrier_rq(clone);
|
|
struct dm_rq_target_io *tio = clone->end_io_data;
|
|
struct dm_rq_target_io *tio = clone->end_io_data;
|
|
struct mapped_device *md = tio->md;
|
|
struct mapped_device *md = tio->md;
|
|
struct request *rq = tio->orig;
|
|
struct request *rq = tio->orig;
|
|
|
|
|
|
- if (blk_pc_request(rq)) {
|
|
|
|
|
|
+ if (blk_pc_request(rq) && !is_barrier) {
|
|
rq->errors = clone->errors;
|
|
rq->errors = clone->errors;
|
|
rq->resid_len = clone->resid_len;
|
|
rq->resid_len = clone->resid_len;
|
|
|
|
|
|
@@ -778,9 +807,14 @@ static void dm_end_request(struct request *clone, int error)
|
|
|
|
|
|
free_rq_clone(clone);
|
|
free_rq_clone(clone);
|
|
|
|
|
|
- blk_end_request_all(rq, error);
|
|
|
|
|
|
+ if (unlikely(is_barrier)) {
|
|
|
|
+ if (unlikely(error))
|
|
|
|
+ store_barrier_error(md, error);
|
|
|
|
+ run_queue = 0;
|
|
|
|
+ } else
|
|
|
|
+ blk_end_request_all(rq, error);
|
|
|
|
|
|
- rq_completed(md, rw, 1);
|
|
|
|
|
|
+ rq_completed(md, rw, run_queue);
|
|
}
|
|
}
|
|
|
|
|
|
static void dm_unprep_request(struct request *rq)
|
|
static void dm_unprep_request(struct request *rq)
|
|
@@ -805,6 +839,16 @@ void dm_requeue_unmapped_request(struct request *clone)
|
|
struct request_queue *q = rq->q;
|
|
struct request_queue *q = rq->q;
|
|
unsigned long flags;
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
+ if (unlikely(blk_barrier_rq(clone))) {
|
|
|
|
+ /*
|
|
|
|
+ * Barrier clones share an original request.
|
|
|
|
+ * Leave it to dm_end_request(), which handles this special
|
|
|
|
+ * case.
|
|
|
|
+ */
|
|
|
|
+ dm_end_request(clone, DM_ENDIO_REQUEUE);
|
|
|
|
+ return;
|
|
|
|
+ }
|
|
|
|
+
|
|
dm_unprep_request(rq);
|
|
dm_unprep_request(rq);
|
|
|
|
|
|
spin_lock_irqsave(q->queue_lock, flags);
|
|
spin_lock_irqsave(q->queue_lock, flags);
|
|
@@ -894,6 +938,19 @@ static void dm_complete_request(struct request *clone, int error)
|
|
struct dm_rq_target_io *tio = clone->end_io_data;
|
|
struct dm_rq_target_io *tio = clone->end_io_data;
|
|
struct request *rq = tio->orig;
|
|
struct request *rq = tio->orig;
|
|
|
|
|
|
|
|
+ if (unlikely(blk_barrier_rq(clone))) {
|
|
|
|
+ /*
|
|
|
|
+ * Barrier clones share an original request. So can't use
|
|
|
|
+ * softirq_done with the original.
|
|
|
|
+ * Pass the clone to dm_done() directly in this special case.
|
|
|
|
+ * It is safe (even if clone->q->queue_lock is held here)
|
|
|
|
+ * because there is no I/O dispatching during the completion
|
|
|
|
+ * of barrier clone.
|
|
|
|
+ */
|
|
|
|
+ dm_done(clone, error, true);
|
|
|
|
+ return;
|
|
|
|
+ }
|
|
|
|
+
|
|
tio->error = error;
|
|
tio->error = error;
|
|
rq->completion_data = clone;
|
|
rq->completion_data = clone;
|
|
blk_complete_request(rq);
|
|
blk_complete_request(rq);
|
|
@@ -910,6 +967,17 @@ void dm_kill_unmapped_request(struct request *clone, int error)
|
|
struct dm_rq_target_io *tio = clone->end_io_data;
|
|
struct dm_rq_target_io *tio = clone->end_io_data;
|
|
struct request *rq = tio->orig;
|
|
struct request *rq = tio->orig;
|
|
|
|
|
|
|
|
+ if (unlikely(blk_barrier_rq(clone))) {
|
|
|
|
+ /*
|
|
|
|
+ * Barrier clones share an original request.
|
|
|
|
+ * Leave it to dm_end_request(), which handles this special
|
|
|
|
+ * case.
|
|
|
|
+ */
|
|
|
|
+ BUG_ON(error > 0);
|
|
|
|
+ dm_end_request(clone, error);
|
|
|
|
+ return;
|
|
|
|
+ }
|
|
|
|
+
|
|
rq->cmd_flags |= REQ_FAILED;
|
|
rq->cmd_flags |= REQ_FAILED;
|
|
dm_complete_request(clone, error);
|
|
dm_complete_request(clone, error);
|
|
}
|
|
}
|
|
@@ -1364,11 +1432,6 @@ static int dm_make_request(struct request_queue *q, struct bio *bio)
|
|
{
|
|
{
|
|
struct mapped_device *md = q->queuedata;
|
|
struct mapped_device *md = q->queuedata;
|
|
|
|
|
|
- if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
|
|
|
|
- bio_endio(bio, -EOPNOTSUPP);
|
|
|
|
- return 0;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
return md->saved_make_request_fn(q, bio); /* call __make_request() */
|
|
return md->saved_make_request_fn(q, bio); /* call __make_request() */
|
|
}
|
|
}
|
|
|
|
|
|
@@ -1387,6 +1450,25 @@ static int dm_request(struct request_queue *q, struct bio *bio)
|
|
return _dm_request(q, bio);
|
|
return _dm_request(q, bio);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+/*
|
|
|
|
+ * Mark this request as flush request, so that dm_request_fn() can
|
|
|
|
+ * recognize.
|
|
|
|
+ */
|
|
|
|
+static void dm_rq_prepare_flush(struct request_queue *q, struct request *rq)
|
|
|
|
+{
|
|
|
|
+ rq->cmd_type = REQ_TYPE_LINUX_BLOCK;
|
|
|
|
+ rq->cmd[0] = REQ_LB_OP_FLUSH;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static bool dm_rq_is_flush_request(struct request *rq)
|
|
|
|
+{
|
|
|
|
+ if (rq->cmd_type == REQ_TYPE_LINUX_BLOCK &&
|
|
|
|
+ rq->cmd[0] == REQ_LB_OP_FLUSH)
|
|
|
|
+ return true;
|
|
|
|
+ else
|
|
|
|
+ return false;
|
|
|
|
+}
|
|
|
|
+
|
|
void dm_dispatch_request(struct request *rq)
|
|
void dm_dispatch_request(struct request *rq)
|
|
{
|
|
{
|
|
int r;
|
|
int r;
|
|
@@ -1432,16 +1514,24 @@ static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
|
|
static int setup_clone(struct request *clone, struct request *rq,
|
|
static int setup_clone(struct request *clone, struct request *rq,
|
|
struct dm_rq_target_io *tio)
|
|
struct dm_rq_target_io *tio)
|
|
{
|
|
{
|
|
- int r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC,
|
|
|
|
- dm_rq_bio_constructor, tio);
|
|
|
|
|
|
+ int r;
|
|
|
|
|
|
- if (r)
|
|
|
|
- return r;
|
|
|
|
|
|
+ if (dm_rq_is_flush_request(rq)) {
|
|
|
|
+ blk_rq_init(NULL, clone);
|
|
|
|
+ clone->cmd_type = REQ_TYPE_FS;
|
|
|
|
+ clone->cmd_flags |= (REQ_HARDBARRIER | WRITE);
|
|
|
|
+ } else {
|
|
|
|
+ r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC,
|
|
|
|
+ dm_rq_bio_constructor, tio);
|
|
|
|
+ if (r)
|
|
|
|
+ return r;
|
|
|
|
+
|
|
|
|
+ clone->cmd = rq->cmd;
|
|
|
|
+ clone->cmd_len = rq->cmd_len;
|
|
|
|
+ clone->sense = rq->sense;
|
|
|
|
+ clone->buffer = rq->buffer;
|
|
|
|
+ }
|
|
|
|
|
|
- clone->cmd = rq->cmd;
|
|
|
|
- clone->cmd_len = rq->cmd_len;
|
|
|
|
- clone->sense = rq->sense;
|
|
|
|
- clone->buffer = rq->buffer;
|
|
|
|
clone->end_io = end_clone_request;
|
|
clone->end_io = end_clone_request;
|
|
clone->end_io_data = tio;
|
|
clone->end_io_data = tio;
|
|
|
|
|
|
@@ -1482,6 +1572,9 @@ static int dm_prep_fn(struct request_queue *q, struct request *rq)
|
|
struct mapped_device *md = q->queuedata;
|
|
struct mapped_device *md = q->queuedata;
|
|
struct request *clone;
|
|
struct request *clone;
|
|
|
|
|
|
|
|
+ if (unlikely(dm_rq_is_flush_request(rq)))
|
|
|
|
+ return BLKPREP_OK;
|
|
|
|
+
|
|
if (unlikely(rq->special)) {
|
|
if (unlikely(rq->special)) {
|
|
DMWARN("Already has something in rq->special.");
|
|
DMWARN("Already has something in rq->special.");
|
|
return BLKPREP_KILL;
|
|
return BLKPREP_KILL;
|
|
@@ -1560,6 +1653,14 @@ static void dm_request_fn(struct request_queue *q)
|
|
if (!rq)
|
|
if (!rq)
|
|
goto plug_and_out;
|
|
goto plug_and_out;
|
|
|
|
|
|
|
|
+ if (unlikely(dm_rq_is_flush_request(rq))) {
|
|
|
|
+ BUG_ON(md->flush_request);
|
|
|
|
+ md->flush_request = rq;
|
|
|
|
+ blk_start_request(rq);
|
|
|
|
+ queue_work(md->wq, &md->barrier_work);
|
|
|
|
+ goto out;
|
|
|
|
+ }
|
|
|
|
+
|
|
ti = dm_table_find_target(map, blk_rq_pos(rq));
|
|
ti = dm_table_find_target(map, blk_rq_pos(rq));
|
|
if (ti->type->busy && ti->type->busy(ti))
|
|
if (ti->type->busy && ti->type->busy(ti))
|
|
goto plug_and_out;
|
|
goto plug_and_out;
|
|
@@ -1726,6 +1827,7 @@ out:
|
|
static const struct block_device_operations dm_blk_dops;
|
|
static const struct block_device_operations dm_blk_dops;
|
|
|
|
|
|
static void dm_wq_work(struct work_struct *work);
|
|
static void dm_wq_work(struct work_struct *work);
|
|
|
|
+static void dm_rq_barrier_work(struct work_struct *work);
|
|
|
|
|
|
/*
|
|
/*
|
|
* Allocate and initialise a blank device with a given minor.
|
|
* Allocate and initialise a blank device with a given minor.
|
|
@@ -1755,6 +1857,7 @@ static struct mapped_device *alloc_dev(int minor)
|
|
init_rwsem(&md->io_lock);
|
|
init_rwsem(&md->io_lock);
|
|
mutex_init(&md->suspend_lock);
|
|
mutex_init(&md->suspend_lock);
|
|
spin_lock_init(&md->deferred_lock);
|
|
spin_lock_init(&md->deferred_lock);
|
|
|
|
+ spin_lock_init(&md->barrier_error_lock);
|
|
rwlock_init(&md->map_lock);
|
|
rwlock_init(&md->map_lock);
|
|
atomic_set(&md->holders, 1);
|
|
atomic_set(&md->holders, 1);
|
|
atomic_set(&md->open_count, 0);
|
|
atomic_set(&md->open_count, 0);
|
|
@@ -1789,6 +1892,8 @@ static struct mapped_device *alloc_dev(int minor)
|
|
blk_queue_softirq_done(md->queue, dm_softirq_done);
|
|
blk_queue_softirq_done(md->queue, dm_softirq_done);
|
|
blk_queue_prep_rq(md->queue, dm_prep_fn);
|
|
blk_queue_prep_rq(md->queue, dm_prep_fn);
|
|
blk_queue_lld_busy(md->queue, dm_lld_busy);
|
|
blk_queue_lld_busy(md->queue, dm_lld_busy);
|
|
|
|
+ blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH,
|
|
|
|
+ dm_rq_prepare_flush);
|
|
|
|
|
|
md->disk = alloc_disk(1);
|
|
md->disk = alloc_disk(1);
|
|
if (!md->disk)
|
|
if (!md->disk)
|
|
@@ -1798,6 +1903,7 @@ static struct mapped_device *alloc_dev(int minor)
|
|
atomic_set(&md->pending[1], 0);
|
|
atomic_set(&md->pending[1], 0);
|
|
init_waitqueue_head(&md->wait);
|
|
init_waitqueue_head(&md->wait);
|
|
INIT_WORK(&md->work, dm_wq_work);
|
|
INIT_WORK(&md->work, dm_wq_work);
|
|
|
|
+ INIT_WORK(&md->barrier_work, dm_rq_barrier_work);
|
|
init_waitqueue_head(&md->eventq);
|
|
init_waitqueue_head(&md->eventq);
|
|
|
|
|
|
md->disk->major = _major;
|
|
md->disk->major = _major;
|
|
@@ -2185,6 +2291,73 @@ static void dm_queue_flush(struct mapped_device *md)
|
|
queue_work(md->wq, &md->work);
|
|
queue_work(md->wq, &md->work);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+static void dm_rq_set_flush_nr(struct request *clone, unsigned flush_nr)
|
|
|
|
+{
|
|
|
|
+ struct dm_rq_target_io *tio = clone->end_io_data;
|
|
|
|
+
|
|
|
|
+ tio->info.flush_request = flush_nr;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/* Issue barrier requests to targets and wait for their completion. */
|
|
|
|
+static int dm_rq_barrier(struct mapped_device *md)
|
|
|
|
+{
|
|
|
|
+ int i, j;
|
|
|
|
+ struct dm_table *map = dm_get_table(md);
|
|
|
|
+ unsigned num_targets = dm_table_get_num_targets(map);
|
|
|
|
+ struct dm_target *ti;
|
|
|
|
+ struct request *clone;
|
|
|
|
+
|
|
|
|
+ md->barrier_error = 0;
|
|
|
|
+
|
|
|
|
+ for (i = 0; i < num_targets; i++) {
|
|
|
|
+ ti = dm_table_get_target(map, i);
|
|
|
|
+ for (j = 0; j < ti->num_flush_requests; j++) {
|
|
|
|
+ clone = clone_rq(md->flush_request, md, GFP_NOIO);
|
|
|
|
+ dm_rq_set_flush_nr(clone, j);
|
|
|
|
+ atomic_inc(&md->pending[rq_data_dir(clone)]);
|
|
|
|
+ map_request(ti, clone, md);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
|
|
|
|
+ dm_table_put(map);
|
|
|
|
+
|
|
|
|
+ return md->barrier_error;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void dm_rq_barrier_work(struct work_struct *work)
|
|
|
|
+{
|
|
|
|
+ int error;
|
|
|
|
+ struct mapped_device *md = container_of(work, struct mapped_device,
|
|
|
|
+ barrier_work);
|
|
|
|
+ struct request_queue *q = md->queue;
|
|
|
|
+ struct request *rq;
|
|
|
|
+ unsigned long flags;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Hold the md reference here and leave it at the last part so that
|
|
|
|
+ * the md can't be deleted by device opener when the barrier request
|
|
|
|
+ * completes.
|
|
|
|
+ */
|
|
|
|
+ dm_get(md);
|
|
|
|
+
|
|
|
|
+ error = dm_rq_barrier(md);
|
|
|
|
+
|
|
|
|
+ rq = md->flush_request;
|
|
|
|
+ md->flush_request = NULL;
|
|
|
|
+
|
|
|
|
+ if (error == DM_ENDIO_REQUEUE) {
|
|
|
|
+ spin_lock_irqsave(q->queue_lock, flags);
|
|
|
|
+ blk_requeue_request(q, rq);
|
|
|
|
+ spin_unlock_irqrestore(q->queue_lock, flags);
|
|
|
|
+ } else
|
|
|
|
+ blk_end_request_all(rq, error);
|
|
|
|
+
|
|
|
|
+ blk_run_queue(q);
|
|
|
|
+
|
|
|
|
+ dm_put(md);
|
|
|
|
+}
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* Swap in a new table (destroying old one).
|
|
* Swap in a new table (destroying old one).
|
|
*/
|
|
*/
|
|
@@ -2325,11 +2498,16 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
|
|
set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags);
|
|
set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags);
|
|
up_write(&md->io_lock);
|
|
up_write(&md->io_lock);
|
|
|
|
|
|
- flush_workqueue(md->wq);
|
|
|
|
-
|
|
|
|
|
|
+ /*
|
|
|
|
+ * Request-based dm uses md->wq for barrier (dm_rq_barrier_work) which
|
|
|
|
+ * can be kicked until md->queue is stopped. So stop md->queue before
|
|
|
|
+ * flushing md->wq.
|
|
|
|
+ */
|
|
if (dm_request_based(md))
|
|
if (dm_request_based(md))
|
|
stop_queue(md->queue);
|
|
stop_queue(md->queue);
|
|
|
|
|
|
|
|
+ flush_workqueue(md->wq);
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* At this point no more requests are entering target request routines.
|
|
* At this point no more requests are entering target request routines.
|
|
* We call dm_wait_for_completion to wait for all existing requests
|
|
* We call dm_wait_for_completion to wait for all existing requests
|