|
@@ -89,12 +89,13 @@ union map_info *dm_get_mapinfo(struct bio *bio)
|
|
/*
|
|
/*
|
|
* Bits for the md->flags field.
|
|
* Bits for the md->flags field.
|
|
*/
|
|
*/
|
|
-#define DMF_BLOCK_IO 0
|
|
|
|
|
|
+#define DMF_BLOCK_IO_FOR_SUSPEND 0
|
|
#define DMF_SUSPENDED 1
|
|
#define DMF_SUSPENDED 1
|
|
#define DMF_FROZEN 2
|
|
#define DMF_FROZEN 2
|
|
#define DMF_FREEING 3
|
|
#define DMF_FREEING 3
|
|
#define DMF_DELETING 4
|
|
#define DMF_DELETING 4
|
|
#define DMF_NOFLUSH_SUSPENDING 5
|
|
#define DMF_NOFLUSH_SUSPENDING 5
|
|
|
|
+#define DMF_QUEUE_IO_TO_THREAD 6
|
|
|
|
|
|
/*
|
|
/*
|
|
* Work processed by per-device workqueue.
|
|
* Work processed by per-device workqueue.
|
|
@@ -123,6 +124,11 @@ struct mapped_device {
|
|
struct bio_list deferred;
|
|
struct bio_list deferred;
|
|
spinlock_t deferred_lock;
|
|
spinlock_t deferred_lock;
|
|
|
|
|
|
|
|
+ /*
|
|
|
|
+ * An error from the barrier request currently being processed.
|
|
|
|
+ */
|
|
|
|
+ int barrier_error;
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* Processing queue (flush/barriers)
|
|
* Processing queue (flush/barriers)
|
|
*/
|
|
*/
|
|
@@ -424,6 +430,10 @@ static void end_io_acct(struct dm_io *io)
|
|
part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration);
|
|
part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration);
|
|
part_stat_unlock();
|
|
part_stat_unlock();
|
|
|
|
|
|
|
|
+ /*
|
|
|
|
+ * After this is decremented the bio must not be touched if it is
|
|
|
|
+ * a barrier.
|
|
|
|
+ */
|
|
dm_disk(md)->part0.in_flight = pending =
|
|
dm_disk(md)->part0.in_flight = pending =
|
|
atomic_dec_return(&md->pending);
|
|
atomic_dec_return(&md->pending);
|
|
|
|
|
|
@@ -435,21 +445,18 @@ static void end_io_acct(struct dm_io *io)
|
|
/*
|
|
/*
|
|
* Add the bio to the list of deferred io.
|
|
* Add the bio to the list of deferred io.
|
|
*/
|
|
*/
|
|
-static int queue_io(struct mapped_device *md, struct bio *bio)
|
|
|
|
|
|
+static void queue_io(struct mapped_device *md, struct bio *bio)
|
|
{
|
|
{
|
|
down_write(&md->io_lock);
|
|
down_write(&md->io_lock);
|
|
|
|
|
|
- if (!test_bit(DMF_BLOCK_IO, &md->flags)) {
|
|
|
|
- up_write(&md->io_lock);
|
|
|
|
- return 1;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
spin_lock_irq(&md->deferred_lock);
|
|
spin_lock_irq(&md->deferred_lock);
|
|
bio_list_add(&md->deferred, bio);
|
|
bio_list_add(&md->deferred, bio);
|
|
spin_unlock_irq(&md->deferred_lock);
|
|
spin_unlock_irq(&md->deferred_lock);
|
|
|
|
|
|
|
|
+ if (!test_and_set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags))
|
|
|
|
+ queue_work(md->wq, &md->work);
|
|
|
|
+
|
|
up_write(&md->io_lock);
|
|
up_write(&md->io_lock);
|
|
- return 0; /* deferred successfully */
|
|
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -533,25 +540,35 @@ static void dec_pending(struct dm_io *io, int error)
|
|
*/
|
|
*/
|
|
spin_lock_irqsave(&md->deferred_lock, flags);
|
|
spin_lock_irqsave(&md->deferred_lock, flags);
|
|
if (__noflush_suspending(md))
|
|
if (__noflush_suspending(md))
|
|
- bio_list_add(&md->deferred, io->bio);
|
|
|
|
|
|
+ bio_list_add_head(&md->deferred, io->bio);
|
|
else
|
|
else
|
|
/* noflush suspend was interrupted. */
|
|
/* noflush suspend was interrupted. */
|
|
io->error = -EIO;
|
|
io->error = -EIO;
|
|
spin_unlock_irqrestore(&md->deferred_lock, flags);
|
|
spin_unlock_irqrestore(&md->deferred_lock, flags);
|
|
}
|
|
}
|
|
|
|
|
|
- end_io_acct(io);
|
|
|
|
-
|
|
|
|
io_error = io->error;
|
|
io_error = io->error;
|
|
bio = io->bio;
|
|
bio = io->bio;
|
|
|
|
|
|
- free_io(md, io);
|
|
|
|
|
|
+ if (bio_barrier(bio)) {
|
|
|
|
+ /*
|
|
|
|
+ * There can be just one barrier request so we use
|
|
|
|
+ * a per-device variable for error reporting.
|
|
|
|
+ * Note that you can't touch the bio after end_io_acct
|
|
|
|
+ */
|
|
|
|
+ md->barrier_error = io_error;
|
|
|
|
+ end_io_acct(io);
|
|
|
|
+ } else {
|
|
|
|
+ end_io_acct(io);
|
|
|
|
|
|
- if (io_error != DM_ENDIO_REQUEUE) {
|
|
|
|
- trace_block_bio_complete(md->queue, bio);
|
|
|
|
|
|
+ if (io_error != DM_ENDIO_REQUEUE) {
|
|
|
|
+ trace_block_bio_complete(md->queue, bio);
|
|
|
|
|
|
- bio_endio(bio, io_error);
|
|
|
|
|
|
+ bio_endio(bio, io_error);
|
|
|
|
+ }
|
|
}
|
|
}
|
|
|
|
+
|
|
|
|
+ free_io(md, io);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
@@ -693,13 +710,19 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector,
|
|
|
|
|
|
clone->bi_sector = sector;
|
|
clone->bi_sector = sector;
|
|
clone->bi_bdev = bio->bi_bdev;
|
|
clone->bi_bdev = bio->bi_bdev;
|
|
- clone->bi_rw = bio->bi_rw;
|
|
|
|
|
|
+ clone->bi_rw = bio->bi_rw & ~(1 << BIO_RW_BARRIER);
|
|
clone->bi_vcnt = 1;
|
|
clone->bi_vcnt = 1;
|
|
clone->bi_size = to_bytes(len);
|
|
clone->bi_size = to_bytes(len);
|
|
clone->bi_io_vec->bv_offset = offset;
|
|
clone->bi_io_vec->bv_offset = offset;
|
|
clone->bi_io_vec->bv_len = clone->bi_size;
|
|
clone->bi_io_vec->bv_len = clone->bi_size;
|
|
clone->bi_flags |= 1 << BIO_CLONED;
|
|
clone->bi_flags |= 1 << BIO_CLONED;
|
|
|
|
|
|
|
|
+ if (bio_integrity(bio)) {
|
|
|
|
+ bio_integrity_clone(clone, bio, GFP_NOIO);
|
|
|
|
+ bio_integrity_trim(clone,
|
|
|
|
+ bio_sector_offset(bio, idx, offset), len);
|
|
|
|
+ }
|
|
|
|
+
|
|
return clone;
|
|
return clone;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -714,6 +737,7 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector,
|
|
|
|
|
|
clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs);
|
|
clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs);
|
|
__bio_clone(clone, bio);
|
|
__bio_clone(clone, bio);
|
|
|
|
+ clone->bi_rw &= ~(1 << BIO_RW_BARRIER);
|
|
clone->bi_destructor = dm_bio_destructor;
|
|
clone->bi_destructor = dm_bio_destructor;
|
|
clone->bi_sector = sector;
|
|
clone->bi_sector = sector;
|
|
clone->bi_idx = idx;
|
|
clone->bi_idx = idx;
|
|
@@ -721,6 +745,14 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector,
|
|
clone->bi_size = to_bytes(len);
|
|
clone->bi_size = to_bytes(len);
|
|
clone->bi_flags &= ~(1 << BIO_SEG_VALID);
|
|
clone->bi_flags &= ~(1 << BIO_SEG_VALID);
|
|
|
|
|
|
|
|
+ if (bio_integrity(bio)) {
|
|
|
|
+ bio_integrity_clone(clone, bio, GFP_NOIO);
|
|
|
|
+
|
|
|
|
+ if (idx != bio->bi_idx || clone->bi_size < bio->bi_size)
|
|
|
|
+ bio_integrity_trim(clone,
|
|
|
|
+ bio_sector_offset(bio, idx, 0), len);
|
|
|
|
+ }
|
|
|
|
+
|
|
return clone;
|
|
return clone;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -834,14 +866,13 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
|
|
|
|
|
|
ci.map = dm_get_table(md);
|
|
ci.map = dm_get_table(md);
|
|
if (unlikely(!ci.map)) {
|
|
if (unlikely(!ci.map)) {
|
|
- bio_io_error(bio);
|
|
|
|
- return;
|
|
|
|
- }
|
|
|
|
- if (unlikely(bio_barrier(bio) && !dm_table_barrier_ok(ci.map))) {
|
|
|
|
- dm_table_put(ci.map);
|
|
|
|
- bio_endio(bio, -EOPNOTSUPP);
|
|
|
|
|
|
+ if (!bio_barrier(bio))
|
|
|
|
+ bio_io_error(bio);
|
|
|
|
+ else
|
|
|
|
+ md->barrier_error = -EIO;
|
|
return;
|
|
return;
|
|
}
|
|
}
|
|
|
|
+
|
|
ci.md = md;
|
|
ci.md = md;
|
|
ci.bio = bio;
|
|
ci.bio = bio;
|
|
ci.io = alloc_io(md);
|
|
ci.io = alloc_io(md);
|
|
@@ -918,7 +949,6 @@ out:
|
|
*/
|
|
*/
|
|
static int dm_request(struct request_queue *q, struct bio *bio)
|
|
static int dm_request(struct request_queue *q, struct bio *bio)
|
|
{
|
|
{
|
|
- int r = -EIO;
|
|
|
|
int rw = bio_data_dir(bio);
|
|
int rw = bio_data_dir(bio);
|
|
struct mapped_device *md = q->queuedata;
|
|
struct mapped_device *md = q->queuedata;
|
|
int cpu;
|
|
int cpu;
|
|
@@ -931,34 +961,27 @@ static int dm_request(struct request_queue *q, struct bio *bio)
|
|
part_stat_unlock();
|
|
part_stat_unlock();
|
|
|
|
|
|
/*
|
|
/*
|
|
- * If we're suspended we have to queue
|
|
|
|
- * this io for later.
|
|
|
|
|
|
+ * If we're suspended or the thread is processing barriers
|
|
|
|
+ * we have to queue this io for later.
|
|
*/
|
|
*/
|
|
- while (test_bit(DMF_BLOCK_IO, &md->flags)) {
|
|
|
|
|
|
+ if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) ||
|
|
|
|
+ unlikely(bio_barrier(bio))) {
|
|
up_read(&md->io_lock);
|
|
up_read(&md->io_lock);
|
|
|
|
|
|
- if (bio_rw(bio) != READA)
|
|
|
|
- r = queue_io(md, bio);
|
|
|
|
|
|
+ if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) &&
|
|
|
|
+ bio_rw(bio) == READA) {
|
|
|
|
+ bio_io_error(bio);
|
|
|
|
+ return 0;
|
|
|
|
+ }
|
|
|
|
|
|
- if (r <= 0)
|
|
|
|
- goto out_req;
|
|
|
|
|
|
+ queue_io(md, bio);
|
|
|
|
|
|
- /*
|
|
|
|
- * We're in a while loop, because someone could suspend
|
|
|
|
- * before we get to the following read lock.
|
|
|
|
- */
|
|
|
|
- down_read(&md->io_lock);
|
|
|
|
|
|
+ return 0;
|
|
}
|
|
}
|
|
|
|
|
|
__split_and_process_bio(md, bio);
|
|
__split_and_process_bio(md, bio);
|
|
up_read(&md->io_lock);
|
|
up_read(&md->io_lock);
|
|
return 0;
|
|
return 0;
|
|
-
|
|
|
|
-out_req:
|
|
|
|
- if (r < 0)
|
|
|
|
- bio_io_error(bio);
|
|
|
|
-
|
|
|
|
- return 0;
|
|
|
|
}
|
|
}
|
|
|
|
|
|
static void dm_unplug_all(struct request_queue *q)
|
|
static void dm_unplug_all(struct request_queue *q)
|
|
@@ -978,7 +1001,7 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
|
|
struct mapped_device *md = congested_data;
|
|
struct mapped_device *md = congested_data;
|
|
struct dm_table *map;
|
|
struct dm_table *map;
|
|
|
|
|
|
- if (!test_bit(DMF_BLOCK_IO, &md->flags)) {
|
|
|
|
|
|
+ if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
|
|
map = dm_get_table(md);
|
|
map = dm_get_table(md);
|
|
if (map) {
|
|
if (map) {
|
|
r = dm_table_any_congested(map, bdi_bits);
|
|
r = dm_table_any_congested(map, bdi_bits);
|
|
@@ -1193,6 +1216,7 @@ static void free_dev(struct mapped_device *md)
|
|
mempool_destroy(md->tio_pool);
|
|
mempool_destroy(md->tio_pool);
|
|
mempool_destroy(md->io_pool);
|
|
mempool_destroy(md->io_pool);
|
|
bioset_free(md->bs);
|
|
bioset_free(md->bs);
|
|
|
|
+ blk_integrity_unregister(md->disk);
|
|
del_gendisk(md->disk);
|
|
del_gendisk(md->disk);
|
|
free_minor(minor);
|
|
free_minor(minor);
|
|
|
|
|
|
@@ -1406,6 +1430,36 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
|
|
return r;
|
|
return r;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+static int dm_flush(struct mapped_device *md)
|
|
|
|
+{
|
|
|
|
+ dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
|
|
|
|
+ return 0;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void process_barrier(struct mapped_device *md, struct bio *bio)
|
|
|
|
+{
|
|
|
|
+ int error = dm_flush(md);
|
|
|
|
+
|
|
|
|
+ if (unlikely(error)) {
|
|
|
|
+ bio_endio(bio, error);
|
|
|
|
+ return;
|
|
|
|
+ }
|
|
|
|
+ if (bio_empty_barrier(bio)) {
|
|
|
|
+ bio_endio(bio, 0);
|
|
|
|
+ return;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ __split_and_process_bio(md, bio);
|
|
|
|
+
|
|
|
|
+ error = dm_flush(md);
|
|
|
|
+
|
|
|
|
+ if (!error && md->barrier_error)
|
|
|
|
+ error = md->barrier_error;
|
|
|
|
+
|
|
|
|
+ if (md->barrier_error != DM_ENDIO_REQUEUE)
|
|
|
|
+ bio_endio(bio, error);
|
|
|
|
+}
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* Process the deferred bios
|
|
* Process the deferred bios
|
|
*/
|
|
*/
|
|
@@ -1417,25 +1471,34 @@ static void dm_wq_work(struct work_struct *work)
|
|
|
|
|
|
down_write(&md->io_lock);
|
|
down_write(&md->io_lock);
|
|
|
|
|
|
-next_bio:
|
|
|
|
- spin_lock_irq(&md->deferred_lock);
|
|
|
|
- c = bio_list_pop(&md->deferred);
|
|
|
|
- spin_unlock_irq(&md->deferred_lock);
|
|
|
|
|
|
+ while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
|
|
|
|
+ spin_lock_irq(&md->deferred_lock);
|
|
|
|
+ c = bio_list_pop(&md->deferred);
|
|
|
|
+ spin_unlock_irq(&md->deferred_lock);
|
|
|
|
|
|
- if (c) {
|
|
|
|
- __split_and_process_bio(md, c);
|
|
|
|
- goto next_bio;
|
|
|
|
- }
|
|
|
|
|
|
+ if (!c) {
|
|
|
|
+ clear_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags);
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
|
|
- clear_bit(DMF_BLOCK_IO, &md->flags);
|
|
|
|
|
|
+ up_write(&md->io_lock);
|
|
|
|
+
|
|
|
|
+ if (bio_barrier(c))
|
|
|
|
+ process_barrier(md, c);
|
|
|
|
+ else
|
|
|
|
+ __split_and_process_bio(md, c);
|
|
|
|
+
|
|
|
|
+ down_write(&md->io_lock);
|
|
|
|
+ }
|
|
|
|
|
|
up_write(&md->io_lock);
|
|
up_write(&md->io_lock);
|
|
}
|
|
}
|
|
|
|
|
|
static void dm_queue_flush(struct mapped_device *md)
|
|
static void dm_queue_flush(struct mapped_device *md)
|
|
{
|
|
{
|
|
|
|
+ clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
|
|
|
|
+ smp_mb__after_clear_bit();
|
|
queue_work(md->wq, &md->work);
|
|
queue_work(md->wq, &md->work);
|
|
- flush_workqueue(md->wq);
|
|
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -1553,20 +1616,36 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
- * First we set the BLOCK_IO flag so no more ios will be mapped.
|
|
|
|
|
|
+ * Here we must make sure that no processes are submitting requests
|
|
|
|
+ * to target drivers i.e. no one may be executing
|
|
|
|
+ * __split_and_process_bio. This is called from dm_request and
|
|
|
|
+ * dm_wq_work.
|
|
|
|
+ *
|
|
|
|
+ * To get all processes out of __split_and_process_bio in dm_request,
|
|
|
|
+ * we take the write lock. To prevent any process from reentering
|
|
|
|
+ * __split_and_process_bio from dm_request, we set
|
|
|
|
+ * DMF_QUEUE_IO_TO_THREAD.
|
|
|
|
+ *
|
|
|
|
+ * To quiesce the thread (dm_wq_work), we set DMF_BLOCK_IO_FOR_SUSPEND
|
|
|
|
+ * and call flush_workqueue(md->wq). flush_workqueue will wait until
|
|
|
|
+ * dm_wq_work exits and DMF_BLOCK_IO_FOR_SUSPEND will prevent any
|
|
|
|
+ * further calls to __split_and_process_bio from dm_wq_work.
|
|
*/
|
|
*/
|
|
down_write(&md->io_lock);
|
|
down_write(&md->io_lock);
|
|
- set_bit(DMF_BLOCK_IO, &md->flags);
|
|
|
|
-
|
|
|
|
|
|
+ set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
|
|
|
|
+ set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags);
|
|
up_write(&md->io_lock);
|
|
up_write(&md->io_lock);
|
|
|
|
|
|
|
|
+ flush_workqueue(md->wq);
|
|
|
|
+
|
|
/*
|
|
/*
|
|
- * Wait for the already-mapped ios to complete.
|
|
|
|
|
|
+ * At this point no more requests are entering target request routines.
|
|
|
|
+ * We call dm_wait_for_completion to wait for all existing requests
|
|
|
|
+ * to finish.
|
|
*/
|
|
*/
|
|
r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE);
|
|
r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE);
|
|
|
|
|
|
down_write(&md->io_lock);
|
|
down_write(&md->io_lock);
|
|
-
|
|
|
|
if (noflush)
|
|
if (noflush)
|
|
clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
|
|
clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
|
|
up_write(&md->io_lock);
|
|
up_write(&md->io_lock);
|
|
@@ -1579,6 +1658,12 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
|
|
goto out; /* pushback list is already flushed, so skip flush */
|
|
goto out; /* pushback list is already flushed, so skip flush */
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ /*
|
|
|
|
+ * If dm_wait_for_completion returned 0, the device is completely
|
|
|
|
+ * quiescent now. There is no request-processing activity. All new
|
|
|
|
+ * requests are being added to md->deferred list.
|
|
|
|
+ */
|
|
|
|
+
|
|
dm_table_postsuspend_targets(map);
|
|
dm_table_postsuspend_targets(map);
|
|
|
|
|
|
set_bit(DMF_SUSPENDED, &md->flags);
|
|
set_bit(DMF_SUSPENDED, &md->flags);
|