|
@@ -297,6 +297,54 @@ void bio_reset(struct bio *bio)
|
|
|
}
|
|
|
EXPORT_SYMBOL(bio_reset);
|
|
|
|
|
|
+static void bio_alloc_rescue(struct work_struct *work)
|
|
|
+{
|
|
|
+ struct bio_set *bs = container_of(work, struct bio_set, rescue_work);
|
|
|
+ struct bio *bio;
|
|
|
+
|
|
|
+ while (1) {
|
|
|
+ spin_lock(&bs->rescue_lock);
|
|
|
+ bio = bio_list_pop(&bs->rescue_list);
|
|
|
+ spin_unlock(&bs->rescue_lock);
|
|
|
+
|
|
|
+ if (!bio)
|
|
|
+ break;
|
|
|
+
|
|
|
+ generic_make_request(bio);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+static void punt_bios_to_rescuer(struct bio_set *bs)
|
|
|
+{
|
|
|
+ struct bio_list punt, nopunt;
|
|
|
+ struct bio *bio;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * In order to guarantee forward progress we must punt only bios that
|
|
|
+ * were allocated from this bio_set; otherwise, if there was a bio on
|
|
|
+ * there for a stacking driver higher up in the stack, processing it
|
|
|
+ * could require allocating bios from this bio_set, and doing that from
|
|
|
+ * our own rescuer would be bad.
|
|
|
+ *
|
|
|
+ * Since bio lists are singly linked, pop them all instead of trying to
|
|
|
+ * remove from the middle of the list:
|
|
|
+ */
|
|
|
+
|
|
|
+ bio_list_init(&punt);
|
|
|
+ bio_list_init(&nopunt);
|
|
|
+
|
|
|
+ while ((bio = bio_list_pop(current->bio_list)))
|
|
|
+ bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
|
|
|
+
|
|
|
+ *current->bio_list = nopunt;
|
|
|
+
|
|
|
+ spin_lock(&bs->rescue_lock);
|
|
|
+ bio_list_merge(&bs->rescue_list, &punt);
|
|
|
+ spin_unlock(&bs->rescue_lock);
|
|
|
+
|
|
|
+ queue_work(bs->rescue_workqueue, &bs->rescue_work);
|
|
|
+}
|
|
|
+
|
|
|
/**
|
|
|
* bio_alloc_bioset - allocate a bio for I/O
|
|
|
* @gfp_mask: the GFP_ mask given to the slab allocator
|
|
@@ -314,11 +362,27 @@ EXPORT_SYMBOL(bio_reset);
|
|
|
* previously allocated bio for IO before attempting to allocate a new one.
|
|
|
* Failure to do so can cause deadlocks under memory pressure.
|
|
|
*
|
|
|
+ * Note that when running under generic_make_request() (i.e. any block
|
|
|
+ * driver), bios are not submitted until after you return - see the code in
|
|
|
+ * generic_make_request() that converts recursion into iteration, to prevent
|
|
|
+ * stack overflows.
|
|
|
+ *
|
|
|
+ * This would normally mean allocating multiple bios under
|
|
|
+ * generic_make_request() would be susceptible to deadlocks, but we have
|
|
|
+ * deadlock avoidance code that resubmits any blocked bios from a rescuer
|
|
|
+ * thread.
|
|
|
+ *
|
|
|
+ * However, we do not guarantee forward progress for allocations from other
|
|
|
+ * mempools. Doing multiple allocations from the same mempool under
|
|
|
+ * generic_make_request() should be avoided - instead, use bio_set's front_pad
|
|
|
+ * for per bio allocations.
|
|
|
+ *
|
|
|
* RETURNS:
|
|
|
* Pointer to new bio on success, NULL on failure.
|
|
|
*/
|
|
|
struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
|
|
|
{
|
|
|
+ gfp_t saved_gfp = gfp_mask;
|
|
|
unsigned front_pad;
|
|
|
unsigned inline_vecs;
|
|
|
unsigned long idx = BIO_POOL_NONE;
|
|
@@ -336,7 +400,37 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
|
|
|
front_pad = 0;
|
|
|
inline_vecs = nr_iovecs;
|
|
|
} else {
|
|
|
+ /*
|
|
|
+ * generic_make_request() converts recursion to iteration; this
|
|
|
+ * means if we're running beneath it, any bios we allocate and
|
|
|
+ * submit will not be submitted (and thus freed) until after we
|
|
|
+ * return.
|
|
|
+ *
|
|
|
+ * This exposes us to a potential deadlock if we allocate
|
|
|
+ * multiple bios from the same bio_set() while running
|
|
|
+ * underneath generic_make_request(). If we were to allocate
|
|
|
+ * multiple bios (say a stacking block driver that was splitting
|
|
|
+ * bios), we would deadlock if we exhausted the mempool's
|
|
|
+ * reserve.
|
|
|
+ *
|
|
|
+ * We solve this, and guarantee forward progress, with a rescuer
|
|
|
+ * workqueue per bio_set. If we go to allocate and there are
|
|
|
+ * bios on current->bio_list, we first try the allocation
|
|
|
+ * without __GFP_WAIT; if that fails, we punt those bios we
|
|
|
+ * would be blocking to the rescuer workqueue before we retry
|
|
|
+ * with the original gfp_flags.
|
|
|
+ */
|
|
|
+
|
|
|
+ if (current->bio_list && !bio_list_empty(current->bio_list))
|
|
|
+ gfp_mask &= ~__GFP_WAIT;
|
|
|
+
|
|
|
p = mempool_alloc(bs->bio_pool, gfp_mask);
|
|
|
+ if (!p && gfp_mask != saved_gfp) {
|
|
|
+ punt_bios_to_rescuer(bs);
|
|
|
+ gfp_mask = saved_gfp;
|
|
|
+ p = mempool_alloc(bs->bio_pool, gfp_mask);
|
|
|
+ }
|
|
|
+
|
|
|
front_pad = bs->front_pad;
|
|
|
inline_vecs = BIO_INLINE_VECS;
|
|
|
}
|
|
@@ -349,6 +443,12 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
|
|
|
|
|
|
if (nr_iovecs > inline_vecs) {
|
|
|
bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs);
|
|
|
+ if (!bvl && gfp_mask != saved_gfp) {
|
|
|
+ punt_bios_to_rescuer(bs);
|
|
|
+ gfp_mask = saved_gfp;
|
|
|
+ bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs);
|
|
|
+ }
|
|
|
+
|
|
|
if (unlikely(!bvl))
|
|
|
goto err_free;
|
|
|
} else if (nr_iovecs) {
|
|
@@ -1579,6 +1679,9 @@ static void biovec_free_pools(struct bio_set *bs)
|
|
|
|
|
|
void bioset_free(struct bio_set *bs)
|
|
|
{
|
|
|
+ if (bs->rescue_workqueue)
|
|
|
+ destroy_workqueue(bs->rescue_workqueue);
|
|
|
+
|
|
|
if (bs->bio_pool)
|
|
|
mempool_destroy(bs->bio_pool);
|
|
|
|
|
@@ -1614,6 +1717,10 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
|
|
|
|
|
|
bs->front_pad = front_pad;
|
|
|
|
|
|
+ spin_lock_init(&bs->rescue_lock);
|
|
|
+ bio_list_init(&bs->rescue_list);
|
|
|
+ INIT_WORK(&bs->rescue_work, bio_alloc_rescue);
|
|
|
+
|
|
|
bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
|
|
|
if (!bs->bio_slab) {
|
|
|
kfree(bs);
|
|
@@ -1624,9 +1731,14 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
|
|
|
if (!bs->bio_pool)
|
|
|
goto bad;
|
|
|
|
|
|
- if (!biovec_create_pools(bs, pool_size))
|
|
|
- return bs;
|
|
|
+ if (biovec_create_pools(bs, pool_size))
|
|
|
+ goto bad;
|
|
|
+
|
|
|
+ bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
|
|
|
+ if (!bs->rescue_workqueue)
|
|
|
+ goto bad;
|
|
|
|
|
|
+ return bs;
|
|
|
bad:
|
|
|
bioset_free(bs);
|
|
|
return NULL;
|