13 years ago · 8cf1a3fce0
--- a/Documentation/block/queue-sysfs.txt
+++ b/Documentation/block/queue-sysfs.txt
@@ -38,6 +38,13 @@ read or write requests. Note that the total allocated number may be twice
 
				 this amount, since it applies only to reads or writes (not the accumulated
			
 
				 sum).
			
 
				 
			
 
				+To avoid priority inversion through request starvation, a request
			
 
				+queue maintains a separate request pool per each cgroup when
			
 
				+CONFIG_BLK_CGROUP is enabled, and this parameter applies to each such
			
 
				+per-block-cgroup request pool.  IOW, if there are N block cgroups,
			
 
				+each request queue may have upto N request pools, each independently
			
 
				+regulated by nr_requests.
			
 
				+
			
 
				 read_ahead_kb (RW)
			
 
				 ------------------
			
 
				 Maximum number of kilobytes to read-ahead for filesystems on this block
			
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -31,27 +31,6 @@ EXPORT_SYMBOL_GPL(blkcg_root);
 
				 
			
 
				 static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
			
 
				 
			
 
				-struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup)
			
 
				-{
			
 
				-	return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
			
 
				-			    struct blkcg, css);
			
 
				-}
			
 
				-EXPORT_SYMBOL_GPL(cgroup_to_blkcg);
			
 
				-
			
 
				-static struct blkcg *task_blkcg(struct task_struct *tsk)
			
 
				-{
			
 
				-	return container_of(task_subsys_state(tsk, blkio_subsys_id),
			
 
				-			    struct blkcg, css);
			
 
				-}
			
 
				-
			
 
				-struct blkcg *bio_blkcg(struct bio *bio)
			
 
				-{
			
 
				-	if (bio && bio->bi_css)
			
 
				-		return container_of(bio->bi_css, struct blkcg, css);
			
 
				-	return task_blkcg(current);
			
 
				-}
			
 
				-EXPORT_SYMBOL_GPL(bio_blkcg);
			
 
				-
			
 
				 static bool blkcg_policy_enabled(struct request_queue *q,
			
 
				 				 const struct blkcg_policy *pol)
			
 
				 {
			
@@ -84,6 +63,7 @@ static void blkg_free(struct blkcg_gq *blkg)
 
				 		kfree(pd);
			
 
				 	}
			
 
				 
			
 
				+	blk_exit_rl(&blkg->rl);
			
 
				 	kfree(blkg);
			
 
				 }
			
 
				 
			
@@ -91,16 +71,18 @@ static void blkg_free(struct blkcg_gq *blkg)
 
				  * blkg_alloc - allocate a blkg
			
 
				  * @blkcg: block cgroup the new blkg is associated with
			
 
				  * @q: request_queue the new blkg is associated with
			
 
				+ * @gfp_mask: allocation mask to use
			
 
				  *
			
 
				  * Allocate a new blkg assocating @blkcg and @q.
			
 
				  */
			
 
				-static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q)
			
 
				+static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
			
 
				+				   gfp_t gfp_mask)
			
 
				 {
			
 
				 	struct blkcg_gq *blkg;
			
 
				 	int i;
			
 
				 
			
 
				 	/* alloc and init base part */
			
 
				-	blkg = kzalloc_node(sizeof(*blkg), GFP_ATOMIC, q->node);
			
 
				+	blkg = kzalloc_node(sizeof(*blkg), gfp_mask, q->node);
			
 
				 	if (!blkg)
			
 
				 		return NULL;
			
 
				 
			
@@ -109,6 +91,13 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q)
 
				 	blkg->blkcg = blkcg;
			
 
				 	blkg->refcnt = 1;
			
 
				 
			
 
				+	/* root blkg uses @q->root_rl, init rl only for !root blkgs */
			
 
				+	if (blkcg != &blkcg_root) {
			
 
				+		if (blk_init_rl(&blkg->rl, q, gfp_mask))
			
 
				+			goto err_free;
			
 
				+		blkg->rl.blkg = blkg;
			
 
				+	}
			
 
				+
			
 
				 	for (i = 0; i < BLKCG_MAX_POLS; i++) {
			
 
				 		struct blkcg_policy *pol = blkcg_policy[i];
			
 
				 		struct blkg_policy_data *pd;
			
@@ -117,11 +106,9 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q)
 
				 			continue;
			
 
				 
			
 
				 		/* alloc per-policy data and attach it to blkg */
			
 
				-		pd = kzalloc_node(pol->pd_size, GFP_ATOMIC, q->node);
			
 
				-		if (!pd) {
			
 
				-			blkg_free(blkg);
			
 
				-			return NULL;
			
 
				-		}
			
 
				+		pd = kzalloc_node(pol->pd_size, gfp_mask, q->node);
			
 
				+		if (!pd)
			
 
				+			goto err_free;
			
 
				 
			
 
				 		blkg->pd[i] = pd;
			
 
				 		pd->blkg = blkg;
			
@@ -132,6 +119,10 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q)
 
				 	}
			
 
				 
			
 
				 	return blkg;
			
 
				+
			
 
				+err_free:
			
 
				+	blkg_free(blkg);
			
 
				+	return NULL;
			
 
				 }
			
 
				 
			
 
				 static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
			
@@ -175,9 +166,13 @@ struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q)
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(blkg_lookup);
			
 
				 
			
 
				+/*
			
 
				+ * If @new_blkg is %NULL, this function tries to allocate a new one as
			
 
				+ * necessary using %GFP_ATOMIC.  @new_blkg is always consumed on return.
			
 
				+ */
			
 
				 static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
			
 
				-					     struct request_queue *q)
			
 
				-	__releases(q->queue_lock) __acquires(q->queue_lock)
			
 
				+					     struct request_queue *q,
			
 
				+					     struct blkcg_gq *new_blkg)
			
 
				 {
			
 
				 	struct blkcg_gq *blkg;
			
 
				 	int ret;
			
@@ -189,24 +184,26 @@ static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
 
				 	blkg = __blkg_lookup(blkcg, q);
			
 
				 	if (blkg) {
			
 
				 		rcu_assign_pointer(blkcg->blkg_hint, blkg);
			
 
				-		return blkg;
			
 
				+		goto out_free;
			
 
				 	}
			
 
				 
			
 
				 	/* blkg holds a reference to blkcg */
			
 
				-	if (!css_tryget(&blkcg->css))
			
 
				-		return ERR_PTR(-EINVAL);
			
 
				+	if (!css_tryget(&blkcg->css)) {
			
 
				+		blkg = ERR_PTR(-EINVAL);
			
 
				+		goto out_free;
			
 
				+	}
			
 
				 
			
 
				 	/* allocate */
			
 
				-	ret = -ENOMEM;
			
 
				-	blkg = blkg_alloc(blkcg, q);
			
 
				-	if (unlikely(!blkg))
			
 
				-		goto err_put;
			
 
				+	if (!new_blkg) {
			
 
				+		new_blkg = blkg_alloc(blkcg, q, GFP_ATOMIC);
			
 
				+		if (unlikely(!new_blkg)) {
			
 
				+			blkg = ERR_PTR(-ENOMEM);
			
 
				+			goto out_put;
			
 
				+		}
			
 
				+	}
			
 
				+	blkg = new_blkg;
			
 
				 
			
 
				 	/* insert */
			
 
				-	ret = radix_tree_preload(GFP_ATOMIC);
			
 
				-	if (ret)
			
 
				-		goto err_free;
			
 
				-
			
 
				 	spin_lock(&blkcg->lock);
			
 
				 	ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg);
			
 
				 	if (likely(!ret)) {
			
@@ -215,15 +212,15 @@ static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
 
				 	}
			
 
				 	spin_unlock(&blkcg->lock);
			
 
				 
			
 
				-	radix_tree_preload_end();
			
 
				-
			
 
				 	if (!ret)
			
 
				 		return blkg;
			
 
				-err_free:
			
 
				-	blkg_free(blkg);
			
 
				-err_put:
			
 
				+
			
 
				+	blkg = ERR_PTR(ret);
			
 
				+out_put:
			
 
				 	css_put(&blkcg->css);
			
 
				-	return ERR_PTR(ret);
			
 
				+out_free:
			
 
				+	blkg_free(new_blkg);
			
 
				+	return blkg;
			
 
				 }
			
 
				 
			
 
				 struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
			
@@ -235,7 +232,7 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
 
				 	 */
			
 
				 	if (unlikely(blk_queue_bypass(q)))
			
 
				 		return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
			
 
				-	return __blkg_lookup_create(blkcg, q);
			
 
				+	return __blkg_lookup_create(blkcg, q, NULL);
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(blkg_lookup_create);
			
 
				 
			
@@ -313,6 +310,38 @@ void __blkg_release(struct blkcg_gq *blkg)
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(__blkg_release);
			
 
				 
			
 
				+/*
			
 
				+ * The next function used by blk_queue_for_each_rl().  It's a bit tricky
			
 
				+ * because the root blkg uses @q->root_rl instead of its own rl.
			
 
				+ */
			
 
				+struct request_list *__blk_queue_next_rl(struct request_list *rl,
			
 
				+					 struct request_queue *q)
			
 
				+{
			
 
				+	struct list_head *ent;
			
 
				+	struct blkcg_gq *blkg;
			
 
				+
			
 
				+	/*
			
 
				+	 * Determine the current blkg list_head.  The first entry is
			
 
				+	 * root_rl which is off @q->blkg_list and mapped to the head.
			
 
				+	 */
			
 
				+	if (rl == &q->root_rl) {
			
 
				+		ent = &q->blkg_list;
			
 
				+	} else {
			
 
				+		blkg = container_of(rl, struct blkcg_gq, rl);
			
 
				+		ent = &blkg->q_node;
			
 
				+	}
			
 
				+
			
 
				+	/* walk to the next list_head, skip root blkcg */
			
 
				+	ent = ent->next;
			
 
				+	if (ent == &q->root_blkg->q_node)
			
 
				+		ent = ent->next;
			
 
				+	if (ent == &q->blkg_list)
			
 
				+		return NULL;
			
 
				+
			
 
				+	blkg = container_of(ent, struct blkcg_gq, q_node);
			
 
				+	return &blkg->rl;
			
 
				+}
			
 
				+
			
 
				 static int blkcg_reset_stats(struct cgroup *cgroup, struct cftype *cftype,
			
 
				 			     u64 val)
			
 
				 {
			
@@ -734,24 +763,36 @@ int blkcg_activate_policy(struct request_queue *q,
 
				 	struct blkcg_gq *blkg;
			
 
				 	struct blkg_policy_data *pd, *n;
			
 
				 	int cnt = 0, ret;
			
 
				+	bool preloaded;
			
 
				 
			
 
				 	if (blkcg_policy_enabled(q, pol))
			
 
				 		return 0;
			
 
				 
			
 
				+	/* preallocations for root blkg */
			
 
				+	blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL);
			
 
				+	if (!blkg)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	preloaded = !radix_tree_preload(GFP_KERNEL);
			
 
				+
			
 
				 	blk_queue_bypass_start(q);
			
 
				 
			
 
				 	/* make sure the root blkg exists and count the existing blkgs */
			
 
				 	spin_lock_irq(q->queue_lock);
			
 
				 
			
 
				 	rcu_read_lock();
			
 
				-	blkg = __blkg_lookup_create(&blkcg_root, q);
			
 
				+	blkg = __blkg_lookup_create(&blkcg_root, q, blkg);
			
 
				 	rcu_read_unlock();
			
 
				 
			
 
				+	if (preloaded)
			
 
				+		radix_tree_preload_end();
			
 
				+
			
 
				 	if (IS_ERR(blkg)) {
			
 
				 		ret = PTR_ERR(blkg);
			
 
				 		goto out_unlock;
			
 
				 	}
			
 
				 	q->root_blkg = blkg;
			
 
				+	q->root_rl.blkg = blkg;
			
 
				 
			
 
				 	list_for_each_entry(blkg, &q->blkg_list, q_node)
			
 
				 		cnt++;
			
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -17,6 +17,7 @@
 
				 #include <linux/u64_stats_sync.h>
			
 
				 #include <linux/seq_file.h>
			
 
				 #include <linux/radix-tree.h>
			
 
				+#include <linux/blkdev.h>
			
 
				 
			
 
				 /* Max limits for throttle policy */
			
 
				 #define THROTL_IOPS_MAX		UINT_MAX
			
@@ -93,6 +94,8 @@ struct blkcg_gq {
 
				 	struct list_head		q_node;
			
 
				 	struct hlist_node		blkcg_node;
			
 
				 	struct blkcg			*blkcg;
			
 
				+	/* request allocation list for this blkcg-q pair */
			
 
				+	struct request_list		rl;
			
 
				 	/* reference count */
			
 
				 	int				refcnt;
			
 
				 
			
@@ -120,8 +123,6 @@ struct blkcg_policy {
 
				 
			
 
				 extern struct blkcg blkcg_root;
			
 
				 
			
 
				-struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup);
			
 
				-struct blkcg *bio_blkcg(struct bio *bio);
			
 
				 struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q);
			
 
				 struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
			
 
				 				    struct request_queue *q);
			
@@ -160,6 +161,25 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 
				 void blkg_conf_finish(struct blkg_conf_ctx *ctx);
			
 
				 
			
 
				 
			
 
				+static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup)
			
 
				+{
			
 
				+	return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
			
 
				+			    struct blkcg, css);
			
 
				+}
			
 
				+
			
 
				+static inline struct blkcg *task_blkcg(struct task_struct *tsk)
			
 
				+{
			
 
				+	return container_of(task_subsys_state(tsk, blkio_subsys_id),
			
 
				+			    struct blkcg, css);
			
 
				+}
			
 
				+
			
 
				+static inline struct blkcg *bio_blkcg(struct bio *bio)
			
 
				+{
			
 
				+	if (bio && bio->bi_css)
			
 
				+		return container_of(bio->bi_css, struct blkcg, css);
			
 
				+	return task_blkcg(current);
			
 
				+}
			
 
				+
			
 
				 /**
			
 
				  * blkg_to_pdata - get policy private data
			
 
				  * @blkg: blkg of interest
			
@@ -233,6 +253,95 @@ static inline void blkg_put(struct blkcg_gq *blkg)
 
				 		__blkg_release(blkg);
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * blk_get_rl - get request_list to use
			
 
				+ * @q: request_queue of interest
			
 
				+ * @bio: bio which will be attached to the allocated request (may be %NULL)
			
 
				+ *
			
 
				+ * The caller wants to allocate a request from @q to use for @bio.  Find
			
 
				+ * the request_list to use and obtain a reference on it.  Should be called
			
 
				+ * under queue_lock.  This function is guaranteed to return non-%NULL
			
 
				+ * request_list.
			
 
				+ */
			
 
				+static inline struct request_list *blk_get_rl(struct request_queue *q,
			
 
				+					      struct bio *bio)
			
 
				+{
			
 
				+	struct blkcg *blkcg;
			
 
				+	struct blkcg_gq *blkg;
			
 
				+
			
 
				+	rcu_read_lock();
			
 
				+
			
 
				+	blkcg = bio_blkcg(bio);
			
 
				+
			
 
				+	/* bypass blkg lookup and use @q->root_rl directly for root */
			
 
				+	if (blkcg == &blkcg_root)
			
 
				+		goto root_rl;
			
 
				+
			
 
				+	/*
			
 
				+	 * Try to use blkg->rl.  blkg lookup may fail under memory pressure
			
 
				+	 * or if either the blkcg or queue is going away.  Fall back to
			
 
				+	 * root_rl in such cases.
			
 
				+	 */
			
 
				+	blkg = blkg_lookup_create(blkcg, q);
			
 
				+	if (unlikely(IS_ERR(blkg)))
			
 
				+		goto root_rl;
			
 
				+
			
 
				+	blkg_get(blkg);
			
 
				+	rcu_read_unlock();
			
 
				+	return &blkg->rl;
			
 
				+root_rl:
			
 
				+	rcu_read_unlock();
			
 
				+	return &q->root_rl;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * blk_put_rl - put request_list
			
 
				+ * @rl: request_list to put
			
 
				+ *
			
 
				+ * Put the reference acquired by blk_get_rl().  Should be called under
			
 
				+ * queue_lock.
			
 
				+ */
			
 
				+static inline void blk_put_rl(struct request_list *rl)
			
 
				+{
			
 
				+	/* root_rl may not have blkg set */
			
 
				+	if (rl->blkg && rl->blkg->blkcg != &blkcg_root)
			
 
				+		blkg_put(rl->blkg);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * blk_rq_set_rl - associate a request with a request_list
			
 
				+ * @rq: request of interest
			
 
				+ * @rl: target request_list
			
 
				+ *
			
 
				+ * Associate @rq with @rl so that accounting and freeing can know the
			
 
				+ * request_list @rq came from.
			
 
				+ */
			
 
				+static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl)
			
 
				+{
			
 
				+	rq->rl = rl;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * blk_rq_rl - return the request_list a request came from
			
 
				+ * @rq: request of interest
			
 
				+ *
			
 
				+ * Return the request_list @rq is allocated from.
			
 
				+ */
			
 
				+static inline struct request_list *blk_rq_rl(struct request *rq)
			
 
				+{
			
 
				+	return rq->rl;
			
 
				+}
			
 
				+
			
 
				+struct request_list *__blk_queue_next_rl(struct request_list *rl,
			
 
				+					 struct request_queue *q);
			
 
				+/**
			
 
				+ * blk_queue_for_each_rl - iterate through all request_lists of a request_queue
			
 
				+ *
			
 
				+ * Should be used under queue_lock.
			
 
				+ */
			
 
				+#define blk_queue_for_each_rl(rl, q)	\
			
 
				+	for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q)))
			
 
				+
			
 
				 /**
			
 
				  * blkg_stat_add - add a value to a blkg_stat
			
 
				  * @stat: target blkg_stat
			
@@ -351,6 +460,7 @@ static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
 
				 #else	/* CONFIG_BLK_CGROUP */
			
 
				 
			
 
				 struct cgroup;
			
 
				+struct blkcg;
			
 
				 
			
 
				 struct blkg_policy_data {
			
 
				 };
			
@@ -361,8 +471,6 @@ struct blkcg_gq {
 
				 struct blkcg_policy {
			
 
				 };
			
 
				 
			
 
				-static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) { return NULL; }
			
 
				-static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
			
 
				 static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
			
 
				 static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
			
 
				 static inline void blkcg_drain_queue(struct request_queue *q) { }
			
@@ -374,6 +482,9 @@ static inline int blkcg_activate_policy(struct request_queue *q,
 
				 static inline void blkcg_deactivate_policy(struct request_queue *q,
			
 
				 					   const struct blkcg_policy *pol) { }
			
 
				 
			
 
				+static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) { return NULL; }
			
 
				+static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
			
 
				+
			
 
				 static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
			
 
				 						  struct blkcg_policy *pol) { return NULL; }
			
 
				 static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
			
@@ -381,5 +492,14 @@ static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
 
				 static inline void blkg_get(struct blkcg_gq *blkg) { }
			
 
				 static inline void blkg_put(struct blkcg_gq *blkg) { }
			
 
				 
			
 
				+static inline struct request_list *blk_get_rl(struct request_queue *q,
			
 
				+					      struct bio *bio) { return &q->root_rl; }
			
 
				+static inline void blk_put_rl(struct request_list *rl) { }
			
 
				+static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { }
			
 
				+static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; }
			
 
				+
			
 
				+#define blk_queue_for_each_rl(rl, q)	\
			
 
				+	for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
			
 
				+
			
 
				 #endif	/* CONFIG_BLK_CGROUP */
			
 
				 #endif	/* _BLK_CGROUP_H */
			
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -387,7 +387,7 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
 
				 		if (!list_empty(&q->queue_head) && q->request_fn)
			
 
				 			__blk_run_queue(q);
			
 
				 
			
 
				-		drain |= q->rq.elvpriv;
			
 
				+		drain |= q->nr_rqs_elvpriv;
			
 
				 
			
 
				 		/*
			
 
				 		 * Unfortunately, requests are queued at and tracked from
			
@@ -397,7 +397,7 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
 
				 		if (drain_all) {
			
 
				 			drain |= !list_empty(&q->queue_head);
			
 
				 			for (i = 0; i < 2; i++) {
			
 
				-				drain |= q->rq.count[i];
			
 
				+				drain |= q->nr_rqs[i];
			
 
				 				drain |= q->in_flight[i];
			
 
				 				drain |= !list_empty(&q->flush_queue[i]);
			
 
				 			}
			
@@ -416,9 +416,14 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
 
				 	 * left with hung waiters. We need to wake up those waiters.
			
 
				 	 */
			
 
				 	if (q->request_fn) {
			
 
				+		struct request_list *rl;
			
 
				+
			
 
				 		spin_lock_irq(q->queue_lock);
			
 
				-		for (i = 0; i < ARRAY_SIZE(q->rq.wait); i++)
			
 
				-			wake_up_all(&q->rq.wait[i]);
			
 
				+
			
 
				+		blk_queue_for_each_rl(rl, q)
			
 
				+			for (i = 0; i < ARRAY_SIZE(rl->wait); i++)
			
 
				+				wake_up_all(&rl->wait[i]);
			
 
				+
			
 
				 		spin_unlock_irq(q->queue_lock);
			
 
				 	}
			
 
				 }
			
@@ -517,28 +522,33 @@ void blk_cleanup_queue(struct request_queue *q)
 
				 }
			
 
				 EXPORT_SYMBOL(blk_cleanup_queue);
			
 
				 
			
 
				-static int blk_init_free_list(struct request_queue *q)
			
 
				+int blk_init_rl(struct request_list *rl, struct request_queue *q,
			
 
				+		gfp_t gfp_mask)
			
 
				 {
			
 
				-	struct request_list *rl = &q->rq;
			
 
				-
			
 
				 	if (unlikely(rl->rq_pool))
			
 
				 		return 0;
			
 
				 
			
 
				+	rl->q = q;
			
 
				 	rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;
			
 
				 	rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;
			
 
				-	rl->elvpriv = 0;
			
 
				 	init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);
			
 
				 	init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);
			
 
				 
			
 
				 	rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
			
 
				-				mempool_free_slab, request_cachep, q->node);
			
 
				-
			
 
				+					  mempool_free_slab, request_cachep,
			
 
				+					  gfp_mask, q->node);
			
 
				 	if (!rl->rq_pool)
			
 
				 		return -ENOMEM;
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+void blk_exit_rl(struct request_list *rl)
			
 
				+{
			
 
				+	if (rl->rq_pool)
			
 
				+		mempool_destroy(rl->rq_pool);
			
 
				+}
			
 
				+
			
 
				 struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
			
 
				 {
			
 
				 	return blk_alloc_queue_node(gfp_mask, -1);
			
@@ -680,7 +690,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
 
				 	if (!q)
			
 
				 		return NULL;
			
 
				 
			
 
				-	if (blk_init_free_list(q))
			
 
				+	if (blk_init_rl(&q->root_rl, q, GFP_KERNEL))
			
 
				 		return NULL;
			
 
				 
			
 
				 	q->request_fn		= rfn;
			
@@ -722,15 +732,15 @@ bool blk_get_queue(struct request_queue *q)
 
				 }
			
 
				 EXPORT_SYMBOL(blk_get_queue);
			
 
				 
			
 
				-static inline void blk_free_request(struct request_queue *q, struct request *rq)
			
 
				+static inline void blk_free_request(struct request_list *rl, struct request *rq)
			
 
				 {
			
 
				 	if (rq->cmd_flags & REQ_ELVPRIV) {
			
 
				-		elv_put_request(q, rq);
			
 
				+		elv_put_request(rl->q, rq);
			
 
				 		if (rq->elv.icq)
			
 
				 			put_io_context(rq->elv.icq->ioc);
			
 
				 	}
			
 
				 
			
 
				-	mempool_free(rq, q->rq.rq_pool);
			
 
				+	mempool_free(rq, rl->rq_pool);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -767,18 +777,23 @@ static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)
 
				 	ioc->last_waited = jiffies;
			
 
				 }
			
 
				 
			
 
				-static void __freed_request(struct request_queue *q, int sync)
			
 
				+static void __freed_request(struct request_list *rl, int sync)
			
 
				 {
			
 
				-	struct request_list *rl = &q->rq;
			
 
				+	struct request_queue *q = rl->q;
			
 
				 
			
 
				-	if (rl->count[sync] < queue_congestion_off_threshold(q))
			
 
				+	/*
			
 
				+	 * bdi isn't aware of blkcg yet.  As all async IOs end up root
			
 
				+	 * blkcg anyway, just use root blkcg state.
			
 
				+	 */
			
 
				+	if (rl == &q->root_rl &&
			
 
				+	    rl->count[sync] < queue_congestion_off_threshold(q))
			
 
				 		blk_clear_queue_congested(q, sync);
			
 
				 
			
 
				 	if (rl->count[sync] + 1 <= q->nr_requests) {
			
 
				 		if (waitqueue_active(&rl->wait[sync]))
			
 
				 			wake_up(&rl->wait[sync]);
			
 
				 
			
 
				-		blk_clear_queue_full(q, sync);
			
 
				+		blk_clear_rl_full(rl, sync);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -786,19 +801,20 @@ static void __freed_request(struct request_queue *q, int sync)
 
				  * A request has just been released.  Account for it, update the full and
			
 
				  * congestion status, wake up any waiters.   Called under q->queue_lock.
			
 
				  */
			
 
				-static void freed_request(struct request_queue *q, unsigned int flags)
			
 
				+static void freed_request(struct request_list *rl, unsigned int flags)
			
 
				 {
			
 
				-	struct request_list *rl = &q->rq;
			
 
				+	struct request_queue *q = rl->q;
			
 
				 	int sync = rw_is_sync(flags);
			
 
				 
			
 
				+	q->nr_rqs[sync]--;
			
 
				 	rl->count[sync]--;
			
 
				 	if (flags & REQ_ELVPRIV)
			
 
				-		rl->elvpriv--;
			
 
				+		q->nr_rqs_elvpriv--;
			
 
				 
			
 
				-	__freed_request(q, sync);
			
 
				+	__freed_request(rl, sync);
			
 
				 
			
 
				 	if (unlikely(rl->starved[sync ^ 1]))
			
 
				-		__freed_request(q, sync ^ 1);
			
 
				+		__freed_request(rl, sync ^ 1);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -837,8 +853,8 @@ static struct io_context *rq_ioc(struct bio *bio)
 
				 }
			
 
				 
			
 
				 /**
			
 
				- * get_request - get a free request
			
 
				- * @q: request_queue to allocate request from
			
 
				+ * __get_request - get a free request
			
 
				+ * @rl: request list to allocate from
			
 
				  * @rw_flags: RW and SYNC flags
			
 
				  * @bio: bio to allocate request for (can be %NULL)
			
 
				  * @gfp_mask: allocation mask
			
@@ -850,20 +866,16 @@ static struct io_context *rq_ioc(struct bio *bio)
 
				  * Returns %NULL on failure, with @q->queue_lock held.
			
 
				  * Returns !%NULL on success, with @q->queue_lock *not held*.
			
 
				  */
			
 
				-static struct request *get_request(struct request_queue *q, int rw_flags,
			
 
				-				   struct bio *bio, gfp_t gfp_mask)
			
 
				+static struct request *__get_request(struct request_list *rl, int rw_flags,
			
 
				+				     struct bio *bio, gfp_t gfp_mask)
			
 
				 {
			
 
				+	struct request_queue *q = rl->q;
			
 
				 	struct request *rq;
			
 
				-	struct request_list *rl = &q->rq;
			
 
				-	struct elevator_type *et;
			
 
				-	struct io_context *ioc;
			
 
				+	struct elevator_type *et = q->elevator->type;
			
 
				+	struct io_context *ioc = rq_ioc(bio);
			
 
				 	struct io_cq *icq = NULL;
			
 
				 	const bool is_sync = rw_is_sync(rw_flags) != 0;
			
 
				-	bool retried = false;
			
 
				 	int may_queue;
			
 
				-retry:
			
 
				-	et = q->elevator->type;
			
 
				-	ioc = rq_ioc(bio);
			
 
				 
			
 
				 	if (unlikely(blk_queue_dead(q)))
			
 
				 		return NULL;
			
@@ -874,29 +886,15 @@ retry:
 
				 
			
 
				 	if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
			
 
				 		if (rl->count[is_sync]+1 >= q->nr_requests) {
			
 
				-			/*
			
 
				-			 * We want ioc to record batching state.  If it's
			
 
				-			 * not already there, creating a new one requires
			
 
				-			 * dropping queue_lock, which in turn requires
			
 
				-			 * retesting conditions to avoid queue hang.
			
 
				-			 */
			
 
				-			if (!ioc && !retried) {
			
 
				-				spin_unlock_irq(q->queue_lock);
			
 
				-				create_io_context(gfp_mask, q->node);
			
 
				-				spin_lock_irq(q->queue_lock);
			
 
				-				retried = true;
			
 
				-				goto retry;
			
 
				-			}
			
 
				-
			
 
				 			/*
			
 
				 			 * The queue will fill after this allocation, so set
			
 
				 			 * it as full, and mark this process as "batching".
			
 
				 			 * This process will be allowed to complete a batch of
			
 
				 			 * requests, others will be blocked.
			
 
				 			 */
			
 
				-			if (!blk_queue_full(q, is_sync)) {
			
 
				+			if (!blk_rl_full(rl, is_sync)) {
			
 
				 				ioc_set_batching(q, ioc);
			
 
				-				blk_set_queue_full(q, is_sync);
			
 
				+				blk_set_rl_full(rl, is_sync);
			
 
				 			} else {
			
 
				 				if (may_queue != ELV_MQUEUE_MUST
			
 
				 						&& !ioc_batching(q, ioc)) {
			
@@ -909,7 +907,12 @@ retry:
 
				 				}
			
 
				 			}
			
 
				 		}
			
 
				-		blk_set_queue_congested(q, is_sync);
			
 
				+		/*
			
 
				+		 * bdi isn't aware of blkcg yet.  As all async IOs end up
			
 
				+		 * root blkcg anyway, just use root blkcg state.
			
 
				+		 */
			
 
				+		if (rl == &q->root_rl)
			
 
				+			blk_set_queue_congested(q, is_sync);
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -920,6 +923,7 @@ retry:
 
				 	if (rl->count[is_sync] >= (3 * q->nr_requests / 2))
			
 
				 		return NULL;
			
 
				 
			
 
				+	q->nr_rqs[is_sync]++;
			
 
				 	rl->count[is_sync]++;
			
 
				 	rl->starved[is_sync] = 0;
			
 
				 
			
@@ -935,7 +939,7 @@ retry:
 
				 	 */
			
 
				 	if (blk_rq_should_init_elevator(bio) && !blk_queue_bypass(q)) {
			
 
				 		rw_flags |= REQ_ELVPRIV;
			
 
				-		rl->elvpriv++;
			
 
				+		q->nr_rqs_elvpriv++;
			
 
				 		if (et->icq_cache && ioc)
			
 
				 			icq = ioc_lookup_icq(ioc, q);
			
 
				 	}
			
@@ -945,22 +949,19 @@ retry:
 
				 	spin_unlock_irq(q->queue_lock);
			
 
				 
			
 
				 	/* allocate and init request */
			
 
				-	rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
			
 
				+	rq = mempool_alloc(rl->rq_pool, gfp_mask);
			
 
				 	if (!rq)
			
 
				 		goto fail_alloc;
			
 
				 
			
 
				 	blk_rq_init(q, rq);
			
 
				+	blk_rq_set_rl(rq, rl);
			
 
				 	rq->cmd_flags = rw_flags | REQ_ALLOCED;
			
 
				 
			
 
				 	/* init elvpriv */
			
 
				 	if (rw_flags & REQ_ELVPRIV) {
			
 
				 		if (unlikely(et->icq_cache && !icq)) {
			
 
				-			create_io_context(gfp_mask, q->node);
			
 
				-			ioc = rq_ioc(bio);
			
 
				-			if (!ioc)
			
 
				-				goto fail_elvpriv;
			
 
				-
			
 
				-			icq = ioc_create_icq(ioc, q, gfp_mask);
			
 
				+			if (ioc)
			
 
				+				icq = ioc_create_icq(ioc, q, gfp_mask);
			
 
				 			if (!icq)
			
 
				 				goto fail_elvpriv;
			
 
				 		}
			
@@ -1000,7 +1001,7 @@ fail_elvpriv:
 
				 	rq->elv.icq = NULL;
			
 
				 
			
 
				 	spin_lock_irq(q->queue_lock);
			
 
				-	rl->elvpriv--;
			
 
				+	q->nr_rqs_elvpriv--;
			
 
				 	spin_unlock_irq(q->queue_lock);
			
 
				 	goto out;
			
 
				 
			
@@ -1013,7 +1014,7 @@ fail_alloc:
 
				 	 * queue, but this is pretty rare.
			
 
				 	 */
			
 
				 	spin_lock_irq(q->queue_lock);
			
 
				-	freed_request(q, rw_flags);
			
 
				+	freed_request(rl, rw_flags);
			
 
				 
			
 
				 	/*
			
 
				 	 * in the very unlikely event that allocation failed and no
			
@@ -1029,56 +1030,58 @@ rq_starved:
 
				 }
			
 
				 
			
 
				 /**
			
 
				- * get_request_wait - get a free request with retry
			
 
				+ * get_request - get a free request
			
 
				  * @q: request_queue to allocate request from
			
 
				  * @rw_flags: RW and SYNC flags
			
 
				  * @bio: bio to allocate request for (can be %NULL)
			
 
				+ * @gfp_mask: allocation mask
			
 
				  *
			
 
				- * Get a free request from @q.  This function keeps retrying under memory
			
 
				- * pressure and fails iff @q is dead.
			
 
				+ * Get a free request from @q.  If %__GFP_WAIT is set in @gfp_mask, this
			
 
				+ * function keeps retrying under memory pressure and fails iff @q is dead.
			
 
				  *
			
 
				  * Must be callled with @q->queue_lock held and,
			
 
				  * Returns %NULL on failure, with @q->queue_lock held.
			
 
				  * Returns !%NULL on success, with @q->queue_lock *not held*.
			
 
				  */
			
 
				-static struct request *get_request_wait(struct request_queue *q, int rw_flags,
			
 
				-					struct bio *bio)
			
 
				+static struct request *get_request(struct request_queue *q, int rw_flags,
			
 
				+				   struct bio *bio, gfp_t gfp_mask)
			
 
				 {
			
 
				 	const bool is_sync = rw_is_sync(rw_flags) != 0;
			
 
				+	DEFINE_WAIT(wait);
			
 
				+	struct request_list *rl;
			
 
				 	struct request *rq;
			
 
				 
			
 
				-	rq = get_request(q, rw_flags, bio, GFP_NOIO);
			
 
				-	while (!rq) {
			
 
				-		DEFINE_WAIT(wait);
			
 
				-		struct request_list *rl = &q->rq;
			
 
				-
			
 
				-		if (unlikely(blk_queue_dead(q)))
			
 
				-			return NULL;
			
 
				+	rl = blk_get_rl(q, bio);	/* transferred to @rq on success */
			
 
				+retry:
			
 
				+	rq = __get_request(rl, rw_flags, bio, gfp_mask);
			
 
				+	if (rq)
			
 
				+		return rq;
			
 
				 
			
 
				-		prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
			
 
				-				TASK_UNINTERRUPTIBLE);
			
 
				+	if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dead(q))) {
			
 
				+		blk_put_rl(rl);
			
 
				+		return NULL;
			
 
				+	}
			
 
				 
			
 
				-		trace_block_sleeprq(q, bio, rw_flags & 1);
			
 
				+	/* wait on @rl and retry */
			
 
				+	prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
			
 
				+				  TASK_UNINTERRUPTIBLE);
			
 
				 
			
 
				-		spin_unlock_irq(q->queue_lock);
			
 
				-		io_schedule();
			
 
				+	trace_block_sleeprq(q, bio, rw_flags & 1);
			
 
				 
			
 
				-		/*
			
 
				-		 * After sleeping, we become a "batching" process and
			
 
				-		 * will be able to allocate at least one request, and
			
 
				-		 * up to a big batch of them for a small period time.
			
 
				-		 * See ioc_batching, ioc_set_batching
			
 
				-		 */
			
 
				-		create_io_context(GFP_NOIO, q->node);
			
 
				-		ioc_set_batching(q, current->io_context);
			
 
				+	spin_unlock_irq(q->queue_lock);
			
 
				+	io_schedule();
			
 
				 
			
 
				-		spin_lock_irq(q->queue_lock);
			
 
				-		finish_wait(&rl->wait[is_sync], &wait);
			
 
				+	/*
			
 
				+	 * After sleeping, we become a "batching" process and will be able
			
 
				+	 * to allocate at least one request, and up to a big batch of them
			
 
				+	 * for a small period time.  See ioc_batching, ioc_set_batching
			
 
				+	 */
			
 
				+	ioc_set_batching(q, current->io_context);
			
 
				 
			
 
				-		rq = get_request(q, rw_flags, bio, GFP_NOIO);
			
 
				-	};
			
 
				+	spin_lock_irq(q->queue_lock);
			
 
				+	finish_wait(&rl->wait[is_sync], &wait);
			
 
				 
			
 
				-	return rq;
			
 
				+	goto retry;
			
 
				 }
			
 
				 
			
 
				 struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
			
@@ -1087,11 +1090,11 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
 
				 
			
 
				 	BUG_ON(rw != READ && rw != WRITE);
			
 
				 
			
 
				+	/* create ioc upfront */
			
 
				+	create_io_context(gfp_mask, q->node);
			
 
				+
			
 
				 	spin_lock_irq(q->queue_lock);
			
 
				-	if (gfp_mask & __GFP_WAIT)
			
 
				-		rq = get_request_wait(q, rw, NULL);
			
 
				-	else
			
 
				-		rq = get_request(q, rw, NULL, gfp_mask);
			
 
				+	rq = get_request(q, rw, NULL, gfp_mask);
			
 
				 	if (!rq)
			
 
				 		spin_unlock_irq(q->queue_lock);
			
 
				 	/* q->queue_lock is unlocked at this point */
			
@@ -1248,12 +1251,14 @@ void __blk_put_request(struct request_queue *q, struct request *req)
 
				 	 */
			
 
				 	if (req->cmd_flags & REQ_ALLOCED) {
			
 
				 		unsigned int flags = req->cmd_flags;
			
 
				+		struct request_list *rl = blk_rq_rl(req);
			
 
				 
			
 
				 		BUG_ON(!list_empty(&req->queuelist));
			
 
				 		BUG_ON(!hlist_unhashed(&req->hash));
			
 
				 
			
 
				-		blk_free_request(q, req);
			
 
				-		freed_request(q, flags);
			
 
				+		blk_free_request(rl, req);
			
 
				+		freed_request(rl, flags);
			
 
				+		blk_put_rl(rl);
			
 
				 	}
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(__blk_put_request);
			
@@ -1481,7 +1486,7 @@ get_rq:
 
				 	 * Grab a free request. This is might sleep but can not fail.
			
 
				 	 * Returns with the queue unlocked.
			
 
				 	 */
			
 
				-	req = get_request_wait(q, rw_flags, bio);
			
 
				+	req = get_request(q, rw_flags, bio, GFP_NOIO);
			
 
				 	if (unlikely(!req)) {
			
 
				 		bio_endio(bio, -ENODEV);	/* @q is dead */
			
 
				 		goto out_unlock;
			
@@ -1702,6 +1707,14 @@ generic_make_request_checks(struct bio *bio)
 
				 		goto end_io;
			
 
				 	}
			
 
				 
			
 
				+	/*
			
 
				+	 * Various block parts want %current->io_context and lazy ioc
			
 
				+	 * allocation ends up trading a lot of pain for a small amount of
			
 
				+	 * memory.  Just allocate it upfront.  This may fail and block
			
 
				+	 * layer knows how to live with it.
			
 
				+	 */
			
 
				+	create_io_context(GFP_ATOMIC, q->node);
			
 
				+
			
 
				 	if (blk_throtl_bio(q, bio))
			
 
				 		return false;	/* throttled, will be resubmitted later */
			
 
				 
			
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -244,6 +244,7 @@ int create_task_io_context(struct task_struct *task, gfp_t gfp_flags, int node)
 
				 
			
 
				 	/* initialize */
			
 
				 	atomic_long_set(&ioc->refcount, 1);
			
 
				+	atomic_set(&ioc->nr_tasks, 1);
			
 
				 	atomic_set(&ioc->active_ref, 1);
			
 
				 	spin_lock_init(&ioc->lock);
			
 
				 	INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC | __GFP_HIGH);
			
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -143,8 +143,7 @@ void blk_set_stacking_limits(struct queue_limits *lim)
 
				 	lim->discard_zeroes_data = 1;
			
 
				 	lim->max_segments = USHRT_MAX;
			
 
				 	lim->max_hw_sectors = UINT_MAX;
			
 
				-
			
 
				-	lim->max_sectors = BLK_DEF_MAX_SECTORS;
			
 
				+	lim->max_sectors = UINT_MAX;
			
 
				 }
			
 
				 EXPORT_SYMBOL(blk_set_stacking_limits);
			
 
				 
			
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -40,7 +40,7 @@ static ssize_t queue_requests_show(struct request_queue *q, char *page)
 
				 static ssize_t
			
 
				 queue_requests_store(struct request_queue *q, const char *page, size_t count)
			
 
				 {
			
 
				-	struct request_list *rl = &q->rq;
			
 
				+	struct request_list *rl;
			
 
				 	unsigned long nr;
			
 
				 	int ret;
			
 
				 
			
@@ -55,6 +55,9 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
 
				 	q->nr_requests = nr;
			
 
				 	blk_queue_congestion_threshold(q);
			
 
				 
			
 
				+	/* congestion isn't cgroup aware and follows root blkcg for now */
			
 
				+	rl = &q->root_rl;
			
 
				+
			
 
				 	if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q))
			
 
				 		blk_set_queue_congested(q, BLK_RW_SYNC);
			
 
				 	else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q))
			
@@ -65,19 +68,22 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
 
				 	else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q))
			
 
				 		blk_clear_queue_congested(q, BLK_RW_ASYNC);
			
 
				 
			
 
				-	if (rl->count[BLK_RW_SYNC] >= q->nr_requests) {
			
 
				-		blk_set_queue_full(q, BLK_RW_SYNC);
			
 
				-	} else {
			
 
				-		blk_clear_queue_full(q, BLK_RW_SYNC);
			
 
				-		wake_up(&rl->wait[BLK_RW_SYNC]);
			
 
				+	blk_queue_for_each_rl(rl, q) {
			
 
				+		if (rl->count[BLK_RW_SYNC] >= q->nr_requests) {
			
 
				+			blk_set_rl_full(rl, BLK_RW_SYNC);
			
 
				+		} else {
			
 
				+			blk_clear_rl_full(rl, BLK_RW_SYNC);
			
 
				+			wake_up(&rl->wait[BLK_RW_SYNC]);
			
 
				+		}
			
 
				+
			
 
				+		if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) {
			
 
				+			blk_set_rl_full(rl, BLK_RW_ASYNC);
			
 
				+		} else {
			
 
				+			blk_clear_rl_full(rl, BLK_RW_ASYNC);
			
 
				+			wake_up(&rl->wait[BLK_RW_ASYNC]);
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				-	if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) {
			
 
				-		blk_set_queue_full(q, BLK_RW_ASYNC);
			
 
				-	} else {
			
 
				-		blk_clear_queue_full(q, BLK_RW_ASYNC);
			
 
				-		wake_up(&rl->wait[BLK_RW_ASYNC]);
			
 
				-	}
			
 
				 	spin_unlock_irq(q->queue_lock);
			
 
				 	return ret;
			
 
				 }
			
@@ -476,7 +482,6 @@ static void blk_release_queue(struct kobject *kobj)
 
				 {
			
 
				 	struct request_queue *q =
			
 
				 		container_of(kobj, struct request_queue, kobj);
			
 
				-	struct request_list *rl = &q->rq;
			
 
				 
			
 
				 	blk_sync_queue(q);
			
 
				 
			
@@ -489,8 +494,7 @@ static void blk_release_queue(struct kobject *kobj)
 
				 		elevator_exit(q->elevator);
			
 
				 	}
			
 
				 
			
 
				-	if (rl->rq_pool)
			
 
				-		mempool_destroy(rl->rq_pool);
			
 
				+	blk_exit_rl(&q->root_rl);
			
 
				 
			
 
				 	if (q->queue_tags)
			
 
				 		__blk_queue_free_tags(q);
			
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -1123,9 +1123,6 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				-	/* bio_associate_current() needs ioc, try creating */
			
 
				-	create_io_context(GFP_ATOMIC, q->node);
			
 
				-
			
 
				 	/*
			
 
				 	 * A throtl_grp pointer retrieved under rcu can be used to access
			
 
				 	 * basic fields like stats and io rates. If a group has no rules,
			
--- a/block/blk.h
+++ b/block/blk.h
@@ -18,6 +18,9 @@ static inline void __blk_get_queue(struct request_queue *q)
 
				 	kobject_get(&q->kobj);
			
 
				 }
			
 
				 
			
 
				+int blk_init_rl(struct request_list *rl, struct request_queue *q,
			
 
				+		gfp_t gfp_mask);
			
 
				+void blk_exit_rl(struct request_list *rl);
			
 
				 void init_request_from_bio(struct request *req, struct bio *bio);
			
 
				 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
			
 
				 			struct bio *bio);
			
@@ -33,7 +36,6 @@ bool __blk_end_bidi_request(struct request *rq, int error,
 
				 void blk_rq_timed_out_timer(unsigned long data);
			
 
				 void blk_delete_timer(struct request *);
			
 
				 void blk_add_timer(struct request *);
			
 
				-void __generic_unplug_device(struct request_queue *);
			
 
				 
			
 
				 /*
			
 
				  * Internal atomic flags for request handling
			
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -243,56 +243,3 @@ int bsg_setup_queue(struct device *dev, struct request_queue *q,
 
				 	return 0;
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(bsg_setup_queue);
			
 
				-
			
 
				-/**
			
 
				- * bsg_remove_queue - Deletes the bsg dev from the q
			
 
				- * @q:	the request_queue that is to be torn down.
			
 
				- *
			
 
				- * Notes:
			
 
				- *   Before unregistering the queue empty any requests that are blocked
			
 
				- */
			
 
				-void bsg_remove_queue(struct request_queue *q)
			
 
				-{
			
 
				-	struct request *req; /* block request */
			
 
				-	int counts; /* totals for request_list count and starved */
			
 
				-
			
 
				-	if (!q)
			
 
				-		return;
			
 
				-
			
 
				-	/* Stop taking in new requests */
			
 
				-	spin_lock_irq(q->queue_lock);
			
 
				-	blk_stop_queue(q);
			
 
				-
			
 
				-	/* drain all requests in the queue */
			
 
				-	while (1) {
			
 
				-		/* need the lock to fetch a request
			
 
				-		 * this may fetch the same reqeust as the previous pass
			
 
				-		 */
			
 
				-		req = blk_fetch_request(q);
			
 
				-		/* save requests in use and starved */
			
 
				-		counts = q->rq.count[0] + q->rq.count[1] +
			
 
				-			 q->rq.starved[0] + q->rq.starved[1];
			
 
				-		spin_unlock_irq(q->queue_lock);
			
 
				-		/* any requests still outstanding? */
			
 
				-		if (counts == 0)
			
 
				-			break;
			
 
				-
			
 
				-		/* This may be the same req as the previous iteration,
			
 
				-		 * always send the blk_end_request_all after a prefetch.
			
 
				-		 * It is not okay to not end the request because the
			
 
				-		 * prefetch started the request.
			
 
				-		 */
			
 
				-		if (req) {
			
 
				-			/* return -ENXIO to indicate that this queue is
			
 
				-			 * going away
			
 
				-			 */
			
 
				-			req->errors = -ENXIO;
			
 
				-			blk_end_request_all(req, -ENXIO);
			
 
				-		}
			
 
				-
			
 
				-		msleep(200); /* allow bsg to possibly finish */
			
 
				-		spin_lock_irq(q->queue_lock);
			
 
				-	}
			
 
				-	bsg_unregister_queue(q);
			
 
				-}
			
 
				-EXPORT_SYMBOL_GPL(bsg_remove_queue);
			
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -154,7 +154,7 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
 
				 		part = rcu_dereference(ptbl->part[piter->idx]);
			
 
				 		if (!part)
			
 
				 			continue;
			
 
				-		if (!part->nr_sects &&
			
 
				+		if (!part_nr_sects_read(part) &&
			
 
				 		    !(piter->flags & DISK_PITER_INCL_EMPTY) &&
			
 
				 		    !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 &&
			
 
				 		      piter->idx == 0))
			
@@ -191,7 +191,7 @@ EXPORT_SYMBOL_GPL(disk_part_iter_exit);
 
				 static inline int sector_in_part(struct hd_struct *part, sector_t sector)
			
 
				 {
			
 
				 	return part->start_sect <= sector &&
			
 
				-		sector < part->start_sect + part->nr_sects;
			
 
				+		sector < part->start_sect + part_nr_sects_read(part);
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -769,8 +769,8 @@ void __init printk_all_partitions(void)
 
				 
			
 
				 			printk("%s%s %10llu %s %s", is_part0 ? "" : "  ",
			
 
				 			       bdevt_str(part_devt(part), devt_buf),
			
 
				-			       (unsigned long long)part->nr_sects >> 1,
			
 
				-			       disk_name(disk, part->partno, name_buf),
			
 
				+			       (unsigned long long)part_nr_sects_read(part) >> 1
			
 
				+			       , disk_name(disk, part->partno, name_buf),
			
 
				 			       uuid_buf);
			
 
				 			if (is_part0) {
			
 
				 				if (disk->driverfs_dev != NULL &&
			
@@ -862,7 +862,7 @@ static int show_partition(struct seq_file *seqf, void *v)
 
				 	while ((part = disk_part_iter_next(&piter)))
			
 
				 		seq_printf(seqf, "%4d  %7d %10llu %s\n",
			
 
				 			   MAJOR(part_devt(part)), MINOR(part_devt(part)),
			
 
				-			   (unsigned long long)part->nr_sects >> 1,
			
 
				+			   (unsigned long long)part_nr_sects_read(part) >> 1,
			
 
				 			   disk_name(sgp, part->partno, buf));
			
 
				 	disk_part_iter_exit(&piter);
			
 
				 
			
@@ -1268,6 +1268,16 @@ struct gendisk *alloc_disk_node(int minors, int node_id)
 
				 		}
			
 
				 		disk->part_tbl->part[0] = &disk->part0;
			
 
				 
			
 
				+		/*
			
 
				+		 * set_capacity() and get_capacity() currently don't use
			
 
				+		 * seqcounter to read/update the part0->nr_sects. Still init
			
 
				+		 * the counter as we can read the sectors in IO submission
			
 
				+		 * patch using seqence counters.
			
 
				+		 *
			
 
				+		 * TODO: Ideally set_capacity() and get_capacity() should be
			
 
				+		 * converted to make use of bd_mutex and sequence counters.
			
 
				+		 */
			
 
				+		seqcount_init(&disk->part0.nr_sects_seq);
			
 
				 		hd_ref_init(&disk->part0);
			
 
				 
			
 
				 		disk->minors = minors;
			
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -13,7 +13,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
 
				 {
			
 
				 	struct block_device *bdevp;
			
 
				 	struct gendisk *disk;
			
 
				-	struct hd_struct *part;
			
 
				+	struct hd_struct *part, *lpart;
			
 
				 	struct blkpg_ioctl_arg a;
			
 
				 	struct blkpg_partition p;
			
 
				 	struct disk_part_iter piter;
			
@@ -36,8 +36,8 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
 
				 		case BLKPG_ADD_PARTITION:
			
 
				 			start = p.start >> 9;
			
 
				 			length = p.length >> 9;
			
 
				-			/* check for fit in a hd_struct */ 
			
 
				-			if (sizeof(sector_t) == sizeof(long) && 
			
 
				+			/* check for fit in a hd_struct */
			
 
				+			if (sizeof(sector_t) == sizeof(long) &&
			
 
				 			    sizeof(long long) > sizeof(long)) {
			
 
				 				long pstart = start, plength = length;
			
 
				 				if (pstart != start || plength != length
			
@@ -91,6 +91,59 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
 
				 			mutex_unlock(&bdevp->bd_mutex);
			
 
				 			bdput(bdevp);
			
 
				 
			
 
				+			return 0;
			
 
				+		case BLKPG_RESIZE_PARTITION:
			
 
				+			start = p.start >> 9;
			
 
				+			/* new length of partition in bytes */
			
 
				+			length = p.length >> 9;
			
 
				+			/* check for fit in a hd_struct */
			
 
				+			if (sizeof(sector_t) == sizeof(long) &&
			
 
				+			    sizeof(long long) > sizeof(long)) {
			
 
				+				long pstart = start, plength = length;
			
 
				+				if (pstart != start || plength != length
			
 
				+				    || pstart < 0 || plength < 0)
			
 
				+					return -EINVAL;
			
 
				+			}
			
 
				+			part = disk_get_part(disk, partno);
			
 
				+			if (!part)
			
 
				+				return -ENXIO;
			
 
				+			bdevp = bdget(part_devt(part));
			
 
				+			if (!bdevp) {
			
 
				+				disk_put_part(part);
			
 
				+				return -ENOMEM;
			
 
				+			}
			
 
				+			mutex_lock(&bdevp->bd_mutex);
			
 
				+			mutex_lock_nested(&bdev->bd_mutex, 1);
			
 
				+			if (start != part->start_sect) {
			
 
				+				mutex_unlock(&bdevp->bd_mutex);
			
 
				+				mutex_unlock(&bdev->bd_mutex);
			
 
				+				bdput(bdevp);
			
 
				+				disk_put_part(part);
			
 
				+				return -EINVAL;
			
 
				+			}
			
 
				+			/* overlap? */
			
 
				+			disk_part_iter_init(&piter, disk,
			
 
				+					    DISK_PITER_INCL_EMPTY);
			
 
				+			while ((lpart = disk_part_iter_next(&piter))) {
			
 
				+				if (lpart->partno != partno &&
			
 
				+				   !(start + length <= lpart->start_sect ||
			
 
				+				   start >= lpart->start_sect + lpart->nr_sects)
			
 
				+				   ) {
			
 
				+					disk_part_iter_exit(&piter);
			
 
				+					mutex_unlock(&bdevp->bd_mutex);
			
 
				+					mutex_unlock(&bdev->bd_mutex);
			
 
				+					bdput(bdevp);
			
 
				+					disk_put_part(part);
			
 
				+					return -EBUSY;
			
 
				+				}
			
 
				+			}
			
 
				+			disk_part_iter_exit(&piter);
			
 
				+			part_nr_sects_write(part, (sector_t)length);
			
 
				+			i_size_write(bdevp->bd_inode, p.length);
			
 
				+			mutex_unlock(&bdevp->bd_mutex);
			
 
				+			mutex_unlock(&bdev->bd_mutex);
			
 
				+			bdput(bdevp);
			
 
				+			disk_put_part(part);
			
 
				 			return 0;
			
 
				 		default:
			
 
				 			return -EINVAL;
			
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -84,7 +84,7 @@ ssize_t part_size_show(struct device *dev,
 
				 		       struct device_attribute *attr, char *buf)
			
 
				 {
			
 
				 	struct hd_struct *p = dev_to_part(dev);
			
 
				-	return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects);
			
 
				+	return sprintf(buf, "%llu\n",(unsigned long long)part_nr_sects_read(p));
			
 
				 }
			
 
				 
			
 
				 static ssize_t part_ro_show(struct device *dev,
			
@@ -294,6 +294,8 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
 
				 		err = -ENOMEM;
			
 
				 		goto out_free;
			
 
				 	}
			
 
				+
			
 
				+	seqcount_init(&p->nr_sects_seq);
			
 
				 	pdev = part_to_dev(p);
			
 
				 
			
 
				 	p->start_sect = start;
			
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -4146,45 +4146,7 @@ fc_bsg_rportadd(struct Scsi_Host *shost, struct fc_rport *rport)
 
				 static void
			
 
				 fc_bsg_remove(struct request_queue *q)
			
 
				 {
			
 
				-	struct request *req; /* block request */
			
 
				-	int counts; /* totals for request_list count and starved */
			
 
				-
			
 
				 	if (q) {
			
 
				-		/* Stop taking in new requests */
			
 
				-		spin_lock_irq(q->queue_lock);
			
 
				-		blk_stop_queue(q);
			
 
				-
			
 
				-		/* drain all requests in the queue */
			
 
				-		while (1) {
			
 
				-			/* need the lock to fetch a request
			
 
				-			 * this may fetch the same reqeust as the previous pass
			
 
				-			 */
			
 
				-			req = blk_fetch_request(q);
			
 
				-			/* save requests in use and starved */
			
 
				-			counts = q->rq.count[0] + q->rq.count[1] +
			
 
				-				q->rq.starved[0] + q->rq.starved[1];
			
 
				-			spin_unlock_irq(q->queue_lock);
			
 
				-			/* any requests still outstanding? */
			
 
				-			if (counts == 0)
			
 
				-				break;
			
 
				-
			
 
				-			/* This may be the same req as the previous iteration,
			
 
				-			 * always send the blk_end_request_all after a prefetch.
			
 
				-			 * It is not okay to not end the request because the
			
 
				-			 * prefetch started the request.
			
 
				-			 */
			
 
				-			if (req) {
			
 
				-				/* return -ENXIO to indicate that this queue is
			
 
				-				 * going away
			
 
				-				 */
			
 
				-				req->errors = -ENXIO;
			
 
				-				blk_end_request_all(req, -ENXIO);
			
 
				-			}
			
 
				-
			
 
				-			msleep(200); /* allow bsg to possibly finish */
			
 
				-			spin_lock_irq(q->queue_lock);
			
 
				-		}
			
 
				-
			
 
				 		bsg_unregister_queue(q);
			
 
				 		blk_cleanup_queue(q);
			
 
				 	}
			
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -575,7 +575,7 @@ static int iscsi_remove_host(struct transport_container *tc,
 
				 	struct iscsi_cls_host *ihost = shost->shost_data;
			
 
				 
			
 
				 	if (ihost->bsg_q) {
			
 
				-		bsg_remove_queue(ihost->bsg_q);
			
 
				+		bsg_unregister_queue(ihost->bsg_q);
			
 
				 		blk_cleanup_queue(ihost->bsg_q);
			
 
				 	}
			
 
				 	return 0;
			
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -46,16 +46,23 @@ struct blkcg_gq;
 
				 struct request;
			
 
				 typedef void (rq_end_io_fn)(struct request *, int);
			
 
				 
			
 
				+#define BLK_RL_SYNCFULL		(1U << 0)
			
 
				+#define BLK_RL_ASYNCFULL	(1U << 1)
			
 
				+
			
 
				 struct request_list {
			
 
				+	struct request_queue	*q;	/* the queue this rl belongs to */
			
 
				+#ifdef CONFIG_BLK_CGROUP
			
 
				+	struct blkcg_gq		*blkg;	/* blkg this request pool belongs to */
			
 
				+#endif
			
 
				 	/*
			
 
				 	 * count[], starved[], and wait[] are indexed by
			
 
				 	 * BLK_RW_SYNC/BLK_RW_ASYNC
			
 
				 	 */
			
 
				-	int count[2];
			
 
				-	int starved[2];
			
 
				-	int elvpriv;
			
 
				-	mempool_t *rq_pool;
			
 
				-	wait_queue_head_t wait[2];
			
 
				+	int			count[2];
			
 
				+	int			starved[2];
			
 
				+	mempool_t		*rq_pool;
			
 
				+	wait_queue_head_t	wait[2];
			
 
				+	unsigned int		flags;
			
 
				 };
			
 
				 
			
 
				 /*
			
@@ -138,6 +145,7 @@ struct request {
 
				 	struct hd_struct *part;
			
 
				 	unsigned long start_time;
			
 
				 #ifdef CONFIG_BLK_CGROUP
			
 
				+	struct request_list *rl;		/* rl this rq is alloced from */
			
 
				 	unsigned long long start_time_ns;
			
 
				 	unsigned long long io_start_time_ns;    /* when passed to hardware */
			
 
				 #endif
			
@@ -282,11 +290,16 @@ struct request_queue {
 
				 	struct list_head	queue_head;
			
 
				 	struct request		*last_merge;
			
 
				 	struct elevator_queue	*elevator;
			
 
				+	int			nr_rqs[2];	/* # allocated [a]sync rqs */
			
 
				+	int			nr_rqs_elvpriv;	/* # allocated rqs w/ elvpriv */
			
 
				 
			
 
				 	/*
			
 
				-	 * the queue request freelist, one for reads and one for writes
			
 
				+	 * If blkcg is not used, @q->root_rl serves all requests.  If blkcg
			
 
				+	 * is used, root blkg allocates from @q->root_rl and all other
			
 
				+	 * blkgs from their own blkg->rl.  Which one to use should be
			
 
				+	 * determined using bio_request_list().
			
 
				 	 */
			
 
				-	struct request_list	rq;
			
 
				+	struct request_list	root_rl;
			
 
				 
			
 
				 	request_fn_proc		*request_fn;
			
 
				 	make_request_fn		*make_request_fn;
			
@@ -561,27 +574,25 @@ static inline bool rq_is_sync(struct request *rq)
 
				 	return rw_is_sync(rq->cmd_flags);
			
 
				 }
			
 
				 
			
 
				-static inline int blk_queue_full(struct request_queue *q, int sync)
			
 
				+static inline bool blk_rl_full(struct request_list *rl, bool sync)
			
 
				 {
			
 
				-	if (sync)
			
 
				-		return test_bit(QUEUE_FLAG_SYNCFULL, &q->queue_flags);
			
 
				-	return test_bit(QUEUE_FLAG_ASYNCFULL, &q->queue_flags);
			
 
				+	unsigned int flag = sync ? BLK_RL_SYNCFULL : BLK_RL_ASYNCFULL;
			
 
				+
			
 
				+	return rl->flags & flag;
			
 
				 }
			
 
				 
			
 
				-static inline void blk_set_queue_full(struct request_queue *q, int sync)
			
 
				+static inline void blk_set_rl_full(struct request_list *rl, bool sync)
			
 
				 {
			
 
				-	if (sync)
			
 
				-		queue_flag_set(QUEUE_FLAG_SYNCFULL, q);
			
 
				-	else
			
 
				-		queue_flag_set(QUEUE_FLAG_ASYNCFULL, q);
			
 
				+	unsigned int flag = sync ? BLK_RL_SYNCFULL : BLK_RL_ASYNCFULL;
			
 
				+
			
 
				+	rl->flags |= flag;
			
 
				 }
			
 
				 
			
 
				-static inline void blk_clear_queue_full(struct request_queue *q, int sync)
			
 
				+static inline void blk_clear_rl_full(struct request_list *rl, bool sync)
			
 
				 {
			
 
				-	if (sync)
			
 
				-		queue_flag_clear(QUEUE_FLAG_SYNCFULL, q);
			
 
				-	else
			
 
				-		queue_flag_clear(QUEUE_FLAG_ASYNCFULL, q);
			
 
				+	unsigned int flag = sync ? BLK_RL_SYNCFULL : BLK_RL_ASYNCFULL;
			
 
				+
			
 
				+	rl->flags &= ~flag;
			
 
				 }
			
 
				 
			
 
				 
			
--- a/include/linux/blkpg.h
+++ b/include/linux/blkpg.h
@@ -40,6 +40,7 @@ struct blkpg_ioctl_arg {
 
				 /* The subfunctions (for the op field) */
			
 
				 #define BLKPG_ADD_PARTITION	1
			
 
				 #define BLKPG_DEL_PARTITION	2
			
 
				+#define BLKPG_RESIZE_PARTITION	3
			
 
				 
			
 
				 /* Sizes of name fields. Unused at present. */
			
 
				 #define BLKPG_DEVNAMELTH	64
			
--- a/include/linux/bsg-lib.h
+++ b/include/linux/bsg-lib.h
@@ -67,7 +67,6 @@ void bsg_job_done(struct bsg_job *job, int result,
 
				 int bsg_setup_queue(struct device *dev, struct request_queue *q, char *name,
			
 
				 		    bsg_job_fn *job_fn, int dd_job_size);
			
 
				 void bsg_request_fn(struct request_queue *q);
			
 
				-void bsg_remove_queue(struct request_queue *q);
			
 
				 void bsg_goose_queue(struct request_queue *q);
			
 
				 
			
 
				 #endif
			
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -97,7 +97,13 @@ struct partition_meta_info {
 
				 
			
 
				 struct hd_struct {
			
 
				 	sector_t start_sect;
			
 
				+	/*
			
 
				+	 * nr_sects is protected by sequence counter. One might extend a
			
 
				+	 * partition while IO is happening to it and update of nr_sects
			
 
				+	 * can be non-atomic on 32bit machines with 64bit sector_t.
			
 
				+	 */
			
 
				 	sector_t nr_sects;
			
 
				+	seqcount_t nr_sects_seq;
			
 
				 	sector_t alignment_offset;
			
 
				 	unsigned int discard_alignment;
			
 
				 	struct device __dev;
			
@@ -647,6 +653,57 @@ static inline void hd_struct_put(struct hd_struct *part)
 
				 		__delete_partition(part);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Any access of part->nr_sects which is not protected by partition
			
 
				+ * bd_mutex or gendisk bdev bd_mutex, should be done using this
			
 
				+ * accessor function.
			
 
				+ *
			
 
				+ * Code written along the lines of i_size_read() and i_size_write().
			
 
				+ * CONFIG_PREEMPT case optimizes the case of UP kernel with preemption
			
 
				+ * on.
			
 
				+ */
			
 
				+static inline sector_t part_nr_sects_read(struct hd_struct *part)
			
 
				+{
			
 
				+#if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP)
			
 
				+	sector_t nr_sects;
			
 
				+	unsigned seq;
			
 
				+	do {
			
 
				+		seq = read_seqcount_begin(&part->nr_sects_seq);
			
 
				+		nr_sects = part->nr_sects;
			
 
				+	} while (read_seqcount_retry(&part->nr_sects_seq, seq));
			
 
				+	return nr_sects;
			
 
				+#elif BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_PREEMPT)
			
 
				+	sector_t nr_sects;
			
 
				+
			
 
				+	preempt_disable();
			
 
				+	nr_sects = part->nr_sects;
			
 
				+	preempt_enable();
			
 
				+	return nr_sects;
			
 
				+#else
			
 
				+	return part->nr_sects;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Should be called with mutex lock held (typically bd_mutex) of partition
			
 
				+ * to provide mutual exlusion among writers otherwise seqcount might be
			
 
				+ * left in wrong state leaving the readers spinning infinitely.
			
 
				+ */
			
 
				+static inline void part_nr_sects_write(struct hd_struct *part, sector_t size)
			
 
				+{
			
 
				+#if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP)
			
 
				+	write_seqcount_begin(&part->nr_sects_seq);
			
 
				+	part->nr_sects = size;
			
 
				+	write_seqcount_end(&part->nr_sects_seq);
			
 
				+#elif BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_PREEMPT)
			
 
				+	preempt_disable();
			
 
				+	part->nr_sects = size;
			
 
				+	preempt_enable();
			
 
				+#else
			
 
				+	part->nr_sects = size;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				 #else /* CONFIG_BLOCK */
			
 
				 
			
 
				 static inline void printk_all_partitions(void) { }
			
--- a/include/linux/mempool.h
+++ b/include/linux/mempool.h
@@ -26,7 +26,8 @@ typedef struct mempool_s {
 
				 extern mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
			
 
				 			mempool_free_t *free_fn, void *pool_data);
			
 
				 extern mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn,
			
 
				-			mempool_free_t *free_fn, void *pool_data, int nid);
			
 
				+			mempool_free_t *free_fn, void *pool_data,
			
 
				+			gfp_t gfp_mask, int nid);
			
 
				 
			
 
				 extern int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask);
			
 
				 extern void mempool_destroy(mempool_t *pool);
			
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -63,19 +63,21 @@ EXPORT_SYMBOL(mempool_destroy);
 
				 mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
			
 
				 				mempool_free_t *free_fn, void *pool_data)
			
 
				 {
			
 
				-	return  mempool_create_node(min_nr,alloc_fn,free_fn, pool_data,-1);
			
 
				+	return mempool_create_node(min_nr,alloc_fn,free_fn, pool_data,
			
 
				+				   GFP_KERNEL, NUMA_NO_NODE);
			
 
				 }
			
 
				 EXPORT_SYMBOL(mempool_create);
			
 
				 
			
 
				 mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn,
			
 
				-			mempool_free_t *free_fn, void *pool_data, int node_id)
			
 
				+			       mempool_free_t *free_fn, void *pool_data,
			
 
				+			       gfp_t gfp_mask, int node_id)
			
 
				 {
			
 
				 	mempool_t *pool;
			
 
				-	pool = kmalloc_node(sizeof(*pool), GFP_KERNEL | __GFP_ZERO, node_id);
			
 
				+	pool = kmalloc_node(sizeof(*pool), gfp_mask | __GFP_ZERO, node_id);
			
 
				 	if (!pool)
			
 
				 		return NULL;
			
 
				 	pool->elements = kmalloc_node(min_nr * sizeof(void *),
			
 
				-					GFP_KERNEL, node_id);
			
 
				+				      gfp_mask, node_id);
			
 
				 	if (!pool->elements) {
			
 
				 		kfree(pool);
			
 
				 		return NULL;
			
@@ -93,7 +95,7 @@ mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn,
 
				 	while (pool->curr_nr < pool->min_nr) {
			
 
				 		void *element;
			
 
				 
			
 
				-		element = pool->alloc(GFP_KERNEL, pool->pool_data);
			
 
				+		element = pool->alloc(gfp_mask, pool->pool_data);
			
 
				 		if (unlikely(!element)) {
			
 
				 			mempool_destroy(pool);
			
 
				 			return NULL;