|
@@ -66,6 +66,7 @@ static DEFINE_SPINLOCK(ioc_gone_lock);
|
|
|
#define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
|
|
|
|
|
|
#define sample_valid(samples) ((samples) > 80)
|
|
|
+#define rb_entry_cfqg(node) rb_entry((node), struct cfq_group, rb_node)
|
|
|
|
|
|
/*
|
|
|
* Most of our rbtree usage is for sorting with min extraction, so
|
|
@@ -77,8 +78,9 @@ struct cfq_rb_root {
|
|
|
struct rb_root rb;
|
|
|
struct rb_node *left;
|
|
|
unsigned count;
|
|
|
+ u64 min_vdisktime;
|
|
|
};
|
|
|
-#define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, 0, }
|
|
|
+#define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, 0, 0, }
|
|
|
|
|
|
/*
|
|
|
* Per process-grouping structure
|
|
@@ -156,6 +158,16 @@ enum wl_type_t {
|
|
|
|
|
|
/* This is per cgroup per device grouping structure */
|
|
|
struct cfq_group {
|
|
|
+ /* group service_tree member */
|
|
|
+ struct rb_node rb_node;
|
|
|
+
|
|
|
+ /* group service_tree key */
|
|
|
+ u64 vdisktime;
|
|
|
+ bool on_st;
|
|
|
+
|
|
|
+ /* number of cfqq currently on this group */
|
|
|
+ int nr_cfqq;
|
|
|
+
|
|
|
/*
|
|
|
* rr lists of queues with requests, onle rr for each priority class.
|
|
|
* Counts are embedded in the cfq_rb_root
|
|
@@ -169,6 +181,8 @@ struct cfq_group {
|
|
|
*/
|
|
|
struct cfq_data {
|
|
|
struct request_queue *queue;
|
|
|
+ /* Root service tree for cfq_groups */
|
|
|
+ struct cfq_rb_root grp_service_tree;
|
|
|
struct cfq_group root_group;
|
|
|
|
|
|
/*
|
|
@@ -251,6 +265,9 @@ static struct cfq_rb_root *service_tree_for(struct cfq_group *cfqg,
|
|
|
enum wl_type_t type,
|
|
|
struct cfq_data *cfqd)
|
|
|
{
|
|
|
+ if (!cfqg)
|
|
|
+ return NULL;
|
|
|
+
|
|
|
if (prio == IDLE_WORKLOAD)
|
|
|
return &cfqg->service_tree_idle;
|
|
|
|
|
@@ -589,6 +606,17 @@ static struct cfq_queue *cfq_rb_first(struct cfq_rb_root *root)
|
|
|
return NULL;
|
|
|
}
|
|
|
|
|
|
+static struct cfq_group *cfq_rb_first_group(struct cfq_rb_root *root)
|
|
|
+{
|
|
|
+ if (!root->left)
|
|
|
+ root->left = rb_first(&root->rb);
|
|
|
+
|
|
|
+ if (root->left)
|
|
|
+ return rb_entry_cfqg(root->left);
|
|
|
+
|
|
|
+ return NULL;
|
|
|
+}
|
|
|
+
|
|
|
static void rb_erase_init(struct rb_node *n, struct rb_root *root)
|
|
|
{
|
|
|
rb_erase(n, root);
|
|
@@ -640,6 +668,83 @@ static unsigned long cfq_slice_offset(struct cfq_data *cfqd,
|
|
|
cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio));
|
|
|
}
|
|
|
|
|
|
+static inline s64
|
|
|
+cfqg_key(struct cfq_rb_root *st, struct cfq_group *cfqg)
|
|
|
+{
|
|
|
+ return cfqg->vdisktime - st->min_vdisktime;
|
|
|
+}
|
|
|
+
|
|
|
+static void
|
|
|
+__cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg)
|
|
|
+{
|
|
|
+ struct rb_node **node = &st->rb.rb_node;
|
|
|
+ struct rb_node *parent = NULL;
|
|
|
+ struct cfq_group *__cfqg;
|
|
|
+ s64 key = cfqg_key(st, cfqg);
|
|
|
+ int left = 1;
|
|
|
+
|
|
|
+ while (*node != NULL) {
|
|
|
+ parent = *node;
|
|
|
+ __cfqg = rb_entry_cfqg(parent);
|
|
|
+
|
|
|
+ if (key < cfqg_key(st, __cfqg))
|
|
|
+ node = &parent->rb_left;
|
|
|
+ else {
|
|
|
+ node = &parent->rb_right;
|
|
|
+ left = 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (left)
|
|
|
+ st->left = &cfqg->rb_node;
|
|
|
+
|
|
|
+ rb_link_node(&cfqg->rb_node, parent, node);
|
|
|
+ rb_insert_color(&cfqg->rb_node, &st->rb);
|
|
|
+}
|
|
|
+
|
|
|
+static void
|
|
|
+cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
|
|
|
+{
|
|
|
+ struct cfq_rb_root *st = &cfqd->grp_service_tree;
|
|
|
+ struct cfq_group *__cfqg;
|
|
|
+ struct rb_node *n;
|
|
|
+
|
|
|
+ cfqg->nr_cfqq++;
|
|
|
+ if (cfqg->on_st)
|
|
|
+ return;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Currently put the group at the end. Later implement something
|
|
|
+ * so that groups get lesser vtime based on their weights, so that
|
|
|
+ * if group does not loose all if it was not continously backlogged.
|
|
|
+ */
|
|
|
+ n = rb_last(&st->rb);
|
|
|
+ if (n) {
|
|
|
+ __cfqg = rb_entry_cfqg(n);
|
|
|
+ cfqg->vdisktime = __cfqg->vdisktime + CFQ_IDLE_DELAY;
|
|
|
+ } else
|
|
|
+ cfqg->vdisktime = st->min_vdisktime;
|
|
|
+
|
|
|
+ __cfq_group_service_tree_add(st, cfqg);
|
|
|
+ cfqg->on_st = true;
|
|
|
+}
|
|
|
+
|
|
|
+static void
|
|
|
+cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
|
|
|
+{
|
|
|
+ struct cfq_rb_root *st = &cfqd->grp_service_tree;
|
|
|
+
|
|
|
+ BUG_ON(cfqg->nr_cfqq < 1);
|
|
|
+ cfqg->nr_cfqq--;
|
|
|
+ /* If there are other cfq queues under this group, don't delete it */
|
|
|
+ if (cfqg->nr_cfqq)
|
|
|
+ return;
|
|
|
+
|
|
|
+ cfqg->on_st = false;
|
|
|
+ if (!RB_EMPTY_NODE(&cfqg->rb_node))
|
|
|
+ cfq_rb_erase(&cfqg->rb_node, st);
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* The cfqd->service_trees holds all pending cfq_queue's that have
|
|
|
* requests waiting to be processed. It is sorted in the order that
|
|
@@ -722,6 +827,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
|
|
|
rb_link_node(&cfqq->rb_node, parent, p);
|
|
|
rb_insert_color(&cfqq->rb_node, &service_tree->rb);
|
|
|
service_tree->count++;
|
|
|
+ cfq_group_service_tree_add(cfqd, cfqq->cfqg);
|
|
|
}
|
|
|
|
|
|
static struct cfq_queue *
|
|
@@ -832,6 +938,7 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
|
|
|
cfqq->p_root = NULL;
|
|
|
}
|
|
|
|
|
|
+ cfq_group_service_tree_del(cfqd, cfqq->cfqg);
|
|
|
BUG_ON(!cfqd->busy_queues);
|
|
|
cfqd->busy_queues--;
|
|
|
}
|
|
@@ -1108,6 +1215,9 @@ static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd)
|
|
|
if (!cfqd->rq_queued)
|
|
|
return NULL;
|
|
|
|
|
|
+ /* There is nothing to dispatch */
|
|
|
+ if (!service_tree)
|
|
|
+ return NULL;
|
|
|
if (RB_EMPTY_ROOT(&service_tree->rb))
|
|
|
return NULL;
|
|
|
return cfq_rb_first(service_tree);
|
|
@@ -1477,6 +1587,12 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
|
|
|
unsigned count;
|
|
|
struct cfq_rb_root *st;
|
|
|
|
|
|
+ if (!cfqg) {
|
|
|
+ cfqd->serving_prio = IDLE_WORKLOAD;
|
|
|
+ cfqd->workload_expires = jiffies + 1;
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
/* Choose next priority. RT > BE > IDLE */
|
|
|
if (cfq_busy_queues_wl(RT_WORKLOAD, cfqd))
|
|
|
cfqd->serving_prio = RT_WORKLOAD;
|
|
@@ -1535,10 +1651,21 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
|
|
|
cfqd->noidle_tree_requires_idle = false;
|
|
|
}
|
|
|
|
|
|
+static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
|
|
|
+{
|
|
|
+ struct cfq_rb_root *st = &cfqd->grp_service_tree;
|
|
|
+
|
|
|
+ if (RB_EMPTY_ROOT(&st->rb))
|
|
|
+ return NULL;
|
|
|
+ return cfq_rb_first_group(st);
|
|
|
+}
|
|
|
+
|
|
|
static void cfq_choose_cfqg(struct cfq_data *cfqd)
|
|
|
{
|
|
|
- cfqd->serving_group = &cfqd->root_group;
|
|
|
- choose_service_tree(cfqd, &cfqd->root_group);
|
|
|
+ struct cfq_group *cfqg = cfq_get_next_cfqg(cfqd);
|
|
|
+
|
|
|
+ cfqd->serving_group = cfqg;
|
|
|
+ choose_service_tree(cfqd, cfqg);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -3014,10 +3141,14 @@ static void *cfq_init_queue(struct request_queue *q)
|
|
|
if (!cfqd)
|
|
|
return NULL;
|
|
|
|
|
|
+ /* Init root service tree */
|
|
|
+ cfqd->grp_service_tree = CFQ_RB_ROOT;
|
|
|
+
|
|
|
/* Init root group */
|
|
|
cfqg = &cfqd->root_group;
|
|
|
for_each_cfqg_st(cfqg, i, j, st)
|
|
|
*st = CFQ_RB_ROOT;
|
|
|
+ RB_CLEAR_NODE(&cfqg->rb_node);
|
|
|
|
|
|
/*
|
|
|
* Not strictly needed (since RB_ROOT just clears the node and we
|