|
@@ -706,6 +706,8 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
|
}
|
|
}
|
|
|
|
|
|
#ifdef CONFIG_FAIR_GROUP_SCHED
|
|
#ifdef CONFIG_FAIR_GROUP_SCHED
|
|
|
|
+/* we need this in update_cfs_load and load-balance functions below */
|
|
|
|
+static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
|
|
# ifdef CONFIG_SMP
|
|
# ifdef CONFIG_SMP
|
|
static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq,
|
|
static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq,
|
|
int global_update)
|
|
int global_update)
|
|
@@ -728,7 +730,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
|
|
u64 now, delta;
|
|
u64 now, delta;
|
|
unsigned long load = cfs_rq->load.weight;
|
|
unsigned long load = cfs_rq->load.weight;
|
|
|
|
|
|
- if (cfs_rq->tg == &root_task_group)
|
|
|
|
|
|
+ if (cfs_rq->tg == &root_task_group || throttled_hierarchy(cfs_rq))
|
|
return;
|
|
return;
|
|
|
|
|
|
now = rq_of(cfs_rq)->clock_task;
|
|
now = rq_of(cfs_rq)->clock_task;
|
|
@@ -837,7 +839,7 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq)
|
|
|
|
|
|
tg = cfs_rq->tg;
|
|
tg = cfs_rq->tg;
|
|
se = tg->se[cpu_of(rq_of(cfs_rq))];
|
|
se = tg->se[cpu_of(rq_of(cfs_rq))];
|
|
- if (!se)
|
|
|
|
|
|
+ if (!se || throttled_hierarchy(cfs_rq))
|
|
return;
|
|
return;
|
|
#ifndef CONFIG_SMP
|
|
#ifndef CONFIG_SMP
|
|
if (likely(se->load.weight == tg->shares))
|
|
if (likely(se->load.weight == tg->shares))
|
|
@@ -1403,6 +1405,65 @@ static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
|
|
return cfs_rq->throttled;
|
|
return cfs_rq->throttled;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+/* check whether cfs_rq, or any parent, is throttled */
|
|
|
|
+static inline int throttled_hierarchy(struct cfs_rq *cfs_rq)
|
|
|
|
+{
|
|
|
|
+ return cfs_rq->throttle_count;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/*
|
|
|
|
+ * Ensure that neither of the group entities corresponding to src_cpu or
|
|
|
|
+ * dest_cpu are members of a throttled hierarchy when performing group
|
|
|
|
+ * load-balance operations.
|
|
|
|
+ */
|
|
|
|
+static inline int throttled_lb_pair(struct task_group *tg,
|
|
|
|
+ int src_cpu, int dest_cpu)
|
|
|
|
+{
|
|
|
|
+ struct cfs_rq *src_cfs_rq, *dest_cfs_rq;
|
|
|
|
+
|
|
|
|
+ src_cfs_rq = tg->cfs_rq[src_cpu];
|
|
|
|
+ dest_cfs_rq = tg->cfs_rq[dest_cpu];
|
|
|
|
+
|
|
|
|
+ return throttled_hierarchy(src_cfs_rq) ||
|
|
|
|
+ throttled_hierarchy(dest_cfs_rq);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/* updated child weight may affect parent so we have to do this bottom up */
|
|
|
|
+static int tg_unthrottle_up(struct task_group *tg, void *data)
|
|
|
|
+{
|
|
|
|
+ struct rq *rq = data;
|
|
|
|
+ struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
|
|
|
|
+
|
|
|
|
+ cfs_rq->throttle_count--;
|
|
|
|
+#ifdef CONFIG_SMP
|
|
|
|
+ if (!cfs_rq->throttle_count) {
|
|
|
|
+ u64 delta = rq->clock_task - cfs_rq->load_stamp;
|
|
|
|
+
|
|
|
|
+ /* leaving throttled state, advance shares averaging windows */
|
|
|
|
+ cfs_rq->load_stamp += delta;
|
|
|
|
+ cfs_rq->load_last += delta;
|
|
|
|
+
|
|
|
|
+ /* update entity weight now that we are on_rq again */
|
|
|
|
+ update_cfs_shares(cfs_rq);
|
|
|
|
+ }
|
|
|
|
+#endif
|
|
|
|
+
|
|
|
|
+ return 0;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static int tg_throttle_down(struct task_group *tg, void *data)
|
|
|
|
+{
|
|
|
|
+ struct rq *rq = data;
|
|
|
|
+ struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
|
|
|
|
+
|
|
|
|
+ /* group is entering throttled state, record last load */
|
|
|
|
+ if (!cfs_rq->throttle_count)
|
|
|
|
+ update_cfs_load(cfs_rq, 0);
|
|
|
|
+ cfs_rq->throttle_count++;
|
|
|
|
+
|
|
|
|
+ return 0;
|
|
|
|
+}
|
|
|
|
+
|
|
static __used void throttle_cfs_rq(struct cfs_rq *cfs_rq)
|
|
static __used void throttle_cfs_rq(struct cfs_rq *cfs_rq)
|
|
{
|
|
{
|
|
struct rq *rq = rq_of(cfs_rq);
|
|
struct rq *rq = rq_of(cfs_rq);
|
|
@@ -1413,7 +1474,9 @@ static __used void throttle_cfs_rq(struct cfs_rq *cfs_rq)
|
|
se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))];
|
|
se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))];
|
|
|
|
|
|
/* account load preceding throttle */
|
|
/* account load preceding throttle */
|
|
- update_cfs_load(cfs_rq, 0);
|
|
|
|
|
|
+ rcu_read_lock();
|
|
|
|
+ walk_tg_tree_from(cfs_rq->tg, tg_throttle_down, tg_nop, (void *)rq);
|
|
|
|
+ rcu_read_unlock();
|
|
|
|
|
|
task_delta = cfs_rq->h_nr_running;
|
|
task_delta = cfs_rq->h_nr_running;
|
|
for_each_sched_entity(se) {
|
|
for_each_sched_entity(se) {
|
|
@@ -1454,6 +1517,10 @@ static void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
|
|
list_del_rcu(&cfs_rq->throttled_list);
|
|
list_del_rcu(&cfs_rq->throttled_list);
|
|
raw_spin_unlock(&cfs_b->lock);
|
|
raw_spin_unlock(&cfs_b->lock);
|
|
|
|
|
|
|
|
+ update_rq_clock(rq);
|
|
|
|
+ /* update hierarchical throttle state */
|
|
|
|
+ walk_tg_tree_from(cfs_rq->tg, tg_nop, tg_unthrottle_up, (void *)rq);
|
|
|
|
+
|
|
if (!cfs_rq->load.weight)
|
|
if (!cfs_rq->load.weight)
|
|
return;
|
|
return;
|
|
|
|
|
|
@@ -1598,6 +1665,17 @@ static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
|
|
{
|
|
{
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
+
|
|
|
|
+static inline int throttled_hierarchy(struct cfs_rq *cfs_rq)
|
|
|
|
+{
|
|
|
|
+ return 0;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static inline int throttled_lb_pair(struct task_group *tg,
|
|
|
|
+ int src_cpu, int dest_cpu)
|
|
|
|
+{
|
|
|
|
+ return 0;
|
|
|
|
+}
|
|
#endif
|
|
#endif
|
|
|
|
|
|
/**************************************************
|
|
/**************************************************
|
|
@@ -2493,6 +2571,9 @@ move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
|
|
|
|
|
for_each_leaf_cfs_rq(busiest, cfs_rq) {
|
|
for_each_leaf_cfs_rq(busiest, cfs_rq) {
|
|
list_for_each_entry_safe(p, n, &cfs_rq->tasks, se.group_node) {
|
|
list_for_each_entry_safe(p, n, &cfs_rq->tasks, se.group_node) {
|
|
|
|
+ if (throttled_lb_pair(task_group(p),
|
|
|
|
+ busiest->cpu, this_cpu))
|
|
|
|
+ break;
|
|
|
|
|
|
if (!can_migrate_task(p, busiest, this_cpu,
|
|
if (!can_migrate_task(p, busiest, this_cpu,
|
|
sd, idle, &pinned))
|
|
sd, idle, &pinned))
|
|
@@ -2608,8 +2689,13 @@ static void update_shares(int cpu)
|
|
* Iterates the task_group tree in a bottom up fashion, see
|
|
* Iterates the task_group tree in a bottom up fashion, see
|
|
* list_add_leaf_cfs_rq() for details.
|
|
* list_add_leaf_cfs_rq() for details.
|
|
*/
|
|
*/
|
|
- for_each_leaf_cfs_rq(rq, cfs_rq)
|
|
|
|
|
|
+ for_each_leaf_cfs_rq(rq, cfs_rq) {
|
|
|
|
+ /* throttled entities do not contribute to load */
|
|
|
|
+ if (throttled_hierarchy(cfs_rq))
|
|
|
|
+ continue;
|
|
|
|
+
|
|
update_shares_cpu(cfs_rq->tg, cpu);
|
|
update_shares_cpu(cfs_rq->tg, cpu);
|
|
|
|
+ }
|
|
rcu_read_unlock();
|
|
rcu_read_unlock();
|
|
}
|
|
}
|
|
|
|
|
|
@@ -2659,9 +2745,10 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
|
u64 rem_load, moved_load;
|
|
u64 rem_load, moved_load;
|
|
|
|
|
|
/*
|
|
/*
|
|
- * empty group
|
|
|
|
|
|
+ * empty group or part of a throttled hierarchy
|
|
*/
|
|
*/
|
|
- if (!busiest_cfs_rq->task_weight)
|
|
|
|
|
|
+ if (!busiest_cfs_rq->task_weight ||
|
|
|
|
+ throttled_lb_pair(busiest_cfs_rq->tg, cpu_of(busiest), this_cpu))
|
|
continue;
|
|
continue;
|
|
|
|
|
|
rem_load = (u64)rem_load_move * busiest_weight;
|
|
rem_load = (u64)rem_load_move * busiest_weight;
|