|
@@ -2721,7 +2721,7 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
|
|
|
* If power savings logic is enabled for a domain, see if we
|
|
|
* are not overloaded, if so, don't balance wider.
|
|
|
*/
|
|
|
- if (tmp->flags & (SD_POWERSAVINGS_BALANCE|SD_PREFER_LOCAL)) {
|
|
|
+ if (tmp->flags & (SD_PREFER_LOCAL)) {
|
|
|
unsigned long power = 0;
|
|
|
unsigned long nr_running = 0;
|
|
|
unsigned long capacity;
|
|
@@ -2734,9 +2734,6 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
|
|
|
|
|
|
capacity = DIV_ROUND_CLOSEST(power, SCHED_POWER_SCALE);
|
|
|
|
|
|
- if (tmp->flags & SD_POWERSAVINGS_BALANCE)
|
|
|
- nr_running /= 2;
|
|
|
-
|
|
|
if (nr_running < capacity)
|
|
|
want_sd = 0;
|
|
|
}
|
|
@@ -3082,7 +3079,7 @@ struct lb_env {
|
|
|
struct rq *dst_rq;
|
|
|
|
|
|
enum cpu_idle_type idle;
|
|
|
- long load_move;
|
|
|
+ long imbalance;
|
|
|
unsigned int flags;
|
|
|
|
|
|
unsigned int loop;
|
|
@@ -3218,7 +3215,7 @@ static unsigned long task_h_load(struct task_struct *p);
|
|
|
static const unsigned int sched_nr_migrate_break = 32;
|
|
|
|
|
|
/*
|
|
|
- * move_tasks tries to move up to load_move weighted load from busiest to
|
|
|
+ * move_tasks tries to move up to imbalance weighted load from busiest to
|
|
|
* this_rq, as part of a balancing operation within domain "sd".
|
|
|
* Returns 1 if successful and 0 otherwise.
|
|
|
*
|
|
@@ -3231,7 +3228,7 @@ static int move_tasks(struct lb_env *env)
|
|
|
unsigned long load;
|
|
|
int pulled = 0;
|
|
|
|
|
|
- if (env->load_move <= 0)
|
|
|
+ if (env->imbalance <= 0)
|
|
|
return 0;
|
|
|
|
|
|
while (!list_empty(tasks)) {
|
|
@@ -3257,7 +3254,7 @@ static int move_tasks(struct lb_env *env)
|
|
|
if (sched_feat(LB_MIN) && load < 16 && !env->sd->nr_balance_failed)
|
|
|
goto next;
|
|
|
|
|
|
- if ((load / 2) > env->load_move)
|
|
|
+ if ((load / 2) > env->imbalance)
|
|
|
goto next;
|
|
|
|
|
|
if (!can_migrate_task(p, env))
|
|
@@ -3265,7 +3262,7 @@ static int move_tasks(struct lb_env *env)
|
|
|
|
|
|
move_task(p, env);
|
|
|
pulled++;
|
|
|
- env->load_move -= load;
|
|
|
+ env->imbalance -= load;
|
|
|
|
|
|
#ifdef CONFIG_PREEMPT
|
|
|
/*
|
|
@@ -3281,7 +3278,7 @@ static int move_tasks(struct lb_env *env)
|
|
|
* We only want to steal up to the prescribed amount of
|
|
|
* weighted load.
|
|
|
*/
|
|
|
- if (env->load_move <= 0)
|
|
|
+ if (env->imbalance <= 0)
|
|
|
break;
|
|
|
|
|
|
continue;
|
|
@@ -3435,14 +3432,6 @@ struct sd_lb_stats {
|
|
|
unsigned int busiest_group_weight;
|
|
|
|
|
|
int group_imb; /* Is there imbalance in this sd */
|
|
|
-#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
|
|
|
- int power_savings_balance; /* Is powersave balance needed for this sd */
|
|
|
- struct sched_group *group_min; /* Least loaded group in sd */
|
|
|
- struct sched_group *group_leader; /* Group which relieves group_min */
|
|
|
- unsigned long min_load_per_task; /* load_per_task in group_min */
|
|
|
- unsigned long leader_nr_running; /* Nr running of group_leader */
|
|
|
- unsigned long min_nr_running; /* Nr running of group_min */
|
|
|
-#endif
|
|
|
};
|
|
|
|
|
|
/*
|
|
@@ -3486,148 +3475,6 @@ static inline int get_sd_load_idx(struct sched_domain *sd,
|
|
|
return load_idx;
|
|
|
}
|
|
|
|
|
|
-
|
|
|
-#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
|
|
|
-/**
|
|
|
- * init_sd_power_savings_stats - Initialize power savings statistics for
|
|
|
- * the given sched_domain, during load balancing.
|
|
|
- *
|
|
|
- * @sd: Sched domain whose power-savings statistics are to be initialized.
|
|
|
- * @sds: Variable containing the statistics for sd.
|
|
|
- * @idle: Idle status of the CPU at which we're performing load-balancing.
|
|
|
- */
|
|
|
-static inline void init_sd_power_savings_stats(struct sched_domain *sd,
|
|
|
- struct sd_lb_stats *sds, enum cpu_idle_type idle)
|
|
|
-{
|
|
|
- /*
|
|
|
- * Busy processors will not participate in power savings
|
|
|
- * balance.
|
|
|
- */
|
|
|
- if (idle == CPU_NOT_IDLE || !(sd->flags & SD_POWERSAVINGS_BALANCE))
|
|
|
- sds->power_savings_balance = 0;
|
|
|
- else {
|
|
|
- sds->power_savings_balance = 1;
|
|
|
- sds->min_nr_running = ULONG_MAX;
|
|
|
- sds->leader_nr_running = 0;
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-/**
|
|
|
- * update_sd_power_savings_stats - Update the power saving stats for a
|
|
|
- * sched_domain while performing load balancing.
|
|
|
- *
|
|
|
- * @group: sched_group belonging to the sched_domain under consideration.
|
|
|
- * @sds: Variable containing the statistics of the sched_domain
|
|
|
- * @local_group: Does group contain the CPU for which we're performing
|
|
|
- * load balancing ?
|
|
|
- * @sgs: Variable containing the statistics of the group.
|
|
|
- */
|
|
|
-static inline void update_sd_power_savings_stats(struct sched_group *group,
|
|
|
- struct sd_lb_stats *sds, int local_group, struct sg_lb_stats *sgs)
|
|
|
-{
|
|
|
-
|
|
|
- if (!sds->power_savings_balance)
|
|
|
- return;
|
|
|
-
|
|
|
- /*
|
|
|
- * If the local group is idle or completely loaded
|
|
|
- * no need to do power savings balance at this domain
|
|
|
- */
|
|
|
- if (local_group && (sds->this_nr_running >= sgs->group_capacity ||
|
|
|
- !sds->this_nr_running))
|
|
|
- sds->power_savings_balance = 0;
|
|
|
-
|
|
|
- /*
|
|
|
- * If a group is already running at full capacity or idle,
|
|
|
- * don't include that group in power savings calculations
|
|
|
- */
|
|
|
- if (!sds->power_savings_balance ||
|
|
|
- sgs->sum_nr_running >= sgs->group_capacity ||
|
|
|
- !sgs->sum_nr_running)
|
|
|
- return;
|
|
|
-
|
|
|
- /*
|
|
|
- * Calculate the group which has the least non-idle load.
|
|
|
- * This is the group from where we need to pick up the load
|
|
|
- * for saving power
|
|
|
- */
|
|
|
- if ((sgs->sum_nr_running < sds->min_nr_running) ||
|
|
|
- (sgs->sum_nr_running == sds->min_nr_running &&
|
|
|
- group_first_cpu(group) > group_first_cpu(sds->group_min))) {
|
|
|
- sds->group_min = group;
|
|
|
- sds->min_nr_running = sgs->sum_nr_running;
|
|
|
- sds->min_load_per_task = sgs->sum_weighted_load /
|
|
|
- sgs->sum_nr_running;
|
|
|
- }
|
|
|
-
|
|
|
- /*
|
|
|
- * Calculate the group which is almost near its
|
|
|
- * capacity but still has some space to pick up some load
|
|
|
- * from other group and save more power
|
|
|
- */
|
|
|
- if (sgs->sum_nr_running + 1 > sgs->group_capacity)
|
|
|
- return;
|
|
|
-
|
|
|
- if (sgs->sum_nr_running > sds->leader_nr_running ||
|
|
|
- (sgs->sum_nr_running == sds->leader_nr_running &&
|
|
|
- group_first_cpu(group) < group_first_cpu(sds->group_leader))) {
|
|
|
- sds->group_leader = group;
|
|
|
- sds->leader_nr_running = sgs->sum_nr_running;
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-/**
|
|
|
- * check_power_save_busiest_group - see if there is potential for some power-savings balance
|
|
|
- * @sds: Variable containing the statistics of the sched_domain
|
|
|
- * under consideration.
|
|
|
- * @this_cpu: Cpu at which we're currently performing load-balancing.
|
|
|
- * @imbalance: Variable to store the imbalance.
|
|
|
- *
|
|
|
- * Description:
|
|
|
- * Check if we have potential to perform some power-savings balance.
|
|
|
- * If yes, set the busiest group to be the least loaded group in the
|
|
|
- * sched_domain, so that it's CPUs can be put to idle.
|
|
|
- *
|
|
|
- * Returns 1 if there is potential to perform power-savings balance.
|
|
|
- * Else returns 0.
|
|
|
- */
|
|
|
-static inline int check_power_save_busiest_group(struct sd_lb_stats *sds,
|
|
|
- int this_cpu, unsigned long *imbalance)
|
|
|
-{
|
|
|
- if (!sds->power_savings_balance)
|
|
|
- return 0;
|
|
|
-
|
|
|
- if (sds->this != sds->group_leader ||
|
|
|
- sds->group_leader == sds->group_min)
|
|
|
- return 0;
|
|
|
-
|
|
|
- *imbalance = sds->min_load_per_task;
|
|
|
- sds->busiest = sds->group_min;
|
|
|
-
|
|
|
- return 1;
|
|
|
-
|
|
|
-}
|
|
|
-#else /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
|
|
|
-static inline void init_sd_power_savings_stats(struct sched_domain *sd,
|
|
|
- struct sd_lb_stats *sds, enum cpu_idle_type idle)
|
|
|
-{
|
|
|
- return;
|
|
|
-}
|
|
|
-
|
|
|
-static inline void update_sd_power_savings_stats(struct sched_group *group,
|
|
|
- struct sd_lb_stats *sds, int local_group, struct sg_lb_stats *sgs)
|
|
|
-{
|
|
|
- return;
|
|
|
-}
|
|
|
-
|
|
|
-static inline int check_power_save_busiest_group(struct sd_lb_stats *sds,
|
|
|
- int this_cpu, unsigned long *imbalance)
|
|
|
-{
|
|
|
- return 0;
|
|
|
-}
|
|
|
-#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
|
|
|
-
|
|
|
-
|
|
|
unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu)
|
|
|
{
|
|
|
return SCHED_POWER_SCALE;
|
|
@@ -3765,24 +3612,22 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
|
|
|
* update_sg_lb_stats - Update sched_group's statistics for load balancing.
|
|
|
* @sd: The sched_domain whose statistics are to be updated.
|
|
|
* @group: sched_group whose statistics are to be updated.
|
|
|
- * @this_cpu: Cpu for which load balance is currently performed.
|
|
|
- * @idle: Idle status of this_cpu
|
|
|
* @load_idx: Load index of sched_domain of this_cpu for load calc.
|
|
|
* @local_group: Does group contain this_cpu.
|
|
|
* @cpus: Set of cpus considered for load balancing.
|
|
|
* @balance: Should we balance.
|
|
|
* @sgs: variable to hold the statistics for this group.
|
|
|
*/
|
|
|
-static inline void update_sg_lb_stats(struct sched_domain *sd,
|
|
|
- struct sched_group *group, int this_cpu,
|
|
|
- enum cpu_idle_type idle, int load_idx,
|
|
|
+static inline void update_sg_lb_stats(struct lb_env *env,
|
|
|
+ struct sched_group *group, int load_idx,
|
|
|
int local_group, const struct cpumask *cpus,
|
|
|
int *balance, struct sg_lb_stats *sgs)
|
|
|
{
|
|
|
- unsigned long load, max_cpu_load, min_cpu_load, max_nr_running;
|
|
|
- int i;
|
|
|
+ unsigned long nr_running, max_nr_running, min_nr_running;
|
|
|
+ unsigned long load, max_cpu_load, min_cpu_load;
|
|
|
unsigned int balance_cpu = -1, first_idle_cpu = 0;
|
|
|
unsigned long avg_load_per_task = 0;
|
|
|
+ int i;
|
|
|
|
|
|
if (local_group)
|
|
|
balance_cpu = group_first_cpu(group);
|
|
@@ -3791,10 +3636,13 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
|
|
|
max_cpu_load = 0;
|
|
|
min_cpu_load = ~0UL;
|
|
|
max_nr_running = 0;
|
|
|
+ min_nr_running = ~0UL;
|
|
|
|
|
|
for_each_cpu_and(i, sched_group_cpus(group), cpus) {
|
|
|
struct rq *rq = cpu_rq(i);
|
|
|
|
|
|
+ nr_running = rq->nr_running;
|
|
|
+
|
|
|
/* Bias balancing toward cpus of our domain */
|
|
|
if (local_group) {
|
|
|
if (idle_cpu(i) && !first_idle_cpu) {
|
|
@@ -3805,16 +3653,19 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
|
|
|
load = target_load(i, load_idx);
|
|
|
} else {
|
|
|
load = source_load(i, load_idx);
|
|
|
- if (load > max_cpu_load) {
|
|
|
+ if (load > max_cpu_load)
|
|
|
max_cpu_load = load;
|
|
|
- max_nr_running = rq->nr_running;
|
|
|
- }
|
|
|
if (min_cpu_load > load)
|
|
|
min_cpu_load = load;
|
|
|
+
|
|
|
+ if (nr_running > max_nr_running)
|
|
|
+ max_nr_running = nr_running;
|
|
|
+ if (min_nr_running > nr_running)
|
|
|
+ min_nr_running = nr_running;
|
|
|
}
|
|
|
|
|
|
sgs->group_load += load;
|
|
|
- sgs->sum_nr_running += rq->nr_running;
|
|
|
+ sgs->sum_nr_running += nr_running;
|
|
|
sgs->sum_weighted_load += weighted_cpuload(i);
|
|
|
if (idle_cpu(i))
|
|
|
sgs->idle_cpus++;
|
|
@@ -3827,14 +3678,14 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
|
|
|
* to do the newly idle load balance.
|
|
|
*/
|
|
|
if (local_group) {
|
|
|
- if (idle != CPU_NEWLY_IDLE) {
|
|
|
- if (balance_cpu != this_cpu) {
|
|
|
+ if (env->idle != CPU_NEWLY_IDLE) {
|
|
|
+ if (balance_cpu != env->dst_cpu) {
|
|
|
*balance = 0;
|
|
|
return;
|
|
|
}
|
|
|
- update_group_power(sd, this_cpu);
|
|
|
+ update_group_power(env->sd, env->dst_cpu);
|
|
|
} else if (time_after_eq(jiffies, group->sgp->next_update))
|
|
|
- update_group_power(sd, this_cpu);
|
|
|
+ update_group_power(env->sd, env->dst_cpu);
|
|
|
}
|
|
|
|
|
|
/* Adjust by relative CPU power of the group */
|
|
@@ -3852,13 +3703,14 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
|
|
|
if (sgs->sum_nr_running)
|
|
|
avg_load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running;
|
|
|
|
|
|
- if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && max_nr_running > 1)
|
|
|
+ if ((max_cpu_load - min_cpu_load) >= avg_load_per_task &&
|
|
|
+ (max_nr_running - min_nr_running) > 1)
|
|
|
sgs->group_imb = 1;
|
|
|
|
|
|
sgs->group_capacity = DIV_ROUND_CLOSEST(group->sgp->power,
|
|
|
SCHED_POWER_SCALE);
|
|
|
if (!sgs->group_capacity)
|
|
|
- sgs->group_capacity = fix_small_capacity(sd, group);
|
|
|
+ sgs->group_capacity = fix_small_capacity(env->sd, group);
|
|
|
sgs->group_weight = group->group_weight;
|
|
|
|
|
|
if (sgs->group_capacity > sgs->sum_nr_running)
|
|
@@ -3876,11 +3728,10 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
|
|
|
* Determine if @sg is a busier group than the previously selected
|
|
|
* busiest group.
|
|
|
*/
|
|
|
-static bool update_sd_pick_busiest(struct sched_domain *sd,
|
|
|
+static bool update_sd_pick_busiest(struct lb_env *env,
|
|
|
struct sd_lb_stats *sds,
|
|
|
struct sched_group *sg,
|
|
|
- struct sg_lb_stats *sgs,
|
|
|
- int this_cpu)
|
|
|
+ struct sg_lb_stats *sgs)
|
|
|
{
|
|
|
if (sgs->avg_load <= sds->max_load)
|
|
|
return false;
|
|
@@ -3896,8 +3747,8 @@ static bool update_sd_pick_busiest(struct sched_domain *sd,
|
|
|
* numbered CPUs in the group, therefore mark all groups
|
|
|
* higher than ourself as busy.
|
|
|
*/
|
|
|
- if ((sd->flags & SD_ASYM_PACKING) && sgs->sum_nr_running &&
|
|
|
- this_cpu < group_first_cpu(sg)) {
|
|
|
+ if ((env->sd->flags & SD_ASYM_PACKING) && sgs->sum_nr_running &&
|
|
|
+ env->dst_cpu < group_first_cpu(sg)) {
|
|
|
if (!sds->busiest)
|
|
|
return true;
|
|
|
|
|
@@ -3917,28 +3768,27 @@ static bool update_sd_pick_busiest(struct sched_domain *sd,
|
|
|
* @balance: Should we balance.
|
|
|
* @sds: variable to hold the statistics for this sched_domain.
|
|
|
*/
|
|
|
-static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
|
|
|
- enum cpu_idle_type idle, const struct cpumask *cpus,
|
|
|
- int *balance, struct sd_lb_stats *sds)
|
|
|
+static inline void update_sd_lb_stats(struct lb_env *env,
|
|
|
+ const struct cpumask *cpus,
|
|
|
+ int *balance, struct sd_lb_stats *sds)
|
|
|
{
|
|
|
- struct sched_domain *child = sd->child;
|
|
|
- struct sched_group *sg = sd->groups;
|
|
|
+ struct sched_domain *child = env->sd->child;
|
|
|
+ struct sched_group *sg = env->sd->groups;
|
|
|
struct sg_lb_stats sgs;
|
|
|
int load_idx, prefer_sibling = 0;
|
|
|
|
|
|
if (child && child->flags & SD_PREFER_SIBLING)
|
|
|
prefer_sibling = 1;
|
|
|
|
|
|
- init_sd_power_savings_stats(sd, sds, idle);
|
|
|
- load_idx = get_sd_load_idx(sd, idle);
|
|
|
+ load_idx = get_sd_load_idx(env->sd, env->idle);
|
|
|
|
|
|
do {
|
|
|
int local_group;
|
|
|
|
|
|
- local_group = cpumask_test_cpu(this_cpu, sched_group_cpus(sg));
|
|
|
+ local_group = cpumask_test_cpu(env->dst_cpu, sched_group_cpus(sg));
|
|
|
memset(&sgs, 0, sizeof(sgs));
|
|
|
- update_sg_lb_stats(sd, sg, this_cpu, idle, load_idx,
|
|
|
- local_group, cpus, balance, &sgs);
|
|
|
+ update_sg_lb_stats(env, sg, load_idx, local_group,
|
|
|
+ cpus, balance, &sgs);
|
|
|
|
|
|
if (local_group && !(*balance))
|
|
|
return;
|
|
@@ -3966,7 +3816,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
|
|
|
sds->this_load_per_task = sgs.sum_weighted_load;
|
|
|
sds->this_has_capacity = sgs.group_has_capacity;
|
|
|
sds->this_idle_cpus = sgs.idle_cpus;
|
|
|
- } else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) {
|
|
|
+ } else if (update_sd_pick_busiest(env, sds, sg, &sgs)) {
|
|
|
sds->max_load = sgs.avg_load;
|
|
|
sds->busiest = sg;
|
|
|
sds->busiest_nr_running = sgs.sum_nr_running;
|
|
@@ -3978,9 +3828,8 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
|
|
|
sds->group_imb = sgs.group_imb;
|
|
|
}
|
|
|
|
|
|
- update_sd_power_savings_stats(sg, sds, local_group, &sgs);
|
|
|
sg = sg->next;
|
|
|
- } while (sg != sd->groups);
|
|
|
+ } while (sg != env->sd->groups);
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -4008,24 +3857,23 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
|
|
|
* @this_cpu: The cpu at whose sched_domain we're performing load-balance.
|
|
|
* @imbalance: returns amount of imbalanced due to packing.
|
|
|
*/
|
|
|
-static int check_asym_packing(struct sched_domain *sd,
|
|
|
- struct sd_lb_stats *sds,
|
|
|
- int this_cpu, unsigned long *imbalance)
|
|
|
+static int check_asym_packing(struct lb_env *env, struct sd_lb_stats *sds)
|
|
|
{
|
|
|
int busiest_cpu;
|
|
|
|
|
|
- if (!(sd->flags & SD_ASYM_PACKING))
|
|
|
+ if (!(env->sd->flags & SD_ASYM_PACKING))
|
|
|
return 0;
|
|
|
|
|
|
if (!sds->busiest)
|
|
|
return 0;
|
|
|
|
|
|
busiest_cpu = group_first_cpu(sds->busiest);
|
|
|
- if (this_cpu > busiest_cpu)
|
|
|
+ if (env->dst_cpu > busiest_cpu)
|
|
|
return 0;
|
|
|
|
|
|
- *imbalance = DIV_ROUND_CLOSEST(sds->max_load * sds->busiest->sgp->power,
|
|
|
- SCHED_POWER_SCALE);
|
|
|
+ env->imbalance = DIV_ROUND_CLOSEST(
|
|
|
+ sds->max_load * sds->busiest->sgp->power, SCHED_POWER_SCALE);
|
|
|
+
|
|
|
return 1;
|
|
|
}
|
|
|
|
|
@@ -4037,8 +3885,8 @@ static int check_asym_packing(struct sched_domain *sd,
|
|
|
* @this_cpu: The cpu at whose sched_domain we're performing load-balance.
|
|
|
* @imbalance: Variable to store the imbalance.
|
|
|
*/
|
|
|
-static inline void fix_small_imbalance(struct sd_lb_stats *sds,
|
|
|
- int this_cpu, unsigned long *imbalance)
|
|
|
+static inline
|
|
|
+void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds)
|
|
|
{
|
|
|
unsigned long tmp, pwr_now = 0, pwr_move = 0;
|
|
|
unsigned int imbn = 2;
|
|
@@ -4049,9 +3897,10 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds,
|
|
|
if (sds->busiest_load_per_task >
|
|
|
sds->this_load_per_task)
|
|
|
imbn = 1;
|
|
|
- } else
|
|
|
+ } else {
|
|
|
sds->this_load_per_task =
|
|
|
- cpu_avg_load_per_task(this_cpu);
|
|
|
+ cpu_avg_load_per_task(env->dst_cpu);
|
|
|
+ }
|
|
|
|
|
|
scaled_busy_load_per_task = sds->busiest_load_per_task
|
|
|
* SCHED_POWER_SCALE;
|
|
@@ -4059,7 +3908,7 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds,
|
|
|
|
|
|
if (sds->max_load - sds->this_load + scaled_busy_load_per_task >=
|
|
|
(scaled_busy_load_per_task * imbn)) {
|
|
|
- *imbalance = sds->busiest_load_per_task;
|
|
|
+ env->imbalance = sds->busiest_load_per_task;
|
|
|
return;
|
|
|
}
|
|
|
|
|
@@ -4096,18 +3945,16 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds,
|
|
|
|
|
|
/* Move if we gain throughput */
|
|
|
if (pwr_move > pwr_now)
|
|
|
- *imbalance = sds->busiest_load_per_task;
|
|
|
+ env->imbalance = sds->busiest_load_per_task;
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* calculate_imbalance - Calculate the amount of imbalance present within the
|
|
|
* groups of a given sched_domain during load balance.
|
|
|
+ * @env: load balance environment
|
|
|
* @sds: statistics of the sched_domain whose imbalance is to be calculated.
|
|
|
- * @this_cpu: Cpu for which currently load balance is being performed.
|
|
|
- * @imbalance: The variable to store the imbalance.
|
|
|
*/
|
|
|
-static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
|
|
|
- unsigned long *imbalance)
|
|
|
+static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *sds)
|
|
|
{
|
|
|
unsigned long max_pull, load_above_capacity = ~0UL;
|
|
|
|
|
@@ -4123,8 +3970,8 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
|
|
|
* its cpu_power, while calculating max_load..)
|
|
|
*/
|
|
|
if (sds->max_load < sds->avg_load) {
|
|
|
- *imbalance = 0;
|
|
|
- return fix_small_imbalance(sds, this_cpu, imbalance);
|
|
|
+ env->imbalance = 0;
|
|
|
+ return fix_small_imbalance(env, sds);
|
|
|
}
|
|
|
|
|
|
if (!sds->group_imb) {
|
|
@@ -4152,7 +3999,7 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
|
|
|
max_pull = min(sds->max_load - sds->avg_load, load_above_capacity);
|
|
|
|
|
|
/* How much load to actually move to equalise the imbalance */
|
|
|
- *imbalance = min(max_pull * sds->busiest->sgp->power,
|
|
|
+ env->imbalance = min(max_pull * sds->busiest->sgp->power,
|
|
|
(sds->avg_load - sds->this_load) * sds->this->sgp->power)
|
|
|
/ SCHED_POWER_SCALE;
|
|
|
|
|
@@ -4162,8 +4009,8 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
|
|
|
* a think about bumping its value to force at least one task to be
|
|
|
* moved
|
|
|
*/
|
|
|
- if (*imbalance < sds->busiest_load_per_task)
|
|
|
- return fix_small_imbalance(sds, this_cpu, imbalance);
|
|
|
+ if (env->imbalance < sds->busiest_load_per_task)
|
|
|
+ return fix_small_imbalance(env, sds);
|
|
|
|
|
|
}
|
|
|
|
|
@@ -4194,9 +4041,7 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
|
|
|
* put to idle by rebalancing its tasks onto our group.
|
|
|
*/
|
|
|
static struct sched_group *
|
|
|
-find_busiest_group(struct sched_domain *sd, int this_cpu,
|
|
|
- unsigned long *imbalance, enum cpu_idle_type idle,
|
|
|
- const struct cpumask *cpus, int *balance)
|
|
|
+find_busiest_group(struct lb_env *env, const struct cpumask *cpus, int *balance)
|
|
|
{
|
|
|
struct sd_lb_stats sds;
|
|
|
|
|
@@ -4206,7 +4051,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
|
|
|
* Compute the various statistics relavent for load balancing at
|
|
|
* this level.
|
|
|
*/
|
|
|
- update_sd_lb_stats(sd, this_cpu, idle, cpus, balance, &sds);
|
|
|
+ update_sd_lb_stats(env, cpus, balance, &sds);
|
|
|
|
|
|
/*
|
|
|
* this_cpu is not the appropriate cpu to perform load balancing at
|
|
@@ -4215,8 +4060,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
|
|
|
if (!(*balance))
|
|
|
goto ret;
|
|
|
|
|
|
- if ((idle == CPU_IDLE || idle == CPU_NEWLY_IDLE) &&
|
|
|
- check_asym_packing(sd, &sds, this_cpu, imbalance))
|
|
|
+ if ((env->idle == CPU_IDLE || env->idle == CPU_NEWLY_IDLE) &&
|
|
|
+ check_asym_packing(env, &sds))
|
|
|
return sds.busiest;
|
|
|
|
|
|
/* There is no busy sibling group to pull tasks from */
|
|
@@ -4234,7 +4079,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
|
|
|
goto force_balance;
|
|
|
|
|
|
/* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */
|
|
|
- if (idle == CPU_NEWLY_IDLE && sds.this_has_capacity &&
|
|
|
+ if (env->idle == CPU_NEWLY_IDLE && sds.this_has_capacity &&
|
|
|
!sds.busiest_has_capacity)
|
|
|
goto force_balance;
|
|
|
|
|
@@ -4252,7 +4097,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
|
|
|
if (sds.this_load >= sds.avg_load)
|
|
|
goto out_balanced;
|
|
|
|
|
|
- if (idle == CPU_IDLE) {
|
|
|
+ if (env->idle == CPU_IDLE) {
|
|
|
/*
|
|
|
* This cpu is idle. If the busiest group load doesn't
|
|
|
* have more tasks than the number of available cpu's and
|
|
@@ -4267,34 +4112,27 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
|
|
|
* In the CPU_NEWLY_IDLE, CPU_NOT_IDLE cases, use
|
|
|
* imbalance_pct to be conservative.
|
|
|
*/
|
|
|
- if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load)
|
|
|
+ if (100 * sds.max_load <= env->sd->imbalance_pct * sds.this_load)
|
|
|
goto out_balanced;
|
|
|
}
|
|
|
|
|
|
force_balance:
|
|
|
/* Looks like there is an imbalance. Compute it */
|
|
|
- calculate_imbalance(&sds, this_cpu, imbalance);
|
|
|
+ calculate_imbalance(env, &sds);
|
|
|
return sds.busiest;
|
|
|
|
|
|
out_balanced:
|
|
|
- /*
|
|
|
- * There is no obvious imbalance. But check if we can do some balancing
|
|
|
- * to save power.
|
|
|
- */
|
|
|
- if (check_power_save_busiest_group(&sds, this_cpu, imbalance))
|
|
|
- return sds.busiest;
|
|
|
ret:
|
|
|
- *imbalance = 0;
|
|
|
+ env->imbalance = 0;
|
|
|
return NULL;
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
* find_busiest_queue - find the busiest runqueue among the cpus in group.
|
|
|
*/
|
|
|
-static struct rq *
|
|
|
-find_busiest_queue(struct sched_domain *sd, struct sched_group *group,
|
|
|
- enum cpu_idle_type idle, unsigned long imbalance,
|
|
|
- const struct cpumask *cpus)
|
|
|
+static struct rq *find_busiest_queue(struct lb_env *env,
|
|
|
+ struct sched_group *group,
|
|
|
+ const struct cpumask *cpus)
|
|
|
{
|
|
|
struct rq *busiest = NULL, *rq;
|
|
|
unsigned long max_load = 0;
|
|
@@ -4307,7 +4145,7 @@ find_busiest_queue(struct sched_domain *sd, struct sched_group *group,
|
|
|
unsigned long wl;
|
|
|
|
|
|
if (!capacity)
|
|
|
- capacity = fix_small_capacity(sd, group);
|
|
|
+ capacity = fix_small_capacity(env->sd, group);
|
|
|
|
|
|
if (!cpumask_test_cpu(i, cpus))
|
|
|
continue;
|
|
@@ -4319,7 +4157,7 @@ find_busiest_queue(struct sched_domain *sd, struct sched_group *group,
|
|
|
* When comparing with imbalance, use weighted_cpuload()
|
|
|
* which is not scaled with the cpu power.
|
|
|
*/
|
|
|
- if (capacity && rq->nr_running == 1 && wl > imbalance)
|
|
|
+ if (capacity && rq->nr_running == 1 && wl > env->imbalance)
|
|
|
continue;
|
|
|
|
|
|
/*
|
|
@@ -4348,40 +4186,19 @@ find_busiest_queue(struct sched_domain *sd, struct sched_group *group,
|
|
|
/* Working cpumask for load_balance and load_balance_newidle. */
|
|
|
DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
|
|
|
|
|
|
-static int need_active_balance(struct sched_domain *sd, int idle,
|
|
|
- int busiest_cpu, int this_cpu)
|
|
|
+static int need_active_balance(struct lb_env *env)
|
|
|
{
|
|
|
- if (idle == CPU_NEWLY_IDLE) {
|
|
|
+ struct sched_domain *sd = env->sd;
|
|
|
+
|
|
|
+ if (env->idle == CPU_NEWLY_IDLE) {
|
|
|
|
|
|
/*
|
|
|
* ASYM_PACKING needs to force migrate tasks from busy but
|
|
|
* higher numbered CPUs in order to pack all tasks in the
|
|
|
* lowest numbered CPUs.
|
|
|
*/
|
|
|
- if ((sd->flags & SD_ASYM_PACKING) && busiest_cpu > this_cpu)
|
|
|
+ if ((sd->flags & SD_ASYM_PACKING) && env->src_cpu > env->dst_cpu)
|
|
|
return 1;
|
|
|
-
|
|
|
- /*
|
|
|
- * The only task running in a non-idle cpu can be moved to this
|
|
|
- * cpu in an attempt to completely freeup the other CPU
|
|
|
- * package.
|
|
|
- *
|
|
|
- * The package power saving logic comes from
|
|
|
- * find_busiest_group(). If there are no imbalance, then
|
|
|
- * f_b_g() will return NULL. However when sched_mc={1,2} then
|
|
|
- * f_b_g() will select a group from which a running task may be
|
|
|
- * pulled to this cpu in order to make the other package idle.
|
|
|
- * If there is no opportunity to make a package idle and if
|
|
|
- * there are no imbalance, then f_b_g() will return NULL and no
|
|
|
- * action will be taken in load_balance_newidle().
|
|
|
- *
|
|
|
- * Under normal task pull operation due to imbalance, there
|
|
|
- * will be more than one task in the source run queue and
|
|
|
- * move_tasks() will succeed. ld_moved will be true and this
|
|
|
- * active balance code will not be triggered.
|
|
|
- */
|
|
|
- if (sched_mc_power_savings < POWERSAVINGS_BALANCE_WAKEUP)
|
|
|
- return 0;
|
|
|
}
|
|
|
|
|
|
return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2);
|
|
@@ -4399,7 +4216,6 @@ static int load_balance(int this_cpu, struct rq *this_rq,
|
|
|
{
|
|
|
int ld_moved, active_balance = 0;
|
|
|
struct sched_group *group;
|
|
|
- unsigned long imbalance;
|
|
|
struct rq *busiest;
|
|
|
unsigned long flags;
|
|
|
struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
|
|
@@ -4417,8 +4233,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
|
|
|
schedstat_inc(sd, lb_count[idle]);
|
|
|
|
|
|
redo:
|
|
|
- group = find_busiest_group(sd, this_cpu, &imbalance, idle,
|
|
|
- cpus, balance);
|
|
|
+ group = find_busiest_group(&env, cpus, balance);
|
|
|
|
|
|
if (*balance == 0)
|
|
|
goto out_balanced;
|
|
@@ -4428,7 +4243,7 @@ redo:
|
|
|
goto out_balanced;
|
|
|
}
|
|
|
|
|
|
- busiest = find_busiest_queue(sd, group, idle, imbalance, cpus);
|
|
|
+ busiest = find_busiest_queue(&env, group, cpus);
|
|
|
if (!busiest) {
|
|
|
schedstat_inc(sd, lb_nobusyq[idle]);
|
|
|
goto out_balanced;
|
|
@@ -4436,7 +4251,7 @@ redo:
|
|
|
|
|
|
BUG_ON(busiest == this_rq);
|
|
|
|
|
|
- schedstat_add(sd, lb_imbalance[idle], imbalance);
|
|
|
+ schedstat_add(sd, lb_imbalance[idle], env.imbalance);
|
|
|
|
|
|
ld_moved = 0;
|
|
|
if (busiest->nr_running > 1) {
|
|
@@ -4447,10 +4262,9 @@ redo:
|
|
|
* correctly treated as an imbalance.
|
|
|
*/
|
|
|
env.flags |= LBF_ALL_PINNED;
|
|
|
- env.load_move = imbalance;
|
|
|
- env.src_cpu = busiest->cpu;
|
|
|
- env.src_rq = busiest;
|
|
|
- env.loop_max = min_t(unsigned long, sysctl_sched_nr_migrate, busiest->nr_running);
|
|
|
+ env.src_cpu = busiest->cpu;
|
|
|
+ env.src_rq = busiest;
|
|
|
+ env.loop_max = min(sysctl_sched_nr_migrate, busiest->nr_running);
|
|
|
|
|
|
more_balance:
|
|
|
local_irq_save(flags);
|
|
@@ -4492,7 +4306,7 @@ more_balance:
|
|
|
if (idle != CPU_NEWLY_IDLE)
|
|
|
sd->nr_balance_failed++;
|
|
|
|
|
|
- if (need_active_balance(sd, idle, cpu_of(busiest), this_cpu)) {
|
|
|
+ if (need_active_balance(&env)) {
|
|
|
raw_spin_lock_irqsave(&busiest->lock, flags);
|
|
|
|
|
|
/* don't kick the active_load_balance_cpu_stop,
|
|
@@ -4519,10 +4333,11 @@ more_balance:
|
|
|
}
|
|
|
raw_spin_unlock_irqrestore(&busiest->lock, flags);
|
|
|
|
|
|
- if (active_balance)
|
|
|
+ if (active_balance) {
|
|
|
stop_one_cpu_nowait(cpu_of(busiest),
|
|
|
active_load_balance_cpu_stop, busiest,
|
|
|
&busiest->active_balance_work);
|
|
|
+ }
|
|
|
|
|
|
/*
|
|
|
* We've kicked active balancing, reset the failure
|
|
@@ -4703,104 +4518,15 @@ static struct {
|
|
|
unsigned long next_balance; /* in jiffy units */
|
|
|
} nohz ____cacheline_aligned;
|
|
|
|
|
|
-#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
|
|
|
-/**
|
|
|
- * lowest_flag_domain - Return lowest sched_domain containing flag.
|
|
|
- * @cpu: The cpu whose lowest level of sched domain is to
|
|
|
- * be returned.
|
|
|
- * @flag: The flag to check for the lowest sched_domain
|
|
|
- * for the given cpu.
|
|
|
- *
|
|
|
- * Returns the lowest sched_domain of a cpu which contains the given flag.
|
|
|
- */
|
|
|
-static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
|
|
|
-{
|
|
|
- struct sched_domain *sd;
|
|
|
-
|
|
|
- for_each_domain(cpu, sd)
|
|
|
- if (sd->flags & flag)
|
|
|
- break;
|
|
|
-
|
|
|
- return sd;
|
|
|
-}
|
|
|
-
|
|
|
-/**
|
|
|
- * for_each_flag_domain - Iterates over sched_domains containing the flag.
|
|
|
- * @cpu: The cpu whose domains we're iterating over.
|
|
|
- * @sd: variable holding the value of the power_savings_sd
|
|
|
- * for cpu.
|
|
|
- * @flag: The flag to filter the sched_domains to be iterated.
|
|
|
- *
|
|
|
- * Iterates over all the scheduler domains for a given cpu that has the 'flag'
|
|
|
- * set, starting from the lowest sched_domain to the highest.
|
|
|
- */
|
|
|
-#define for_each_flag_domain(cpu, sd, flag) \
|
|
|
- for (sd = lowest_flag_domain(cpu, flag); \
|
|
|
- (sd && (sd->flags & flag)); sd = sd->parent)
|
|
|
-
|
|
|
-/**
|
|
|
- * find_new_ilb - Finds the optimum idle load balancer for nomination.
|
|
|
- * @cpu: The cpu which is nominating a new idle_load_balancer.
|
|
|
- *
|
|
|
- * Returns: Returns the id of the idle load balancer if it exists,
|
|
|
- * Else, returns >= nr_cpu_ids.
|
|
|
- *
|
|
|
- * This algorithm picks the idle load balancer such that it belongs to a
|
|
|
- * semi-idle powersavings sched_domain. The idea is to try and avoid
|
|
|
- * completely idle packages/cores just for the purpose of idle load balancing
|
|
|
- * when there are other idle cpu's which are better suited for that job.
|
|
|
- */
|
|
|
-static int find_new_ilb(int cpu)
|
|
|
+static inline int find_new_ilb(int call_cpu)
|
|
|
{
|
|
|
int ilb = cpumask_first(nohz.idle_cpus_mask);
|
|
|
- struct sched_group *ilbg;
|
|
|
- struct sched_domain *sd;
|
|
|
-
|
|
|
- /*
|
|
|
- * Have idle load balancer selection from semi-idle packages only
|
|
|
- * when power-aware load balancing is enabled
|
|
|
- */
|
|
|
- if (!(sched_smt_power_savings || sched_mc_power_savings))
|
|
|
- goto out_done;
|
|
|
-
|
|
|
- /*
|
|
|
- * Optimize for the case when we have no idle CPUs or only one
|
|
|
- * idle CPU. Don't walk the sched_domain hierarchy in such cases
|
|
|
- */
|
|
|
- if (cpumask_weight(nohz.idle_cpus_mask) < 2)
|
|
|
- goto out_done;
|
|
|
-
|
|
|
- rcu_read_lock();
|
|
|
- for_each_flag_domain(cpu, sd, SD_POWERSAVINGS_BALANCE) {
|
|
|
- ilbg = sd->groups;
|
|
|
-
|
|
|
- do {
|
|
|
- if (ilbg->group_weight !=
|
|
|
- atomic_read(&ilbg->sgp->nr_busy_cpus)) {
|
|
|
- ilb = cpumask_first_and(nohz.idle_cpus_mask,
|
|
|
- sched_group_cpus(ilbg));
|
|
|
- goto unlock;
|
|
|
- }
|
|
|
-
|
|
|
- ilbg = ilbg->next;
|
|
|
-
|
|
|
- } while (ilbg != sd->groups);
|
|
|
- }
|
|
|
-unlock:
|
|
|
- rcu_read_unlock();
|
|
|
|
|
|
-out_done:
|
|
|
if (ilb < nr_cpu_ids && idle_cpu(ilb))
|
|
|
return ilb;
|
|
|
|
|
|
return nr_cpu_ids;
|
|
|
}
|
|
|
-#else /* (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */
|
|
|
-static inline int find_new_ilb(int call_cpu)
|
|
|
-{
|
|
|
- return nr_cpu_ids;
|
|
|
-}
|
|
|
-#endif
|
|
|
|
|
|
/*
|
|
|
* Kick a CPU to do the nohz balancing, if it is time for it. We pick the
|
|
@@ -5023,7 +4749,7 @@ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle)
|
|
|
|
|
|
raw_spin_lock_irq(&this_rq->lock);
|
|
|
update_rq_clock(this_rq);
|
|
|
- update_cpu_load(this_rq);
|
|
|
+ update_idle_cpu_load(this_rq);
|
|
|
raw_spin_unlock_irq(&this_rq->lock);
|
|
|
|
|
|
rebalance_domains(balance_cpu, CPU_IDLE);
|