|
@@ -4240,10 +4240,126 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
|
|
static struct {
|
|
static struct {
|
|
atomic_t load_balancer;
|
|
atomic_t load_balancer;
|
|
cpumask_var_t cpu_mask;
|
|
cpumask_var_t cpu_mask;
|
|
|
|
+ cpumask_var_t ilb_grp_nohz_mask;
|
|
} nohz ____cacheline_aligned = {
|
|
} nohz ____cacheline_aligned = {
|
|
.load_balancer = ATOMIC_INIT(-1),
|
|
.load_balancer = ATOMIC_INIT(-1),
|
|
};
|
|
};
|
|
|
|
|
|
|
|
+#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
|
|
|
|
+/**
|
|
|
|
+ * lowest_flag_domain - Return lowest sched_domain containing flag.
|
|
|
|
+ * @cpu: The cpu whose lowest level of sched domain is to
|
|
|
|
+ * be returned.
|
|
|
|
+ * @flag: The flag to check for the lowest sched_domain
|
|
|
|
+ * for the given cpu.
|
|
|
|
+ *
|
|
|
|
+ * Returns the lowest sched_domain of a cpu which contains the given flag.
|
|
|
|
+ */
|
|
|
|
+static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
|
|
|
|
+{
|
|
|
|
+ struct sched_domain *sd;
|
|
|
|
+
|
|
|
|
+ for_each_domain(cpu, sd)
|
|
|
|
+ if (sd && (sd->flags & flag))
|
|
|
|
+ break;
|
|
|
|
+
|
|
|
|
+ return sd;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/**
|
|
|
|
+ * for_each_flag_domain - Iterates over sched_domains containing the flag.
|
|
|
|
+ * @cpu: The cpu whose domains we're iterating over.
|
|
|
|
+ * @sd: variable holding the value of the power_savings_sd
|
|
|
|
+ * for cpu.
|
|
|
|
+ * @flag: The flag to filter the sched_domains to be iterated.
|
|
|
|
+ *
|
|
|
|
+ * Iterates over all the scheduler domains for a given cpu that has the 'flag'
|
|
|
|
+ * set, starting from the lowest sched_domain to the highest.
|
|
|
|
+ */
|
|
|
|
+#define for_each_flag_domain(cpu, sd, flag) \
|
|
|
|
+ for (sd = lowest_flag_domain(cpu, flag); \
|
|
|
|
+ (sd && (sd->flags & flag)); sd = sd->parent)
|
|
|
|
+
|
|
|
|
+/**
|
|
|
|
+ * is_semi_idle_group - Checks if the given sched_group is semi-idle.
|
|
|
|
+ * @ilb_group: group to be checked for semi-idleness
|
|
|
|
+ *
|
|
|
|
+ * Returns: 1 if the group is semi-idle. 0 otherwise.
|
|
|
|
+ *
|
|
|
|
+ * We define a sched_group to be semi idle if it has atleast one idle-CPU
|
|
|
|
+ * and atleast one non-idle CPU. This helper function checks if the given
|
|
|
|
+ * sched_group is semi-idle or not.
|
|
|
|
+ */
|
|
|
|
+static inline int is_semi_idle_group(struct sched_group *ilb_group)
|
|
|
|
+{
|
|
|
|
+ cpumask_and(nohz.ilb_grp_nohz_mask, nohz.cpu_mask,
|
|
|
|
+ sched_group_cpus(ilb_group));
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * A sched_group is semi-idle when it has atleast one busy cpu
|
|
|
|
+ * and atleast one idle cpu.
|
|
|
|
+ */
|
|
|
|
+ if (cpumask_empty(nohz.ilb_grp_nohz_mask))
|
|
|
|
+ return 0;
|
|
|
|
+
|
|
|
|
+ if (cpumask_equal(nohz.ilb_grp_nohz_mask, sched_group_cpus(ilb_group)))
|
|
|
|
+ return 0;
|
|
|
|
+
|
|
|
|
+ return 1;
|
|
|
|
+}
|
|
|
|
+/**
|
|
|
|
+ * find_new_ilb - Finds the optimum idle load balancer for nomination.
|
|
|
|
+ * @cpu: The cpu which is nominating a new idle_load_balancer.
|
|
|
|
+ *
|
|
|
|
+ * Returns: Returns the id of the idle load balancer if it exists,
|
|
|
|
+ * Else, returns >= nr_cpu_ids.
|
|
|
|
+ *
|
|
|
|
+ * This algorithm picks the idle load balancer such that it belongs to a
|
|
|
|
+ * semi-idle powersavings sched_domain. The idea is to try and avoid
|
|
|
|
+ * completely idle packages/cores just for the purpose of idle load balancing
|
|
|
|
+ * when there are other idle cpu's which are better suited for that job.
|
|
|
|
+ */
|
|
|
|
+static int find_new_ilb(int cpu)
|
|
|
|
+{
|
|
|
|
+ struct sched_domain *sd;
|
|
|
|
+ struct sched_group *ilb_group;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Have idle load balancer selection from semi-idle packages only
|
|
|
|
+ * when power-aware load balancing is enabled
|
|
|
|
+ */
|
|
|
|
+ if (!(sched_smt_power_savings || sched_mc_power_savings))
|
|
|
|
+ goto out_done;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Optimize for the case when we have no idle CPUs or only one
|
|
|
|
+ * idle CPU. Don't walk the sched_domain hierarchy in such cases
|
|
|
|
+ */
|
|
|
|
+ if (cpumask_weight(nohz.cpu_mask) < 2)
|
|
|
|
+ goto out_done;
|
|
|
|
+
|
|
|
|
+ for_each_flag_domain(cpu, sd, SD_POWERSAVINGS_BALANCE) {
|
|
|
|
+ ilb_group = sd->groups;
|
|
|
|
+
|
|
|
|
+ do {
|
|
|
|
+ if (is_semi_idle_group(ilb_group))
|
|
|
|
+ return cpumask_first(nohz.ilb_grp_nohz_mask);
|
|
|
|
+
|
|
|
|
+ ilb_group = ilb_group->next;
|
|
|
|
+
|
|
|
|
+ } while (ilb_group != sd->groups);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+out_done:
|
|
|
|
+ return cpumask_first(nohz.cpu_mask);
|
|
|
|
+}
|
|
|
|
+#else /* (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */
|
|
|
|
+static inline int find_new_ilb(int call_cpu)
|
|
|
|
+{
|
|
|
|
+ return first_cpu(nohz.cpu_mask);
|
|
|
|
+}
|
|
|
|
+#endif
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* This routine will try to nominate the ilb (idle load balancing)
|
|
* This routine will try to nominate the ilb (idle load balancing)
|
|
* owner among the cpus whose ticks are stopped. ilb owner will do the idle
|
|
* owner among the cpus whose ticks are stopped. ilb owner will do the idle
|
|
@@ -4468,15 +4584,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu)
|
|
}
|
|
}
|
|
|
|
|
|
if (atomic_read(&nohz.load_balancer) == -1) {
|
|
if (atomic_read(&nohz.load_balancer) == -1) {
|
|
- /*
|
|
|
|
- * simple selection for now: Nominate the
|
|
|
|
- * first cpu in the nohz list to be the next
|
|
|
|
- * ilb owner.
|
|
|
|
- *
|
|
|
|
- * TBD: Traverse the sched domains and nominate
|
|
|
|
- * the nearest cpu in the nohz.cpu_mask.
|
|
|
|
- */
|
|
|
|
- int ilb = cpumask_first(nohz.cpu_mask);
|
|
|
|
|
|
+ int ilb = find_new_ilb(cpu);
|
|
|
|
|
|
if (ilb < nr_cpu_ids)
|
|
if (ilb < nr_cpu_ids)
|
|
resched_cpu(ilb);
|
|
resched_cpu(ilb);
|
|
@@ -9051,6 +9159,7 @@ void __init sched_init(void)
|
|
#ifdef CONFIG_SMP
|
|
#ifdef CONFIG_SMP
|
|
#ifdef CONFIG_NO_HZ
|
|
#ifdef CONFIG_NO_HZ
|
|
alloc_bootmem_cpumask_var(&nohz.cpu_mask);
|
|
alloc_bootmem_cpumask_var(&nohz.cpu_mask);
|
|
|
|
+ alloc_bootmem_cpumask_var(&nohz.ilb_grp_nohz_mask);
|
|
#endif
|
|
#endif
|
|
alloc_bootmem_cpumask_var(&cpu_isolated_map);
|
|
alloc_bootmem_cpumask_var(&cpu_isolated_map);
|
|
#endif /* SMP */
|
|
#endif /* SMP */
|