|
@@ -5994,6 +5994,44 @@ struct sched_domain_topology_level {
|
|
|
struct sd_data data;
|
|
|
};
|
|
|
|
|
|
+/*
|
|
|
+ * Build an iteration mask that can exclude certain CPUs from the upwards
|
|
|
+ * domain traversal.
|
|
|
+ *
|
|
|
+ * Asymmetric node setups can result in situations where the domain tree is of
|
|
|
+ * unequal depth, make sure to skip domains that already cover the entire
|
|
|
+ * range.
|
|
|
+ *
|
|
|
+ * In that case build_sched_domains() will have terminated the iteration early
|
|
|
+ * and our sibling sd spans will be empty. Domains should always include the
|
|
|
+ * cpu they're built on, so check that.
|
|
|
+ *
|
|
|
+ */
|
|
|
+static void build_group_mask(struct sched_domain *sd, struct sched_group *sg)
|
|
|
+{
|
|
|
+ const struct cpumask *span = sched_domain_span(sd);
|
|
|
+ struct sd_data *sdd = sd->private;
|
|
|
+ struct sched_domain *sibling;
|
|
|
+ int i;
|
|
|
+
|
|
|
+ for_each_cpu(i, span) {
|
|
|
+ sibling = *per_cpu_ptr(sdd->sd, i);
|
|
|
+ if (!cpumask_test_cpu(i, sched_domain_span(sibling)))
|
|
|
+ continue;
|
|
|
+
|
|
|
+ cpumask_set_cpu(i, sched_group_mask(sg));
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Return the canonical balance cpu for this group, this is the first cpu
|
|
|
+ * of this group that's also in the iteration mask.
|
|
|
+ */
|
|
|
+int group_balance_cpu(struct sched_group *sg)
|
|
|
+{
|
|
|
+ return cpumask_first_and(sched_group_cpus(sg), sched_group_mask(sg));
|
|
|
+}
|
|
|
+
|
|
|
static int
|
|
|
build_overlap_sched_groups(struct sched_domain *sd, int cpu)
|
|
|
{
|
|
@@ -6012,6 +6050,12 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
|
|
|
if (cpumask_test_cpu(i, covered))
|
|
|
continue;
|
|
|
|
|
|
+ child = *per_cpu_ptr(sdd->sd, i);
|
|
|
+
|
|
|
+ /* See the comment near build_group_mask(). */
|
|
|
+ if (!cpumask_test_cpu(i, sched_domain_span(child)))
|
|
|
+ continue;
|
|
|
+
|
|
|
sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
|
|
|
GFP_KERNEL, cpu_to_node(cpu));
|
|
|
|
|
@@ -6019,8 +6063,6 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
|
|
|
goto fail;
|
|
|
|
|
|
sg_span = sched_group_cpus(sg);
|
|
|
-
|
|
|
- child = *per_cpu_ptr(sdd->sd, i);
|
|
|
if (child->child) {
|
|
|
child = child->child;
|
|
|
cpumask_copy(sg_span, sched_domain_span(child));
|
|
@@ -6030,13 +6072,18 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
|
|
|
cpumask_or(covered, covered, sg_span);
|
|
|
|
|
|
sg->sgp = *per_cpu_ptr(sdd->sgp, i);
|
|
|
- atomic_inc(&sg->sgp->ref);
|
|
|
+ if (atomic_inc_return(&sg->sgp->ref) == 1)
|
|
|
+ build_group_mask(sd, sg);
|
|
|
+
|
|
|
|
|
|
+ /*
|
|
|
+ * Make sure the first group of this domain contains the
|
|
|
+ * canonical balance cpu. Otherwise the sched_domain iteration
|
|
|
+ * breaks. See update_sg_lb_stats().
|
|
|
+ */
|
|
|
if ((!groups && cpumask_test_cpu(cpu, sg_span)) ||
|
|
|
- cpumask_first(sg_span) == cpu) {
|
|
|
- WARN_ON_ONCE(!cpumask_test_cpu(cpu, sg_span));
|
|
|
+ group_balance_cpu(sg) == cpu)
|
|
|
groups = sg;
|
|
|
- }
|
|
|
|
|
|
if (!first)
|
|
|
first = sg;
|
|
@@ -6109,6 +6156,7 @@ build_sched_groups(struct sched_domain *sd, int cpu)
|
|
|
|
|
|
cpumask_clear(sched_group_cpus(sg));
|
|
|
sg->sgp->power = 0;
|
|
|
+ cpumask_setall(sched_group_mask(sg));
|
|
|
|
|
|
for_each_cpu(j, span) {
|
|
|
if (get_group(j, sdd, NULL) != group)
|
|
@@ -6150,7 +6198,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
|
|
|
sg = sg->next;
|
|
|
} while (sg != sd->groups);
|
|
|
|
|
|
- if (cpu != group_first_cpu(sg))
|
|
|
+ if (cpu != group_balance_cpu(sg))
|
|
|
return;
|
|
|
|
|
|
update_group_power(sd, cpu);
|
|
@@ -6525,7 +6573,7 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
|
|
|
|
|
|
*per_cpu_ptr(sdd->sg, j) = sg;
|
|
|
|
|
|
- sgp = kzalloc_node(sizeof(struct sched_group_power),
|
|
|
+ sgp = kzalloc_node(sizeof(struct sched_group_power) + cpumask_size(),
|
|
|
GFP_KERNEL, cpu_to_node(j));
|
|
|
if (!sgp)
|
|
|
return -ENOMEM;
|