13 years ago · c117487687
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -876,6 +876,8 @@ struct sched_group_power {
 
				 	 * Number of busy cpus in this group.
			
 
				 	 */
			
 
				 	atomic_t nr_busy_cpus;
			
 
				+
			
 
				+	unsigned long cpumask[0]; /* iteration mask */
			
 
				 };
			
 
				 
			
 
				 struct sched_group {
			
@@ -900,6 +902,15 @@ static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
 
				 	return to_cpumask(sg->cpumask);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * cpumask masking which cpus in the group are allowed to iterate up the domain
			
 
				+ * tree.
			
 
				+ */
			
 
				+static inline struct cpumask *sched_group_mask(struct sched_group *sg)
			
 
				+{
			
 
				+	return to_cpumask(sg->sgp->cpumask);
			
 
				+}
			
 
				+
			
 
				 /**
			
 
				  * group_first_cpu - Returns the first cpu in the cpumask of a sched_group.
			
 
				  * @group: The group whose first cpu is to be returned.
			
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5994,6 +5994,44 @@ struct sched_domain_topology_level {
 
				 	struct sd_data      data;
			
 
				 };
			
 
				 
			
 
				+/*
			
 
				+ * Build an iteration mask that can exclude certain CPUs from the upwards
			
 
				+ * domain traversal.
			
 
				+ *
			
 
				+ * Asymmetric node setups can result in situations where the domain tree is of
			
 
				+ * unequal depth, make sure to skip domains that already cover the entire
			
 
				+ * range.
			
 
				+ *
			
 
				+ * In that case build_sched_domains() will have terminated the iteration early
			
 
				+ * and our sibling sd spans will be empty. Domains should always include the
			
 
				+ * cpu they're built on, so check that.
			
 
				+ *
			
 
				+ */
			
 
				+static void build_group_mask(struct sched_domain *sd, struct sched_group *sg)
			
 
				+{
			
 
				+	const struct cpumask *span = sched_domain_span(sd);
			
 
				+	struct sd_data *sdd = sd->private;
			
 
				+	struct sched_domain *sibling;
			
 
				+	int i;
			
 
				+
			
 
				+	for_each_cpu(i, span) {
			
 
				+		sibling = *per_cpu_ptr(sdd->sd, i);
			
 
				+		if (!cpumask_test_cpu(i, sched_domain_span(sibling)))
			
 
				+			continue;
			
 
				+
			
 
				+		cpumask_set_cpu(i, sched_group_mask(sg));
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Return the canonical balance cpu for this group, this is the first cpu
			
 
				+ * of this group that's also in the iteration mask.
			
 
				+ */
			
 
				+int group_balance_cpu(struct sched_group *sg)
			
 
				+{
			
 
				+	return cpumask_first_and(sched_group_cpus(sg), sched_group_mask(sg));
			
 
				+}
			
 
				+
			
 
				 static int
			
 
				 build_overlap_sched_groups(struct sched_domain *sd, int cpu)
			
 
				 {
			
@@ -6012,6 +6050,12 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
 
				 		if (cpumask_test_cpu(i, covered))
			
 
				 			continue;
			
 
				 
			
 
				+		child = *per_cpu_ptr(sdd->sd, i);
			
 
				+
			
 
				+		/* See the comment near build_group_mask(). */
			
 
				+		if (!cpumask_test_cpu(i, sched_domain_span(child)))
			
 
				+			continue;
			
 
				+
			
 
				 		sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
			
 
				 				GFP_KERNEL, cpu_to_node(cpu));
			
 
				 
			
@@ -6019,8 +6063,6 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
 
				 			goto fail;
			
 
				 
			
 
				 		sg_span = sched_group_cpus(sg);
			
 
				-
			
 
				-		child = *per_cpu_ptr(sdd->sd, i);
			
 
				 		if (child->child) {
			
 
				 			child = child->child;
			
 
				 			cpumask_copy(sg_span, sched_domain_span(child));
			
@@ -6030,13 +6072,18 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
 
				 		cpumask_or(covered, covered, sg_span);
			
 
				 
			
 
				 		sg->sgp = *per_cpu_ptr(sdd->sgp, i);
			
 
				-		atomic_inc(&sg->sgp->ref);
			
 
				+		if (atomic_inc_return(&sg->sgp->ref) == 1)
			
 
				+			build_group_mask(sd, sg);
			
 
				+
			
 
				 
			
 
				+		/*
			
 
				+		 * Make sure the first group of this domain contains the
			
 
				+		 * canonical balance cpu. Otherwise the sched_domain iteration
			
 
				+		 * breaks. See update_sg_lb_stats().
			
 
				+		 */
			
 
				 		if ((!groups && cpumask_test_cpu(cpu, sg_span)) ||
			
 
				-			       cpumask_first(sg_span) == cpu) {
			
 
				-			WARN_ON_ONCE(!cpumask_test_cpu(cpu, sg_span));
			
 
				+		    group_balance_cpu(sg) == cpu)
			
 
				 			groups = sg;
			
 
				-		}
			
 
				 
			
 
				 		if (!first)
			
 
				 			first = sg;
			
@@ -6109,6 +6156,7 @@ build_sched_groups(struct sched_domain *sd, int cpu)
 
				 
			
 
				 		cpumask_clear(sched_group_cpus(sg));
			
 
				 		sg->sgp->power = 0;
			
 
				+		cpumask_setall(sched_group_mask(sg));
			
 
				 
			
 
				 		for_each_cpu(j, span) {
			
 
				 			if (get_group(j, sdd, NULL) != group)
			
@@ -6150,7 +6198,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
 
				 		sg = sg->next;
			
 
				 	} while (sg != sd->groups);
			
 
				 
			
 
				-	if (cpu != group_first_cpu(sg))
			
 
				+	if (cpu != group_balance_cpu(sg))
			
 
				 		return;
			
 
				 
			
 
				 	update_group_power(sd, cpu);
			
@@ -6525,7 +6573,7 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
 
				 
			
 
				 			*per_cpu_ptr(sdd->sg, j) = sg;
			
 
				 
			
 
				-			sgp = kzalloc_node(sizeof(struct sched_group_power),
			
 
				+			sgp = kzalloc_node(sizeof(struct sched_group_power) + cpumask_size(),
			
 
				 					GFP_KERNEL, cpu_to_node(j));
			
 
				 			if (!sgp)
			
 
				 				return -ENOMEM;
			
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3652,7 +3652,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
 
				 	int i;
			
 
				 
			
 
				 	if (local_group)
			
 
				-		balance_cpu = group_first_cpu(group);
			
 
				+		balance_cpu = group_balance_cpu(group);
			
 
				 
			
 
				 	/* Tally up the load of all CPUs in the group */
			
 
				 	max_cpu_load = 0;
			
@@ -3667,7 +3667,8 @@ static inline void update_sg_lb_stats(struct lb_env *env,
 
				 
			
 
				 		/* Bias balancing toward cpus of our domain */
			
 
				 		if (local_group) {
			
 
				-			if (idle_cpu(i) && !first_idle_cpu) {
			
 
				+			if (idle_cpu(i) && !first_idle_cpu &&
			
 
				+					cpumask_test_cpu(i, sched_group_mask(group))) {
			
 
				 				first_idle_cpu = 1;
			
 
				 				balance_cpu = i;
			
 
				 			}
			
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -526,6 +526,8 @@ static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
 
				 DECLARE_PER_CPU(struct sched_domain *, sd_llc);
			
 
				 DECLARE_PER_CPU(int, sd_llc_id);
			
 
				 
			
 
				+extern int group_balance_cpu(struct sched_group *sg);
			
 
				+
			
 
				 #endif /* CONFIG_SMP */
			
 
				 
			
 
				 #include "stats.h"