18 years ago · 5517d86bea
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -680,8 +680,14 @@ struct sched_group {
 
				 	/*
			
 
				 	 * CPU power of this group, SCHED_LOAD_SCALE being max power for a
			
 
				 	 * single CPU. This is read only (except for setup, hotplug CPU).
			
 
				+	 * Note : Never change cpu_power without recompute its reciprocal
			
 
				 	 */
			
 
				-	unsigned long cpu_power;
			
 
				+	unsigned int __cpu_power;
			
 
				+	/*
			
 
				+	 * reciprocal value of cpu_power to avoid expensive divides
			
 
				+	 * (see include/linux/reciprocal_div.h)
			
 
				+	 */
			
 
				+	u32 reciprocal_cpu_power;
			
 
				 };
			
 
				 
			
 
				 struct sched_domain {
			
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -52,8 +52,9 @@
 
				 #include <linux/tsacct_kern.h>
			
 
				 #include <linux/kprobes.h>
			
 
				 #include <linux/delayacct.h>
			
 
				-#include <asm/tlb.h>
			
 
				+#include <linux/reciprocal_div.h>
			
 
				 
			
 
				+#include <asm/tlb.h>
			
 
				 #include <asm/unistd.h>
			
 
				 
			
 
				 /*
			
@@ -181,6 +182,27 @@ static unsigned int static_prio_timeslice(int static_prio)
 
				 		return SCALE_PRIO(DEF_TIMESLICE, static_prio);
			
 
				 }
			
 
				 
			
 
				+#ifdef CONFIG_SMP
			
 
				+/*
			
 
				+ * Divide a load by a sched group cpu_power : (load / sg->__cpu_power)
			
 
				+ * Since cpu_power is a 'constant', we can use a reciprocal divide.
			
 
				+ */
			
 
				+static inline u32 sg_div_cpu_power(const struct sched_group *sg, u32 load)
			
 
				+{
			
 
				+	return reciprocal_divide(load, sg->reciprocal_cpu_power);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Each time a sched group cpu_power is changed,
			
 
				+ * we must compute its reciprocal value
			
 
				+ */
			
 
				+static inline void sg_inc_cpu_power(struct sched_group *sg, u32 val)
			
 
				+{
			
 
				+	sg->__cpu_power += val;
			
 
				+	sg->reciprocal_cpu_power = reciprocal_value(sg->__cpu_power);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 /*
			
 
				  * task_timeslice() scales user-nice values [ -20 ... 0 ... 19 ]
			
 
				  * to time slice values: [800ms ... 100ms ... 5ms]
			
@@ -1256,7 +1278,8 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
 
				 		}
			
 
				 
			
 
				 		/* Adjust by relative CPU power of the group */
			
 
				-		avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power;
			
 
				+		avg_load = sg_div_cpu_power(group,
			
 
				+				avg_load * SCHED_LOAD_SCALE);
			
 
				 
			
 
				 		if (local_group) {
			
 
				 			this_load = avg_load;
			
@@ -2367,12 +2390,13 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 
				 		}
			
 
				 
			
 
				 		total_load += avg_load;
			
 
				-		total_pwr += group->cpu_power;
			
 
				+		total_pwr += group->__cpu_power;
			
 
				 
			
 
				 		/* Adjust by relative CPU power of the group */
			
 
				-		avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power;
			
 
				+		avg_load = sg_div_cpu_power(group,
			
 
				+				avg_load * SCHED_LOAD_SCALE);
			
 
				 
			
 
				-		group_capacity = group->cpu_power / SCHED_LOAD_SCALE;
			
 
				+		group_capacity = group->__cpu_power / SCHED_LOAD_SCALE;
			
 
				 
			
 
				 		if (local_group) {
			
 
				 			this_load = avg_load;
			
@@ -2483,8 +2507,8 @@ group_next:
 
				 	max_pull = min(max_load - avg_load, max_load - busiest_load_per_task);
			
 
				 
			
 
				 	/* How much load to actually move to equalise the imbalance */
			
 
				-	*imbalance = min(max_pull * busiest->cpu_power,
			
 
				-				(avg_load - this_load) * this->cpu_power)
			
 
				+	*imbalance = min(max_pull * busiest->__cpu_power,
			
 
				+				(avg_load - this_load) * this->__cpu_power)
			
 
				 			/ SCHED_LOAD_SCALE;
			
 
				 
			
 
				 	/*
			
@@ -2518,28 +2542,29 @@ small_imbalance:
 
				 		 * moving them.
			
 
				 		 */
			
 
				 
			
 
				-		pwr_now += busiest->cpu_power *
			
 
				-			min(busiest_load_per_task, max_load);
			
 
				-		pwr_now += this->cpu_power *
			
 
				-			min(this_load_per_task, this_load);
			
 
				+		pwr_now += busiest->__cpu_power *
			
 
				+				min(busiest_load_per_task, max_load);
			
 
				+		pwr_now += this->__cpu_power *
			
 
				+				min(this_load_per_task, this_load);
			
 
				 		pwr_now /= SCHED_LOAD_SCALE;
			
 
				 
			
 
				 		/* Amount of load we'd subtract */
			
 
				-		tmp = busiest_load_per_task * SCHED_LOAD_SCALE /
			
 
				-			busiest->cpu_power;
			
 
				+		tmp = sg_div_cpu_power(busiest,
			
 
				+				busiest_load_per_task * SCHED_LOAD_SCALE);
			
 
				 		if (max_load > tmp)
			
 
				-			pwr_move += busiest->cpu_power *
			
 
				+			pwr_move += busiest->__cpu_power *
			
 
				 				min(busiest_load_per_task, max_load - tmp);
			
 
				 
			
 
				 		/* Amount of load we'd add */
			
 
				-		if (max_load * busiest->cpu_power <
			
 
				+		if (max_load * busiest->__cpu_power <
			
 
				 				busiest_load_per_task * SCHED_LOAD_SCALE)
			
 
				-			tmp = max_load * busiest->cpu_power / this->cpu_power;
			
 
				+			tmp = sg_div_cpu_power(this,
			
 
				+					max_load * busiest->__cpu_power);
			
 
				 		else
			
 
				-			tmp = busiest_load_per_task * SCHED_LOAD_SCALE /
			
 
				-				this->cpu_power;
			
 
				-		pwr_move += this->cpu_power *
			
 
				-			min(this_load_per_task, this_load + tmp);
			
 
				+			tmp = sg_div_cpu_power(this,
			
 
				+				busiest_load_per_task * SCHED_LOAD_SCALE);
			
 
				+		pwr_move += this->__cpu_power *
			
 
				+				min(this_load_per_task, this_load + tmp);
			
 
				 		pwr_move /= SCHED_LOAD_SCALE;
			
 
				 
			
 
				 		/* Move if we gain throughput */
			
@@ -5501,7 +5526,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
 
				 				break;
			
 
				 			}
			
 
				 
			
 
				-			if (!group->cpu_power) {
			
 
				+			if (!group->__cpu_power) {
			
 
				 				printk("\n");
			
 
				 				printk(KERN_ERR "ERROR: domain->cpu_power not "
			
 
				 						"set\n");
			
@@ -5678,7 +5703,7 @@ init_sched_build_groups(cpumask_t span, const cpumask_t *cpu_map,
 
				 			continue;
			
 
				 
			
 
				 		sg->cpumask = CPU_MASK_NONE;
			
 
				-		sg->cpu_power = 0;
			
 
				+		sg->__cpu_power = 0;
			
 
				 
			
 
				 		for_each_cpu_mask(j, span) {
			
 
				 			if (group_fn(j, cpu_map, NULL) != group)
			
@@ -6367,7 +6392,7 @@ next_sg:
 
				 			continue;
			
 
				 		}
			
 
				 
			
 
				-		sg->cpu_power += sd->groups->cpu_power;
			
 
				+		sg_inc_cpu_power(sg, sd->groups->__cpu_power);
			
 
				 	}
			
 
				 	sg = sg->next;
			
 
				 	if (sg != group_head)
			
@@ -6442,6 +6467,8 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
 
				 
			
 
				 	child = sd->child;
			
 
				 
			
 
				+	sd->groups->__cpu_power = 0;
			
 
				+
			
 
				 	/*
			
 
				 	 * For perf policy, if the groups in child domain share resources
			
 
				 	 * (for example cores sharing some portions of the cache hierarchy
			
@@ -6452,18 +6479,16 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
 
				 	if (!child || (!(sd->flags & SD_POWERSAVINGS_BALANCE) &&
			
 
				 		       (child->flags &
			
 
				 			(SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES)))) {
			
 
				-		sd->groups->cpu_power = SCHED_LOAD_SCALE;
			
 
				+		sg_inc_cpu_power(sd->groups, SCHED_LOAD_SCALE);
			
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				-	sd->groups->cpu_power = 0;
			
 
				-
			
 
				 	/*
			
 
				 	 * add cpu_power of each child group to this groups cpu_power
			
 
				 	 */
			
 
				 	group = child->groups;
			
 
				 	do {
			
 
				-		sd->groups->cpu_power += group->cpu_power;
			
 
				+		sg_inc_cpu_power(sd->groups, group->__cpu_power);
			
 
				 		group = group->next;
			
 
				 	} while (group != child->groups);
			
 
				 }
			
@@ -6623,7 +6648,7 @@ static int build_sched_domains(const cpumask_t *cpu_map)
 
				 			sd = &per_cpu(node_domains, j);
			
 
				 			sd->groups = sg;
			
 
				 		}
			
 
				-		sg->cpu_power = 0;
			
 
				+		sg->__cpu_power = 0;
			
 
				 		sg->cpumask = nodemask;
			
 
				 		sg->next = sg;
			
 
				 		cpus_or(covered, covered, nodemask);
			
@@ -6651,7 +6676,7 @@ static int build_sched_domains(const cpumask_t *cpu_map)
 
				 				"Can not alloc domain group for node %d\n", j);
			
 
				 				goto error;
			
 
				 			}
			
 
				-			sg->cpu_power = 0;
			
 
				+			sg->__cpu_power = 0;
			
 
				 			sg->cpumask = tmp;
			
 
				 			sg->next = prev->next;
			
 
				 			cpus_or(covered, covered, tmp);