16 лет назад · 1f8c553d0f
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3237,6 +3237,103 @@ static inline int get_sd_load_idx(struct sched_domain *sd,
 
															 	return load_idx;
														
 
															 }
														
 
															+
														
 
															+
														
 
															+/**
														
 
															+ * update_sg_lb_stats - Update sched_group's statistics for load balancing.
														
 
															+ * @group: sched_group whose statistics are to be updated.
														
 
															+ * @this_cpu: Cpu for which load balance is currently performed.
														
 
															+ * @idle: Idle status of this_cpu
														
 
															+ * @load_idx: Load index of sched_domain of this_cpu for load calc.
														
 
															+ * @sd_idle: Idle status of the sched_domain containing group.
														
 
															+ * @local_group: Does group contain this_cpu.
														
 
															+ * @cpus: Set of cpus considered for load balancing.
														
 
															+ * @balance: Should we balance.
														
 
															+ * @sgs: variable to hold the statistics for this group.
														
 
															+ */
														
 
															+static inline void update_sg_lb_stats(struct sched_group *group, int this_cpu,
														
 
															+			enum cpu_idle_type idle, int load_idx, int *sd_idle,
														
 
															+			int local_group, const struct cpumask *cpus,
														
 
															+			int *balance, struct sg_lb_stats *sgs)
														
 
															+{
														
 
															+	unsigned long load, max_cpu_load, min_cpu_load;
														
 
															+	int i;
														
 
															+	unsigned int balance_cpu = -1, first_idle_cpu = 0;
														
 
															+	unsigned long sum_avg_load_per_task;
														
 
															+	unsigned long avg_load_per_task;
														
 
															+
														
 
															+	if (local_group)
														
 
															+		balance_cpu = group_first_cpu(group);
														
 
															+
														
 
															+	/* Tally up the load of all CPUs in the group */
														
 
															+	sum_avg_load_per_task = avg_load_per_task = 0;
														
 
															+	max_cpu_load = 0;
														
 
															+	min_cpu_load = ~0UL;
														
 
															+
														
 
															+	for_each_cpu_and(i, sched_group_cpus(group), cpus) {
														
 
															+		struct rq *rq = cpu_rq(i);
														
 
															+
														
 
															+		if (*sd_idle && rq->nr_running)
														
 
															+			*sd_idle = 0;
														
 
															+
														
 
															+		/* Bias balancing toward cpus of our domain */
														
 
															+		if (local_group) {
														
 
															+			if (idle_cpu(i) && !first_idle_cpu) {
														
 
															+				first_idle_cpu = 1;
														
 
															+				balance_cpu = i;
														
 
															+			}
														
 
															+
														
 
															+			load = target_load(i, load_idx);
														
 
															+		} else {
														
 
															+			load = source_load(i, load_idx);
														
 
															+			if (load > max_cpu_load)
														
 
															+				max_cpu_load = load;
														
 
															+			if (min_cpu_load > load)
														
 
															+				min_cpu_load = load;
														
 
															+		}
														
 
															+
														
 
															+		sgs->group_load += load;
														
 
															+		sgs->sum_nr_running += rq->nr_running;
														
 
															+		sgs->sum_weighted_load += weighted_cpuload(i);
														
 
															+
														
 
															+		sum_avg_load_per_task += cpu_avg_load_per_task(i);
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * First idle cpu or the first cpu(busiest) in this sched group
														
 
															+	 * is eligible for doing load balancing at this and above
														
 
															+	 * domains. In the newly idle case, we will allow all the cpu's
														
 
															+	 * to do the newly idle load balance.
														
 
															+	 */
														
 
															+	if (idle != CPU_NEWLY_IDLE && local_group &&
														
 
															+	    balance_cpu != this_cpu && balance) {
														
 
															+		*balance = 0;
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															+	/* Adjust by relative CPU power of the group */
														
 
															+	sgs->avg_load = sg_div_cpu_power(group,
														
 
															+			sgs->group_load * SCHED_LOAD_SCALE);
														
 
															+
														
 
															+
														
 
															+	/*
														
 
															+	 * Consider the group unbalanced when the imbalance is larger
														
 
															+	 * than the average weight of two tasks.
														
 
															+	 *
														
 
															+	 * APZ: with cgroup the avg task weight can vary wildly and
														
 
															+	 *      might not be a suitable number - should we keep a
														
 
															+	 *      normalized nr_running number somewhere that negates
														
 
															+	 *      the hierarchy?
														
 
															+	 */
														
 
															+	avg_load_per_task = sg_div_cpu_power(group,
														
 
															+			sum_avg_load_per_task * SCHED_LOAD_SCALE);
														
 
															+
														
 
															+	if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task)
														
 
															+		sgs->group_imb = 1;
														
 
															+
														
 
															+	sgs->group_capacity = group->__cpu_power / SCHED_LOAD_SCALE;
														
 
															+
														
 
															+}
														
 
															 /******* find_busiest_group() helpers end here *********************/
														
 
															 /*
														
@@ -3270,92 +3367,20 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 
															 	do {
														
 
															 		struct sg_lb_stats sgs;
														
 
															-		unsigned long load, max_cpu_load, min_cpu_load;
														
 
															 		int local_group;
														
 
															-		int i;
														
 
															-		unsigned int balance_cpu = -1, first_idle_cpu = 0;
														
 
															-		unsigned long sum_avg_load_per_task;
														
 
															-		unsigned long avg_load_per_task;
														
 
															 		local_group = cpumask_test_cpu(this_cpu,
														
 
															 					       sched_group_cpus(group));
														
 
															 		memset(&sgs, 0, sizeof(sgs));
														
 
															+		update_sg_lb_stats(group, this_cpu, idle, load_idx, sd_idle,
														
 
															+				local_group, cpus, balance, &sgs);
														
 
															-		if (local_group)
														
 
															-			balance_cpu = group_first_cpu(group);
														
 
															-
														
 
															-		/* Tally up the load of all CPUs in the group */
														
 
															-		sum_avg_load_per_task = avg_load_per_task = 0;
														
 
															-
														
 
															-		max_cpu_load = 0;
														
 
															-		min_cpu_load = ~0UL;
														
 
															-
														
 
															-		for_each_cpu_and(i, sched_group_cpus(group), cpus) {
														
 
															-			struct rq *rq = cpu_rq(i);
														
 
															-
														
 
															-			if (*sd_idle && rq->nr_running)
														
 
															-				*sd_idle = 0;
														
 
															-
														
 
															-			/* Bias balancing toward cpus of our domain */
														
 
															-			if (local_group) {
														
 
															-				if (idle_cpu(i) && !first_idle_cpu) {
														
 
															-					first_idle_cpu = 1;
														
 
															-					balance_cpu = i;
														
 
															-				}
														
 
															-
														
 
															-				load = target_load(i, load_idx);
														
 
															-			} else {
														
 
															-				load = source_load(i, load_idx);
														
 
															-				if (load > max_cpu_load)
														
 
															-					max_cpu_load = load;
														
 
															-				if (min_cpu_load > load)
														
 
															-					min_cpu_load = load;
														
 
															-			}
														
 
															-
														
 
															-			sgs.group_load += load;
														
 
															-			sgs.sum_nr_running += rq->nr_running;
														
 
															-			sgs.sum_weighted_load += weighted_cpuload(i);
														
 
															-
														
 
															-			sum_avg_load_per_task += cpu_avg_load_per_task(i);
														
 
															-		}
														
 
															-
														
 
															-		/*
														
 
															-		 * First idle cpu or the first cpu(busiest) in this sched group
														
 
															-		 * is eligible for doing load balancing at this and above
														
 
															-		 * domains. In the newly idle case, we will allow all the cpu's
														
 
															-		 * to do the newly idle load balance.
														
 
															-		 */
														
 
															-		if (idle != CPU_NEWLY_IDLE && local_group &&
														
 
															-		    balance_cpu != this_cpu && balance) {
														
 
															-			*balance = 0;
														
 
															+		if (balance && !(*balance))
														
 
															 			goto ret;
														
 
															-		}
														
 
															 		total_load += sgs.group_load;
														
 
															 		total_pwr += group->__cpu_power;
														
 
															-		/* Adjust by relative CPU power of the group */
														
 
															-		sgs.avg_load = sg_div_cpu_power(group,
														
 
															-				sgs.group_load * SCHED_LOAD_SCALE);
														
 
															-
														
 
															-
														
 
															-		/*
														
 
															-		 * Consider the group unbalanced when the imbalance is larger
														
 
															-		 * than the average weight of two tasks.
														
 
															-		 *
														
 
															-		 * APZ: with cgroup the avg task weight can vary wildly and
														
 
															-		 *      might not be a suitable number - should we keep a
														
 
															-		 *      normalized nr_running number somewhere that negates
														
 
															-		 *      the hierarchy?
														
 
															-		 */
														
 
															-		avg_load_per_task = sg_div_cpu_power(group,
														
 
															-				sum_avg_load_per_task * SCHED_LOAD_SCALE);
														
 
															-
														
 
															-		if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task)
														
 
															-			sgs.group_imb = 1;
														
 
															-
														
 
															-		sgs.group_capacity = group->__cpu_power / SCHED_LOAD_SCALE;
														
 
															-
														
 
															 		if (local_group) {
														
 
															 			this_load = sgs.avg_load;
														
 
															 			this = group;