12 years ago · bb17f65571
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -234,6 +234,10 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 
				 			atomic64_read(&cfs_rq->tg->load_avg));
			
 
				 	SEQ_printf(m, "  .%-30s: %lld\n", "tg_load_contrib",
			
 
				 			cfs_rq->tg_load_contrib);
			
 
				+	SEQ_printf(m, "  .%-30s: %d\n", "tg_runnable_contrib",
			
 
				+			cfs_rq->tg_runnable_contrib);
			
 
				+	SEQ_printf(m, "  .%-30s: %d\n", "tg->runnable_avg",
			
 
				+			atomic_read(&cfs_rq->tg->runnable_avg));
			
 
				 #endif
			
 
				 
			
 
				 	print_cfs_group_stats(m, cpu, cfs_rq->tg);
			
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1118,19 +1118,73 @@ static inline void __update_cfs_rq_tg_load_contrib(struct cfs_rq *cfs_rq,
 
				 	}
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Aggregate cfs_rq runnable averages into an equivalent task_group
			
 
				+ * representation for computing load contributions.
			
 
				+ */
			
 
				+static inline void __update_tg_runnable_avg(struct sched_avg *sa,
			
 
				+						  struct cfs_rq *cfs_rq)
			
 
				+{
			
 
				+	struct task_group *tg = cfs_rq->tg;
			
 
				+	long contrib;
			
 
				+
			
 
				+	/* The fraction of a cpu used by this cfs_rq */
			
 
				+	contrib = div_u64(sa->runnable_avg_sum << NICE_0_SHIFT,
			
 
				+			  sa->runnable_avg_period + 1);
			
 
				+	contrib -= cfs_rq->tg_runnable_contrib;
			
 
				+
			
 
				+	if (abs(contrib) > cfs_rq->tg_runnable_contrib / 64) {
			
 
				+		atomic_add(contrib, &tg->runnable_avg);
			
 
				+		cfs_rq->tg_runnable_contrib += contrib;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 static inline void __update_group_entity_contrib(struct sched_entity *se)
			
 
				 {
			
 
				 	struct cfs_rq *cfs_rq = group_cfs_rq(se);
			
 
				 	struct task_group *tg = cfs_rq->tg;
			
 
				+	int runnable_avg;
			
 
				+
			
 
				 	u64 contrib;
			
 
				 
			
 
				 	contrib = cfs_rq->tg_load_contrib * tg->shares;
			
 
				 	se->avg.load_avg_contrib = div64_u64(contrib,
			
 
				 					     atomic64_read(&tg->load_avg) + 1);
			
 
				+
			
 
				+	/*
			
 
				+	 * For group entities we need to compute a correction term in the case
			
 
				+	 * that they are consuming <1 cpu so that we would contribute the same
			
 
				+	 * load as a task of equal weight.
			
 
				+	 *
			
 
				+	 * Explicitly co-ordinating this measurement would be expensive, but
			
 
				+	 * fortunately the sum of each cpus contribution forms a usable
			
 
				+	 * lower-bound on the true value.
			
 
				+	 *
			
 
				+	 * Consider the aggregate of 2 contributions.  Either they are disjoint
			
 
				+	 * (and the sum represents true value) or they are disjoint and we are
			
 
				+	 * understating by the aggregate of their overlap.
			
 
				+	 *
			
 
				+	 * Extending this to N cpus, for a given overlap, the maximum amount we
			
 
				+	 * understand is then n_i(n_i+1)/2 * w_i where n_i is the number of
			
 
				+	 * cpus that overlap for this interval and w_i is the interval width.
			
 
				+	 *
			
 
				+	 * On a small machine; the first term is well-bounded which bounds the
			
 
				+	 * total error since w_i is a subset of the period.  Whereas on a
			
 
				+	 * larger machine, while this first term can be larger, if w_i is the
			
 
				+	 * of consequential size guaranteed to see n_i*w_i quickly converge to
			
 
				+	 * our upper bound of 1-cpu.
			
 
				+	 */
			
 
				+	runnable_avg = atomic_read(&tg->runnable_avg);
			
 
				+	if (runnable_avg < NICE_0_LOAD) {
			
 
				+		se->avg.load_avg_contrib *= runnable_avg;
			
 
				+		se->avg.load_avg_contrib >>= NICE_0_SHIFT;
			
 
				+	}
			
 
				 }
			
 
				 #else
			
 
				 static inline void __update_cfs_rq_tg_load_contrib(struct cfs_rq *cfs_rq,
			
 
				 						 int force_update) {}
			
 
				+static inline void __update_tg_runnable_avg(struct sched_avg *sa,
			
 
				+						  struct cfs_rq *cfs_rq) {}
			
 
				 static inline void __update_group_entity_contrib(struct sched_entity *se) {}
			
 
				 #endif
			
 
				 
			
@@ -1152,6 +1206,7 @@ static long __update_entity_load_avg_contrib(struct sched_entity *se)
 
				 	if (entity_is_task(se)) {
			
 
				 		__update_task_entity_contrib(se);
			
 
				 	} else {
			
 
				+		__update_tg_runnable_avg(&se->avg, group_cfs_rq(se));
			
 
				 		__update_group_entity_contrib(se);
			
 
				 	}
			
 
				 
			
@@ -1220,6 +1275,7 @@ static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, int force_update)
 
				 static inline void update_rq_runnable_avg(struct rq *rq, int runnable)
			
 
				 {
			
 
				 	__update_entity_runnable_avg(rq->clock_task, &rq->avg, runnable);
			
 
				+	__update_tg_runnable_avg(&rq->avg, &rq->cfs);
			
 
				 }
			
 
				 
			
 
				 /* Add the load generated by se into cfs_rq's child load-average */
			
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -113,6 +113,7 @@ struct task_group {
 
				 
			
 
				 	atomic_t load_weight;
			
 
				 	atomic64_t load_avg;
			
 
				+	atomic_t runnable_avg;
			
 
				 #endif
			
 
				 
			
 
				 #ifdef CONFIG_RT_GROUP_SCHED
			
@@ -234,6 +235,7 @@ struct cfs_rq {
 
				 	atomic64_t decay_counter, removed_load;
			
 
				 	u64 last_decay;
			
 
				 #ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				+	u32 tg_runnable_contrib;
			
 
				 	u64 tg_load_contrib;
			
 
				 #endif
			
 
				 #endif