13 years ago · 9ee474f556
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1103,6 +1103,7 @@ struct sched_avg {
 
				 	 */
			
 
				 	u32 runnable_avg_sum, runnable_avg_period;
			
 
				 	u64 last_runnable_update;
			
 
				+	s64 decay_count;
			
 
				 	unsigned long load_avg_contrib;
			
 
				 };
			
 
				 
			
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1528,7 +1528,6 @@ static void __sched_fork(struct task_struct *p)
 
				 	p->se.avg.runnable_avg_period = 0;
			
 
				 	p->se.avg.runnable_avg_sum = 0;
			
 
				 #endif
			
 
				-
			
 
				 #ifdef CONFIG_SCHEDSTATS
			
 
				 	memset(&p->se.statistics, 0, sizeof(p->se.statistics));
			
 
				 #endif
			
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -95,6 +95,7 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
 
				 	P(se->avg.runnable_avg_sum);
			
 
				 	P(se->avg.runnable_avg_period);
			
 
				 	P(se->avg.load_avg_contrib);
			
 
				+	P(se->avg.decay_count);
			
 
				 #endif
			
 
				 #undef PN
			
 
				 #undef P
			
@@ -227,6 +228,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 
				 			atomic_read(&cfs_rq->tg->load_weight));
			
 
				 	SEQ_printf(m, "  .%-30s: %lld\n", "runnable_load_avg",
			
 
				 			cfs_rq->runnable_load_avg);
			
 
				+	SEQ_printf(m, "  .%-30s: %lld\n", "blocked_load_avg",
			
 
				+			cfs_rq->blocked_load_avg);
			
 
				 #endif
			
 
				 
			
 
				 	print_cfs_group_stats(m, cpu, cfs_rq->tg);
			
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -259,6 +259,8 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
 
				 	return grp->my_q;
			
 
				 }
			
 
				 
			
 
				+static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq);
			
 
				+
			
 
				 static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
			
 
				 {
			
 
				 	if (!cfs_rq->on_list) {
			
@@ -278,6 +280,8 @@ static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
 
				 		}
			
 
				 
			
 
				 		cfs_rq->on_list = 1;
			
 
				+		/* We should have no load, but we need to update last_decay. */
			
 
				+		update_cfs_rq_blocked_load(cfs_rq);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -1081,6 +1085,20 @@ static __always_inline int __update_entity_runnable_avg(u64 now,
 
				 	return decayed;
			
 
				 }
			
 
				 
			
 
				+/* Synchronize an entity's decay with its parenting cfs_rq.*/
			
 
				+static inline void __synchronize_entity_decay(struct sched_entity *se)
			
 
				+{
			
 
				+	struct cfs_rq *cfs_rq = cfs_rq_of(se);
			
 
				+	u64 decays = atomic64_read(&cfs_rq->decay_counter);
			
 
				+
			
 
				+	decays -= se->avg.decay_count;
			
 
				+	if (!decays)
			
 
				+		return;
			
 
				+
			
 
				+	se->avg.load_avg_contrib = decay_load(se->avg.load_avg_contrib, decays);
			
 
				+	se->avg.decay_count = 0;
			
 
				+}
			
 
				+
			
 
				 /* Compute the current contribution to load_avg by se, return any delta */
			
 
				 static long __update_entity_load_avg_contrib(struct sched_entity *se)
			
 
				 {
			
@@ -1096,8 +1114,18 @@ static long __update_entity_load_avg_contrib(struct sched_entity *se)
 
				 	return se->avg.load_avg_contrib - old_contrib;
			
 
				 }
			
 
				 
			
 
				+static inline void subtract_blocked_load_contrib(struct cfs_rq *cfs_rq,
			
 
				+						 long load_contrib)
			
 
				+{
			
 
				+	if (likely(load_contrib < cfs_rq->blocked_load_avg))
			
 
				+		cfs_rq->blocked_load_avg -= load_contrib;
			
 
				+	else
			
 
				+		cfs_rq->blocked_load_avg = 0;
			
 
				+}
			
 
				+
			
 
				 /* Update a sched_entity's runnable average */
			
 
				-static inline void update_entity_load_avg(struct sched_entity *se)
			
 
				+static inline void update_entity_load_avg(struct sched_entity *se,
			
 
				+					  int update_cfs_rq)
			
 
				 {
			
 
				 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
			
 
				 	long contrib_delta;
			
@@ -1107,8 +1135,34 @@ static inline void update_entity_load_avg(struct sched_entity *se)
 
				 		return;
			
 
				 
			
 
				 	contrib_delta = __update_entity_load_avg_contrib(se);
			
 
				+
			
 
				+	if (!update_cfs_rq)
			
 
				+		return;
			
 
				+
			
 
				 	if (se->on_rq)
			
 
				 		cfs_rq->runnable_load_avg += contrib_delta;
			
 
				+	else
			
 
				+		subtract_blocked_load_contrib(cfs_rq, -contrib_delta);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Decay the load contributed by all blocked children and account this so that
			
 
				+ * their contribution may appropriately discounted when they wake up.
			
 
				+ */
			
 
				+static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq)
			
 
				+{
			
 
				+	u64 now = rq_of(cfs_rq)->clock_task >> 20;
			
 
				+	u64 decays;
			
 
				+
			
 
				+	decays = now - cfs_rq->last_decay;
			
 
				+	if (!decays)
			
 
				+		return;
			
 
				+
			
 
				+	cfs_rq->blocked_load_avg = decay_load(cfs_rq->blocked_load_avg,
			
 
				+					      decays);
			
 
				+	atomic64_add(decays, &cfs_rq->decay_counter);
			
 
				+
			
 
				+	cfs_rq->last_decay = now;
			
 
				 }
			
 
				 
			
 
				 static inline void update_rq_runnable_avg(struct rq *rq, int runnable)
			
@@ -1118,26 +1172,53 @@ static inline void update_rq_runnable_avg(struct rq *rq, int runnable)
 
				 
			
 
				 /* Add the load generated by se into cfs_rq's child load-average */
			
 
				 static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq,
			
 
				-						  struct sched_entity *se)
			
 
				+						  struct sched_entity *se,
			
 
				+						  int wakeup)
			
 
				 {
			
 
				-	update_entity_load_avg(se);
			
 
				+	/* we track migrations using entity decay_count == 0 */
			
 
				+	if (unlikely(!se->avg.decay_count)) {
			
 
				+		se->avg.last_runnable_update = rq_of(cfs_rq)->clock_task;
			
 
				+		wakeup = 0;
			
 
				+	} else {
			
 
				+		__synchronize_entity_decay(se);
			
 
				+	}
			
 
				+
			
 
				+	if (wakeup)
			
 
				+		subtract_blocked_load_contrib(cfs_rq, se->avg.load_avg_contrib);
			
 
				+
			
 
				+	update_entity_load_avg(se, 0);
			
 
				 	cfs_rq->runnable_load_avg += se->avg.load_avg_contrib;
			
 
				+	update_cfs_rq_blocked_load(cfs_rq);
			
 
				 }
			
 
				 
			
 
				-/* Remove se's load from this cfs_rq child load-average */
			
 
				+/*
			
 
				+ * Remove se's load from this cfs_rq child load-average, if the entity is
			
 
				+ * transitioning to a blocked state we track its projected decay using
			
 
				+ * blocked_load_avg.
			
 
				+ */
			
 
				 static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq,
			
 
				-						  struct sched_entity *se)
			
 
				+						  struct sched_entity *se,
			
 
				+						  int sleep)
			
 
				 {
			
 
				-	update_entity_load_avg(se);
			
 
				+	update_entity_load_avg(se, 1);
			
 
				+
			
 
				 	cfs_rq->runnable_load_avg -= se->avg.load_avg_contrib;
			
 
				+	if (sleep) {
			
 
				+		cfs_rq->blocked_load_avg += se->avg.load_avg_contrib;
			
 
				+		se->avg.decay_count = atomic64_read(&cfs_rq->decay_counter);
			
 
				+	} /* migrations, e.g. sleep=0 leave decay_count == 0 */
			
 
				 }
			
 
				 #else
			
 
				-static inline void update_entity_load_avg(struct sched_entity *se) {}
			
 
				+static inline void update_entity_load_avg(struct sched_entity *se,
			
 
				+					  int update_cfs_rq) {}
			
 
				 static inline void update_rq_runnable_avg(struct rq *rq, int runnable) {}
			
 
				 static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq,
			
 
				-						  struct sched_entity *se) {}
			
 
				+					   struct sched_entity *se,
			
 
				+					   int wakeup) {}
			
 
				 static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq,
			
 
				-						  struct sched_entity *se) {}
			
 
				+					   struct sched_entity *se,
			
 
				+					   int sleep) {}
			
 
				+static inline void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq) {}
			
 
				 #endif
			
 
				 
			
 
				 static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
			
@@ -1266,7 +1347,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 
				 	 */
			
 
				 	update_curr(cfs_rq);
			
 
				 	update_cfs_load(cfs_rq, 0);
			
 
				-	enqueue_entity_load_avg(cfs_rq, se);
			
 
				+	enqueue_entity_load_avg(cfs_rq, se, flags & ENQUEUE_WAKEUP);
			
 
				 	account_entity_enqueue(cfs_rq, se);
			
 
				 	update_cfs_shares(cfs_rq);
			
 
				 
			
@@ -1341,7 +1422,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 
				 	 * Update run-time statistics of the 'current'.
			
 
				 	 */
			
 
				 	update_curr(cfs_rq);
			
 
				-	dequeue_entity_load_avg(cfs_rq, se);
			
 
				+	dequeue_entity_load_avg(cfs_rq, se, flags & DEQUEUE_SLEEP);
			
 
				 
			
 
				 	update_stats_dequeue(cfs_rq, se);
			
 
				 	if (flags & DEQUEUE_SLEEP) {
			
@@ -1512,7 +1593,7 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
 
				 		/* Put 'current' back into the tree. */
			
 
				 		__enqueue_entity(cfs_rq, prev);
			
 
				 		/* in !on_rq case, update occurred at dequeue */
			
 
				-		update_entity_load_avg(prev);
			
 
				+		update_entity_load_avg(prev, 1);
			
 
				 	}
			
 
				 	cfs_rq->curr = NULL;
			
 
				 }
			
@@ -1528,7 +1609,8 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
 
				 	/*
			
 
				 	 * Ensure that runnable average is periodically updated.
			
 
				 	 */
			
 
				-	update_entity_load_avg(curr);
			
 
				+	update_entity_load_avg(curr, 1);
			
 
				+	update_cfs_rq_blocked_load(cfs_rq);
			
 
				 
			
 
				 	/*
			
 
				 	 * Update share accounting for long-running entities.
			
@@ -2387,6 +2469,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 
				 
			
 
				 		update_cfs_load(cfs_rq, 0);
			
 
				 		update_cfs_shares(cfs_rq);
			
 
				+		update_entity_load_avg(se, 1);
			
 
				 	}
			
 
				 
			
 
				 	if (!se) {
			
@@ -2448,6 +2531,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 
				 
			
 
				 		update_cfs_load(cfs_rq, 0);
			
 
				 		update_cfs_shares(cfs_rq);
			
 
				+		update_entity_load_avg(se, 1);
			
 
				 	}
			
 
				 
			
 
				 	if (!se) {
			
@@ -3498,6 +3582,7 @@ static int update_shares_cpu(struct task_group *tg, int cpu)
 
				 
			
 
				 	update_rq_clock(rq);
			
 
				 	update_cfs_load(cfs_rq, 1);
			
 
				+	update_cfs_rq_blocked_load(cfs_rq);
			
 
				 
			
 
				 	/*
			
 
				 	 * We need to update shares after updating tg->load_weight in
			
@@ -5232,6 +5317,20 @@ static void switched_from_fair(struct rq *rq, struct task_struct *p)
 
				 		place_entity(cfs_rq, se, 0);
			
 
				 		se->vruntime -= cfs_rq->min_vruntime;
			
 
				 	}
			
 
				+
			
 
				+#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
			
 
				+	/*
			
 
				+	* Remove our load from contribution when we leave sched_fair
			
 
				+	* and ensure we don't carry in an old decay_count if we
			
 
				+	* switch back.
			
 
				+	*/
			
 
				+	if (p->se.avg.decay_count) {
			
 
				+		struct cfs_rq *cfs_rq = cfs_rq_of(&p->se);
			
 
				+		__synchronize_entity_decay(&p->se);
			
 
				+		subtract_blocked_load_contrib(cfs_rq,
			
 
				+				p->se.avg.load_avg_contrib);
			
 
				+	}
			
 
				+#endif
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -5278,6 +5377,9 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
 
				 #ifndef CONFIG_64BIT
			
 
				 	cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
			
 
				 #endif
			
 
				+#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
			
 
				+	atomic64_set(&cfs_rq->decay_counter, 1);
			
 
				+#endif
			
 
				 }
			
 
				 
			
 
				 #ifdef CONFIG_FAIR_GROUP_SCHED
			
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -229,7 +229,9 @@ struct cfs_rq {
 
				 	 * This allows for the description of both thread and group usage (in
			
 
				 	 * the FAIR_GROUP_SCHED case).
			
 
				 	 */
			
 
				-	u64 runnable_load_avg;
			
 
				+	u64 runnable_load_avg, blocked_load_avg;
			
 
				+	atomic64_t decay_counter;
			
 
				+	u64 last_decay;
			
 
				 #endif
			
 
				 #ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				 	struct rq *rq;	/* cpu runqueue to which this cfs_rq is attached */