17 years ago · f65eda4f78
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3721,6 +3721,8 @@ need_resched_nonpreemptible:
 
															 		switch_count = &prev->nvcsw;
														
 
															 	}
														
 
															+	schedule_balance_rt(rq, prev);
														
 
															+
														
 
															 	if (unlikely(!rq->nr_running))
														
 
															 		idle_balance(cpu, rq);
														
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -179,8 +179,17 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
 
															 static int double_lock_balance(struct rq *this_rq, struct rq *busiest);
														
 
															 static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
														
 
															+static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
														
 
															+{
														
 
															+	if (!task_running(rq, p) &&
														
 
															+	    (cpu < 0 || cpu_isset(cpu, p->cpus_allowed)))
														
 
															+		return 1;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															 /* Return the second highest RT task, NULL otherwise */
														
 
															-static struct task_struct *pick_next_highest_task_rt(struct rq *rq)
														
 
															+static struct task_struct *pick_next_highest_task_rt(struct rq *rq,
														
 
															+						     int cpu)
														
 
															 {
														
 
															 	struct rt_prio_array *array = &rq->rt.active;
														
 
															 	struct task_struct *next;
														
@@ -199,26 +208,36 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq)
 
															 	}
														
 
															 	queue = array->queue + idx;
														
 
															+	BUG_ON(list_empty(queue));
														
 
															+
														
 
															 	next = list_entry(queue->next, struct task_struct, run_list);
														
 
															-	if (unlikely(next != rq->curr))
														
 
															-		return next;
														
 
															+	if (unlikely(pick_rt_task(rq, next, cpu)))
														
 
															+		goto out;
														
 
															 	if (queue->next->next != queue) {
														
 
															 		/* same prio task */
														
 
															 		next = list_entry(queue->next->next, struct task_struct, run_list);
														
 
															-		return next;
														
 
															+		if (pick_rt_task(rq, next, cpu))
														
 
															+			goto out;
														
 
															 	}
														
 
															+ retry:
														
 
															 	/* slower, but more flexible */
														
 
															 	idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
														
 
															-	if (unlikely(idx >= MAX_RT_PRIO)) {
														
 
															-		WARN_ON(1); /* rt_nr_running was 2 and above! */
														
 
															+	if (unlikely(idx >= MAX_RT_PRIO))
														
 
															 		return NULL;
														
 
															-	}
														
 
															 	queue = array->queue + idx;
														
 
															-	next = list_entry(queue->next, struct task_struct, run_list);
														
 
															+	BUG_ON(list_empty(queue));
														
 
															+
														
 
															+	list_for_each_entry(next, queue, run_list) {
														
 
															+		if (pick_rt_task(rq, next, cpu))
														
 
															+			goto out;
														
 
															+	}
														
 
															+
														
 
															+	goto retry;
														
 
															+ out:
														
 
															 	return next;
														
 
															 }
														
@@ -305,13 +324,15 @@ static int push_rt_task(struct rq *this_rq)
 
															 	assert_spin_locked(&this_rq->lock);
														
 
															-	next_task = pick_next_highest_task_rt(this_rq);
														
 
															+	next_task = pick_next_highest_task_rt(this_rq, -1);
														
 
															 	if (!next_task)
														
 
															 		return 0;
														
 
															  retry:
														
 
															-	if (unlikely(next_task == this_rq->curr))
														
 
															+	if (unlikely(next_task == this_rq->curr)) {
														
 
															+		WARN_ON(1);
														
 
															 		return 0;
														
 
															+	}
														
 
															 	/*
														
 
															 	 * It's possible that the next_task slipped in of
														
@@ -335,7 +356,7 @@ static int push_rt_task(struct rq *this_rq)
 
															 		 * so it is possible that next_task has changed.
														
 
															 		 * If it has, then try again.
														
 
															 		 */
														
 
															-		task = pick_next_highest_task_rt(this_rq);
														
 
															+		task = pick_next_highest_task_rt(this_rq, -1);
														
 
															 		if (unlikely(task != next_task) && task && paranoid--) {
														
 
															 			put_task_struct(next_task);
														
 
															 			next_task = task;
														
@@ -378,6 +399,149 @@ static void push_rt_tasks(struct rq *rq)
 
															 		;
														
 
															 }
														
 
															+static int pull_rt_task(struct rq *this_rq)
														
 
															+{
														
 
															+	struct task_struct *next;
														
 
															+	struct task_struct *p;
														
 
															+	struct rq *src_rq;
														
 
															+	cpumask_t *rto_cpumask;
														
 
															+	int this_cpu = this_rq->cpu;
														
 
															+	int cpu;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	assert_spin_locked(&this_rq->lock);
														
 
															+
														
 
															+	/*
														
 
															+	 * If cpusets are used, and we have overlapping
														
 
															+	 * run queue cpusets, then this algorithm may not catch all.
														
 
															+	 * This is just the price you pay on trying to keep
														
 
															+	 * dirtying caches down on large SMP machines.
														
 
															+	 */
														
 
															+	if (likely(!rt_overloaded()))
														
 
															+		return 0;
														
 
															+
														
 
															+	next = pick_next_task_rt(this_rq);
														
 
															+
														
 
															+	rto_cpumask = rt_overload();
														
 
															+
														
 
															+	for_each_cpu_mask(cpu, *rto_cpumask) {
														
 
															+		if (this_cpu == cpu)
														
 
															+			continue;
														
 
															+
														
 
															+		src_rq = cpu_rq(cpu);
														
 
															+		if (unlikely(src_rq->rt.rt_nr_running <= 1)) {
														
 
															+			/*
														
 
															+			 * It is possible that overlapping cpusets
														
 
															+			 * will miss clearing a non overloaded runqueue.
														
 
															+			 * Clear it now.
														
 
															+			 */
														
 
															+			if (double_lock_balance(this_rq, src_rq)) {
														
 
															+				/* unlocked our runqueue lock */
														
 
															+				struct task_struct *old_next = next;
														
 
															+				next = pick_next_task_rt(this_rq);
														
 
															+				if (next != old_next)
														
 
															+					ret = 1;
														
 
															+			}
														
 
															+			if (likely(src_rq->rt.rt_nr_running <= 1))
														
 
															+				/*
														
 
															+				 * Small chance that this_rq->curr changed
														
 
															+				 * but it's really harmless here.
														
 
															+				 */
														
 
															+				rt_clear_overload(this_rq);
														
 
															+			else
														
 
															+				/*
														
 
															+				 * Heh, the src_rq is now overloaded, since
														
 
															+				 * we already have the src_rq lock, go straight
														
 
															+				 * to pulling tasks from it.
														
 
															+				 */
														
 
															+				goto try_pulling;
														
 
															+			spin_unlock(&src_rq->lock);
														
 
															+			continue;
														
 
															+		}
														
 
															+
														
 
															+		/*
														
 
															+		 * We can potentially drop this_rq's lock in
														
 
															+		 * double_lock_balance, and another CPU could
														
 
															+		 * steal our next task - hence we must cause
														
 
															+		 * the caller to recalculate the next task
														
 
															+		 * in that case:
														
 
															+		 */
														
 
															+		if (double_lock_balance(this_rq, src_rq)) {
														
 
															+			struct task_struct *old_next = next;
														
 
															+			next = pick_next_task_rt(this_rq);
														
 
															+			if (next != old_next)
														
 
															+				ret = 1;
														
 
															+		}
														
 
															+
														
 
															+		/*
														
 
															+		 * Are there still pullable RT tasks?
														
 
															+		 */
														
 
															+		if (src_rq->rt.rt_nr_running <= 1) {
														
 
															+			spin_unlock(&src_rq->lock);
														
 
															+			continue;
														
 
															+		}
														
 
															+
														
 
															+ try_pulling:
														
 
															+		p = pick_next_highest_task_rt(src_rq, this_cpu);
														
 
															+
														
 
															+		/*
														
 
															+		 * Do we have an RT task that preempts
														
 
															+		 * the to-be-scheduled task?
														
 
															+		 */
														
 
															+		if (p && (!next || (p->prio < next->prio))) {
														
 
															+			WARN_ON(p == src_rq->curr);
														
 
															+			WARN_ON(!p->se.on_rq);
														
 
															+
														
 
															+			/*
														
 
															+			 * There's a chance that p is higher in priority
														
 
															+			 * than what's currently running on its cpu.
														
 
															+			 * This is just that p is wakeing up and hasn't
														
 
															+			 * had a chance to schedule. We only pull
														
 
															+			 * p if it is lower in priority than the
														
 
															+			 * current task on the run queue or
														
 
															+			 * this_rq next task is lower in prio than
														
 
															+			 * the current task on that rq.
														
 
															+			 */
														
 
															+			if (p->prio < src_rq->curr->prio ||
														
 
															+			    (next && next->prio < src_rq->curr->prio))
														
 
															+				goto bail;
														
 
															+
														
 
															+			ret = 1;
														
 
															+
														
 
															+			deactivate_task(src_rq, p, 0);
														
 
															+			set_task_cpu(p, this_cpu);
														
 
															+			activate_task(this_rq, p, 0);
														
 
															+			/*
														
 
															+			 * We continue with the search, just in
														
 
															+			 * case there's an even higher prio task
														
 
															+			 * in another runqueue. (low likelyhood
														
 
															+			 * but possible)
														
 
															+			 */
														
 
															+
														
 
															+			/*
														
 
															+			 * Update next so that we won't pick a task
														
 
															+			 * on another cpu with a priority lower (or equal)
														
 
															+			 * than the one we just picked.
														
 
															+			 */
														
 
															+			next = p;
														
 
															+
														
 
															+		}
														
 
															+ bail:
														
 
															+		spin_unlock(&src_rq->lock);
														
 
															+	}
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static void schedule_balance_rt(struct rq *rq,
														
 
															+				struct task_struct *prev)
														
 
															+{
														
 
															+	/* Try to pull RT tasks here if we lower this rq's prio */
														
 
															+	if (unlikely(rt_task(prev)) &&
														
 
															+	    rq->rt.highest_prio > prev->prio)
														
 
															+		pull_rt_task(rq);
														
 
															+}
														
 
															+
														
 
															 static void schedule_tail_balance_rt(struct rq *rq)
														
 
															 {
														
 
															 	/*
														
@@ -500,6 +664,7 @@ move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
 
															 }
														
 
															 #else /* CONFIG_SMP */
														
 
															 # define schedule_tail_balance_rt(rq)	do { } while (0)
														
 
															+# define schedule_balance_rt(rq, prev)	do { } while (0)
														
 
															 #endif /* CONFIG_SMP */
														
 
															 static void task_tick_rt(struct rq *rq, struct task_struct *p)