19 年之前 · c96d145e71
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -239,7 +239,6 @@ struct runqueue {
 
				 
			
 
				 	task_t *migration_thread;
			
 
				 	struct list_head migration_queue;
			
 
				-	int cpu;
			
 
				 #endif
			
 
				 
			
 
				 #ifdef CONFIG_SCHEDSTATS
			
@@ -1074,9 +1073,10 @@ static int sched_balance_self(int cpu, int flag)
 
				 	struct task_struct *t = current;
			
 
				 	struct sched_domain *tmp, *sd = NULL;
			
 
				 
			
 
				-	for_each_domain(cpu, tmp)
			
 
				+	for_each_domain(cpu, tmp) {
			
 
				 		if (tmp->flags & flag)
			
 
				 			sd = tmp;
			
 
				+	}
			
 
				 
			
 
				 	while (sd) {
			
 
				 		cpumask_t span;
			
@@ -1691,9 +1691,6 @@ unsigned long nr_active(void)
 
				 /*
			
 
				  * double_rq_lock - safely lock two runqueues
			
 
				  *
			
 
				- * We must take them in cpu order to match code in
			
 
				- * dependent_sleeper and wake_dependent_sleeper.
			
 
				- *
			
 
				  * Note this does not disable interrupts like task_rq_lock,
			
 
				  * you need to do so manually before calling.
			
 
				  */
			
@@ -1705,7 +1702,7 @@ static void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2)
 
				 		spin_lock(&rq1->lock);
			
 
				 		__acquire(rq2->lock);	/* Fake it out ;) */
			
 
				 	} else {
			
 
				-		if (rq1->cpu < rq2->cpu) {
			
 
				+		if (rq1 < rq2) {
			
 
				 			spin_lock(&rq1->lock);
			
 
				 			spin_lock(&rq2->lock);
			
 
				 		} else {
			
@@ -1741,7 +1738,7 @@ static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest)
 
				 	__acquires(this_rq->lock)
			
 
				 {
			
 
				 	if (unlikely(!spin_trylock(&busiest->lock))) {
			
 
				-		if (busiest->cpu < this_rq->cpu) {
			
 
				+		if (busiest < this_rq) {
			
 
				 			spin_unlock(&this_rq->lock);
			
 
				 			spin_lock(&busiest->lock);
			
 
				 			spin_lock(&this_rq->lock);
			
@@ -2352,10 +2349,11 @@ static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu)
 
				 	double_lock_balance(busiest_rq, target_rq);
			
 
				 
			
 
				 	/* Search for an sd spanning us and the target CPU. */
			
 
				-	for_each_domain(target_cpu, sd)
			
 
				+	for_each_domain(target_cpu, sd) {
			
 
				 		if ((sd->flags & SD_LOAD_BALANCE) &&
			
 
				 			cpu_isset(busiest_cpu, sd->span))
			
 
				 				break;
			
 
				+	}
			
 
				 
			
 
				 	if (unlikely(sd == NULL))
			
 
				 		goto out;
			
@@ -2691,48 +2689,35 @@ static inline void wakeup_busy_runqueue(runqueue_t *rq)
 
				 		resched_task(rq->idle);
			
 
				 }
			
 
				 
			
 
				-static void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq)
			
 
				+/*
			
 
				+ * Called with interrupt disabled and this_rq's runqueue locked.
			
 
				+ */
			
 
				+static void wake_sleeping_dependent(int this_cpu)
			
 
				 {
			
 
				 	struct sched_domain *tmp, *sd = NULL;
			
 
				-	cpumask_t sibling_map;
			
 
				 	int i;
			
 
				 
			
 
				-	for_each_domain(this_cpu, tmp)
			
 
				-		if (tmp->flags & SD_SHARE_CPUPOWER)
			
 
				+	for_each_domain(this_cpu, tmp) {
			
 
				+		if (tmp->flags & SD_SHARE_CPUPOWER) {
			
 
				 			sd = tmp;
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				 
			
 
				 	if (!sd)
			
 
				 		return;
			
 
				 
			
 
				-	/*
			
 
				-	 * Unlock the current runqueue because we have to lock in
			
 
				-	 * CPU order to avoid deadlocks. Caller knows that we might
			
 
				-	 * unlock. We keep IRQs disabled.
			
 
				-	 */
			
 
				-	spin_unlock(&this_rq->lock);
			
 
				-
			
 
				-	sibling_map = sd->span;
			
 
				-
			
 
				-	for_each_cpu_mask(i, sibling_map)
			
 
				-		spin_lock(&cpu_rq(i)->lock);
			
 
				-	/*
			
 
				-	 * We clear this CPU from the mask. This both simplifies the
			
 
				-	 * inner loop and keps this_rq locked when we exit:
			
 
				-	 */
			
 
				-	cpu_clear(this_cpu, sibling_map);
			
 
				-
			
 
				-	for_each_cpu_mask(i, sibling_map) {
			
 
				+	for_each_cpu_mask(i, sd->span) {
			
 
				 		runqueue_t *smt_rq = cpu_rq(i);
			
 
				 
			
 
				+		if (i == this_cpu)
			
 
				+			continue;
			
 
				+		if (unlikely(!spin_trylock(&smt_rq->lock)))
			
 
				+			continue;
			
 
				+
			
 
				 		wakeup_busy_runqueue(smt_rq);
			
 
				+		spin_unlock(&smt_rq->lock);
			
 
				 	}
			
 
				-
			
 
				-	for_each_cpu_mask(i, sibling_map)
			
 
				-		spin_unlock(&cpu_rq(i)->lock);
			
 
				-	/*
			
 
				-	 * We exit with this_cpu's rq still held and IRQs
			
 
				-	 * still disabled:
			
 
				-	 */
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -2745,52 +2730,46 @@ static inline unsigned long smt_slice(task_t *p, struct sched_domain *sd)
 
				 	return p->time_slice * (100 - sd->per_cpu_gain) / 100;
			
 
				 }
			
 
				 
			
 
				-static int dependent_sleeper(int this_cpu, runqueue_t *this_rq)
			
 
				+/*
			
 
				+ * To minimise lock contention and not have to drop this_rq's runlock we only
			
 
				+ * trylock the sibling runqueues and bypass those runqueues if we fail to
			
 
				+ * acquire their lock. As we only trylock the normal locking order does not
			
 
				+ * need to be obeyed.
			
 
				+ */
			
 
				+static int dependent_sleeper(int this_cpu, runqueue_t *this_rq, task_t *p)
			
 
				 {
			
 
				 	struct sched_domain *tmp, *sd = NULL;
			
 
				-	cpumask_t sibling_map;
			
 
				-	prio_array_t *array;
			
 
				 	int ret = 0, i;
			
 
				-	task_t *p;
			
 
				 
			
 
				-	for_each_domain(this_cpu, tmp)
			
 
				-		if (tmp->flags & SD_SHARE_CPUPOWER)
			
 
				+	/* kernel/rt threads do not participate in dependent sleeping */
			
 
				+	if (!p->mm || rt_task(p))
			
 
				+		return 0;
			
 
				+
			
 
				+	for_each_domain(this_cpu, tmp) {
			
 
				+		if (tmp->flags & SD_SHARE_CPUPOWER) {
			
 
				 			sd = tmp;
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				 
			
 
				 	if (!sd)
			
 
				 		return 0;
			
 
				 
			
 
				-	/*
			
 
				-	 * The same locking rules and details apply as for
			
 
				-	 * wake_sleeping_dependent():
			
 
				-	 */
			
 
				-	spin_unlock(&this_rq->lock);
			
 
				-	sibling_map = sd->span;
			
 
				-	for_each_cpu_mask(i, sibling_map)
			
 
				-		spin_lock(&cpu_rq(i)->lock);
			
 
				-	cpu_clear(this_cpu, sibling_map);
			
 
				+	for_each_cpu_mask(i, sd->span) {
			
 
				+		runqueue_t *smt_rq;
			
 
				+		task_t *smt_curr;
			
 
				 
			
 
				-	/*
			
 
				-	 * Establish next task to be run - it might have gone away because
			
 
				-	 * we released the runqueue lock above:
			
 
				-	 */
			
 
				-	if (!this_rq->nr_running)
			
 
				-		goto out_unlock;
			
 
				-	array = this_rq->active;
			
 
				-	if (!array->nr_active)
			
 
				-		array = this_rq->expired;
			
 
				-	BUG_ON(!array->nr_active);
			
 
				+		if (i == this_cpu)
			
 
				+			continue;
			
 
				 
			
 
				-	p = list_entry(array->queue[sched_find_first_bit(array->bitmap)].next,
			
 
				-		task_t, run_list);
			
 
				+		smt_rq = cpu_rq(i);
			
 
				+		if (unlikely(!spin_trylock(&smt_rq->lock)))
			
 
				+			continue;
			
 
				 
			
 
				-	for_each_cpu_mask(i, sibling_map) {
			
 
				-		runqueue_t *smt_rq = cpu_rq(i);
			
 
				-		task_t *smt_curr = smt_rq->curr;
			
 
				+		smt_curr = smt_rq->curr;
			
 
				 
			
 
				-		/* Kernel threads do not participate in dependent sleeping */
			
 
				-		if (!p->mm || !smt_curr->mm || rt_task(p))
			
 
				-			goto check_smt_task;
			
 
				+		if (!smt_curr->mm)
			
 
				+			goto unlock;
			
 
				 
			
 
				 		/*
			
 
				 		 * If a user task with lower static priority than the
			
@@ -2808,49 +2787,24 @@ static int dependent_sleeper(int this_cpu, runqueue_t *this_rq)
 
				 			if ((jiffies % DEF_TIMESLICE) >
			
 
				 				(sd->per_cpu_gain * DEF_TIMESLICE / 100))
			
 
				 					ret = 1;
			
 
				-		} else
			
 
				+		} else {
			
 
				 			if (smt_curr->static_prio < p->static_prio &&
			
 
				 				!TASK_PREEMPTS_CURR(p, smt_rq) &&
			
 
				 				smt_slice(smt_curr, sd) > task_timeslice(p))
			
 
				 					ret = 1;
			
 
				-
			
 
				-check_smt_task:
			
 
				-		if ((!smt_curr->mm && smt_curr != smt_rq->idle) ||
			
 
				-			rt_task(smt_curr))
			
 
				-				continue;
			
 
				-		if (!p->mm) {
			
 
				-			wakeup_busy_runqueue(smt_rq);
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		/*
			
 
				-		 * Reschedule a lower priority task on the SMT sibling for
			
 
				-		 * it to be put to sleep, or wake it up if it has been put to
			
 
				-		 * sleep for priority reasons to see if it should run now.
			
 
				-		 */
			
 
				-		if (rt_task(p)) {
			
 
				-			if ((jiffies % DEF_TIMESLICE) >
			
 
				-				(sd->per_cpu_gain * DEF_TIMESLICE / 100))
			
 
				-					resched_task(smt_curr);
			
 
				-		} else {
			
 
				-			if (TASK_PREEMPTS_CURR(p, smt_rq) &&
			
 
				-				smt_slice(p, sd) > task_timeslice(smt_curr))
			
 
				-					resched_task(smt_curr);
			
 
				-			else
			
 
				-				wakeup_busy_runqueue(smt_rq);
			
 
				 		}
			
 
				+unlock:
			
 
				+		spin_unlock(&smt_rq->lock);
			
 
				 	}
			
 
				-out_unlock:
			
 
				-	for_each_cpu_mask(i, sibling_map)
			
 
				-		spin_unlock(&cpu_rq(i)->lock);
			
 
				 	return ret;
			
 
				 }
			
 
				 #else
			
 
				-static inline void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq)
			
 
				+static inline void wake_sleeping_dependent(int this_cpu)
			
 
				 {
			
 
				 }
			
 
				 
			
 
				-static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq)
			
 
				+static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq,
			
 
				+					task_t *p)
			
 
				 {
			
 
				 	return 0;
			
 
				 }
			
@@ -2972,32 +2926,13 @@ need_resched_nonpreemptible:
 
				 
			
 
				 	cpu = smp_processor_id();
			
 
				 	if (unlikely(!rq->nr_running)) {
			
 
				-go_idle:
			
 
				 		idle_balance(cpu, rq);
			
 
				 		if (!rq->nr_running) {
			
 
				 			next = rq->idle;
			
 
				 			rq->expired_timestamp = 0;
			
 
				-			wake_sleeping_dependent(cpu, rq);
			
 
				-			/*
			
 
				-			 * wake_sleeping_dependent() might have released
			
 
				-			 * the runqueue, so break out if we got new
			
 
				-			 * tasks meanwhile:
			
 
				-			 */
			
 
				-			if (!rq->nr_running)
			
 
				-				goto switch_tasks;
			
 
				-		}
			
 
				-	} else {
			
 
				-		if (dependent_sleeper(cpu, rq)) {
			
 
				-			next = rq->idle;
			
 
				+			wake_sleeping_dependent(cpu);
			
 
				 			goto switch_tasks;
			
 
				 		}
			
 
				-		/*
			
 
				-		 * dependent_sleeper() releases and reacquires the runqueue
			
 
				-		 * lock, hence go into the idle loop if the rq went
			
 
				-		 * empty meanwhile:
			
 
				-		 */
			
 
				-		if (unlikely(!rq->nr_running))
			
 
				-			goto go_idle;
			
 
				 	}
			
 
				 
			
 
				 	array = rq->active;
			
@@ -3035,6 +2970,8 @@ go_idle:
 
				 		}
			
 
				 	}
			
 
				 	next->sleep_type = SLEEP_NORMAL;
			
 
				+	if (dependent_sleeper(cpu, rq, next))
			
 
				+		next = rq->idle;
			
 
				 switch_tasks:
			
 
				 	if (next == rq->idle)
			
 
				 		schedstat_inc(rq, sched_goidle);
			
@@ -6144,7 +6081,6 @@ void __init sched_init(void)
 
				 		rq->push_cpu = 0;
			
 
				 		rq->migration_thread = NULL;
			
 
				 		INIT_LIST_HEAD(&rq->migration_queue);
			
 
				-		rq->cpu = i;
			
 
				 #endif
			
 
				 		atomic_set(&rq->nr_iowait, 0);
			
 
				 
			
@@ -6205,7 +6141,7 @@ void normalize_rt_tasks(void)
 
				 	runqueue_t *rq;
			
 
				 
			
 
				 	read_lock_irq(&tasklist_lock);
			
 
				-	for_each_process (p) {
			
 
				+	for_each_process(p) {
			
 
				 		if (!rt_task(p))
			
 
				 			continue;