16 年之前 · 133e887f90
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -287,7 +287,6 @@ extern void trap_init(void);
 
															 extern void account_process_tick(struct task_struct *task, int user);
														
 
															 extern void update_process_times(int user);
														
 
															 extern void scheduler_tick(void);
														
 
															-extern void hrtick_resched(void);
														
 
															 extern void sched_show_task(struct task_struct *p);
														
@@ -1665,6 +1664,7 @@ extern unsigned int sysctl_sched_features;
 
															 extern unsigned int sysctl_sched_migration_cost;
														
 
															 extern unsigned int sysctl_sched_nr_migrate;
														
 
															 extern unsigned int sysctl_sched_shares_ratelimit;
														
 
															+extern unsigned int sysctl_sched_shares_thresh;
														
 
															 int sched_nr_latency_handler(struct ctl_table *table, int write,
														
 
															 		struct file *file, void __user *buffer, size_t *length,
														
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -818,6 +818,13 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32;
 
															  */
														
 
															 unsigned int sysctl_sched_shares_ratelimit = 250000;
														
 
															+/*
														
 
															+ * Inject some fuzzyness into changing the per-cpu group shares
														
 
															+ * this avoids remote rq-locks at the expense of fairness.
														
 
															+ * default: 4
														
 
															+ */
														
 
															+unsigned int sysctl_sched_shares_thresh = 4;
														
 
															+
														
 
															 /*
														
 
															  * period over which we measure -rt task cpu usage in us.
														
 
															  * default: 1s
														
@@ -1454,8 +1461,8 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares);
 
															  * Calculate and set the cpu's group shares.
														
 
															  */
														
 
															 static void
														
 
															-__update_group_shares_cpu(struct task_group *tg, int cpu,
														
 
															-			  unsigned long sd_shares, unsigned long sd_rq_weight)
														
 
															+update_group_shares_cpu(struct task_group *tg, int cpu,
														
 
															+			unsigned long sd_shares, unsigned long sd_rq_weight)
														
 
															 {
														
 
															 	int boost = 0;
														
 
															 	unsigned long shares;
														
@@ -1486,19 +1493,23 @@ __update_group_shares_cpu(struct task_group *tg, int cpu,
 
															 	 *
														
 
															 	 */
														
 
															 	shares = (sd_shares * rq_weight) / (sd_rq_weight + 1);
														
 
															+	shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
														
 
															-	/*
														
 
															-	 * record the actual number of shares, not the boosted amount.
														
 
															-	 */
														
 
															-	tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
														
 
															-	tg->cfs_rq[cpu]->rq_weight = rq_weight;
														
 
															+	if (abs(shares - tg->se[cpu]->load.weight) >
														
 
															+			sysctl_sched_shares_thresh) {
														
 
															+		struct rq *rq = cpu_rq(cpu);
														
 
															+		unsigned long flags;
														
 
															-	if (shares < MIN_SHARES)
														
 
															-		shares = MIN_SHARES;
														
 
															-	else if (shares > MAX_SHARES)
														
 
															-		shares = MAX_SHARES;
														
 
															+		spin_lock_irqsave(&rq->lock, flags);
														
 
															+		/*
														
 
															+		 * record the actual number of shares, not the boosted amount.
														
 
															+		 */
														
 
															+		tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
														
 
															+		tg->cfs_rq[cpu]->rq_weight = rq_weight;
														
 
															-	__set_se_shares(tg->se[cpu], shares);
														
 
															+		__set_se_shares(tg->se[cpu], shares);
														
 
															+		spin_unlock_irqrestore(&rq->lock, flags);
														
 
															+	}
														
 
															 }
														
 
															 /*
														
@@ -1527,14 +1538,8 @@ static int tg_shares_up(struct task_group *tg, void *data)
 
															 	if (!rq_weight)
														
 
															 		rq_weight = cpus_weight(sd->span) * NICE_0_LOAD;
														
 
															-	for_each_cpu_mask(i, sd->span) {
														
 
															-		struct rq *rq = cpu_rq(i);
														
 
															-		unsigned long flags;
														
 
															-
														
 
															-		spin_lock_irqsave(&rq->lock, flags);
														
 
															-		__update_group_shares_cpu(tg, i, shares, rq_weight);
														
 
															-		spin_unlock_irqrestore(&rq->lock, flags);
														
 
															-	}
														
 
															+	for_each_cpu_mask(i, sd->span)
														
 
															+		update_group_shares_cpu(tg, i, shares, rq_weight);
														
 
															 	return 0;
														
 
															 }
														
@@ -4443,12 +4448,8 @@ need_resched_nonpreemptible:
 
															 	if (sched_feat(HRTICK))
														
 
															 		hrtick_clear(rq);
														
 
															-	/*
														
 
															-	 * Do the rq-clock update outside the rq lock:
														
 
															-	 */
														
 
															-	local_irq_disable();
														
 
															+	spin_lock_irq(&rq->lock);
														
 
															 	update_rq_clock(rq);
														
 
															-	spin_lock(&rq->lock);
														
 
															 	clear_tsk_need_resched(prev);
														
 
															 	if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
														
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -73,6 +73,8 @@ unsigned int sysctl_sched_wakeup_granularity = 5000000UL;
 
															 const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
														
 
															+static const struct sched_class fair_sched_class;
														
 
															+
														
 
															 /**************************************************************
														
 
															  * CFS operations on generic schedulable entities:
														
 
															  */
														
@@ -334,7 +336,7 @@ int sched_nr_latency_handler(struct ctl_table *table, int write,
 
															 #endif
														
 
															 /*
														
 
															- * delta *= w / rw
														
 
															+ * delta *= P[w / rw]
														
 
															  */
														
 
															 static inline unsigned long
														
 
															 calc_delta_weight(unsigned long delta, struct sched_entity *se)
														
@@ -348,15 +350,13 @@ calc_delta_weight(unsigned long delta, struct sched_entity *se)
 
															 }
														
 
															 /*
														
 
															- * delta *= rw / w
														
 
															+ * delta /= w
														
 
															  */
														
 
															 static inline unsigned long
														
 
															 calc_delta_fair(unsigned long delta, struct sched_entity *se)
														
 
															 {
														
 
															-	for_each_sched_entity(se) {
														
 
															-		delta = calc_delta_mine(delta,
														
 
															-				cfs_rq_of(se)->load.weight, &se->load);
														
 
															-	}
														
 
															+	if (unlikely(se->load.weight != NICE_0_LOAD))
														
 
															+		delta = calc_delta_mine(delta, NICE_0_LOAD, &se->load);
														
 
															 	return delta;
														
 
															 }
														
@@ -386,26 +386,26 @@ static u64 __sched_period(unsigned long nr_running)
 
															  * We calculate the wall-time slice from the period by taking a part
														
 
															  * proportional to the weight.
														
 
															  *
														
 
															- * s = p*w/rw
														
 
															+ * s = p*P[w/rw]
														
 
															  */
														
 
															 static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
														
 
															 {
														
 
															-	return calc_delta_weight(__sched_period(cfs_rq->nr_running), se);
														
 
															+	unsigned long nr_running = cfs_rq->nr_running;
														
 
															+
														
 
															+	if (unlikely(!se->on_rq))
														
 
															+		nr_running++;
														
 
															+
														
 
															+	return calc_delta_weight(__sched_period(nr_running), se);
														
 
															 }
														
 
															 /*
														
 
															  * We calculate the vruntime slice of a to be inserted task
														
 
															  *
														
 
															- * vs = s*rw/w = p
														
 
															+ * vs = s/w
														
 
															  */
														
 
															-static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
														
 
															+static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
														
 
															 {
														
 
															-	unsigned long nr_running = cfs_rq->nr_running;
														
 
															-
														
 
															-	if (!se->on_rq)
														
 
															-		nr_running++;
														
 
															-
														
 
															-	return __sched_period(nr_running);
														
 
															+	return calc_delta_fair(sched_slice(cfs_rq, se), se);
														
 
															 }
														
 
															 /*
														
@@ -628,7 +628,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 
															 	 * stays open at the end.
														
 
															 	 */
														
 
															 	if (initial && sched_feat(START_DEBIT))
														
 
															-		vruntime += sched_vslice_add(cfs_rq, se);
														
 
															+		vruntime += sched_vslice(cfs_rq, se);
														
 
															 	if (!initial) {
														
 
															 		/* sleeps upto a single latency don't count. */
														
@@ -748,7 +748,7 @@ pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se)
 
															 	struct rq *rq = rq_of(cfs_rq);
														
 
															 	u64 pair_slice = rq->clock - cfs_rq->pair_start;
														
 
															-	if (!cfs_rq->next || pair_slice > sched_slice(cfs_rq, cfs_rq->next)) {
														
 
															+	if (!cfs_rq->next || pair_slice > sysctl_sched_min_granularity) {
														
 
															 		cfs_rq->pair_start = rq->clock;
														
 
															 		return se;
														
 
															 	}
														
@@ -849,11 +849,31 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
 
															 		hrtick_start(rq, delta);
														
 
															 	}
														
 
															 }
														
 
															+
														
 
															+/*
														
 
															+ * called from enqueue/dequeue and updates the hrtick when the
														
 
															+ * current task is from our class and nr_running is low enough
														
 
															+ * to matter.
														
 
															+ */
														
 
															+static void hrtick_update(struct rq *rq)
														
 
															+{
														
 
															+	struct task_struct *curr = rq->curr;
														
 
															+
														
 
															+	if (curr->sched_class != &fair_sched_class)
														
 
															+		return;
														
 
															+
														
 
															+	if (cfs_rq_of(&curr->se)->nr_running < sched_nr_latency)
														
 
															+		hrtick_start_fair(rq, curr);
														
 
															+}
														
 
															 #else /* !CONFIG_SCHED_HRTICK */
														
 
															 static inline void
														
 
															 hrtick_start_fair(struct rq *rq, struct task_struct *p)
														
 
															 {
														
 
															 }
														
 
															+
														
 
															+static inline void hrtick_update(struct rq *rq)
														
 
															+{
														
 
															+}
														
 
															 #endif
														
 
															 /*
														
@@ -874,7 +894,7 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
 
															 		wakeup = 1;
														
 
															 	}
														
 
															-	hrtick_start_fair(rq, rq->curr);
														
 
															+	hrtick_update(rq);
														
 
															 }
														
 
															 /*
														
@@ -896,7 +916,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep)
 
															 		sleep = 1;
														
 
															 	}
														
 
															-	hrtick_start_fair(rq, rq->curr);
														
 
															+	hrtick_update(rq);
														
 
															 }
														
 
															 /*
														
@@ -1002,8 +1022,6 @@ static inline int wake_idle(int cpu, struct task_struct *p)
 
															 #ifdef CONFIG_SMP
														
 
															-static const struct sched_class fair_sched_class;
														
 
															-
														
 
															 #ifdef CONFIG_FAIR_GROUP_SCHED
														
 
															 /*
														
 
															  * effective_load() calculates the load change as seen from the root_task_group
														
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -5,7 +5,7 @@ SCHED_FEAT(START_DEBIT, 1)
 
															 SCHED_FEAT(AFFINE_WAKEUPS, 1)
														
 
															 SCHED_FEAT(CACHE_HOT_BUDDY, 1)
														
 
															 SCHED_FEAT(SYNC_WAKEUPS, 1)
														
 
															-SCHED_FEAT(HRTICK, 1)
														
 
															+SCHED_FEAT(HRTICK, 0)
														
 
															 SCHED_FEAT(DOUBLE_TICK, 0)
														
 
															 SCHED_FEAT(ASYM_GRAN, 1)
														
 
															 SCHED_FEAT(LB_BIAS, 1)
														
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -9,7 +9,7 @@
 
															 static int show_schedstat(struct seq_file *seq, void *v)
														
 
															 {
														
 
															 	int cpu;
														
 
															-	int mask_len = NR_CPUS/32 * 9;
														
 
															+	int mask_len = DIV_ROUND_UP(NR_CPUS, 32) * 9;
														
 
															 	char *mask_str = kmalloc(mask_len, GFP_KERNEL);
														
 
															 	if (mask_str == NULL)
														
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -274,6 +274,16 @@ static struct ctl_table kern_table[] = {
 
															 		.mode		= 0644,
														
 
															 		.proc_handler	= &proc_dointvec,
														
 
															 	},
														
 
															+	{
														
 
															+		.ctl_name	= CTL_UNNUMBERED,
														
 
															+		.procname	= "sched_shares_thresh",
														
 
															+		.data		= &sysctl_sched_shares_thresh,
														
 
															+		.maxlen		= sizeof(unsigned int),
														
 
															+		.mode		= 0644,
														
 
															+		.proc_handler	= &proc_dointvec_minmax,
														
 
															+		.strategy	= &sysctl_intvec,
														
 
															+		.extra1		= &zero,
														
 
															+	},
														
 
															 	{
														
 
															 		.ctl_name	= CTL_UNNUMBERED,
														
 
															 		.procname	= "sched_child_runs_first",