|
@@ -143,6 +143,49 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se)
|
|
|
return se->parent;
|
|
|
}
|
|
|
|
|
|
+/* return depth at which a sched entity is present in the hierarchy */
|
|
|
+static inline int depth_se(struct sched_entity *se)
|
|
|
+{
|
|
|
+ int depth = 0;
|
|
|
+
|
|
|
+ for_each_sched_entity(se)
|
|
|
+ depth++;
|
|
|
+
|
|
|
+ return depth;
|
|
|
+}
|
|
|
+
|
|
|
+static void
|
|
|
+find_matching_se(struct sched_entity **se, struct sched_entity **pse)
|
|
|
+{
|
|
|
+ int se_depth, pse_depth;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * preemption test can be made between sibling entities who are in the
|
|
|
+ * same cfs_rq i.e who have a common parent. Walk up the hierarchy of
|
|
|
+ * both tasks until we find their ancestors who are siblings of common
|
|
|
+ * parent.
|
|
|
+ */
|
|
|
+
|
|
|
+ /* First walk up until both entities are at same depth */
|
|
|
+ se_depth = depth_se(*se);
|
|
|
+ pse_depth = depth_se(*pse);
|
|
|
+
|
|
|
+ while (se_depth > pse_depth) {
|
|
|
+ se_depth--;
|
|
|
+ *se = parent_entity(*se);
|
|
|
+ }
|
|
|
+
|
|
|
+ while (pse_depth > se_depth) {
|
|
|
+ pse_depth--;
|
|
|
+ *pse = parent_entity(*pse);
|
|
|
+ }
|
|
|
+
|
|
|
+ while (!is_same_group(*se, *pse)) {
|
|
|
+ *se = parent_entity(*se);
|
|
|
+ *pse = parent_entity(*pse);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
#else /* CONFIG_FAIR_GROUP_SCHED */
|
|
|
|
|
|
static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
|
|
@@ -193,6 +236,11 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se)
|
|
|
return NULL;
|
|
|
}
|
|
|
|
|
|
+static inline void
|
|
|
+find_matching_se(struct sched_entity **se, struct sched_entity **pse)
|
|
|
+{
|
|
|
+}
|
|
|
+
|
|
|
#endif /* CONFIG_FAIR_GROUP_SCHED */
|
|
|
|
|
|
|
|
@@ -223,6 +271,27 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
|
|
return se->vruntime - cfs_rq->min_vruntime;
|
|
|
}
|
|
|
|
|
|
+static void update_min_vruntime(struct cfs_rq *cfs_rq)
|
|
|
+{
|
|
|
+ u64 vruntime = cfs_rq->min_vruntime;
|
|
|
+
|
|
|
+ if (cfs_rq->curr)
|
|
|
+ vruntime = cfs_rq->curr->vruntime;
|
|
|
+
|
|
|
+ if (cfs_rq->rb_leftmost) {
|
|
|
+ struct sched_entity *se = rb_entry(cfs_rq->rb_leftmost,
|
|
|
+ struct sched_entity,
|
|
|
+ run_node);
|
|
|
+
|
|
|
+ if (vruntime == cfs_rq->min_vruntime)
|
|
|
+ vruntime = se->vruntime;
|
|
|
+ else
|
|
|
+ vruntime = min_vruntime(vruntime, se->vruntime);
|
|
|
+ }
|
|
|
+
|
|
|
+ cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime);
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Enqueue an entity into the rb-tree:
|
|
|
*/
|
|
@@ -256,15 +325,8 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
|
|
* Maintain a cache of leftmost tree entries (it is frequently
|
|
|
* used):
|
|
|
*/
|
|
|
- if (leftmost) {
|
|
|
+ if (leftmost)
|
|
|
cfs_rq->rb_leftmost = &se->run_node;
|
|
|
- /*
|
|
|
- * maintain cfs_rq->min_vruntime to be a monotonic increasing
|
|
|
- * value tracking the leftmost vruntime in the tree.
|
|
|
- */
|
|
|
- cfs_rq->min_vruntime =
|
|
|
- max_vruntime(cfs_rq->min_vruntime, se->vruntime);
|
|
|
- }
|
|
|
|
|
|
rb_link_node(&se->run_node, parent, link);
|
|
|
rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline);
|
|
@@ -274,18 +336,9 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
|
|
{
|
|
|
if (cfs_rq->rb_leftmost == &se->run_node) {
|
|
|
struct rb_node *next_node;
|
|
|
- struct sched_entity *next;
|
|
|
|
|
|
next_node = rb_next(&se->run_node);
|
|
|
cfs_rq->rb_leftmost = next_node;
|
|
|
-
|
|
|
- if (next_node) {
|
|
|
- next = rb_entry(next_node,
|
|
|
- struct sched_entity, run_node);
|
|
|
- cfs_rq->min_vruntime =
|
|
|
- max_vruntime(cfs_rq->min_vruntime,
|
|
|
- next->vruntime);
|
|
|
- }
|
|
|
}
|
|
|
|
|
|
if (cfs_rq->next == se)
|
|
@@ -424,6 +477,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
|
|
|
schedstat_add(cfs_rq, exec_clock, delta_exec);
|
|
|
delta_exec_weighted = calc_delta_fair(delta_exec, curr);
|
|
|
curr->vruntime += delta_exec_weighted;
|
|
|
+ update_min_vruntime(cfs_rq);
|
|
|
}
|
|
|
|
|
|
static void update_curr(struct cfs_rq *cfs_rq)
|
|
@@ -613,13 +667,7 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
|
|
static void
|
|
|
place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
|
|
|
{
|
|
|
- u64 vruntime;
|
|
|
-
|
|
|
- if (first_fair(cfs_rq)) {
|
|
|
- vruntime = min_vruntime(cfs_rq->min_vruntime,
|
|
|
- __pick_next_entity(cfs_rq)->vruntime);
|
|
|
- } else
|
|
|
- vruntime = cfs_rq->min_vruntime;
|
|
|
+ u64 vruntime = cfs_rq->min_vruntime;
|
|
|
|
|
|
/*
|
|
|
* The 'current' period is already promised to the current tasks,
|
|
@@ -696,6 +744,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
|
|
|
if (se != cfs_rq->curr)
|
|
|
__dequeue_entity(cfs_rq, se);
|
|
|
account_entity_dequeue(cfs_rq, se);
|
|
|
+ update_min_vruntime(cfs_rq);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -742,16 +791,14 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
|
|
se->prev_sum_exec_runtime = se->sum_exec_runtime;
|
|
|
}
|
|
|
|
|
|
+static int
|
|
|
+wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
|
|
|
+
|
|
|
static struct sched_entity *
|
|
|
pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
|
|
{
|
|
|
- struct rq *rq = rq_of(cfs_rq);
|
|
|
- u64 pair_slice = rq->clock - cfs_rq->pair_start;
|
|
|
-
|
|
|
- if (!cfs_rq->next || pair_slice > sysctl_sched_min_granularity) {
|
|
|
- cfs_rq->pair_start = rq->clock;
|
|
|
+ if (!cfs_rq->next || wakeup_preempt_entity(cfs_rq->next, se) == 1)
|
|
|
return se;
|
|
|
- }
|
|
|
|
|
|
return cfs_rq->next;
|
|
|
}
|
|
@@ -1122,10 +1169,9 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
|
|
|
if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS))
|
|
|
return 0;
|
|
|
|
|
|
- if (!sync && sched_feat(SYNC_WAKEUPS) &&
|
|
|
- curr->se.avg_overlap < sysctl_sched_migration_cost &&
|
|
|
- p->se.avg_overlap < sysctl_sched_migration_cost)
|
|
|
- sync = 1;
|
|
|
+ if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost ||
|
|
|
+ p->se.avg_overlap > sysctl_sched_migration_cost))
|
|
|
+ sync = 0;
|
|
|
|
|
|
/*
|
|
|
* If sync wakeup then subtract the (maximum possible)
|
|
@@ -1244,12 +1290,41 @@ static unsigned long wakeup_gran(struct sched_entity *se)
|
|
|
* More easily preempt - nice tasks, while not making it harder for
|
|
|
* + nice tasks.
|
|
|
*/
|
|
|
- if (sched_feat(ASYM_GRAN))
|
|
|
- gran = calc_delta_mine(gran, NICE_0_LOAD, &se->load);
|
|
|
+ if (!sched_feat(ASYM_GRAN) || se->load.weight > NICE_0_LOAD)
|
|
|
+ gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se);
|
|
|
|
|
|
return gran;
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * Should 'se' preempt 'curr'.
|
|
|
+ *
|
|
|
+ * |s1
|
|
|
+ * |s2
|
|
|
+ * |s3
|
|
|
+ * g
|
|
|
+ * |<--->|c
|
|
|
+ *
|
|
|
+ * w(c, s1) = -1
|
|
|
+ * w(c, s2) = 0
|
|
|
+ * w(c, s3) = 1
|
|
|
+ *
|
|
|
+ */
|
|
|
+static int
|
|
|
+wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
|
|
|
+{
|
|
|
+ s64 gran, vdiff = curr->vruntime - se->vruntime;
|
|
|
+
|
|
|
+ if (vdiff <= 0)
|
|
|
+ return -1;
|
|
|
+
|
|
|
+ gran = wakeup_gran(curr);
|
|
|
+ if (vdiff > gran)
|
|
|
+ return 1;
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Preempt the current task with a newly woken task if needed:
|
|
|
*/
|
|
@@ -1258,7 +1333,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
|
|
|
struct task_struct *curr = rq->curr;
|
|
|
struct cfs_rq *cfs_rq = task_cfs_rq(curr);
|
|
|
struct sched_entity *se = &curr->se, *pse = &p->se;
|
|
|
- s64 delta_exec;
|
|
|
|
|
|
if (unlikely(rt_prio(p->prio))) {
|
|
|
update_rq_clock(rq);
|
|
@@ -1296,9 +1370,19 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
- delta_exec = se->sum_exec_runtime - se->prev_sum_exec_runtime;
|
|
|
- if (delta_exec > wakeup_gran(pse))
|
|
|
- resched_task(curr);
|
|
|
+ find_matching_se(&se, &pse);
|
|
|
+
|
|
|
+ while (se) {
|
|
|
+ BUG_ON(!pse);
|
|
|
+
|
|
|
+ if (wakeup_preempt_entity(se, pse) == 1) {
|
|
|
+ resched_task(curr);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ se = parent_entity(se);
|
|
|
+ pse = parent_entity(pse);
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
static struct task_struct *pick_next_task_fair(struct rq *rq)
|
|
@@ -1594,9 +1678,6 @@ static const struct sched_class fair_sched_class = {
|
|
|
.enqueue_task = enqueue_task_fair,
|
|
|
.dequeue_task = dequeue_task_fair,
|
|
|
.yield_task = yield_task_fair,
|
|
|
-#ifdef CONFIG_SMP
|
|
|
- .select_task_rq = select_task_rq_fair,
|
|
|
-#endif /* CONFIG_SMP */
|
|
|
|
|
|
.check_preempt_curr = check_preempt_wakeup,
|
|
|
|
|
@@ -1604,6 +1685,8 @@ static const struct sched_class fair_sched_class = {
|
|
|
.put_prev_task = put_prev_task_fair,
|
|
|
|
|
|
#ifdef CONFIG_SMP
|
|
|
+ .select_task_rq = select_task_rq_fair,
|
|
|
+
|
|
|
.load_balance = load_balance_fair,
|
|
|
.move_one_task = move_one_task_fair,
|
|
|
#endif
|