|
@@ -877,6 +877,15 @@ static unsigned int task_scan_max(struct task_struct *p)
|
|
|
return max(smin, smax);
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * Once a preferred node is selected the scheduler balancer will prefer moving
|
|
|
+ * a task to that node for sysctl_numa_balancing_settle_count number of PTE
|
|
|
+ * scans. This will give the process the chance to accumulate more faults on
|
|
|
+ * the preferred node but still allow the scheduler to move the task again if
|
|
|
+ * the nodes CPUs are overloaded.
|
|
|
+ */
|
|
|
+unsigned int sysctl_numa_balancing_settle_count __read_mostly = 3;
|
|
|
+
|
|
|
static void task_numa_placement(struct task_struct *p)
|
|
|
{
|
|
|
int seq, nid, max_nid = -1;
|
|
@@ -888,6 +897,7 @@ static void task_numa_placement(struct task_struct *p)
|
|
|
if (p->numa_scan_seq == seq)
|
|
|
return;
|
|
|
p->numa_scan_seq = seq;
|
|
|
+ p->numa_migrate_seq++;
|
|
|
p->numa_scan_period_max = task_scan_max(p);
|
|
|
|
|
|
/* Find the node with the highest number of faults */
|
|
@@ -907,8 +917,10 @@ static void task_numa_placement(struct task_struct *p)
|
|
|
}
|
|
|
|
|
|
/* Update the tasks preferred node if necessary */
|
|
|
- if (max_faults && max_nid != p->numa_preferred_nid)
|
|
|
+ if (max_faults && max_nid != p->numa_preferred_nid) {
|
|
|
p->numa_preferred_nid = max_nid;
|
|
|
+ p->numa_migrate_seq = 0;
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -4071,6 +4083,38 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
|
|
|
return delta < (s64)sysctl_sched_migration_cost;
|
|
|
}
|
|
|
|
|
|
+#ifdef CONFIG_NUMA_BALANCING
|
|
|
+/* Returns true if the destination node has incurred more faults */
|
|
|
+static bool migrate_improves_locality(struct task_struct *p, struct lb_env *env)
|
|
|
+{
|
|
|
+ int src_nid, dst_nid;
|
|
|
+
|
|
|
+ if (!sched_feat(NUMA_FAVOUR_HIGHER) || !p->numa_faults ||
|
|
|
+ !(env->sd->flags & SD_NUMA)) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+
|
|
|
+ src_nid = cpu_to_node(env->src_cpu);
|
|
|
+ dst_nid = cpu_to_node(env->dst_cpu);
|
|
|
+
|
|
|
+ if (src_nid == dst_nid ||
|
|
|
+ p->numa_migrate_seq >= sysctl_numa_balancing_settle_count)
|
|
|
+ return false;
|
|
|
+
|
|
|
+ if (dst_nid == p->numa_preferred_nid ||
|
|
|
+ p->numa_faults[dst_nid] > p->numa_faults[src_nid])
|
|
|
+ return true;
|
|
|
+
|
|
|
+ return false;
|
|
|
+}
|
|
|
+#else
|
|
|
+static inline bool migrate_improves_locality(struct task_struct *p,
|
|
|
+ struct lb_env *env)
|
|
|
+{
|
|
|
+ return false;
|
|
|
+}
|
|
|
+#endif
|
|
|
+
|
|
|
/*
|
|
|
* can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
|
|
|
*/
|
|
@@ -4128,11 +4172,22 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
|
|
|
|
|
|
/*
|
|
|
* Aggressive migration if:
|
|
|
- * 1) task is cache cold, or
|
|
|
- * 2) too many balance attempts have failed.
|
|
|
+ * 1) destination numa is preferred
|
|
|
+ * 2) task is cache cold, or
|
|
|
+ * 3) too many balance attempts have failed.
|
|
|
*/
|
|
|
-
|
|
|
tsk_cache_hot = task_hot(p, rq_clock_task(env->src_rq), env->sd);
|
|
|
+
|
|
|
+ if (migrate_improves_locality(p, env)) {
|
|
|
+#ifdef CONFIG_SCHEDSTATS
|
|
|
+ if (tsk_cache_hot) {
|
|
|
+ schedstat_inc(env->sd, lb_hot_gained[env->idle]);
|
|
|
+ schedstat_inc(p, se.statistics.nr_forced_migrations);
|
|
|
+ }
|
|
|
+#endif
|
|
|
+ return 1;
|
|
|
+ }
|
|
|
+
|
|
|
if (!tsk_cache_hot ||
|
|
|
env->sd->nr_balance_failed > env->sd->cache_nice_tries) {
|
|
|
|