|
@@ -886,6 +886,31 @@ static unsigned int task_scan_max(struct task_struct *p)
|
|
|
*/
|
|
|
unsigned int sysctl_numa_balancing_settle_count __read_mostly = 3;
|
|
|
|
|
|
+static unsigned long weighted_cpuload(const int cpu);
|
|
|
+
|
|
|
+
|
|
|
+static int
|
|
|
+find_idlest_cpu_node(int this_cpu, int nid)
|
|
|
+{
|
|
|
+ unsigned long load, min_load = ULONG_MAX;
|
|
|
+ int i, idlest_cpu = this_cpu;
|
|
|
+
|
|
|
+ BUG_ON(cpu_to_node(this_cpu) == nid);
|
|
|
+
|
|
|
+ rcu_read_lock();
|
|
|
+ for_each_cpu(i, cpumask_of_node(nid)) {
|
|
|
+ load = weighted_cpuload(i);
|
|
|
+
|
|
|
+ if (load < min_load) {
|
|
|
+ min_load = load;
|
|
|
+ idlest_cpu = i;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ rcu_read_unlock();
|
|
|
+
|
|
|
+ return idlest_cpu;
|
|
|
+}
|
|
|
+
|
|
|
static void task_numa_placement(struct task_struct *p)
|
|
|
{
|
|
|
int seq, nid, max_nid = -1;
|
|
@@ -916,10 +941,29 @@ static void task_numa_placement(struct task_struct *p)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- /* Update the tasks preferred node if necessary */
|
|
|
+ /*
|
|
|
+ * Record the preferred node as the node with the most faults,
|
|
|
+ * requeue the task to be running on the idlest CPU on the
|
|
|
+ * preferred node and reset the scanning rate to recheck
|
|
|
+ * the working set placement.
|
|
|
+ */
|
|
|
if (max_faults && max_nid != p->numa_preferred_nid) {
|
|
|
+ int preferred_cpu;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If the task is not on the preferred node then find the most
|
|
|
+ * idle CPU to migrate to.
|
|
|
+ */
|
|
|
+ preferred_cpu = task_cpu(p);
|
|
|
+ if (cpu_to_node(preferred_cpu) != max_nid) {
|
|
|
+ preferred_cpu = find_idlest_cpu_node(preferred_cpu,
|
|
|
+ max_nid);
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Update the preferred nid and migrate task if possible */
|
|
|
p->numa_preferred_nid = max_nid;
|
|
|
p->numa_migrate_seq = 0;
|
|
|
+ migrate_task_to(p, preferred_cpu);
|
|
|
}
|
|
|
}
|
|
|
|