|
@@ -1000,7 +1000,7 @@ struct numa_stats {
|
|
|
*/
|
|
|
static void update_numa_stats(struct numa_stats *ns, int nid)
|
|
|
{
|
|
|
- int cpu;
|
|
|
+ int cpu, cpus = 0;
|
|
|
|
|
|
memset(ns, 0, sizeof(*ns));
|
|
|
for_each_cpu(cpu, cpumask_of_node(nid)) {
|
|
@@ -1009,8 +1009,21 @@ static void update_numa_stats(struct numa_stats *ns, int nid)
|
|
|
ns->nr_running += rq->nr_running;
|
|
|
ns->load += weighted_cpuload(cpu);
|
|
|
ns->power += power_of(cpu);
|
|
|
+
|
|
|
+ cpus++;
|
|
|
}
|
|
|
|
|
|
+ /*
|
|
|
+ * If we raced with hotplug and there are no CPUs left in our mask
|
|
|
+ * the @ns structure is NULL'ed and task_numa_compare() will
|
|
|
+ * not find this node attractive.
|
|
|
+ *
|
|
|
+ * We'll either bail at !has_capacity, or we'll detect a huge imbalance
|
|
|
+ * and bail there.
|
|
|
+ */
|
|
|
+ if (!cpus)
|
|
|
+ return;
|
|
|
+
|
|
|
ns->load = (ns->load * SCHED_POWER_SCALE) / ns->power;
|
|
|
ns->capacity = DIV_ROUND_CLOSEST(ns->power, SCHED_POWER_SCALE);
|
|
|
ns->has_capacity = (ns->nr_running < ns->capacity);
|
|
@@ -1201,9 +1214,21 @@ static int task_numa_migrate(struct task_struct *p)
|
|
|
*/
|
|
|
rcu_read_lock();
|
|
|
sd = rcu_dereference(per_cpu(sd_numa, env.src_cpu));
|
|
|
- env.imbalance_pct = 100 + (sd->imbalance_pct - 100) / 2;
|
|
|
+ if (sd)
|
|
|
+ env.imbalance_pct = 100 + (sd->imbalance_pct - 100) / 2;
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
+ /*
|
|
|
+ * Cpusets can break the scheduler domain tree into smaller
|
|
|
+ * balance domains, some of which do not cross NUMA boundaries.
|
|
|
+ * Tasks that are "trapped" in such domains cannot be migrated
|
|
|
+ * elsewhere, so there is no point in (re)trying.
|
|
|
+ */
|
|
|
+ if (unlikely(!sd)) {
|
|
|
+ p->numa_preferred_nid = cpu_to_node(task_cpu(p));
|
|
|
+ return -EINVAL;
|
|
|
+ }
|
|
|
+
|
|
|
taskweight = task_weight(p, env.src_nid);
|
|
|
groupweight = group_weight(p, env.src_nid);
|
|
|
update_numa_stats(&env.src_stats, env.src_nid);
|
|
@@ -2153,7 +2178,7 @@ static inline void __update_tg_runnable_avg(struct sched_avg *sa,
|
|
|
long contrib;
|
|
|
|
|
|
/* The fraction of a cpu used by this cfs_rq */
|
|
|
- contrib = div_u64(sa->runnable_avg_sum << NICE_0_SHIFT,
|
|
|
+ contrib = div_u64((u64)sa->runnable_avg_sum << NICE_0_SHIFT,
|
|
|
sa->runnable_avg_period + 1);
|
|
|
contrib -= cfs_rq->tg_runnable_contrib;
|
|
|
|