|
@@ -5681,15 +5681,39 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
|
|
|
/* Earliest time when we have to do rebalance again */
|
|
|
unsigned long next_balance = jiffies + 60*HZ;
|
|
|
int update_next_balance = 0;
|
|
|
- int need_serialize;
|
|
|
+ int need_serialize, need_decay = 0;
|
|
|
+ u64 max_cost = 0;
|
|
|
|
|
|
update_blocked_averages(cpu);
|
|
|
|
|
|
rcu_read_lock();
|
|
|
for_each_domain(cpu, sd) {
|
|
|
+ /*
|
|
|
+ * Decay the newidle max times here because this is a regular
|
|
|
+ * visit to all the domains. Decay ~1% per second.
|
|
|
+ */
|
|
|
+ if (time_after(jiffies, sd->next_decay_max_lb_cost)) {
|
|
|
+ sd->max_newidle_lb_cost =
|
|
|
+ (sd->max_newidle_lb_cost * 253) / 256;
|
|
|
+ sd->next_decay_max_lb_cost = jiffies + HZ;
|
|
|
+ need_decay = 1;
|
|
|
+ }
|
|
|
+ max_cost += sd->max_newidle_lb_cost;
|
|
|
+
|
|
|
if (!(sd->flags & SD_LOAD_BALANCE))
|
|
|
continue;
|
|
|
|
|
|
+ /*
|
|
|
+ * Stop the load balance at this level. There is another
|
|
|
+ * CPU in our sched group which is doing load balancing more
|
|
|
+ * actively.
|
|
|
+ */
|
|
|
+ if (!continue_balancing) {
|
|
|
+ if (need_decay)
|
|
|
+ continue;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
interval = sd->balance_interval;
|
|
|
if (idle != CPU_IDLE)
|
|
|
interval *= sd->busy_factor;
|
|
@@ -5723,14 +5747,14 @@ out:
|
|
|
next_balance = sd->last_balance + interval;
|
|
|
update_next_balance = 1;
|
|
|
}
|
|
|
-
|
|
|
+ }
|
|
|
+ if (need_decay) {
|
|
|
/*
|
|
|
- * Stop the load balance at this level. There is another
|
|
|
- * CPU in our sched group which is doing load balancing more
|
|
|
- * actively.
|
|
|
+ * Ensure the rq-wide value also decays but keep it at a
|
|
|
+ * reasonable floor to avoid funnies with rq->avg_idle.
|
|
|
*/
|
|
|
- if (!continue_balancing)
|
|
|
- break;
|
|
|
+ rq->max_idle_balance_cost =
|
|
|
+ max((u64)sysctl_sched_migration_cost, max_cost);
|
|
|
}
|
|
|
rcu_read_unlock();
|
|
|
|