|
@@ -1439,6 +1439,84 @@ static __used void throttle_cfs_rq(struct cfs_rq *cfs_rq)
|
|
raw_spin_unlock(&cfs_b->lock);
|
|
raw_spin_unlock(&cfs_b->lock);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+static void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
|
|
|
|
+{
|
|
|
|
+ struct rq *rq = rq_of(cfs_rq);
|
|
|
|
+ struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
|
|
|
|
+ struct sched_entity *se;
|
|
|
|
+ int enqueue = 1;
|
|
|
|
+ long task_delta;
|
|
|
|
+
|
|
|
|
+ se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))];
|
|
|
|
+
|
|
|
|
+ cfs_rq->throttled = 0;
|
|
|
|
+ raw_spin_lock(&cfs_b->lock);
|
|
|
|
+ list_del_rcu(&cfs_rq->throttled_list);
|
|
|
|
+ raw_spin_unlock(&cfs_b->lock);
|
|
|
|
+
|
|
|
|
+ if (!cfs_rq->load.weight)
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+ task_delta = cfs_rq->h_nr_running;
|
|
|
|
+ for_each_sched_entity(se) {
|
|
|
|
+ if (se->on_rq)
|
|
|
|
+ enqueue = 0;
|
|
|
|
+
|
|
|
|
+ cfs_rq = cfs_rq_of(se);
|
|
|
|
+ if (enqueue)
|
|
|
|
+ enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP);
|
|
|
|
+ cfs_rq->h_nr_running += task_delta;
|
|
|
|
+
|
|
|
|
+ if (cfs_rq_throttled(cfs_rq))
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (!se)
|
|
|
|
+ rq->nr_running += task_delta;
|
|
|
|
+
|
|
|
|
+ /* determine whether we need to wake up potentially idle cpu */
|
|
|
|
+ if (rq->curr == rq->idle && rq->cfs.nr_running)
|
|
|
|
+ resched_task(rq->curr);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b,
|
|
|
|
+ u64 remaining, u64 expires)
|
|
|
|
+{
|
|
|
|
+ struct cfs_rq *cfs_rq;
|
|
|
|
+ u64 runtime = remaining;
|
|
|
|
+
|
|
|
|
+ rcu_read_lock();
|
|
|
|
+ list_for_each_entry_rcu(cfs_rq, &cfs_b->throttled_cfs_rq,
|
|
|
|
+ throttled_list) {
|
|
|
|
+ struct rq *rq = rq_of(cfs_rq);
|
|
|
|
+
|
|
|
|
+ raw_spin_lock(&rq->lock);
|
|
|
|
+ if (!cfs_rq_throttled(cfs_rq))
|
|
|
|
+ goto next;
|
|
|
|
+
|
|
|
|
+ runtime = -cfs_rq->runtime_remaining + 1;
|
|
|
|
+ if (runtime > remaining)
|
|
|
|
+ runtime = remaining;
|
|
|
|
+ remaining -= runtime;
|
|
|
|
+
|
|
|
|
+ cfs_rq->runtime_remaining += runtime;
|
|
|
|
+ cfs_rq->runtime_expires = expires;
|
|
|
|
+
|
|
|
|
+ /* we check whether we're throttled above */
|
|
|
|
+ if (cfs_rq->runtime_remaining > 0)
|
|
|
|
+ unthrottle_cfs_rq(cfs_rq);
|
|
|
|
+
|
|
|
|
+next:
|
|
|
|
+ raw_spin_unlock(&rq->lock);
|
|
|
|
+
|
|
|
|
+ if (!remaining)
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+ rcu_read_unlock();
|
|
|
|
+
|
|
|
|
+ return remaining;
|
|
|
|
+}
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* Responsible for refilling a task_group's bandwidth and unthrottling its
|
|
* Responsible for refilling a task_group's bandwidth and unthrottling its
|
|
* cfs_rqs as appropriate. If there has been no activity within the last
|
|
* cfs_rqs as appropriate. If there has been no activity within the last
|
|
@@ -1447,23 +1525,64 @@ static __used void throttle_cfs_rq(struct cfs_rq *cfs_rq)
|
|
*/
|
|
*/
|
|
static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
|
|
static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
|
|
{
|
|
{
|
|
- int idle = 1;
|
|
|
|
|
|
+ u64 runtime, runtime_expires;
|
|
|
|
+ int idle = 1, throttled;
|
|
|
|
|
|
raw_spin_lock(&cfs_b->lock);
|
|
raw_spin_lock(&cfs_b->lock);
|
|
/* no need to continue the timer with no bandwidth constraint */
|
|
/* no need to continue the timer with no bandwidth constraint */
|
|
if (cfs_b->quota == RUNTIME_INF)
|
|
if (cfs_b->quota == RUNTIME_INF)
|
|
goto out_unlock;
|
|
goto out_unlock;
|
|
|
|
|
|
- idle = cfs_b->idle;
|
|
|
|
|
|
+ throttled = !list_empty(&cfs_b->throttled_cfs_rq);
|
|
|
|
+ /* idle depends on !throttled (for the case of a large deficit) */
|
|
|
|
+ idle = cfs_b->idle && !throttled;
|
|
|
|
+
|
|
/* if we're going inactive then everything else can be deferred */
|
|
/* if we're going inactive then everything else can be deferred */
|
|
if (idle)
|
|
if (idle)
|
|
goto out_unlock;
|
|
goto out_unlock;
|
|
|
|
|
|
__refill_cfs_bandwidth_runtime(cfs_b);
|
|
__refill_cfs_bandwidth_runtime(cfs_b);
|
|
|
|
|
|
|
|
+ if (!throttled) {
|
|
|
|
+ /* mark as potentially idle for the upcoming period */
|
|
|
|
+ cfs_b->idle = 1;
|
|
|
|
+ goto out_unlock;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * There are throttled entities so we must first use the new bandwidth
|
|
|
|
+ * to unthrottle them before making it generally available. This
|
|
|
|
+ * ensures that all existing debts will be paid before a new cfs_rq is
|
|
|
|
+ * allowed to run.
|
|
|
|
+ */
|
|
|
|
+ runtime = cfs_b->runtime;
|
|
|
|
+ runtime_expires = cfs_b->runtime_expires;
|
|
|
|
+ cfs_b->runtime = 0;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * This check is repeated as we are holding onto the new bandwidth
|
|
|
|
+ * while we unthrottle. This can potentially race with an unthrottled
|
|
|
|
+ * group trying to acquire new bandwidth from the global pool.
|
|
|
|
+ */
|
|
|
|
+ while (throttled && runtime > 0) {
|
|
|
|
+ raw_spin_unlock(&cfs_b->lock);
|
|
|
|
+ /* we can't nest cfs_b->lock while distributing bandwidth */
|
|
|
|
+ runtime = distribute_cfs_runtime(cfs_b, runtime,
|
|
|
|
+ runtime_expires);
|
|
|
|
+ raw_spin_lock(&cfs_b->lock);
|
|
|
|
+
|
|
|
|
+ throttled = !list_empty(&cfs_b->throttled_cfs_rq);
|
|
|
|
+ }
|
|
|
|
|
|
- /* mark as potentially idle for the upcoming period */
|
|
|
|
- cfs_b->idle = 1;
|
|
|
|
|
|
+ /* return (any) remaining runtime */
|
|
|
|
+ cfs_b->runtime = runtime;
|
|
|
|
+ /*
|
|
|
|
+ * While we are ensured activity in the period following an
|
|
|
|
+ * unthrottle, this also covers the case in which the new bandwidth is
|
|
|
|
+ * insufficient to cover the existing bandwidth deficit. (Forcing the
|
|
|
|
+ * timer to remain active while there are any throttled entities.)
|
|
|
|
+ */
|
|
|
|
+ cfs_b->idle = 0;
|
|
out_unlock:
|
|
out_unlock:
|
|
if (idle)
|
|
if (idle)
|
|
cfs_b->timer_active = 0;
|
|
cfs_b->timer_active = 0;
|