|
@@ -1272,11 +1272,30 @@ static inline u64 sched_cfs_bandwidth_slice(void)
|
|
|
return (u64)sysctl_sched_cfs_bandwidth_slice * NSEC_PER_USEC;
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * Replenish runtime according to assigned quota and update expiration time.
|
|
|
+ * We use sched_clock_cpu directly instead of rq->clock to avoid adding
|
|
|
+ * additional synchronization around rq->lock.
|
|
|
+ *
|
|
|
+ * requires cfs_b->lock
|
|
|
+ */
|
|
|
+static void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b)
|
|
|
+{
|
|
|
+ u64 now;
|
|
|
+
|
|
|
+ if (cfs_b->quota == RUNTIME_INF)
|
|
|
+ return;
|
|
|
+
|
|
|
+ now = sched_clock_cpu(smp_processor_id());
|
|
|
+ cfs_b->runtime = cfs_b->quota;
|
|
|
+ cfs_b->runtime_expires = now + ktime_to_ns(cfs_b->period);
|
|
|
+}
|
|
|
+
|
|
|
static void assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
|
|
|
{
|
|
|
struct task_group *tg = cfs_rq->tg;
|
|
|
struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);
|
|
|
- u64 amount = 0, min_amount;
|
|
|
+ u64 amount = 0, min_amount, expires;
|
|
|
|
|
|
/* note: this is a positive sum as runtime_remaining <= 0 */
|
|
|
min_amount = sched_cfs_bandwidth_slice() - cfs_rq->runtime_remaining;
|
|
@@ -1285,9 +1304,16 @@ static void assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
|
|
|
if (cfs_b->quota == RUNTIME_INF)
|
|
|
amount = min_amount;
|
|
|
else {
|
|
|
- /* ensure bandwidth timer remains active under consumption */
|
|
|
- if (!cfs_b->timer_active)
|
|
|
+ /*
|
|
|
+ * If the bandwidth pool has become inactive, then at least one
|
|
|
+ * period must have elapsed since the last consumption.
|
|
|
+ * Refresh the global state and ensure bandwidth timer becomes
|
|
|
+ * active.
|
|
|
+ */
|
|
|
+ if (!cfs_b->timer_active) {
|
|
|
+ __refill_cfs_bandwidth_runtime(cfs_b);
|
|
|
__start_cfs_bandwidth(cfs_b);
|
|
|
+ }
|
|
|
|
|
|
if (cfs_b->runtime > 0) {
|
|
|
amount = min(cfs_b->runtime, min_amount);
|
|
@@ -1295,19 +1321,61 @@ static void assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
|
|
|
cfs_b->idle = 0;
|
|
|
}
|
|
|
}
|
|
|
+ expires = cfs_b->runtime_expires;
|
|
|
raw_spin_unlock(&cfs_b->lock);
|
|
|
|
|
|
cfs_rq->runtime_remaining += amount;
|
|
|
+ /*
|
|
|
+ * we may have advanced our local expiration to account for allowed
|
|
|
+ * spread between our sched_clock and the one on which runtime was
|
|
|
+ * issued.
|
|
|
+ */
|
|
|
+ if ((s64)(expires - cfs_rq->runtime_expires) > 0)
|
|
|
+ cfs_rq->runtime_expires = expires;
|
|
|
}
|
|
|
|
|
|
-static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
|
|
|
- unsigned long delta_exec)
|
|
|
+/*
|
|
|
+ * Note: This depends on the synchronization provided by sched_clock and the
|
|
|
+ * fact that rq->clock snapshots this value.
|
|
|
+ */
|
|
|
+static void expire_cfs_rq_runtime(struct cfs_rq *cfs_rq)
|
|
|
{
|
|
|
- if (!cfs_rq->runtime_enabled)
|
|
|
+ struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
|
|
|
+ struct rq *rq = rq_of(cfs_rq);
|
|
|
+
|
|
|
+ /* if the deadline is ahead of our clock, nothing to do */
|
|
|
+ if (likely((s64)(rq->clock - cfs_rq->runtime_expires) < 0))
|
|
|
+ return;
|
|
|
+
|
|
|
+ if (cfs_rq->runtime_remaining < 0)
|
|
|
return;
|
|
|
|
|
|
+ /*
|
|
|
+ * If the local deadline has passed we have to consider the
|
|
|
+ * possibility that our sched_clock is 'fast' and the global deadline
|
|
|
+ * has not truly expired.
|
|
|
+ *
|
|
|
+ * Fortunately we can check determine whether this the case by checking
|
|
|
+ * whether the global deadline has advanced.
|
|
|
+ */
|
|
|
+
|
|
|
+ if ((s64)(cfs_rq->runtime_expires - cfs_b->runtime_expires) >= 0) {
|
|
|
+ /* extend local deadline, drift is bounded above by 2 ticks */
|
|
|
+ cfs_rq->runtime_expires += TICK_NSEC;
|
|
|
+ } else {
|
|
|
+ /* global deadline is ahead, expiration has passed */
|
|
|
+ cfs_rq->runtime_remaining = 0;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
|
|
|
+ unsigned long delta_exec)
|
|
|
+{
|
|
|
+ /* dock delta_exec before expiring quota (as it could span periods) */
|
|
|
cfs_rq->runtime_remaining -= delta_exec;
|
|
|
- if (cfs_rq->runtime_remaining > 0)
|
|
|
+ expire_cfs_rq_runtime(cfs_rq);
|
|
|
+
|
|
|
+ if (likely(cfs_rq->runtime_remaining > 0))
|
|
|
return;
|
|
|
|
|
|
assign_cfs_rq_runtime(cfs_rq);
|
|
@@ -1338,7 +1406,12 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
|
|
|
goto out_unlock;
|
|
|
|
|
|
idle = cfs_b->idle;
|
|
|
- cfs_b->runtime = cfs_b->quota;
|
|
|
+ /* if we're going inactive then everything else can be deferred */
|
|
|
+ if (idle)
|
|
|
+ goto out_unlock;
|
|
|
+
|
|
|
+ __refill_cfs_bandwidth_runtime(cfs_b);
|
|
|
+
|
|
|
|
|
|
/* mark as potentially idle for the upcoming period */
|
|
|
cfs_b->idle = 1;
|
|
@@ -1557,7 +1630,6 @@ static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
|
|
|
|
|
|
return wl;
|
|
|
}
|
|
|
-
|
|
|
#else
|
|
|
|
|
|
static inline unsigned long effective_load(struct task_group *tg, int cpu,
|