|
@@ -3119,6 +3119,15 @@ static long calc_load_fold_active(struct rq *this_rq)
|
|
|
return delta;
|
|
|
}
|
|
|
|
|
|
+static unsigned long
|
|
|
+calc_load(unsigned long load, unsigned long exp, unsigned long active)
|
|
|
+{
|
|
|
+ load *= exp;
|
|
|
+ load += active * (FIXED_1 - exp);
|
|
|
+ load += 1UL << (FSHIFT - 1);
|
|
|
+ return load >> FSHIFT;
|
|
|
+}
|
|
|
+
|
|
|
#ifdef CONFIG_NO_HZ
|
|
|
/*
|
|
|
* For NO_HZ we delay the active fold to the next LOAD_FREQ update.
|
|
@@ -3148,6 +3157,128 @@ static long calc_load_fold_idle(void)
|
|
|
|
|
|
return delta;
|
|
|
}
|
|
|
+
|
|
|
+/**
|
|
|
+ * fixed_power_int - compute: x^n, in O(log n) time
|
|
|
+ *
|
|
|
+ * @x: base of the power
|
|
|
+ * @frac_bits: fractional bits of @x
|
|
|
+ * @n: power to raise @x to.
|
|
|
+ *
|
|
|
+ * By exploiting the relation between the definition of the natural power
|
|
|
+ * function: x^n := x*x*...*x (x multiplied by itself for n times), and
|
|
|
+ * the binary encoding of numbers used by computers: n := \Sum n_i * 2^i,
|
|
|
+ * (where: n_i \elem {0, 1}, the binary vector representing n),
|
|
|
+ * we find: x^n := x^(\Sum n_i * 2^i) := \Prod x^(n_i * 2^i), which is
|
|
|
+ * of course trivially computable in O(log_2 n), the length of our binary
|
|
|
+ * vector.
|
|
|
+ */
|
|
|
+static unsigned long
|
|
|
+fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n)
|
|
|
+{
|
|
|
+ unsigned long result = 1UL << frac_bits;
|
|
|
+
|
|
|
+ if (n) for (;;) {
|
|
|
+ if (n & 1) {
|
|
|
+ result *= x;
|
|
|
+ result += 1UL << (frac_bits - 1);
|
|
|
+ result >>= frac_bits;
|
|
|
+ }
|
|
|
+ n >>= 1;
|
|
|
+ if (!n)
|
|
|
+ break;
|
|
|
+ x *= x;
|
|
|
+ x += 1UL << (frac_bits - 1);
|
|
|
+ x >>= frac_bits;
|
|
|
+ }
|
|
|
+
|
|
|
+ return result;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * a1 = a0 * e + a * (1 - e)
|
|
|
+ *
|
|
|
+ * a2 = a1 * e + a * (1 - e)
|
|
|
+ * = (a0 * e + a * (1 - e)) * e + a * (1 - e)
|
|
|
+ * = a0 * e^2 + a * (1 - e) * (1 + e)
|
|
|
+ *
|
|
|
+ * a3 = a2 * e + a * (1 - e)
|
|
|
+ * = (a0 * e^2 + a * (1 - e) * (1 + e)) * e + a * (1 - e)
|
|
|
+ * = a0 * e^3 + a * (1 - e) * (1 + e + e^2)
|
|
|
+ *
|
|
|
+ * ...
|
|
|
+ *
|
|
|
+ * an = a0 * e^n + a * (1 - e) * (1 + e + ... + e^n-1) [1]
|
|
|
+ * = a0 * e^n + a * (1 - e) * (1 - e^n)/(1 - e)
|
|
|
+ * = a0 * e^n + a * (1 - e^n)
|
|
|
+ *
|
|
|
+ * [1] application of the geometric series:
|
|
|
+ *
|
|
|
+ * n 1 - x^(n+1)
|
|
|
+ * S_n := \Sum x^i = -------------
|
|
|
+ * i=0 1 - x
|
|
|
+ */
|
|
|
+static unsigned long
|
|
|
+calc_load_n(unsigned long load, unsigned long exp,
|
|
|
+ unsigned long active, unsigned int n)
|
|
|
+{
|
|
|
+
|
|
|
+ return calc_load(load, fixed_power_int(exp, FSHIFT, n), active);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * NO_HZ can leave us missing all per-cpu ticks calling
|
|
|
+ * calc_load_account_active(), but since an idle CPU folds its delta into
|
|
|
+ * calc_load_tasks_idle per calc_load_account_idle(), all we need to do is fold
|
|
|
+ * in the pending idle delta if our idle period crossed a load cycle boundary.
|
|
|
+ *
|
|
|
+ * Once we've updated the global active value, we need to apply the exponential
|
|
|
+ * weights adjusted to the number of cycles missed.
|
|
|
+ */
|
|
|
+static void calc_global_nohz(unsigned long ticks)
|
|
|
+{
|
|
|
+ long delta, active, n;
|
|
|
+
|
|
|
+ if (time_before(jiffies, calc_load_update))
|
|
|
+ return;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If we crossed a calc_load_update boundary, make sure to fold
|
|
|
+ * any pending idle changes, the respective CPUs might have
|
|
|
+ * missed the tick driven calc_load_account_active() update
|
|
|
+ * due to NO_HZ.
|
|
|
+ */
|
|
|
+ delta = calc_load_fold_idle();
|
|
|
+ if (delta)
|
|
|
+ atomic_long_add(delta, &calc_load_tasks);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If we were idle for multiple load cycles, apply them.
|
|
|
+ */
|
|
|
+ if (ticks >= LOAD_FREQ) {
|
|
|
+ n = ticks / LOAD_FREQ;
|
|
|
+
|
|
|
+ active = atomic_long_read(&calc_load_tasks);
|
|
|
+ active = active > 0 ? active * FIXED_1 : 0;
|
|
|
+
|
|
|
+ avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n);
|
|
|
+ avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
|
|
|
+ avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
|
|
|
+
|
|
|
+ calc_load_update += n * LOAD_FREQ;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Its possible the remainder of the above division also crosses
|
|
|
+ * a LOAD_FREQ period, the regular check in calc_global_load()
|
|
|
+ * which comes after this will take care of that.
|
|
|
+ *
|
|
|
+ * Consider us being 11 ticks before a cycle completion, and us
|
|
|
+ * sleeping for 4*LOAD_FREQ + 22 ticks, then the above code will
|
|
|
+ * age us 4 cycles, and the test in calc_global_load() will
|
|
|
+ * pick up the final one.
|
|
|
+ */
|
|
|
+}
|
|
|
#else
|
|
|
static void calc_load_account_idle(struct rq *this_rq)
|
|
|
{
|
|
@@ -3157,6 +3288,10 @@ static inline long calc_load_fold_idle(void)
|
|
|
{
|
|
|
return 0;
|
|
|
}
|
|
|
+
|
|
|
+static void calc_global_nohz(unsigned long ticks)
|
|
|
+{
|
|
|
+}
|
|
|
#endif
|
|
|
|
|
|
/**
|
|
@@ -3174,24 +3309,17 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
|
|
|
loads[2] = (avenrun[2] + offset) << shift;
|
|
|
}
|
|
|
|
|
|
-static unsigned long
|
|
|
-calc_load(unsigned long load, unsigned long exp, unsigned long active)
|
|
|
-{
|
|
|
- load *= exp;
|
|
|
- load += active * (FIXED_1 - exp);
|
|
|
- return load >> FSHIFT;
|
|
|
-}
|
|
|
-
|
|
|
/*
|
|
|
* calc_load - update the avenrun load estimates 10 ticks after the
|
|
|
* CPUs have updated calc_load_tasks.
|
|
|
*/
|
|
|
-void calc_global_load(void)
|
|
|
+void calc_global_load(unsigned long ticks)
|
|
|
{
|
|
|
- unsigned long upd = calc_load_update + 10;
|
|
|
long active;
|
|
|
|
|
|
- if (time_before(jiffies, upd))
|
|
|
+ calc_global_nohz(ticks);
|
|
|
+
|
|
|
+ if (time_before(jiffies, calc_load_update + 10))
|
|
|
return;
|
|
|
|
|
|
active = atomic_long_read(&calc_load_tasks);
|