14 years ago · 0f004f5a69
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -143,7 +143,7 @@ extern unsigned long nr_iowait_cpu(int cpu);
 
				 extern unsigned long this_cpu_load(void);
			
 
				 
			
 
				 
			
 
				-extern void calc_global_load(void);
			
 
				+extern void calc_global_load(unsigned long ticks);
			
 
				 
			
 
				 extern unsigned long get_parent_ip(unsigned long addr);
			
 
				 
			
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3119,6 +3119,15 @@ static long calc_load_fold_active(struct rq *this_rq)
 
				 	return delta;
			
 
				 }
			
 
				 
			
 
				+static unsigned long
			
 
				+calc_load(unsigned long load, unsigned long exp, unsigned long active)
			
 
				+{
			
 
				+	load *= exp;
			
 
				+	load += active * (FIXED_1 - exp);
			
 
				+	load += 1UL << (FSHIFT - 1);
			
 
				+	return load >> FSHIFT;
			
 
				+}
			
 
				+
			
 
				 #ifdef CONFIG_NO_HZ
			
 
				 /*
			
 
				  * For NO_HZ we delay the active fold to the next LOAD_FREQ update.
			
@@ -3148,6 +3157,128 @@ static long calc_load_fold_idle(void)
 
				 
			
 
				 	return delta;
			
 
				 }
			
 
				+
			
 
				+/**
			
 
				+ * fixed_power_int - compute: x^n, in O(log n) time
			
 
				+ *
			
 
				+ * @x:         base of the power
			
 
				+ * @frac_bits: fractional bits of @x
			
 
				+ * @n:         power to raise @x to.
			
 
				+ *
			
 
				+ * By exploiting the relation between the definition of the natural power
			
 
				+ * function: x^n := x*x*...*x (x multiplied by itself for n times), and
			
 
				+ * the binary encoding of numbers used by computers: n := \Sum n_i * 2^i,
			
 
				+ * (where: n_i \elem {0, 1}, the binary vector representing n),
			
 
				+ * we find: x^n := x^(\Sum n_i * 2^i) := \Prod x^(n_i * 2^i), which is
			
 
				+ * of course trivially computable in O(log_2 n), the length of our binary
			
 
				+ * vector.
			
 
				+ */
			
 
				+static unsigned long
			
 
				+fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n)
			
 
				+{
			
 
				+	unsigned long result = 1UL << frac_bits;
			
 
				+
			
 
				+	if (n) for (;;) {
			
 
				+		if (n & 1) {
			
 
				+			result *= x;
			
 
				+			result += 1UL << (frac_bits - 1);
			
 
				+			result >>= frac_bits;
			
 
				+		}
			
 
				+		n >>= 1;
			
 
				+		if (!n)
			
 
				+			break;
			
 
				+		x *= x;
			
 
				+		x += 1UL << (frac_bits - 1);
			
 
				+		x >>= frac_bits;
			
 
				+	}
			
 
				+
			
 
				+	return result;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * a1 = a0 * e + a * (1 - e)
			
 
				+ *
			
 
				+ * a2 = a1 * e + a * (1 - e)
			
 
				+ *    = (a0 * e + a * (1 - e)) * e + a * (1 - e)
			
 
				+ *    = a0 * e^2 + a * (1 - e) * (1 + e)
			
 
				+ *
			
 
				+ * a3 = a2 * e + a * (1 - e)
			
 
				+ *    = (a0 * e^2 + a * (1 - e) * (1 + e)) * e + a * (1 - e)
			
 
				+ *    = a0 * e^3 + a * (1 - e) * (1 + e + e^2)
			
 
				+ *
			
 
				+ *  ...
			
 
				+ *
			
 
				+ * an = a0 * e^n + a * (1 - e) * (1 + e + ... + e^n-1) [1]
			
 
				+ *    = a0 * e^n + a * (1 - e) * (1 - e^n)/(1 - e)
			
 
				+ *    = a0 * e^n + a * (1 - e^n)
			
 
				+ *
			
 
				+ * [1] application of the geometric series:
			
 
				+ *
			
 
				+ *              n         1 - x^(n+1)
			
 
				+ *     S_n := \Sum x^i = -------------
			
 
				+ *             i=0          1 - x
			
 
				+ */
			
 
				+static unsigned long
			
 
				+calc_load_n(unsigned long load, unsigned long exp,
			
 
				+	    unsigned long active, unsigned int n)
			
 
				+{
			
 
				+
			
 
				+	return calc_load(load, fixed_power_int(exp, FSHIFT, n), active);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * NO_HZ can leave us missing all per-cpu ticks calling
			
 
				+ * calc_load_account_active(), but since an idle CPU folds its delta into
			
 
				+ * calc_load_tasks_idle per calc_load_account_idle(), all we need to do is fold
			
 
				+ * in the pending idle delta if our idle period crossed a load cycle boundary.
			
 
				+ *
			
 
				+ * Once we've updated the global active value, we need to apply the exponential
			
 
				+ * weights adjusted to the number of cycles missed.
			
 
				+ */
			
 
				+static void calc_global_nohz(unsigned long ticks)
			
 
				+{
			
 
				+	long delta, active, n;
			
 
				+
			
 
				+	if (time_before(jiffies, calc_load_update))
			
 
				+		return;
			
 
				+
			
 
				+	/*
			
 
				+	 * If we crossed a calc_load_update boundary, make sure to fold
			
 
				+	 * any pending idle changes, the respective CPUs might have
			
 
				+	 * missed the tick driven calc_load_account_active() update
			
 
				+	 * due to NO_HZ.
			
 
				+	 */
			
 
				+	delta = calc_load_fold_idle();
			
 
				+	if (delta)
			
 
				+		atomic_long_add(delta, &calc_load_tasks);
			
 
				+
			
 
				+	/*
			
 
				+	 * If we were idle for multiple load cycles, apply them.
			
 
				+	 */
			
 
				+	if (ticks >= LOAD_FREQ) {
			
 
				+		n = ticks / LOAD_FREQ;
			
 
				+
			
 
				+		active = atomic_long_read(&calc_load_tasks);
			
 
				+		active = active > 0 ? active * FIXED_1 : 0;
			
 
				+
			
 
				+		avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n);
			
 
				+		avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
			
 
				+		avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
			
 
				+
			
 
				+		calc_load_update += n * LOAD_FREQ;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Its possible the remainder of the above division also crosses
			
 
				+	 * a LOAD_FREQ period, the regular check in calc_global_load()
			
 
				+	 * which comes after this will take care of that.
			
 
				+	 *
			
 
				+	 * Consider us being 11 ticks before a cycle completion, and us
			
 
				+	 * sleeping for 4*LOAD_FREQ + 22 ticks, then the above code will
			
 
				+	 * age us 4 cycles, and the test in calc_global_load() will
			
 
				+	 * pick up the final one.
			
 
				+	 */
			
 
				+}
			
 
				 #else
			
 
				 static void calc_load_account_idle(struct rq *this_rq)
			
 
				 {
			
@@ -3157,6 +3288,10 @@ static inline long calc_load_fold_idle(void)
 
				 {
			
 
				 	return 0;
			
 
				 }
			
 
				+
			
 
				+static void calc_global_nohz(unsigned long ticks)
			
 
				+{
			
 
				+}
			
 
				 #endif
			
 
				 
			
 
				 /**
			
@@ -3174,24 +3309,17 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
 
				 	loads[2] = (avenrun[2] + offset) << shift;
			
 
				 }
			
 
				 
			
 
				-static unsigned long
			
 
				-calc_load(unsigned long load, unsigned long exp, unsigned long active)
			
 
				-{
			
 
				-	load *= exp;
			
 
				-	load += active * (FIXED_1 - exp);
			
 
				-	return load >> FSHIFT;
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * calc_load - update the avenrun load estimates 10 ticks after the
			
 
				  * CPUs have updated calc_load_tasks.
			
 
				  */
			
 
				-void calc_global_load(void)
			
 
				+void calc_global_load(unsigned long ticks)
			
 
				 {
			
 
				-	unsigned long upd = calc_load_update + 10;
			
 
				 	long active;
			
 
				 
			
 
				-	if (time_before(jiffies, upd))
			
 
				+	calc_global_nohz(ticks);
			
 
				+
			
 
				+	if (time_before(jiffies, calc_load_update + 10))
			
 
				 		return;
			
 
				 
			
 
				 	active = atomic_long_read(&calc_load_tasks);
			
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1319,7 +1319,7 @@ void do_timer(unsigned long ticks)
 
				 {
			
 
				 	jiffies_64 += ticks;
			
 
				 	update_wall_time();
			
 
				-	calc_global_load();
			
 
				+	calc_global_load(ticks);
			
 
				 }
			
 
				 
			
 
				 #ifdef __ARCH_WANT_SYS_ALARM