瀏覽代碼

powerpc: Improve decrementer accuracy

I have been looking at sources of OS jitter and notice that after a long
NO_HZ idle period we wakeup too early:

relative time (us)    event
                      timer irq exit
    999946.405        timer irq entry
         4.835        timer irq exit
        21.685        timer irq entry
         3.540          timer (tick_sched_timer) entry

Here we slept for just under a second then took a timer interrupt that did
nothing. 21.685 us later we wake up again and do the work.

We set a rather low shift value of 16 for the decrementer clockevent, which I
think is causing this issue. On this box we have a 207MHz decrementer and see:

clockevent: decrementer mult[3501] shift[16] cpu[0]

For calculations of large intervals this mult/shift combination could be
off by a significant amount. I notice the sparc code has a loop that iterates
to find a mult/shift combination that maximises the shift value while
keeping mult under 32bit. With the patch below we get:

clockevent: decrementer mult[35015c20] shift[32] cpu[15]

And we no longer see the spurious wakeups.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Anton Blanchard 16 年之前
父節點
當前提交
8d165db107
共有 1 個文件被更改,包括 18 次插入3 次删除
  1. 18 3
      arch/powerpc/kernel/time.c

+ 18 - 3
arch/powerpc/kernel/time.c

@@ -109,7 +109,7 @@ static void decrementer_set_mode(enum clock_event_mode mode,
 static struct clock_event_device decrementer_clockevent = {
 static struct clock_event_device decrementer_clockevent = {
        .name           = "decrementer",
        .name           = "decrementer",
        .rating         = 200,
        .rating         = 200,
-       .shift          = 16,
+       .shift          = 0,	/* To be filled in */
        .mult           = 0,	/* To be filled in */
        .mult           = 0,	/* To be filled in */
        .irq            = 0,
        .irq            = 0,
        .set_next_event = decrementer_set_next_event,
        .set_next_event = decrementer_set_next_event,
@@ -843,6 +843,22 @@ static void decrementer_set_mode(enum clock_event_mode mode,
 		decrementer_set_next_event(DECREMENTER_MAX, dev);
 		decrementer_set_next_event(DECREMENTER_MAX, dev);
 }
 }
 
 
+static void __init setup_clockevent_multiplier(unsigned long hz)
+{
+	u64 mult, shift = 32;
+
+	while (1) {
+		mult = div_sc(hz, NSEC_PER_SEC, shift);
+		if (mult && (mult >> 32UL) == 0UL)
+			break;
+
+		shift--;
+	}
+
+	decrementer_clockevent.shift = shift;
+	decrementer_clockevent.mult = mult;
+}
+
 static void register_decrementer_clockevent(int cpu)
 static void register_decrementer_clockevent(int cpu)
 {
 {
 	struct clock_event_device *dec = &per_cpu(decrementers, cpu).event;
 	struct clock_event_device *dec = &per_cpu(decrementers, cpu).event;
@@ -860,8 +876,7 @@ static void __init init_decrementer_clockevent(void)
 {
 {
 	int cpu = smp_processor_id();
 	int cpu = smp_processor_id();
 
 
-	decrementer_clockevent.mult = div_sc(ppc_tb_freq, NSEC_PER_SEC,
-					     decrementer_clockevent.shift);
+	setup_clockevent_multiplier(ppc_tb_freq);
 	decrementer_clockevent.max_delta_ns =
 	decrementer_clockevent.max_delta_ns =
 		clockevent_delta2ns(DECREMENTER_MAX, &decrementer_clockevent);
 		clockevent_delta2ns(DECREMENTER_MAX, &decrementer_clockevent);
 	decrementer_clockevent.min_delta_ns =
 	decrementer_clockevent.min_delta_ns =