18 vuotta sitten · f91a8b447b
--- a/arch/i386/xen/time.c
+++ b/arch/i386/xen/time.c
@@ -11,6 +11,7 @@
 
				 #include <linux/interrupt.h>
			
 
				 #include <linux/clocksource.h>
			
 
				 #include <linux/clockchips.h>
			
 
				+#include <linux/kernel_stat.h>
			
 
				 
			
 
				 #include <asm/xen/hypervisor.h>
			
 
				 #include <asm/xen/hypercall.h>
			
@@ -25,6 +26,7 @@
 
				 
			
 
				 /* Xen may fire a timer up to this many ns early */
			
 
				 #define TIMER_SLOP	100000
			
 
				+#define NS_PER_TICK	(1000000000LL / HZ)
			
 
				 
			
 
				 /* These are perodically updated in shared_info, and then copied here. */
			
 
				 struct shadow_time_info {
			
@@ -37,6 +39,139 @@ struct shadow_time_info {
 
				 
			
 
				 static DEFINE_PER_CPU(struct shadow_time_info, shadow_time);
			
 
				 
			
 
				+/* runstate info updated by Xen */
			
 
				+static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
			
 
				+
			
 
				+/* snapshots of runstate info */
			
 
				+static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate_snapshot);
			
 
				+
			
 
				+/* unused ns of stolen and blocked time */
			
 
				+static DEFINE_PER_CPU(u64, residual_stolen);
			
 
				+static DEFINE_PER_CPU(u64, residual_blocked);
			
 
				+
			
 
				+/* return an consistent snapshot of 64-bit time/counter value */
			
 
				+static u64 get64(const u64 *p)
			
 
				+{
			
 
				+	u64 ret;
			
 
				+
			
 
				+	if (BITS_PER_LONG < 64) {
			
 
				+		u32 *p32 = (u32 *)p;
			
 
				+		u32 h, l;
			
 
				+
			
 
				+		/*
			
 
				+		 * Read high then low, and then make sure high is
			
 
				+		 * still the same; this will only loop if low wraps
			
 
				+		 * and carries into high.
			
 
				+		 * XXX some clean way to make this endian-proof?
			
 
				+		 */
			
 
				+		do {
			
 
				+			h = p32[1];
			
 
				+			barrier();
			
 
				+			l = p32[0];
			
 
				+			barrier();
			
 
				+		} while (p32[1] != h);
			
 
				+
			
 
				+		ret = (((u64)h) << 32) | l;
			
 
				+	} else
			
 
				+		ret = *p;
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Runstate accounting
			
 
				+ */
			
 
				+static void get_runstate_snapshot(struct vcpu_runstate_info *res)
			
 
				+{
			
 
				+	u64 state_time;
			
 
				+	struct vcpu_runstate_info *state;
			
 
				+
			
 
				+	preempt_disable();
			
 
				+
			
 
				+	state = &__get_cpu_var(runstate);
			
 
				+
			
 
				+	/*
			
 
				+	 * The runstate info is always updated by the hypervisor on
			
 
				+	 * the current CPU, so there's no need to use anything
			
 
				+	 * stronger than a compiler barrier when fetching it.
			
 
				+	 */
			
 
				+	do {
			
 
				+		state_time = get64(&state->state_entry_time);
			
 
				+		barrier();
			
 
				+		*res = *state;
			
 
				+		barrier();
			
 
				+	} while (get64(&state->state_entry_time) != state_time);
			
 
				+
			
 
				+	preempt_enable();
			
 
				+}
			
 
				+
			
 
				+static void setup_runstate_info(int cpu)
			
 
				+{
			
 
				+	struct vcpu_register_runstate_memory_area area;
			
 
				+
			
 
				+	area.addr.v = &per_cpu(runstate, cpu);
			
 
				+
			
 
				+	if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
			
 
				+			       cpu, &area))
			
 
				+		BUG();
			
 
				+}
			
 
				+
			
 
				+static void do_stolen_accounting(void)
			
 
				+{
			
 
				+	struct vcpu_runstate_info state;
			
 
				+	struct vcpu_runstate_info *snap;
			
 
				+	s64 blocked, runnable, offline, stolen;
			
 
				+	cputime_t ticks;
			
 
				+
			
 
				+	get_runstate_snapshot(&state);
			
 
				+
			
 
				+	WARN_ON(state.state != RUNSTATE_running);
			
 
				+
			
 
				+	snap = &__get_cpu_var(runstate_snapshot);
			
 
				+
			
 
				+	/* work out how much time the VCPU has not been runn*ing*  */
			
 
				+	blocked = state.time[RUNSTATE_blocked] - snap->time[RUNSTATE_blocked];
			
 
				+	runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable];
			
 
				+	offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline];
			
 
				+
			
 
				+	*snap = state;
			
 
				+
			
 
				+	/* Add the appropriate number of ticks of stolen time,
			
 
				+	   including any left-overs from last time.  Passing NULL to
			
 
				+	   account_steal_time accounts the time as stolen. */
			
 
				+	stolen = runnable + offline + __get_cpu_var(residual_stolen);
			
 
				+
			
 
				+	if (stolen < 0)
			
 
				+		stolen = 0;
			
 
				+
			
 
				+	ticks = 0;
			
 
				+	while (stolen >= NS_PER_TICK) {
			
 
				+		ticks++;
			
 
				+		stolen -= NS_PER_TICK;
			
 
				+	}
			
 
				+	__get_cpu_var(residual_stolen) = stolen;
			
 
				+	account_steal_time(NULL, ticks);
			
 
				+
			
 
				+	/* Add the appropriate number of ticks of blocked time,
			
 
				+	   including any left-overs from last time.  Passing idle to
			
 
				+	   account_steal_time accounts the time as idle/wait. */
			
 
				+	blocked += __get_cpu_var(residual_blocked);
			
 
				+
			
 
				+	if (blocked < 0)
			
 
				+		blocked = 0;
			
 
				+
			
 
				+	ticks = 0;
			
 
				+	while (blocked >= NS_PER_TICK) {
			
 
				+		ticks++;
			
 
				+		blocked -= NS_PER_TICK;
			
 
				+	}
			
 
				+	__get_cpu_var(residual_blocked) = blocked;
			
 
				+	account_steal_time(idle_task(smp_processor_id()), ticks);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+/* Get the CPU speed from Xen */
			
 
				 unsigned long xen_cpu_khz(void)
			
 
				 {
			
 
				 	u64 cpu_khz = 1000000ULL << 32;
			
@@ -56,13 +191,11 @@ unsigned long xen_cpu_khz(void)
 
				  * Reads a consistent set of time-base values from Xen, into a shadow data
			
 
				  * area.
			
 
				  */
			
 
				-static void get_time_values_from_xen(void)
			
 
				+static unsigned get_time_values_from_xen(void)
			
 
				 {
			
 
				 	struct vcpu_time_info   *src;
			
 
				 	struct shadow_time_info *dst;
			
 
				 
			
 
				-	preempt_disable();
			
 
				-
			
 
				 	/* src is shared memory with the hypervisor, so we need to
			
 
				 	   make sure we get a consistent snapshot, even in the face of
			
 
				 	   being preempted. */
			
@@ -79,7 +212,7 @@ static void get_time_values_from_xen(void)
 
				 		rmb();		/* test version after fetching data */
			
 
				 	} while ((src->version & 1) | (dst->version ^ src->version));
			
 
				 
			
 
				-	preempt_enable();
			
 
				+	return dst->version;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -123,7 +256,7 @@ static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
 
				 static u64 get_nsec_offset(struct shadow_time_info *shadow)
			
 
				 {
			
 
				 	u64 now, delta;
			
 
				-	rdtscll(now);
			
 
				+	now = native_read_tsc();
			
 
				 	delta = now - shadow->tsc_timestamp;
			
 
				 	return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
			
 
				 }
			
@@ -132,10 +265,14 @@ cycle_t xen_clocksource_read(void)
 
				 {
			
 
				 	struct shadow_time_info *shadow = &get_cpu_var(shadow_time);
			
 
				 	cycle_t ret;
			
 
				+	unsigned version;
			
 
				 
			
 
				-	get_time_values_from_xen();
			
 
				-
			
 
				-	ret = shadow->system_timestamp + get_nsec_offset(shadow);
			
 
				+	do {
			
 
				+		version = get_time_values_from_xen();
			
 
				+		barrier();
			
 
				+		ret = shadow->system_timestamp + get_nsec_offset(shadow);
			
 
				+		barrier();
			
 
				+	} while (version != __get_cpu_var(xen_vcpu)->time.version);
			
 
				 
			
 
				 	put_cpu_var(shadow_time);
			
 
				 
			
@@ -352,6 +489,8 @@ static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
 
				 		ret = IRQ_HANDLED;
			
 
				 	}
			
 
				 
			
 
				+	do_stolen_accounting();
			
 
				+
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
@@ -378,6 +517,8 @@ static void xen_setup_timer(int cpu)
 
				 	evt->irq = irq;
			
 
				 	clockevents_register_device(evt);
			
 
				 
			
 
				+	setup_runstate_info(cpu);
			
 
				+
			
 
				 	put_cpu_var(xen_clock_events);
			
 
				 }
			
 
				 
			
@@ -390,7 +531,7 @@ __init void xen_time_init(void)
 
				 	clocksource_register(&xen_clocksource);
			
 
				 
			
 
				 	if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) {
			
 
				-		/* Successfully turned off 100hz tick, so we have the
			
 
				+		/* Successfully turned off 100Hz tick, so we have the
			
 
				 		   vcpuop-based timer interface */
			
 
				 		printk(KERN_DEBUG "Xen: using vcpuop timer interface\n");
			
 
				 		xen_clockevent = &xen_vcpuop_clockevent;