17 years ago · f6e16d5ad4
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -383,6 +383,7 @@ config VMI
 
				 config KVM_CLOCK
			
 
				 	bool "KVM paravirtualized clock"
			
 
				 	select PARAVIRT
			
 
				+	select PARAVIRT_CLOCK
			
 
				 	depends on !(X86_VISWS || X86_VOYAGER)
			
 
				 	help
			
 
				 	  Turning on this option will allow you to run a paravirtualized clock
			
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -18,6 +18,7 @@
 
				 
			
 
				 #include <linux/clocksource.h>
			
 
				 #include <linux/kvm_para.h>
			
 
				+#include <asm/pvclock.h>
			
 
				 #include <asm/arch_hooks.h>
			
 
				 #include <asm/msr.h>
			
 
				 #include <asm/apic.h>
			
@@ -36,18 +37,9 @@ static int parse_no_kvmclock(char *arg)
 
				 early_param("no-kvmclock", parse_no_kvmclock);
			
 
				 
			
 
				 /* The hypervisor will put information about time periodically here */
			
 
				-static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock);
			
 
				-#define get_clock(cpu, field) per_cpu(hv_clock, cpu).field
			
 
				+static DEFINE_PER_CPU_SHARED_ALIGNED(struct pvclock_vcpu_time_info, hv_clock);
			
 
				+static struct pvclock_wall_clock wall_clock;
			
 
				 
			
 
				-static inline u64 kvm_get_delta(u64 last_tsc)
			
 
				-{
			
 
				-	int cpu = smp_processor_id();
			
 
				-	u64 delta = native_read_tsc() - last_tsc;
			
 
				-	return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE;
			
 
				-}
			
 
				-
			
 
				-static struct kvm_wall_clock wall_clock;
			
 
				-static cycle_t kvm_clock_read(void);
			
 
				 /*
			
 
				  * The wallclock is the time of day when we booted. Since then, some time may
			
 
				  * have elapsed since the hypervisor wrote the data. So we try to account for
			
@@ -55,64 +47,37 @@ static cycle_t kvm_clock_read(void);
 
				  */
			
 
				 static unsigned long kvm_get_wallclock(void)
			
 
				 {
			
 
				-	u32 wc_sec, wc_nsec;
			
 
				-	u64 delta;
			
 
				+	struct pvclock_vcpu_time_info *vcpu_time;
			
 
				 	struct timespec ts;
			
 
				-	int version, nsec;
			
 
				 	int low, high;
			
 
				 
			
 
				 	low = (int)__pa(&wall_clock);
			
 
				 	high = ((u64)__pa(&wall_clock) >> 32);
			
 
				+	native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
			
 
				 
			
 
				-	delta = kvm_clock_read();
			
 
				+	vcpu_time = &get_cpu_var(hv_clock);
			
 
				+	pvclock_read_wallclock(&wall_clock, vcpu_time, &ts);
			
 
				+	put_cpu_var(hv_clock);
			
 
				 
			
 
				-	native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
			
 
				-	do {
			
 
				-		version = wall_clock.wc_version;
			
 
				-		rmb();
			
 
				-		wc_sec = wall_clock.wc_sec;
			
 
				-		wc_nsec = wall_clock.wc_nsec;
			
 
				-		rmb();
			
 
				-	} while ((wall_clock.wc_version != version) || (version & 1));
			
 
				-
			
 
				-	delta = kvm_clock_read() - delta;
			
 
				-	delta += wc_nsec;
			
 
				-	nsec = do_div(delta, NSEC_PER_SEC);
			
 
				-	set_normalized_timespec(&ts, wc_sec + delta, nsec);
			
 
				-	/*
			
 
				-	 * Of all mechanisms of time adjustment I've tested, this one
			
 
				-	 * was the champion!
			
 
				-	 */
			
 
				-	return ts.tv_sec + 1;
			
 
				+	return ts.tv_sec;
			
 
				 }
			
 
				 
			
 
				 static int kvm_set_wallclock(unsigned long now)
			
 
				 {
			
 
				-	return 0;
			
 
				+	return -1;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * This is our read_clock function. The host puts an tsc timestamp each time
			
 
				- * it updates a new time. Without the tsc adjustment, we can have a situation
			
 
				- * in which a vcpu starts to run earlier (smaller system_time), but probes
			
 
				- * time later (compared to another vcpu), leading to backwards time
			
 
				- */
			
 
				 static cycle_t kvm_clock_read(void)
			
 
				 {
			
 
				-	u64 last_tsc, now;
			
 
				-	int cpu;
			
 
				+	struct pvclock_vcpu_time_info *src;
			
 
				+	cycle_t ret;
			
 
				 
			
 
				-	preempt_disable();
			
 
				-	cpu = smp_processor_id();
			
 
				-
			
 
				-	last_tsc = get_clock(cpu, tsc_timestamp);
			
 
				-	now = get_clock(cpu, system_time);
			
 
				-
			
 
				-	now += kvm_get_delta(last_tsc);
			
 
				-	preempt_enable();
			
 
				-
			
 
				-	return now;
			
 
				+	src = &get_cpu_var(hv_clock);
			
 
				+	ret = pvclock_clocksource_read(src);
			
 
				+	put_cpu_var(hv_clock);
			
 
				+	return ret;
			
 
				 }
			
 
				+
			
 
				 static struct clocksource kvm_clock = {
			
 
				 	.name = "kvm-clock",
			
 
				 	.read = kvm_clock_read,
			
@@ -123,13 +88,14 @@ static struct clocksource kvm_clock = {
 
				 	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
			
 
				 };
			
 
				 
			
 
				-static int kvm_register_clock(void)
			
 
				+static int kvm_register_clock(char *txt)
			
 
				 {
			
 
				 	int cpu = smp_processor_id();
			
 
				 	int low, high;
			
 
				 	low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1;
			
 
				 	high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
			
 
				-
			
 
				+	printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
			
 
				+	       cpu, high, low, txt);
			
 
				 	return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high);
			
 
				 }
			
 
				 
			
@@ -140,12 +106,20 @@ static void kvm_setup_secondary_clock(void)
 
				 	 * Now that the first cpu already had this clocksource initialized,
			
 
				 	 * we shouldn't fail.
			
 
				 	 */
			
 
				-	WARN_ON(kvm_register_clock());
			
 
				+	WARN_ON(kvm_register_clock("secondary cpu clock"));
			
 
				 	/* ok, done with our trickery, call native */
			
 
				 	setup_secondary_APIC_clock();
			
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+#ifdef CONFIG_SMP
			
 
				+void __init kvm_smp_prepare_boot_cpu(void)
			
 
				+{
			
 
				+	WARN_ON(kvm_register_clock("primary cpu clock"));
			
 
				+	native_smp_prepare_boot_cpu();
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 /*
			
 
				  * After the clock is registered, the host will keep writing to the
			
 
				  * registered memory location. If the guest happens to shutdown, this memory
			
@@ -174,13 +148,16 @@ void __init kvmclock_init(void)
 
				 		return;
			
 
				 
			
 
				 	if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) {
			
 
				-		if (kvm_register_clock())
			
 
				+		if (kvm_register_clock("boot clock"))
			
 
				 			return;
			
 
				 		pv_time_ops.get_wallclock = kvm_get_wallclock;
			
 
				 		pv_time_ops.set_wallclock = kvm_set_wallclock;
			
 
				 		pv_time_ops.sched_clock = kvm_clock_read;
			
 
				 #ifdef CONFIG_X86_LOCAL_APIC
			
 
				 		pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock;
			
 
				+#endif
			
 
				+#ifdef CONFIG_SMP
			
 
				+		smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
			
 
				 #endif
			
 
				 		machine_ops.shutdown  = kvm_shutdown;
			
 
				 #ifdef CONFIG_KEXEC