|
@@ -18,6 +18,7 @@
|
|
|
|
|
|
#include <linux/clocksource.h>
|
|
|
#include <linux/kvm_para.h>
|
|
|
+#include <asm/pvclock.h>
|
|
|
#include <asm/arch_hooks.h>
|
|
|
#include <asm/msr.h>
|
|
|
#include <asm/apic.h>
|
|
@@ -36,18 +37,9 @@ static int parse_no_kvmclock(char *arg)
|
|
|
early_param("no-kvmclock", parse_no_kvmclock);
|
|
|
|
|
|
/* The hypervisor will put information about time periodically here */
|
|
|
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock);
|
|
|
-#define get_clock(cpu, field) per_cpu(hv_clock, cpu).field
|
|
|
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct pvclock_vcpu_time_info, hv_clock);
|
|
|
+static struct pvclock_wall_clock wall_clock;
|
|
|
|
|
|
-static inline u64 kvm_get_delta(u64 last_tsc)
|
|
|
-{
|
|
|
- int cpu = smp_processor_id();
|
|
|
- u64 delta = native_read_tsc() - last_tsc;
|
|
|
- return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE;
|
|
|
-}
|
|
|
-
|
|
|
-static struct kvm_wall_clock wall_clock;
|
|
|
-static cycle_t kvm_clock_read(void);
|
|
|
/*
|
|
|
* The wallclock is the time of day when we booted. Since then, some time may
|
|
|
* have elapsed since the hypervisor wrote the data. So we try to account for
|
|
@@ -55,64 +47,37 @@ static cycle_t kvm_clock_read(void);
|
|
|
*/
|
|
|
static unsigned long kvm_get_wallclock(void)
|
|
|
{
|
|
|
- u32 wc_sec, wc_nsec;
|
|
|
- u64 delta;
|
|
|
+ struct pvclock_vcpu_time_info *vcpu_time;
|
|
|
struct timespec ts;
|
|
|
- int version, nsec;
|
|
|
int low, high;
|
|
|
|
|
|
low = (int)__pa(&wall_clock);
|
|
|
high = ((u64)__pa(&wall_clock) >> 32);
|
|
|
+ native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
|
|
|
|
|
|
- delta = kvm_clock_read();
|
|
|
+ vcpu_time = &get_cpu_var(hv_clock);
|
|
|
+ pvclock_read_wallclock(&wall_clock, vcpu_time, &ts);
|
|
|
+ put_cpu_var(hv_clock);
|
|
|
|
|
|
- native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
|
|
|
- do {
|
|
|
- version = wall_clock.wc_version;
|
|
|
- rmb();
|
|
|
- wc_sec = wall_clock.wc_sec;
|
|
|
- wc_nsec = wall_clock.wc_nsec;
|
|
|
- rmb();
|
|
|
- } while ((wall_clock.wc_version != version) || (version & 1));
|
|
|
-
|
|
|
- delta = kvm_clock_read() - delta;
|
|
|
- delta += wc_nsec;
|
|
|
- nsec = do_div(delta, NSEC_PER_SEC);
|
|
|
- set_normalized_timespec(&ts, wc_sec + delta, nsec);
|
|
|
- /*
|
|
|
- * Of all mechanisms of time adjustment I've tested, this one
|
|
|
- * was the champion!
|
|
|
- */
|
|
|
- return ts.tv_sec + 1;
|
|
|
+ return ts.tv_sec;
|
|
|
}
|
|
|
|
|
|
static int kvm_set_wallclock(unsigned long now)
|
|
|
{
|
|
|
- return 0;
|
|
|
+ return -1;
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * This is our read_clock function. The host puts an tsc timestamp each time
|
|
|
- * it updates a new time. Without the tsc adjustment, we can have a situation
|
|
|
- * in which a vcpu starts to run earlier (smaller system_time), but probes
|
|
|
- * time later (compared to another vcpu), leading to backwards time
|
|
|
- */
|
|
|
static cycle_t kvm_clock_read(void)
|
|
|
{
|
|
|
- u64 last_tsc, now;
|
|
|
- int cpu;
|
|
|
+ struct pvclock_vcpu_time_info *src;
|
|
|
+ cycle_t ret;
|
|
|
|
|
|
- preempt_disable();
|
|
|
- cpu = smp_processor_id();
|
|
|
-
|
|
|
- last_tsc = get_clock(cpu, tsc_timestamp);
|
|
|
- now = get_clock(cpu, system_time);
|
|
|
-
|
|
|
- now += kvm_get_delta(last_tsc);
|
|
|
- preempt_enable();
|
|
|
-
|
|
|
- return now;
|
|
|
+ src = &get_cpu_var(hv_clock);
|
|
|
+ ret = pvclock_clocksource_read(src);
|
|
|
+ put_cpu_var(hv_clock);
|
|
|
+ return ret;
|
|
|
}
|
|
|
+
|
|
|
static struct clocksource kvm_clock = {
|
|
|
.name = "kvm-clock",
|
|
|
.read = kvm_clock_read,
|
|
@@ -123,13 +88,14 @@ static struct clocksource kvm_clock = {
|
|
|
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
|
|
};
|
|
|
|
|
|
-static int kvm_register_clock(void)
|
|
|
+static int kvm_register_clock(char *txt)
|
|
|
{
|
|
|
int cpu = smp_processor_id();
|
|
|
int low, high;
|
|
|
low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1;
|
|
|
high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
|
|
|
-
|
|
|
+ printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
|
|
|
+ cpu, high, low, txt);
|
|
|
return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high);
|
|
|
}
|
|
|
|
|
@@ -140,12 +106,20 @@ static void kvm_setup_secondary_clock(void)
|
|
|
* Now that the first cpu already had this clocksource initialized,
|
|
|
* we shouldn't fail.
|
|
|
*/
|
|
|
- WARN_ON(kvm_register_clock());
|
|
|
+ WARN_ON(kvm_register_clock("secondary cpu clock"));
|
|
|
/* ok, done with our trickery, call native */
|
|
|
setup_secondary_APIC_clock();
|
|
|
}
|
|
|
#endif
|
|
|
|
|
|
+#ifdef CONFIG_SMP
|
|
|
+void __init kvm_smp_prepare_boot_cpu(void)
|
|
|
+{
|
|
|
+ WARN_ON(kvm_register_clock("primary cpu clock"));
|
|
|
+ native_smp_prepare_boot_cpu();
|
|
|
+}
|
|
|
+#endif
|
|
|
+
|
|
|
/*
|
|
|
* After the clock is registered, the host will keep writing to the
|
|
|
* registered memory location. If the guest happens to shutdown, this memory
|
|
@@ -174,13 +148,16 @@ void __init kvmclock_init(void)
|
|
|
return;
|
|
|
|
|
|
if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) {
|
|
|
- if (kvm_register_clock())
|
|
|
+ if (kvm_register_clock("boot clock"))
|
|
|
return;
|
|
|
pv_time_ops.get_wallclock = kvm_get_wallclock;
|
|
|
pv_time_ops.set_wallclock = kvm_set_wallclock;
|
|
|
pv_time_ops.sched_clock = kvm_clock_read;
|
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
|
pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock;
|
|
|
+#endif
|
|
|
+#ifdef CONFIG_SMP
|
|
|
+ smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
|
|
|
#endif
|
|
|
machine_ops.shutdown = kvm_shutdown;
|
|
|
#ifdef CONFIG_KEXEC
|