|
@@ -767,6 +767,7 @@ static cycle_t read_tsc(struct clocksource *cs)
|
|
|
static cycle_t __vsyscall_fn vread_tsc(void)
|
|
|
{
|
|
|
cycle_t ret;
|
|
|
+ u64 last;
|
|
|
|
|
|
/*
|
|
|
* Empirically, a fence (of type that depends on the CPU)
|
|
@@ -778,8 +779,21 @@ static cycle_t __vsyscall_fn vread_tsc(void)
|
|
|
rdtsc_barrier();
|
|
|
ret = (cycle_t)vget_cycles();
|
|
|
|
|
|
- return ret >= VVAR(vsyscall_gtod_data).clock.cycle_last ?
|
|
|
- ret : VVAR(vsyscall_gtod_data).clock.cycle_last;
|
|
|
+ last = VVAR(vsyscall_gtod_data).clock.cycle_last;
|
|
|
+
|
|
|
+ if (likely(ret >= last))
|
|
|
+ return ret;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * GCC likes to generate cmov here, but this branch is extremely
|
|
|
+ * predictable (it's just a funciton of time and the likely is
|
|
|
+ * very likely) and there's a data dependence, so force GCC
|
|
|
+ * to generate a branch instead. I don't barrier() because
|
|
|
+ * we don't actually need a barrier, and if this function
|
|
|
+ * ever gets inlined it will generate worse code.
|
|
|
+ */
|
|
|
+ asm volatile ("");
|
|
|
+ return last;
|
|
|
}
|
|
|
#endif
|
|
|
|