vclock_gettime.c 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298
  1. /*
  2. * Copyright 2006 Andi Kleen, SUSE Labs.
  3. * Subject to the GNU Public License, v.2
  4. *
  5. * Fast user context implementation of clock_gettime, gettimeofday, and time.
  6. *
  7. * The code should have no internal unresolved relocations.
  8. * Check with readelf after changing.
  9. */
  10. /* Disable profiling for userspace code: */
  11. #define DISABLE_BRANCH_PROFILING
  12. #include <linux/kernel.h>
  13. #include <linux/posix-timers.h>
  14. #include <linux/time.h>
  15. #include <linux/string.h>
  16. #include <asm/vsyscall.h>
  17. #include <asm/fixmap.h>
  18. #include <asm/vgtod.h>
  19. #include <asm/timex.h>
  20. #include <asm/hpet.h>
  21. #include <asm/unistd.h>
  22. #include <asm/io.h>
  23. #include <asm/pvclock.h>
  24. #define gtod (&VVAR(vsyscall_gtod_data))
  25. notrace static cycle_t vread_tsc(void)
  26. {
  27. cycle_t ret;
  28. u64 last;
  29. /*
  30. * Empirically, a fence (of type that depends on the CPU)
  31. * before rdtsc is enough to ensure that rdtsc is ordered
  32. * with respect to loads. The various CPU manuals are unclear
  33. * as to whether rdtsc can be reordered with later loads,
  34. * but no one has ever seen it happen.
  35. */
  36. rdtsc_barrier();
  37. ret = (cycle_t)vget_cycles();
  38. last = VVAR(vsyscall_gtod_data).clock.cycle_last;
  39. if (likely(ret >= last))
  40. return ret;
  41. /*
  42. * GCC likes to generate cmov here, but this branch is extremely
  43. * predictable (it's just a funciton of time and the likely is
  44. * very likely) and there's a data dependence, so force GCC
  45. * to generate a branch instead. I don't barrier() because
  46. * we don't actually need a barrier, and if this function
  47. * ever gets inlined it will generate worse code.
  48. */
  49. asm volatile ("");
  50. return last;
  51. }
  52. static notrace cycle_t vread_hpet(void)
  53. {
  54. return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + HPET_COUNTER);
  55. }
  56. #ifdef CONFIG_PARAVIRT_CLOCK
  57. static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu)
  58. {
  59. const struct pvclock_vsyscall_time_info *pvti_base;
  60. int idx = cpu / (PAGE_SIZE/PVTI_SIZE);
  61. int offset = cpu % (PAGE_SIZE/PVTI_SIZE);
  62. BUG_ON(PVCLOCK_FIXMAP_BEGIN + idx > PVCLOCK_FIXMAP_END);
  63. pvti_base = (struct pvclock_vsyscall_time_info *)
  64. __fix_to_virt(PVCLOCK_FIXMAP_BEGIN+idx);
  65. return &pvti_base[offset];
  66. }
  67. static notrace cycle_t vread_pvclock(int *mode)
  68. {
  69. const struct pvclock_vsyscall_time_info *pvti;
  70. cycle_t ret;
  71. u64 last;
  72. u32 version;
  73. u8 flags;
  74. unsigned cpu, cpu1;
  75. /*
  76. * Note: hypervisor must guarantee that:
  77. * 1. cpu ID number maps 1:1 to per-CPU pvclock time info.
  78. * 2. that per-CPU pvclock time info is updated if the
  79. * underlying CPU changes.
  80. * 3. that version is increased whenever underlying CPU
  81. * changes.
  82. *
  83. */
  84. do {
  85. cpu = __getcpu() & VGETCPU_CPU_MASK;
  86. /* TODO: We can put vcpu id into higher bits of pvti.version.
  87. * This will save a couple of cycles by getting rid of
  88. * __getcpu() calls (Gleb).
  89. */
  90. pvti = get_pvti(cpu);
  91. version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);
  92. /*
  93. * Test we're still on the cpu as well as the version.
  94. * We could have been migrated just after the first
  95. * vgetcpu but before fetching the version, so we
  96. * wouldn't notice a version change.
  97. */
  98. cpu1 = __getcpu() & VGETCPU_CPU_MASK;
  99. } while (unlikely(cpu != cpu1 ||
  100. (pvti->pvti.version & 1) ||
  101. pvti->pvti.version != version));
  102. if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
  103. *mode = VCLOCK_NONE;
  104. /* refer to tsc.c read_tsc() comment for rationale */
  105. last = VVAR(vsyscall_gtod_data).clock.cycle_last;
  106. if (likely(ret >= last))
  107. return ret;
  108. return last;
  109. }
  110. #endif
  111. notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
  112. {
  113. long ret;
  114. asm("syscall" : "=a" (ret) :
  115. "0" (__NR_clock_gettime),"D" (clock), "S" (ts) : "memory");
  116. return ret;
  117. }
  118. notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
  119. {
  120. long ret;
  121. asm("syscall" : "=a" (ret) :
  122. "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
  123. return ret;
  124. }
  125. notrace static inline u64 vgetsns(int *mode)
  126. {
  127. long v;
  128. cycles_t cycles;
  129. if (gtod->clock.vclock_mode == VCLOCK_TSC)
  130. cycles = vread_tsc();
  131. else if (gtod->clock.vclock_mode == VCLOCK_HPET)
  132. cycles = vread_hpet();
  133. #ifdef CONFIG_PARAVIRT_CLOCK
  134. else if (gtod->clock.vclock_mode == VCLOCK_PVCLOCK)
  135. cycles = vread_pvclock(mode);
  136. #endif
  137. else
  138. return 0;
  139. v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask;
  140. return v * gtod->clock.mult;
  141. }
  142. /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
  143. notrace static int __always_inline do_realtime(struct timespec *ts)
  144. {
  145. unsigned long seq;
  146. u64 ns;
  147. int mode;
  148. ts->tv_nsec = 0;
  149. do {
  150. seq = read_seqcount_begin(&gtod->seq);
  151. mode = gtod->clock.vclock_mode;
  152. ts->tv_sec = gtod->wall_time_sec;
  153. ns = gtod->wall_time_snsec;
  154. ns += vgetsns(&mode);
  155. ns >>= gtod->clock.shift;
  156. } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
  157. timespec_add_ns(ts, ns);
  158. return mode;
  159. }
  160. notrace static int do_monotonic(struct timespec *ts)
  161. {
  162. unsigned long seq;
  163. u64 ns;
  164. int mode;
  165. ts->tv_nsec = 0;
  166. do {
  167. seq = read_seqcount_begin(&gtod->seq);
  168. mode = gtod->clock.vclock_mode;
  169. ts->tv_sec = gtod->monotonic_time_sec;
  170. ns = gtod->monotonic_time_snsec;
  171. ns += vgetsns(&mode);
  172. ns >>= gtod->clock.shift;
  173. } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
  174. timespec_add_ns(ts, ns);
  175. return mode;
  176. }
  177. notrace static int do_realtime_coarse(struct timespec *ts)
  178. {
  179. unsigned long seq;
  180. do {
  181. seq = read_seqcount_begin(&gtod->seq);
  182. ts->tv_sec = gtod->wall_time_coarse.tv_sec;
  183. ts->tv_nsec = gtod->wall_time_coarse.tv_nsec;
  184. } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
  185. return 0;
  186. }
  187. notrace static int do_monotonic_coarse(struct timespec *ts)
  188. {
  189. unsigned long seq;
  190. do {
  191. seq = read_seqcount_begin(&gtod->seq);
  192. ts->tv_sec = gtod->monotonic_time_coarse.tv_sec;
  193. ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec;
  194. } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
  195. return 0;
  196. }
  197. notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
  198. {
  199. int ret = VCLOCK_NONE;
  200. switch (clock) {
  201. case CLOCK_REALTIME:
  202. ret = do_realtime(ts);
  203. break;
  204. case CLOCK_MONOTONIC:
  205. ret = do_monotonic(ts);
  206. break;
  207. case CLOCK_REALTIME_COARSE:
  208. return do_realtime_coarse(ts);
  209. case CLOCK_MONOTONIC_COARSE:
  210. return do_monotonic_coarse(ts);
  211. }
  212. if (ret == VCLOCK_NONE)
  213. return vdso_fallback_gettime(clock, ts);
  214. return 0;
  215. }
  216. int clock_gettime(clockid_t, struct timespec *)
  217. __attribute__((weak, alias("__vdso_clock_gettime")));
  218. notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
  219. {
  220. long ret = VCLOCK_NONE;
  221. if (likely(tv != NULL)) {
  222. BUILD_BUG_ON(offsetof(struct timeval, tv_usec) !=
  223. offsetof(struct timespec, tv_nsec) ||
  224. sizeof(*tv) != sizeof(struct timespec));
  225. ret = do_realtime((struct timespec *)tv);
  226. tv->tv_usec /= 1000;
  227. }
  228. if (unlikely(tz != NULL)) {
  229. /* Avoid memcpy. Some old compilers fail to inline it */
  230. tz->tz_minuteswest = gtod->sys_tz.tz_minuteswest;
  231. tz->tz_dsttime = gtod->sys_tz.tz_dsttime;
  232. }
  233. if (ret == VCLOCK_NONE)
  234. return vdso_fallback_gtod(tv, tz);
  235. return 0;
  236. }
  237. int gettimeofday(struct timeval *, struct timezone *)
  238. __attribute__((weak, alias("__vdso_gettimeofday")));
  239. /*
  240. * This will break when the xtime seconds get inaccurate, but that is
  241. * unlikely
  242. */
  243. notrace time_t __vdso_time(time_t *t)
  244. {
  245. /* This is atomic on x86_64 so we don't need any locks. */
  246. time_t result = ACCESS_ONCE(VVAR(vsyscall_gtod_data).wall_time_sec);
  247. if (t)
  248. *t = result;
  249. return result;
  250. }
  251. int time(time_t *t)
  252. __attribute__((weak, alias("__vdso_time")));