vsyscall.c 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224
  1. /*
  2. * linux/arch/x86_64/kernel/vsyscall.c
  3. *
  4. * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
  5. * Copyright 2003 Andi Kleen, SuSE Labs.
  6. *
  7. * Thanks to hpa@transmeta.com for some useful hint.
  8. * Special thanks to Ingo Molnar for his early experience with
  9. * a different vsyscall implementation for Linux/IA32 and for the name.
  10. *
  11. * vsyscall 1 is located at -10Mbyte, vsyscall 2 is located
  12. * at virtual address -10Mbyte+1024bytes etc... There are at max 4
  13. * vsyscalls. One vsyscall can reserve more than 1 slot to avoid
  14. * jumping out of line if necessary. We cannot add more with this
  15. * mechanism because older kernels won't return -ENOSYS.
  16. * If we want more than four we need a vDSO.
  17. *
  18. * Note: the concept clashes with user mode linux. If you use UML and
  19. * want per guest time just set the kernel.vsyscall64 sysctl to 0.
  20. */
  21. #include <linux/time.h>
  22. #include <linux/init.h>
  23. #include <linux/kernel.h>
  24. #include <linux/timer.h>
  25. #include <linux/seqlock.h>
  26. #include <linux/jiffies.h>
  27. #include <linux/sysctl.h>
  28. #include <asm/vsyscall.h>
  29. #include <asm/pgtable.h>
  30. #include <asm/page.h>
  31. #include <asm/fixmap.h>
  32. #include <asm/errno.h>
  33. #include <asm/io.h>
  34. #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
  35. int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
  36. seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
  37. #include <asm/unistd.h>
  38. static __always_inline void timeval_normalize(struct timeval * tv)
  39. {
  40. time_t __sec;
  41. __sec = tv->tv_usec / 1000000;
  42. if (__sec) {
  43. tv->tv_usec %= 1000000;
  44. tv->tv_sec += __sec;
  45. }
  46. }
  47. static __always_inline void do_vgettimeofday(struct timeval * tv)
  48. {
  49. long sequence, t;
  50. unsigned long sec, usec;
  51. do {
  52. sequence = read_seqbegin(&__xtime_lock);
  53. sec = __xtime.tv_sec;
  54. usec = (__xtime.tv_nsec / 1000) +
  55. (__jiffies - __wall_jiffies) * (1000000 / HZ);
  56. if (__vxtime.mode != VXTIME_HPET) {
  57. t = get_cycles_sync();
  58. if (t < __vxtime.last_tsc)
  59. t = __vxtime.last_tsc;
  60. usec += ((t - __vxtime.last_tsc) *
  61. __vxtime.tsc_quot) >> 32;
  62. /* See comment in x86_64 do_gettimeofday. */
  63. } else {
  64. usec += ((readl((void *)fix_to_virt(VSYSCALL_HPET) + 0xf0) -
  65. __vxtime.last) * __vxtime.quot) >> 32;
  66. }
  67. } while (read_seqretry(&__xtime_lock, sequence));
  68. tv->tv_sec = sec + usec / 1000000;
  69. tv->tv_usec = usec % 1000000;
  70. }
  71. /* RED-PEN may want to readd seq locking, but then the variable should be write-once. */
  72. static __always_inline void do_get_tz(struct timezone * tz)
  73. {
  74. *tz = __sys_tz;
  75. }
  76. static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
  77. {
  78. int ret;
  79. asm volatile("vsysc2: syscall"
  80. : "=a" (ret)
  81. : "0" (__NR_gettimeofday),"D" (tv),"S" (tz) : __syscall_clobber );
  82. return ret;
  83. }
  84. static __always_inline long time_syscall(long *t)
  85. {
  86. long secs;
  87. asm volatile("vsysc1: syscall"
  88. : "=a" (secs)
  89. : "0" (__NR_time),"D" (t) : __syscall_clobber);
  90. return secs;
  91. }
  92. int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
  93. {
  94. if (!__sysctl_vsyscall)
  95. return gettimeofday(tv,tz);
  96. if (tv)
  97. do_vgettimeofday(tv);
  98. if (tz)
  99. do_get_tz(tz);
  100. return 0;
  101. }
  102. /* This will break when the xtime seconds get inaccurate, but that is
  103. * unlikely */
  104. time_t __vsyscall(1) vtime(time_t *t)
  105. {
  106. if (!__sysctl_vsyscall)
  107. return time_syscall(t);
  108. else if (t)
  109. *t = __xtime.tv_sec;
  110. return __xtime.tv_sec;
  111. }
  112. long __vsyscall(2) venosys_0(void)
  113. {
  114. return -ENOSYS;
  115. }
  116. long __vsyscall(3) venosys_1(void)
  117. {
  118. return -ENOSYS;
  119. }
  120. #ifdef CONFIG_SYSCTL
  121. #define SYSCALL 0x050f
  122. #define NOP2 0x9090
  123. /*
  124. * NOP out syscall in vsyscall page when not needed.
  125. */
  126. static int vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp,
  127. void __user *buffer, size_t *lenp, loff_t *ppos)
  128. {
  129. extern u16 vsysc1, vsysc2;
  130. u16 *map1, *map2;
  131. int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
  132. if (!write)
  133. return ret;
  134. /* gcc has some trouble with __va(__pa()), so just do it this
  135. way. */
  136. map1 = ioremap(__pa_symbol(&vsysc1), 2);
  137. if (!map1)
  138. return -ENOMEM;
  139. map2 = ioremap(__pa_symbol(&vsysc2), 2);
  140. if (!map2) {
  141. ret = -ENOMEM;
  142. goto out;
  143. }
  144. if (!sysctl_vsyscall) {
  145. *map1 = SYSCALL;
  146. *map2 = SYSCALL;
  147. } else {
  148. *map1 = NOP2;
  149. *map2 = NOP2;
  150. }
  151. iounmap(map2);
  152. out:
  153. iounmap(map1);
  154. return ret;
  155. }
  156. static int vsyscall_sysctl_nostrat(ctl_table *t, int __user *name, int nlen,
  157. void __user *oldval, size_t __user *oldlenp,
  158. void __user *newval, size_t newlen,
  159. void **context)
  160. {
  161. return -ENOSYS;
  162. }
  163. static ctl_table kernel_table2[] = {
  164. { .ctl_name = 99, .procname = "vsyscall64",
  165. .data = &sysctl_vsyscall, .maxlen = sizeof(int), .mode = 0644,
  166. .strategy = vsyscall_sysctl_nostrat,
  167. .proc_handler = vsyscall_sysctl_change },
  168. { 0, }
  169. };
  170. static ctl_table kernel_root_table2[] = {
  171. { .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555,
  172. .child = kernel_table2 },
  173. { 0 },
  174. };
  175. #endif
  176. static void __init map_vsyscall(void)
  177. {
  178. extern char __vsyscall_0;
  179. unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0);
  180. __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL);
  181. }
  182. static int __init vsyscall_init(void)
  183. {
  184. BUG_ON(((unsigned long) &vgettimeofday !=
  185. VSYSCALL_ADDR(__NR_vgettimeofday)));
  186. BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
  187. BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
  188. map_vsyscall();
  189. #ifdef CONFIG_SYSCTL
  190. register_sysctl_table(kernel_root_table2, 0);
  191. #endif
  192. return 0;
  193. }
  194. __initcall(vsyscall_init);