vsyscall.c 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. /*
  2. * linux/arch/x86_64/kernel/vsyscall.c
  3. *
  4. * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
  5. * Copyright 2003 Andi Kleen, SuSE Labs.
  6. *
  7. * Thanks to hpa@transmeta.com for some useful hint.
  8. * Special thanks to Ingo Molnar for his early experience with
  9. * a different vsyscall implementation for Linux/IA32 and for the name.
  10. *
  11. * vsyscall 1 is located at -10Mbyte, vsyscall 2 is located
  12. * at virtual address -10Mbyte+1024bytes etc... There are at max 4
  13. * vsyscalls. One vsyscall can reserve more than 1 slot to avoid
  14. * jumping out of line if necessary. We cannot add more with this
  15. * mechanism because older kernels won't return -ENOSYS.
  16. * If we want more than four we need a vDSO.
  17. *
  18. * Note: the concept clashes with user mode linux. If you use UML and
  19. * want per guest time just set the kernel.vsyscall64 sysctl to 0.
  20. */
  21. #include <linux/time.h>
  22. #include <linux/init.h>
  23. #include <linux/kernel.h>
  24. #include <linux/timer.h>
  25. #include <linux/seqlock.h>
  26. #include <linux/jiffies.h>
  27. #include <linux/sysctl.h>
  28. #include <asm/vsyscall.h>
  29. #include <asm/pgtable.h>
  30. #include <asm/page.h>
  31. #include <asm/fixmap.h>
  32. #include <asm/errno.h>
  33. #include <asm/io.h>
  34. #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
  35. #define force_inline __attribute__((always_inline)) inline
  36. int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
  37. seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
  38. #include <asm/unistd.h>
  39. static force_inline void timeval_normalize(struct timeval * tv)
  40. {
  41. time_t __sec;
  42. __sec = tv->tv_usec / 1000000;
  43. if (__sec) {
  44. tv->tv_usec %= 1000000;
  45. tv->tv_sec += __sec;
  46. }
  47. }
  48. static force_inline void do_vgettimeofday(struct timeval * tv)
  49. {
  50. long sequence, t;
  51. unsigned long sec, usec;
  52. do {
  53. sequence = read_seqbegin(&__xtime_lock);
  54. sec = __xtime.tv_sec;
  55. usec = (__xtime.tv_nsec / 1000) +
  56. (__jiffies - __wall_jiffies) * (1000000 / HZ);
  57. if (__vxtime.mode != VXTIME_HPET) {
  58. sync_core();
  59. rdtscll(t);
  60. if (t < __vxtime.last_tsc)
  61. t = __vxtime.last_tsc;
  62. usec += ((t - __vxtime.last_tsc) *
  63. __vxtime.tsc_quot) >> 32;
  64. /* See comment in x86_64 do_gettimeofday. */
  65. } else {
  66. usec += ((readl((void *)fix_to_virt(VSYSCALL_HPET) + 0xf0) -
  67. __vxtime.last) * __vxtime.quot) >> 32;
  68. }
  69. } while (read_seqretry(&__xtime_lock, sequence));
  70. tv->tv_sec = sec + usec / 1000000;
  71. tv->tv_usec = usec % 1000000;
  72. }
  73. /* RED-PEN may want to readd seq locking, but then the variable should be write-once. */
  74. static force_inline void do_get_tz(struct timezone * tz)
  75. {
  76. *tz = __sys_tz;
  77. }
  78. static force_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
  79. {
  80. int ret;
  81. asm volatile("vsysc2: syscall"
  82. : "=a" (ret)
  83. : "0" (__NR_gettimeofday),"D" (tv),"S" (tz) : __syscall_clobber );
  84. return ret;
  85. }
  86. static force_inline long time_syscall(long *t)
  87. {
  88. long secs;
  89. asm volatile("vsysc1: syscall"
  90. : "=a" (secs)
  91. : "0" (__NR_time),"D" (t) : __syscall_clobber);
  92. return secs;
  93. }
  94. static int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
  95. {
  96. if (unlikely(!__sysctl_vsyscall))
  97. return gettimeofday(tv,tz);
  98. if (tv)
  99. do_vgettimeofday(tv);
  100. if (tz)
  101. do_get_tz(tz);
  102. return 0;
  103. }
  104. /* This will break when the xtime seconds get inaccurate, but that is
  105. * unlikely */
  106. static time_t __vsyscall(1) vtime(time_t *t)
  107. {
  108. if (unlikely(!__sysctl_vsyscall))
  109. return time_syscall(t);
  110. else if (t)
  111. *t = __xtime.tv_sec;
  112. return __xtime.tv_sec;
  113. }
  114. static long __vsyscall(2) venosys_0(void)
  115. {
  116. return -ENOSYS;
  117. }
  118. static long __vsyscall(3) venosys_1(void)
  119. {
  120. return -ENOSYS;
  121. }
  122. #ifdef CONFIG_SYSCTL
  123. #define SYSCALL 0x050f
  124. #define NOP2 0x9090
  125. /*
  126. * NOP out syscall in vsyscall page when not needed.
  127. */
  128. static int vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp,
  129. void __user *buffer, size_t *lenp, loff_t *ppos)
  130. {
  131. extern u16 vsysc1, vsysc2;
  132. u16 *map1, *map2;
  133. int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
  134. if (!write)
  135. return ret;
  136. /* gcc has some trouble with __va(__pa()), so just do it this
  137. way. */
  138. map1 = ioremap(__pa_symbol(&vsysc1), 2);
  139. if (!map1)
  140. return -ENOMEM;
  141. map2 = ioremap(__pa_symbol(&vsysc2), 2);
  142. if (!map2) {
  143. ret = -ENOMEM;
  144. goto out;
  145. }
  146. if (!sysctl_vsyscall) {
  147. *map1 = SYSCALL;
  148. *map2 = SYSCALL;
  149. } else {
  150. *map1 = NOP2;
  151. *map2 = NOP2;
  152. }
  153. iounmap(map2);
  154. out:
  155. iounmap(map1);
  156. return ret;
  157. }
  158. static int vsyscall_sysctl_nostrat(ctl_table *t, int __user *name, int nlen,
  159. void __user *oldval, size_t __user *oldlenp,
  160. void __user *newval, size_t newlen,
  161. void **context)
  162. {
  163. return -ENOSYS;
  164. }
  165. static ctl_table kernel_table2[] = {
  166. { .ctl_name = 99, .procname = "vsyscall64",
  167. .data = &sysctl_vsyscall, .maxlen = sizeof(int), .mode = 0644,
  168. .strategy = vsyscall_sysctl_nostrat,
  169. .proc_handler = vsyscall_sysctl_change },
  170. { 0, }
  171. };
  172. static ctl_table kernel_root_table2[] = {
  173. { .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555,
  174. .child = kernel_table2 },
  175. { 0 },
  176. };
  177. #endif
  178. static void __init map_vsyscall(void)
  179. {
  180. extern char __vsyscall_0;
  181. unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0);
  182. __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL);
  183. }
  184. static int __init vsyscall_init(void)
  185. {
  186. BUG_ON(((unsigned long) &vgettimeofday !=
  187. VSYSCALL_ADDR(__NR_vgettimeofday)));
  188. BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
  189. BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
  190. map_vsyscall();
  191. #ifdef CONFIG_SYSCTL
  192. register_sysctl_table(kernel_root_table2, 0);
  193. #endif
  194. return 0;
  195. }
  196. __initcall(vsyscall_init);