sysenter.c 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345
  1. /*
  2. * linux/arch/i386/kernel/sysenter.c
  3. *
  4. * (C) Copyright 2002 Linus Torvalds
  5. * Portions based on the vdso-randomization code from exec-shield:
  6. * Copyright(C) 2005-2006, Red Hat, Inc., Ingo Molnar
  7. *
  8. * This file contains the needed initializations to support sysenter.
  9. */
  10. #include <linux/init.h>
  11. #include <linux/smp.h>
  12. #include <linux/thread_info.h>
  13. #include <linux/sched.h>
  14. #include <linux/gfp.h>
  15. #include <linux/string.h>
  16. #include <linux/elf.h>
  17. #include <linux/mm.h>
  18. #include <linux/module.h>
  19. #include <asm/cpufeature.h>
  20. #include <asm/msr.h>
  21. #include <asm/pgtable.h>
  22. #include <asm/unistd.h>
  23. #include <asm/elf.h>
  24. #include <asm/tlbflush.h>
  25. enum {
  26. VDSO_DISABLED = 0,
  27. VDSO_ENABLED = 1,
  28. VDSO_COMPAT = 2,
  29. };
  30. #ifdef CONFIG_COMPAT_VDSO
  31. #define VDSO_DEFAULT VDSO_COMPAT
  32. #else
  33. #define VDSO_DEFAULT VDSO_ENABLED
  34. #endif
  35. /*
  36. * Should the kernel map a VDSO page into processes and pass its
  37. * address down to glibc upon exec()?
  38. */
  39. unsigned int __read_mostly vdso_enabled = VDSO_DEFAULT;
  40. EXPORT_SYMBOL_GPL(vdso_enabled);
  41. static int __init vdso_setup(char *s)
  42. {
  43. vdso_enabled = simple_strtoul(s, NULL, 0);
  44. return 1;
  45. }
  46. __setup("vdso=", vdso_setup);
  47. extern asmlinkage void sysenter_entry(void);
  48. static __init void reloc_symtab(Elf32_Ehdr *ehdr,
  49. unsigned offset, unsigned size)
  50. {
  51. Elf32_Sym *sym = (void *)ehdr + offset;
  52. unsigned nsym = size / sizeof(*sym);
  53. unsigned i;
  54. for(i = 0; i < nsym; i++, sym++) {
  55. if (sym->st_shndx == SHN_UNDEF ||
  56. sym->st_shndx == SHN_ABS)
  57. continue; /* skip */
  58. if (sym->st_shndx > SHN_LORESERVE) {
  59. printk(KERN_INFO "VDSO: unexpected st_shndx %x\n",
  60. sym->st_shndx);
  61. continue;
  62. }
  63. switch(ELF_ST_TYPE(sym->st_info)) {
  64. case STT_OBJECT:
  65. case STT_FUNC:
  66. case STT_SECTION:
  67. case STT_FILE:
  68. sym->st_value += VDSO_HIGH_BASE;
  69. }
  70. }
  71. }
  72. static __init void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset)
  73. {
  74. Elf32_Dyn *dyn = (void *)ehdr + offset;
  75. for(; dyn->d_tag != DT_NULL; dyn++)
  76. switch(dyn->d_tag) {
  77. case DT_PLTGOT:
  78. case DT_HASH:
  79. case DT_STRTAB:
  80. case DT_SYMTAB:
  81. case DT_RELA:
  82. case DT_INIT:
  83. case DT_FINI:
  84. case DT_REL:
  85. case DT_DEBUG:
  86. case DT_JMPREL:
  87. case DT_VERSYM:
  88. case DT_VERDEF:
  89. case DT_VERNEED:
  90. case DT_ADDRRNGLO ... DT_ADDRRNGHI:
  91. /* definitely pointers needing relocation */
  92. dyn->d_un.d_ptr += VDSO_HIGH_BASE;
  93. break;
  94. case DT_ENCODING ... OLD_DT_LOOS-1:
  95. case DT_LOOS ... DT_HIOS-1:
  96. /* Tags above DT_ENCODING are pointers if
  97. they're even */
  98. if (dyn->d_tag >= DT_ENCODING &&
  99. (dyn->d_tag & 1) == 0)
  100. dyn->d_un.d_ptr += VDSO_HIGH_BASE;
  101. break;
  102. case DT_VERDEFNUM:
  103. case DT_VERNEEDNUM:
  104. case DT_FLAGS_1:
  105. case DT_RELACOUNT:
  106. case DT_RELCOUNT:
  107. case DT_VALRNGLO ... DT_VALRNGHI:
  108. /* definitely not pointers */
  109. break;
  110. case OLD_DT_LOOS ... DT_LOOS-1:
  111. case DT_HIOS ... DT_VALRNGLO-1:
  112. default:
  113. if (dyn->d_tag > DT_ENCODING)
  114. printk(KERN_INFO "VDSO: unexpected DT_tag %x\n",
  115. dyn->d_tag);
  116. break;
  117. }
  118. }
  119. static __init void relocate_vdso(Elf32_Ehdr *ehdr)
  120. {
  121. Elf32_Phdr *phdr;
  122. Elf32_Shdr *shdr;
  123. int i;
  124. BUG_ON(memcmp(ehdr->e_ident, ELFMAG, 4) != 0 ||
  125. !elf_check_arch(ehdr) ||
  126. ehdr->e_type != ET_DYN);
  127. ehdr->e_entry += VDSO_HIGH_BASE;
  128. /* rebase phdrs */
  129. phdr = (void *)ehdr + ehdr->e_phoff;
  130. for (i = 0; i < ehdr->e_phnum; i++) {
  131. phdr[i].p_vaddr += VDSO_HIGH_BASE;
  132. /* relocate dynamic stuff */
  133. if (phdr[i].p_type == PT_DYNAMIC)
  134. reloc_dyn(ehdr, phdr[i].p_offset);
  135. }
  136. /* rebase sections */
  137. shdr = (void *)ehdr + ehdr->e_shoff;
  138. for(i = 0; i < ehdr->e_shnum; i++) {
  139. if (!(shdr[i].sh_flags & SHF_ALLOC))
  140. continue;
  141. shdr[i].sh_addr += VDSO_HIGH_BASE;
  142. if (shdr[i].sh_type == SHT_SYMTAB ||
  143. shdr[i].sh_type == SHT_DYNSYM)
  144. reloc_symtab(ehdr, shdr[i].sh_offset,
  145. shdr[i].sh_size);
  146. }
  147. }
  148. void enable_sep_cpu(void)
  149. {
  150. int cpu = get_cpu();
  151. struct tss_struct *tss = &per_cpu(init_tss, cpu);
  152. if (!boot_cpu_has(X86_FEATURE_SEP)) {
  153. put_cpu();
  154. return;
  155. }
  156. tss->x86_tss.ss1 = __KERNEL_CS;
  157. tss->x86_tss.esp1 = sizeof(struct tss_struct) + (unsigned long) tss;
  158. wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
  159. wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.esp1, 0);
  160. wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0);
  161. put_cpu();
  162. }
  163. static struct vm_area_struct gate_vma;
  164. static int __init gate_vma_init(void)
  165. {
  166. gate_vma.vm_mm = NULL;
  167. gate_vma.vm_start = FIXADDR_USER_START;
  168. gate_vma.vm_end = FIXADDR_USER_END;
  169. gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
  170. gate_vma.vm_page_prot = __P101;
  171. /*
  172. * Make sure the vDSO gets into every core dump.
  173. * Dumping its contents makes post-mortem fully interpretable later
  174. * without matching up the same kernel and hardware config to see
  175. * what PC values meant.
  176. */
  177. gate_vma.vm_flags |= VM_ALWAYSDUMP;
  178. return 0;
  179. }
  180. /*
  181. * These symbols are defined by vsyscall.o to mark the bounds
  182. * of the ELF DSO images included therein.
  183. */
  184. extern const char vsyscall_int80_start, vsyscall_int80_end;
  185. extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
  186. static struct page *syscall_pages[1];
  187. static void map_compat_vdso(int map)
  188. {
  189. static int vdso_mapped;
  190. if (map == vdso_mapped)
  191. return;
  192. vdso_mapped = map;
  193. __set_fixmap(FIX_VDSO, page_to_pfn(syscall_pages[0]) << PAGE_SHIFT,
  194. map ? PAGE_READONLY_EXEC : PAGE_NONE);
  195. /* flush stray tlbs */
  196. flush_tlb_all();
  197. }
  198. int __init sysenter_setup(void)
  199. {
  200. void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
  201. const void *vsyscall;
  202. size_t vsyscall_len;
  203. syscall_pages[0] = virt_to_page(syscall_page);
  204. gate_vma_init();
  205. printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO));
  206. if (!boot_cpu_has(X86_FEATURE_SEP)) {
  207. vsyscall = &vsyscall_int80_start;
  208. vsyscall_len = &vsyscall_int80_end - &vsyscall_int80_start;
  209. } else {
  210. vsyscall = &vsyscall_sysenter_start;
  211. vsyscall_len = &vsyscall_sysenter_end - &vsyscall_sysenter_start;
  212. }
  213. memcpy(syscall_page, vsyscall, vsyscall_len);
  214. relocate_vdso(syscall_page);
  215. return 0;
  216. }
  217. /* Defined in vsyscall-sysenter.S */
  218. extern void SYSENTER_RETURN;
  219. /* Setup a VMA at program startup for the vsyscall page */
  220. int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
  221. {
  222. struct mm_struct *mm = current->mm;
  223. unsigned long addr;
  224. int ret = 0;
  225. bool compat;
  226. down_write(&mm->mmap_sem);
  227. /* Test compat mode once here, in case someone
  228. changes it via sysctl */
  229. compat = (vdso_enabled == VDSO_COMPAT);
  230. map_compat_vdso(compat);
  231. if (compat)
  232. addr = VDSO_HIGH_BASE;
  233. else {
  234. addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
  235. if (IS_ERR_VALUE(addr)) {
  236. ret = addr;
  237. goto up_fail;
  238. }
  239. /*
  240. * MAYWRITE to allow gdb to COW and set breakpoints
  241. *
  242. * Make sure the vDSO gets into every core dump.
  243. * Dumping its contents makes post-mortem fully
  244. * interpretable later without matching up the same
  245. * kernel and hardware config to see what PC values
  246. * meant.
  247. */
  248. ret = install_special_mapping(mm, addr, PAGE_SIZE,
  249. VM_READ|VM_EXEC|
  250. VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
  251. VM_ALWAYSDUMP,
  252. syscall_pages);
  253. if (ret)
  254. goto up_fail;
  255. }
  256. current->mm->context.vdso = (void *)addr;
  257. current_thread_info()->sysenter_return =
  258. (void *)VDSO_SYM(&SYSENTER_RETURN);
  259. up_fail:
  260. up_write(&mm->mmap_sem);
  261. return ret;
  262. }
  263. const char *arch_vma_name(struct vm_area_struct *vma)
  264. {
  265. if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
  266. return "[vdso]";
  267. return NULL;
  268. }
  269. struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
  270. {
  271. struct mm_struct *mm = tsk->mm;
  272. /* Check to see if this task was created in compat vdso mode */
  273. if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE)
  274. return &gate_vma;
  275. return NULL;
  276. }
  277. int in_gate_area(struct task_struct *task, unsigned long addr)
  278. {
  279. return 0;
  280. }
  281. int in_gate_area_no_task(unsigned long addr)
  282. {
  283. return 0;
  284. }