sysenter.c 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348
  1. /*
  2. * linux/arch/i386/kernel/sysenter.c
  3. *
  4. * (C) Copyright 2002 Linus Torvalds
  5. * Portions based on the vdso-randomization code from exec-shield:
  6. * Copyright(C) 2005-2006, Red Hat, Inc., Ingo Molnar
  7. *
  8. * This file contains the needed initializations to support sysenter.
  9. */
  10. #include <linux/init.h>
  11. #include <linux/smp.h>
  12. #include <linux/thread_info.h>
  13. #include <linux/sched.h>
  14. #include <linux/gfp.h>
  15. #include <linux/string.h>
  16. #include <linux/elf.h>
  17. #include <linux/mm.h>
  18. #include <linux/err.h>
  19. #include <linux/module.h>
  20. #include <asm/cpufeature.h>
  21. #include <asm/msr.h>
  22. #include <asm/pgtable.h>
  23. #include <asm/unistd.h>
  24. #include <asm/elf.h>
  25. #include <asm/tlbflush.h>
  26. enum {
  27. VDSO_DISABLED = 0,
  28. VDSO_ENABLED = 1,
  29. VDSO_COMPAT = 2,
  30. };
  31. #ifdef CONFIG_COMPAT_VDSO
  32. #define VDSO_DEFAULT VDSO_COMPAT
  33. #else
  34. #define VDSO_DEFAULT VDSO_ENABLED
  35. #endif
  36. /*
  37. * Should the kernel map a VDSO page into processes and pass its
  38. * address down to glibc upon exec()?
  39. */
  40. unsigned int __read_mostly vdso_enabled = VDSO_DEFAULT;
  41. EXPORT_SYMBOL_GPL(vdso_enabled);
  42. static int __init vdso_setup(char *s)
  43. {
  44. vdso_enabled = simple_strtoul(s, NULL, 0);
  45. return 1;
  46. }
  47. __setup("vdso=", vdso_setup);
  48. extern asmlinkage void sysenter_entry(void);
  49. static __init void reloc_symtab(Elf32_Ehdr *ehdr,
  50. unsigned offset, unsigned size)
  51. {
  52. Elf32_Sym *sym = (void *)ehdr + offset;
  53. unsigned nsym = size / sizeof(*sym);
  54. unsigned i;
  55. for(i = 0; i < nsym; i++, sym++) {
  56. if (sym->st_shndx == SHN_UNDEF ||
  57. sym->st_shndx == SHN_ABS)
  58. continue; /* skip */
  59. if (sym->st_shndx > SHN_LORESERVE) {
  60. printk(KERN_INFO "VDSO: unexpected st_shndx %x\n",
  61. sym->st_shndx);
  62. continue;
  63. }
  64. switch(ELF_ST_TYPE(sym->st_info)) {
  65. case STT_OBJECT:
  66. case STT_FUNC:
  67. case STT_SECTION:
  68. case STT_FILE:
  69. sym->st_value += VDSO_HIGH_BASE;
  70. }
  71. }
  72. }
  73. static __init void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset)
  74. {
  75. Elf32_Dyn *dyn = (void *)ehdr + offset;
  76. for(; dyn->d_tag != DT_NULL; dyn++)
  77. switch(dyn->d_tag) {
  78. case DT_PLTGOT:
  79. case DT_HASH:
  80. case DT_STRTAB:
  81. case DT_SYMTAB:
  82. case DT_RELA:
  83. case DT_INIT:
  84. case DT_FINI:
  85. case DT_REL:
  86. case DT_DEBUG:
  87. case DT_JMPREL:
  88. case DT_VERSYM:
  89. case DT_VERDEF:
  90. case DT_VERNEED:
  91. case DT_ADDRRNGLO ... DT_ADDRRNGHI:
  92. /* definitely pointers needing relocation */
  93. dyn->d_un.d_ptr += VDSO_HIGH_BASE;
  94. break;
  95. case DT_ENCODING ... OLD_DT_LOOS-1:
  96. case DT_LOOS ... DT_HIOS-1:
  97. /* Tags above DT_ENCODING are pointers if
  98. they're even */
  99. if (dyn->d_tag >= DT_ENCODING &&
  100. (dyn->d_tag & 1) == 0)
  101. dyn->d_un.d_ptr += VDSO_HIGH_BASE;
  102. break;
  103. case DT_VERDEFNUM:
  104. case DT_VERNEEDNUM:
  105. case DT_FLAGS_1:
  106. case DT_RELACOUNT:
  107. case DT_RELCOUNT:
  108. case DT_VALRNGLO ... DT_VALRNGHI:
  109. /* definitely not pointers */
  110. break;
  111. case OLD_DT_LOOS ... DT_LOOS-1:
  112. case DT_HIOS ... DT_VALRNGLO-1:
  113. default:
  114. if (dyn->d_tag > DT_ENCODING)
  115. printk(KERN_INFO "VDSO: unexpected DT_tag %x\n",
  116. dyn->d_tag);
  117. break;
  118. }
  119. }
  120. static __init void relocate_vdso(Elf32_Ehdr *ehdr)
  121. {
  122. Elf32_Phdr *phdr;
  123. Elf32_Shdr *shdr;
  124. int i;
  125. BUG_ON(memcmp(ehdr->e_ident, ELFMAG, 4) != 0 ||
  126. !elf_check_arch(ehdr) ||
  127. ehdr->e_type != ET_DYN);
  128. ehdr->e_entry += VDSO_HIGH_BASE;
  129. /* rebase phdrs */
  130. phdr = (void *)ehdr + ehdr->e_phoff;
  131. for (i = 0; i < ehdr->e_phnum; i++) {
  132. phdr[i].p_vaddr += VDSO_HIGH_BASE;
  133. /* relocate dynamic stuff */
  134. if (phdr[i].p_type == PT_DYNAMIC)
  135. reloc_dyn(ehdr, phdr[i].p_offset);
  136. }
  137. /* rebase sections */
  138. shdr = (void *)ehdr + ehdr->e_shoff;
  139. for(i = 0; i < ehdr->e_shnum; i++) {
  140. if (!(shdr[i].sh_flags & SHF_ALLOC))
  141. continue;
  142. shdr[i].sh_addr += VDSO_HIGH_BASE;
  143. if (shdr[i].sh_type == SHT_SYMTAB ||
  144. shdr[i].sh_type == SHT_DYNSYM)
  145. reloc_symtab(ehdr, shdr[i].sh_offset,
  146. shdr[i].sh_size);
  147. }
  148. }
  149. void enable_sep_cpu(void)
  150. {
  151. int cpu = get_cpu();
  152. struct tss_struct *tss = &per_cpu(init_tss, cpu);
  153. if (!boot_cpu_has(X86_FEATURE_SEP)) {
  154. put_cpu();
  155. return;
  156. }
  157. tss->x86_tss.ss1 = __KERNEL_CS;
  158. tss->x86_tss.esp1 = sizeof(struct tss_struct) + (unsigned long) tss;
  159. wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
  160. wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.esp1, 0);
  161. wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0);
  162. put_cpu();
  163. }
  164. static struct vm_area_struct gate_vma;
  165. static int __init gate_vma_init(void)
  166. {
  167. gate_vma.vm_mm = NULL;
  168. gate_vma.vm_start = FIXADDR_USER_START;
  169. gate_vma.vm_end = FIXADDR_USER_END;
  170. gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
  171. gate_vma.vm_page_prot = __P101;
  172. /*
  173. * Make sure the vDSO gets into every core dump.
  174. * Dumping its contents makes post-mortem fully interpretable later
  175. * without matching up the same kernel and hardware config to see
  176. * what PC values meant.
  177. */
  178. gate_vma.vm_flags |= VM_ALWAYSDUMP;
  179. return 0;
  180. }
  181. /*
  182. * These symbols are defined by vsyscall.o to mark the bounds
  183. * of the ELF DSO images included therein.
  184. */
  185. extern const char vsyscall_int80_start, vsyscall_int80_end;
  186. extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
  187. static struct page *syscall_pages[1];
  188. static void map_compat_vdso(int map)
  189. {
  190. static int vdso_mapped;
  191. if (map == vdso_mapped)
  192. return;
  193. vdso_mapped = map;
  194. __set_fixmap(FIX_VDSO, page_to_pfn(syscall_pages[0]) << PAGE_SHIFT,
  195. map ? PAGE_READONLY_EXEC : PAGE_NONE);
  196. /* flush stray tlbs */
  197. flush_tlb_all();
  198. }
  199. int __init sysenter_setup(void)
  200. {
  201. void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
  202. const void *vsyscall;
  203. size_t vsyscall_len;
  204. syscall_pages[0] = virt_to_page(syscall_page);
  205. gate_vma_init();
  206. printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO));
  207. if (!boot_cpu_has(X86_FEATURE_SEP)) {
  208. vsyscall = &vsyscall_int80_start;
  209. vsyscall_len = &vsyscall_int80_end - &vsyscall_int80_start;
  210. } else {
  211. vsyscall = &vsyscall_sysenter_start;
  212. vsyscall_len = &vsyscall_sysenter_end - &vsyscall_sysenter_start;
  213. }
  214. memcpy(syscall_page, vsyscall, vsyscall_len);
  215. relocate_vdso(syscall_page);
  216. return 0;
  217. }
  218. /* Defined in vsyscall-sysenter.S */
  219. extern void SYSENTER_RETURN;
  220. /* Setup a VMA at program startup for the vsyscall page */
  221. int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
  222. {
  223. struct mm_struct *mm = current->mm;
  224. unsigned long addr;
  225. int ret = 0;
  226. bool compat;
  227. down_write(&mm->mmap_sem);
  228. /* Test compat mode once here, in case someone
  229. changes it via sysctl */
  230. compat = (vdso_enabled == VDSO_COMPAT);
  231. map_compat_vdso(compat);
  232. if (compat)
  233. addr = VDSO_HIGH_BASE;
  234. else {
  235. addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
  236. if (IS_ERR_VALUE(addr)) {
  237. ret = addr;
  238. goto up_fail;
  239. }
  240. /*
  241. * MAYWRITE to allow gdb to COW and set breakpoints
  242. *
  243. * Make sure the vDSO gets into every core dump.
  244. * Dumping its contents makes post-mortem fully
  245. * interpretable later without matching up the same
  246. * kernel and hardware config to see what PC values
  247. * meant.
  248. */
  249. ret = install_special_mapping(mm, addr, PAGE_SIZE,
  250. VM_READ|VM_EXEC|
  251. VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
  252. VM_ALWAYSDUMP,
  253. syscall_pages);
  254. if (ret)
  255. goto up_fail;
  256. }
  257. current->mm->context.vdso = (void *)addr;
  258. current_thread_info()->sysenter_return =
  259. (void *)VDSO_SYM(&SYSENTER_RETURN);
  260. up_fail:
  261. up_write(&mm->mmap_sem);
  262. return ret;
  263. }
  264. const char *arch_vma_name(struct vm_area_struct *vma)
  265. {
  266. if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
  267. return "[vdso]";
  268. return NULL;
  269. }
  270. struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
  271. {
  272. struct mm_struct *mm = tsk->mm;
  273. /* Check to see if this task was created in compat vdso mode */
  274. if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE)
  275. return &gate_vma;
  276. return NULL;
  277. }
  278. int in_gate_area(struct task_struct *task, unsigned long addr)
  279. {
  280. const struct vm_area_struct *vma = get_gate_vma(task);
  281. return vma && addr >= vma->vm_start && addr < vma->vm_end;
  282. }
  283. int in_gate_area_no_task(unsigned long addr)
  284. {
  285. return 0;
  286. }