sysenter_32.c 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346
  1. /*
  2. * (C) Copyright 2002 Linus Torvalds
  3. * Portions based on the vdso-randomization code from exec-shield:
  4. * Copyright(C) 2005-2006, Red Hat, Inc., Ingo Molnar
  5. *
  6. * This file contains the needed initializations to support sysenter.
  7. */
  8. #include <linux/init.h>
  9. #include <linux/smp.h>
  10. #include <linux/thread_info.h>
  11. #include <linux/sched.h>
  12. #include <linux/gfp.h>
  13. #include <linux/string.h>
  14. #include <linux/elf.h>
  15. #include <linux/mm.h>
  16. #include <linux/err.h>
  17. #include <linux/module.h>
  18. #include <asm/cpufeature.h>
  19. #include <asm/msr.h>
  20. #include <asm/pgtable.h>
  21. #include <asm/unistd.h>
  22. #include <asm/elf.h>
  23. #include <asm/tlbflush.h>
  24. enum {
  25. VDSO_DISABLED = 0,
  26. VDSO_ENABLED = 1,
  27. VDSO_COMPAT = 2,
  28. };
  29. #ifdef CONFIG_COMPAT_VDSO
  30. #define VDSO_DEFAULT VDSO_COMPAT
  31. #else
  32. #define VDSO_DEFAULT VDSO_ENABLED
  33. #endif
  34. /*
  35. * Should the kernel map a VDSO page into processes and pass its
  36. * address down to glibc upon exec()?
  37. */
  38. unsigned int __read_mostly vdso_enabled = VDSO_DEFAULT;
  39. EXPORT_SYMBOL_GPL(vdso_enabled);
  40. static int __init vdso_setup(char *s)
  41. {
  42. vdso_enabled = simple_strtoul(s, NULL, 0);
  43. return 1;
  44. }
  45. __setup("vdso=", vdso_setup);
  46. extern asmlinkage void sysenter_entry(void);
  47. static __init void reloc_symtab(Elf32_Ehdr *ehdr,
  48. unsigned offset, unsigned size)
  49. {
  50. Elf32_Sym *sym = (void *)ehdr + offset;
  51. unsigned nsym = size / sizeof(*sym);
  52. unsigned i;
  53. for(i = 0; i < nsym; i++, sym++) {
  54. if (sym->st_shndx == SHN_UNDEF ||
  55. sym->st_shndx == SHN_ABS)
  56. continue; /* skip */
  57. if (sym->st_shndx > SHN_LORESERVE) {
  58. printk(KERN_INFO "VDSO: unexpected st_shndx %x\n",
  59. sym->st_shndx);
  60. continue;
  61. }
  62. switch(ELF_ST_TYPE(sym->st_info)) {
  63. case STT_OBJECT:
  64. case STT_FUNC:
  65. case STT_SECTION:
  66. case STT_FILE:
  67. sym->st_value += VDSO_HIGH_BASE;
  68. }
  69. }
  70. }
  71. static __init void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset)
  72. {
  73. Elf32_Dyn *dyn = (void *)ehdr + offset;
  74. for(; dyn->d_tag != DT_NULL; dyn++)
  75. switch(dyn->d_tag) {
  76. case DT_PLTGOT:
  77. case DT_HASH:
  78. case DT_STRTAB:
  79. case DT_SYMTAB:
  80. case DT_RELA:
  81. case DT_INIT:
  82. case DT_FINI:
  83. case DT_REL:
  84. case DT_DEBUG:
  85. case DT_JMPREL:
  86. case DT_VERSYM:
  87. case DT_VERDEF:
  88. case DT_VERNEED:
  89. case DT_ADDRRNGLO ... DT_ADDRRNGHI:
  90. /* definitely pointers needing relocation */
  91. dyn->d_un.d_ptr += VDSO_HIGH_BASE;
  92. break;
  93. case DT_ENCODING ... OLD_DT_LOOS-1:
  94. case DT_LOOS ... DT_HIOS-1:
  95. /* Tags above DT_ENCODING are pointers if
  96. they're even */
  97. if (dyn->d_tag >= DT_ENCODING &&
  98. (dyn->d_tag & 1) == 0)
  99. dyn->d_un.d_ptr += VDSO_HIGH_BASE;
  100. break;
  101. case DT_VERDEFNUM:
  102. case DT_VERNEEDNUM:
  103. case DT_FLAGS_1:
  104. case DT_RELACOUNT:
  105. case DT_RELCOUNT:
  106. case DT_VALRNGLO ... DT_VALRNGHI:
  107. /* definitely not pointers */
  108. break;
  109. case OLD_DT_LOOS ... DT_LOOS-1:
  110. case DT_HIOS ... DT_VALRNGLO-1:
  111. default:
  112. if (dyn->d_tag > DT_ENCODING)
  113. printk(KERN_INFO "VDSO: unexpected DT_tag %x\n",
  114. dyn->d_tag);
  115. break;
  116. }
  117. }
  118. static __init void relocate_vdso(Elf32_Ehdr *ehdr)
  119. {
  120. Elf32_Phdr *phdr;
  121. Elf32_Shdr *shdr;
  122. int i;
  123. BUG_ON(memcmp(ehdr->e_ident, ELFMAG, 4) != 0 ||
  124. !elf_check_arch(ehdr) ||
  125. ehdr->e_type != ET_DYN);
  126. ehdr->e_entry += VDSO_HIGH_BASE;
  127. /* rebase phdrs */
  128. phdr = (void *)ehdr + ehdr->e_phoff;
  129. for (i = 0; i < ehdr->e_phnum; i++) {
  130. phdr[i].p_vaddr += VDSO_HIGH_BASE;
  131. /* relocate dynamic stuff */
  132. if (phdr[i].p_type == PT_DYNAMIC)
  133. reloc_dyn(ehdr, phdr[i].p_offset);
  134. }
  135. /* rebase sections */
  136. shdr = (void *)ehdr + ehdr->e_shoff;
  137. for(i = 0; i < ehdr->e_shnum; i++) {
  138. if (!(shdr[i].sh_flags & SHF_ALLOC))
  139. continue;
  140. shdr[i].sh_addr += VDSO_HIGH_BASE;
  141. if (shdr[i].sh_type == SHT_SYMTAB ||
  142. shdr[i].sh_type == SHT_DYNSYM)
  143. reloc_symtab(ehdr, shdr[i].sh_offset,
  144. shdr[i].sh_size);
  145. }
  146. }
  147. void enable_sep_cpu(void)
  148. {
  149. int cpu = get_cpu();
  150. struct tss_struct *tss = &per_cpu(init_tss, cpu);
  151. if (!boot_cpu_has(X86_FEATURE_SEP)) {
  152. put_cpu();
  153. return;
  154. }
  155. tss->x86_tss.ss1 = __KERNEL_CS;
  156. tss->x86_tss.esp1 = sizeof(struct tss_struct) + (unsigned long) tss;
  157. wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
  158. wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.esp1, 0);
  159. wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0);
  160. put_cpu();
  161. }
  162. static struct vm_area_struct gate_vma;
  163. static int __init gate_vma_init(void)
  164. {
  165. gate_vma.vm_mm = NULL;
  166. gate_vma.vm_start = FIXADDR_USER_START;
  167. gate_vma.vm_end = FIXADDR_USER_END;
  168. gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
  169. gate_vma.vm_page_prot = __P101;
  170. /*
  171. * Make sure the vDSO gets into every core dump.
  172. * Dumping its contents makes post-mortem fully interpretable later
  173. * without matching up the same kernel and hardware config to see
  174. * what PC values meant.
  175. */
  176. gate_vma.vm_flags |= VM_ALWAYSDUMP;
  177. return 0;
  178. }
  179. /*
  180. * These symbols are defined by vsyscall.o to mark the bounds
  181. * of the ELF DSO images included therein.
  182. */
  183. extern const char vsyscall_int80_start, vsyscall_int80_end;
  184. extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
  185. static struct page *syscall_pages[1];
  186. static void map_compat_vdso(int map)
  187. {
  188. static int vdso_mapped;
  189. if (map == vdso_mapped)
  190. return;
  191. vdso_mapped = map;
  192. __set_fixmap(FIX_VDSO, page_to_pfn(syscall_pages[0]) << PAGE_SHIFT,
  193. map ? PAGE_READONLY_EXEC : PAGE_NONE);
  194. /* flush stray tlbs */
  195. flush_tlb_all();
  196. }
  197. int __init sysenter_setup(void)
  198. {
  199. void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
  200. const void *vsyscall;
  201. size_t vsyscall_len;
  202. syscall_pages[0] = virt_to_page(syscall_page);
  203. gate_vma_init();
  204. printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO));
  205. if (!boot_cpu_has(X86_FEATURE_SEP)) {
  206. vsyscall = &vsyscall_int80_start;
  207. vsyscall_len = &vsyscall_int80_end - &vsyscall_int80_start;
  208. } else {
  209. vsyscall = &vsyscall_sysenter_start;
  210. vsyscall_len = &vsyscall_sysenter_end - &vsyscall_sysenter_start;
  211. }
  212. memcpy(syscall_page, vsyscall, vsyscall_len);
  213. relocate_vdso(syscall_page);
  214. return 0;
  215. }
  216. /* Defined in vsyscall-sysenter.S */
  217. extern void SYSENTER_RETURN;
  218. /* Setup a VMA at program startup for the vsyscall page */
  219. int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
  220. {
  221. struct mm_struct *mm = current->mm;
  222. unsigned long addr;
  223. int ret = 0;
  224. bool compat;
  225. down_write(&mm->mmap_sem);
  226. /* Test compat mode once here, in case someone
  227. changes it via sysctl */
  228. compat = (vdso_enabled == VDSO_COMPAT);
  229. map_compat_vdso(compat);
  230. if (compat)
  231. addr = VDSO_HIGH_BASE;
  232. else {
  233. addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
  234. if (IS_ERR_VALUE(addr)) {
  235. ret = addr;
  236. goto up_fail;
  237. }
  238. /*
  239. * MAYWRITE to allow gdb to COW and set breakpoints
  240. *
  241. * Make sure the vDSO gets into every core dump.
  242. * Dumping its contents makes post-mortem fully
  243. * interpretable later without matching up the same
  244. * kernel and hardware config to see what PC values
  245. * meant.
  246. */
  247. ret = install_special_mapping(mm, addr, PAGE_SIZE,
  248. VM_READ|VM_EXEC|
  249. VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
  250. VM_ALWAYSDUMP,
  251. syscall_pages);
  252. if (ret)
  253. goto up_fail;
  254. }
  255. current->mm->context.vdso = (void *)addr;
  256. current_thread_info()->sysenter_return =
  257. (void *)VDSO_SYM(&SYSENTER_RETURN);
  258. up_fail:
  259. up_write(&mm->mmap_sem);
  260. return ret;
  261. }
  262. const char *arch_vma_name(struct vm_area_struct *vma)
  263. {
  264. if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
  265. return "[vdso]";
  266. return NULL;
  267. }
  268. struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
  269. {
  270. struct mm_struct *mm = tsk->mm;
  271. /* Check to see if this task was created in compat vdso mode */
  272. if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE)
  273. return &gate_vma;
  274. return NULL;
  275. }
  276. int in_gate_area(struct task_struct *task, unsigned long addr)
  277. {
  278. const struct vm_area_struct *vma = get_gate_vma(task);
  279. return vma && addr >= vma->vm_start && addr < vma->vm_end;
  280. }
  281. int in_gate_area_no_task(unsigned long addr)
  282. {
  283. return 0;
  284. }