fault.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458
  1. /*
  2. * linux/arch/cris/mm/fault.c
  3. *
  4. * Copyright (C) 2000, 2001 Axis Communications AB
  5. *
  6. * Authors: Bjorn Wesen
  7. *
  8. * $Log: fault.c,v $
  9. * Revision 1.20 2005/03/04 08:16:18 starvik
  10. * Merge of Linux 2.6.11.
  11. *
  12. * Revision 1.19 2005/01/14 10:07:59 starvik
  13. * Fixed warning.
  14. *
  15. * Revision 1.18 2005/01/12 08:10:14 starvik
  16. * Readded the change of frametype when handling kernel page fault fixup
  17. * for v10. This is necessary to avoid that the CPU remakes the faulting
  18. * access.
  19. *
  20. * Revision 1.17 2005/01/11 13:53:05 starvik
  21. * Use raw_printk.
  22. *
  23. * Revision 1.16 2004/12/17 11:39:41 starvik
  24. * SMP support.
  25. *
  26. * Revision 1.15 2004/11/23 18:36:18 starvik
  27. * Stack is now non-executable.
  28. * Signal handler trampolines are placed in a reserved page mapped into all
  29. * processes.
  30. *
  31. * Revision 1.14 2004/11/23 07:10:21 starvik
  32. * Moved find_fixup_code to generic code.
  33. *
  34. * Revision 1.13 2004/11/23 07:00:54 starvik
  35. * Actually use the execute permission bit in the MMU. This makes it possible
  36. * to prevent e.g. attacks where executable code is put on the stack.
  37. *
  38. * Revision 1.12 2004/09/29 06:16:04 starvik
  39. * Use instruction_pointer
  40. *
  41. * Revision 1.11 2004/05/14 07:58:05 starvik
  42. * Merge of changes from 2.4
  43. *
  44. * Revision 1.10 2003/10/27 14:51:24 starvik
  45. * Removed debugcode
  46. *
  47. * Revision 1.9 2003/10/27 14:50:42 starvik
  48. * Changed do_page_fault signature
  49. *
  50. * Revision 1.8 2003/07/04 13:02:48 tobiasa
  51. * Moved code snippet from arch/cris/mm/fault.c that searches for fixup code
  52. * to seperate function in arch-specific files.
  53. *
  54. * Revision 1.7 2003/01/22 06:48:38 starvik
  55. * Fixed warnings issued by GCC 3.2.1
  56. *
  57. * Revision 1.6 2003/01/09 14:42:52 starvik
  58. * Merge of Linux 2.5.55
  59. *
  60. * Revision 1.5 2002/12/11 14:44:48 starvik
  61. * Extracted v10 (ETRAX 100LX) specific stuff to arch/cris/arch-v10/mm
  62. *
  63. * Revision 1.4 2002/11/13 15:10:28 starvik
  64. * pte_offset has been renamed to pte_offset_kernel
  65. *
  66. * Revision 1.3 2002/11/05 06:45:13 starvik
  67. * Merge of Linux 2.5.45
  68. *
  69. * Revision 1.2 2001/12/18 13:35:22 bjornw
  70. * Applied the 2.4.13->2.4.16 CRIS patch to 2.5.1 (is a copy of 2.4.15).
  71. *
  72. * Revision 1.20 2001/11/22 13:34:06 bjornw
  73. * * Bug workaround (LX TR89): force a rerun of the whole of an interrupted
  74. * unaligned write, because the second half of the write will be corrupted
  75. * otherwise. Affected unaligned writes spanning not-yet mapped pages.
  76. * * Optimization: use the wr_rd bit in R_MMU_CAUSE to know whether a miss
  77. * was due to a read or a write (before we didn't know this until the next
  78. * restart of the interrupted instruction, thus wasting one fault-irq)
  79. *
  80. * Revision 1.19 2001/11/12 19:02:10 pkj
  81. * Fixed compiler warnings.
  82. *
  83. * Revision 1.18 2001/07/18 22:14:32 bjornw
  84. * Enable interrupts in the bulk of do_page_fault
  85. *
  86. * Revision 1.17 2001/07/18 13:07:23 bjornw
  87. * * Detect non-existant PTE's in vmalloc pmd synchronization
  88. * * Remove comment about fast-paths for VMALLOC_START etc, because all that
  89. * was totally bogus anyway it turned out :)
  90. * * Fix detection of vmalloc-area synchronization
  91. * * Add some comments
  92. *
  93. * Revision 1.16 2001/06/13 00:06:08 bjornw
  94. * current_pgd should be volatile
  95. *
  96. * Revision 1.15 2001/06/13 00:02:23 bjornw
  97. * Use a separate variable to store the current pgd to avoid races in schedule
  98. *
  99. * Revision 1.14 2001/05/16 17:41:07 hp
  100. * Last comment tweak further tweaked.
  101. *
  102. * Revision 1.13 2001/05/15 00:58:44 hp
  103. * Expand a bit on the comment why we compare address >= TASK_SIZE rather
  104. * than >= VMALLOC_START.
  105. *
  106. * Revision 1.12 2001/04/04 10:51:14 bjornw
  107. * mmap_sem is grabbed for reading
  108. *
  109. * Revision 1.11 2001/03/23 07:36:07 starvik
  110. * Corrected according to review remarks
  111. *
  112. * Revision 1.10 2001/03/21 16:10:11 bjornw
  113. * CRIS_FRAME_FIXUP not needed anymore, use FRAME_NORMAL
  114. *
  115. * Revision 1.9 2001/03/05 13:22:20 bjornw
  116. * Spell-fix and fix in vmalloc_fault handling
  117. *
  118. * Revision 1.8 2000/11/22 14:45:31 bjornw
  119. * * 2.4.0-test10 removed the set_pgdir instantaneous kernel global mapping
  120. * into all processes. Instead we fill in the missing PTE entries on demand.
  121. *
  122. * Revision 1.7 2000/11/21 16:39:09 bjornw
  123. * fixup switches frametype
  124. *
  125. * Revision 1.6 2000/11/17 16:54:08 bjornw
  126. * More detailed siginfo reporting
  127. *
  128. *
  129. */
  130. #include <linux/mm.h>
  131. #include <linux/interrupt.h>
  132. #include <linux/module.h>
  133. #include <asm/uaccess.h>
  134. extern int find_fixup_code(struct pt_regs *);
  135. extern void die_if_kernel(const char *, struct pt_regs *, long);
  136. extern int raw_printk(const char *fmt, ...);
  137. /* debug of low-level TLB reload */
  138. #undef DEBUG
  139. #ifdef DEBUG
  140. #define D(x) x
  141. #else
  142. #define D(x)
  143. #endif
  144. /* debug of higher-level faults */
  145. #define DPG(x)
  146. /* current active page directory */
  147. volatile DEFINE_PER_CPU(pgd_t *,current_pgd);
  148. unsigned long cris_signal_return_page;
  149. /*
  150. * This routine handles page faults. It determines the address,
  151. * and the problem, and then passes it off to one of the appropriate
  152. * routines.
  153. *
  154. * Notice that the address we're given is aligned to the page the fault
  155. * occurred in, since we only get the PFN in R_MMU_CAUSE not the complete
  156. * address.
  157. *
  158. * error_code:
  159. * bit 0 == 0 means no page found, 1 means protection fault
  160. * bit 1 == 0 means read, 1 means write
  161. *
  162. * If this routine detects a bad access, it returns 1, otherwise it
  163. * returns 0.
  164. */
  165. asmlinkage void
  166. do_page_fault(unsigned long address, struct pt_regs *regs,
  167. int protection, int writeaccess)
  168. {
  169. struct task_struct *tsk;
  170. struct mm_struct *mm;
  171. struct vm_area_struct * vma;
  172. siginfo_t info;
  173. D(printk("Page fault for %lX on %X at %lX, prot %d write %d\n",
  174. address, smp_processor_id(), instruction_pointer(regs),
  175. protection, writeaccess));
  176. tsk = current;
  177. /*
  178. * We fault-in kernel-space virtual memory on-demand. The
  179. * 'reference' page table is init_mm.pgd.
  180. *
  181. * NOTE! We MUST NOT take any locks for this case. We may
  182. * be in an interrupt or a critical region, and should
  183. * only copy the information from the master page table,
  184. * nothing more.
  185. *
  186. * NOTE2: This is done so that, when updating the vmalloc
  187. * mappings we don't have to walk all processes pgdirs and
  188. * add the high mappings all at once. Instead we do it as they
  189. * are used. However vmalloc'ed page entries have the PAGE_GLOBAL
  190. * bit set so sometimes the TLB can use a lingering entry.
  191. *
  192. * This verifies that the fault happens in kernel space
  193. * and that the fault was not a protection error (error_code & 1).
  194. */
  195. if (address >= VMALLOC_START &&
  196. !protection &&
  197. !user_mode(regs))
  198. goto vmalloc_fault;
  199. /* When stack execution is not allowed we store the signal
  200. * trampolines in the reserved cris_signal_return_page.
  201. * Handle this in the exact same way as vmalloc (we know
  202. * that the mapping is there and is valid so no need to
  203. * call handle_mm_fault).
  204. */
  205. if (cris_signal_return_page &&
  206. address == cris_signal_return_page &&
  207. !protection && user_mode(regs))
  208. goto vmalloc_fault;
  209. /* we can and should enable interrupts at this point */
  210. local_irq_enable();
  211. mm = tsk->mm;
  212. info.si_code = SEGV_MAPERR;
  213. /*
  214. * If we're in an interrupt or have no user
  215. * context, we must not take the fault..
  216. */
  217. if (in_interrupt() || !mm)
  218. goto no_context;
  219. down_read(&mm->mmap_sem);
  220. vma = find_vma(mm, address);
  221. if (!vma)
  222. goto bad_area;
  223. if (vma->vm_start <= address)
  224. goto good_area;
  225. if (!(vma->vm_flags & VM_GROWSDOWN))
  226. goto bad_area;
  227. if (user_mode(regs)) {
  228. /*
  229. * accessing the stack below usp is always a bug.
  230. * we get page-aligned addresses so we can only check
  231. * if we're within a page from usp, but that might be
  232. * enough to catch brutal errors at least.
  233. */
  234. if (address + PAGE_SIZE < rdusp())
  235. goto bad_area;
  236. }
  237. if (expand_stack(vma, address))
  238. goto bad_area;
  239. /*
  240. * Ok, we have a good vm_area for this memory access, so
  241. * we can handle it..
  242. */
  243. good_area:
  244. info.si_code = SEGV_ACCERR;
  245. /* first do some preliminary protection checks */
  246. if (writeaccess == 2){
  247. if (!(vma->vm_flags & VM_EXEC))
  248. goto bad_area;
  249. } else if (writeaccess == 1) {
  250. if (!(vma->vm_flags & VM_WRITE))
  251. goto bad_area;
  252. } else {
  253. if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
  254. goto bad_area;
  255. }
  256. /*
  257. * If for any reason at all we couldn't handle the fault,
  258. * make sure we exit gracefully rather than endlessly redo
  259. * the fault.
  260. */
  261. switch (handle_mm_fault(mm, vma, address, writeaccess & 1)) {
  262. case VM_FAULT_MINOR:
  263. tsk->min_flt++;
  264. break;
  265. case VM_FAULT_MAJOR:
  266. tsk->maj_flt++;
  267. break;
  268. case VM_FAULT_SIGBUS:
  269. goto do_sigbus;
  270. default:
  271. goto out_of_memory;
  272. }
  273. up_read(&mm->mmap_sem);
  274. return;
  275. /*
  276. * Something tried to access memory that isn't in our memory map..
  277. * Fix it, but check if it's kernel or user first..
  278. */
  279. bad_area:
  280. up_read(&mm->mmap_sem);
  281. bad_area_nosemaphore:
  282. DPG(show_registers(regs));
  283. /* User mode accesses just cause a SIGSEGV */
  284. if (user_mode(regs)) {
  285. info.si_signo = SIGSEGV;
  286. info.si_errno = 0;
  287. /* info.si_code has been set above */
  288. info.si_addr = (void *)address;
  289. force_sig_info(SIGSEGV, &info, tsk);
  290. return;
  291. }
  292. no_context:
  293. /* Are we prepared to handle this kernel fault?
  294. *
  295. * (The kernel has valid exception-points in the source
  296. * when it acesses user-memory. When it fails in one
  297. * of those points, we find it in a table and do a jump
  298. * to some fixup code that loads an appropriate error
  299. * code)
  300. */
  301. if (find_fixup_code(regs))
  302. return;
  303. /*
  304. * Oops. The kernel tried to access some bad page. We'll have to
  305. * terminate things with extreme prejudice.
  306. */
  307. if ((unsigned long) (address) < PAGE_SIZE)
  308. raw_printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
  309. else
  310. raw_printk(KERN_ALERT "Unable to handle kernel access");
  311. raw_printk(" at virtual address %08lx\n",address);
  312. die_if_kernel("Oops", regs, (writeaccess << 1) | protection);
  313. do_exit(SIGKILL);
  314. /*
  315. * We ran out of memory, or some other thing happened to us that made
  316. * us unable to handle the page fault gracefully.
  317. */
  318. out_of_memory:
  319. up_read(&mm->mmap_sem);
  320. printk("VM: killing process %s\n", tsk->comm);
  321. if (user_mode(regs))
  322. do_exit(SIGKILL);
  323. goto no_context;
  324. do_sigbus:
  325. up_read(&mm->mmap_sem);
  326. /*
  327. * Send a sigbus, regardless of whether we were in kernel
  328. * or user mode.
  329. */
  330. info.si_signo = SIGBUS;
  331. info.si_errno = 0;
  332. info.si_code = BUS_ADRERR;
  333. info.si_addr = (void *)address;
  334. force_sig_info(SIGBUS, &info, tsk);
  335. /* Kernel mode? Handle exceptions or die */
  336. if (!user_mode(regs))
  337. goto no_context;
  338. return;
  339. vmalloc_fault:
  340. {
  341. /*
  342. * Synchronize this task's top level page-table
  343. * with the 'reference' page table.
  344. *
  345. * Use current_pgd instead of tsk->active_mm->pgd
  346. * since the latter might be unavailable if this
  347. * code is executed in a misfortunately run irq
  348. * (like inside schedule() between switch_mm and
  349. * switch_to...).
  350. */
  351. int offset = pgd_index(address);
  352. pgd_t *pgd, *pgd_k;
  353. pud_t *pud, *pud_k;
  354. pmd_t *pmd, *pmd_k;
  355. pte_t *pte_k;
  356. pgd = (pgd_t *)per_cpu(current_pgd, smp_processor_id()) + offset;
  357. pgd_k = init_mm.pgd + offset;
  358. /* Since we're two-level, we don't need to do both
  359. * set_pgd and set_pmd (they do the same thing). If
  360. * we go three-level at some point, do the right thing
  361. * with pgd_present and set_pgd here.
  362. *
  363. * Also, since the vmalloc area is global, we don't
  364. * need to copy individual PTE's, it is enough to
  365. * copy the pgd pointer into the pte page of the
  366. * root task. If that is there, we'll find our pte if
  367. * it exists.
  368. */
  369. pud = pud_offset(pgd, address);
  370. pud_k = pud_offset(pgd_k, address);
  371. if (!pud_present(*pud_k))
  372. goto no_context;
  373. pmd = pmd_offset(pud, address);
  374. pmd_k = pmd_offset(pud_k, address);
  375. if (!pmd_present(*pmd_k))
  376. goto bad_area_nosemaphore;
  377. set_pmd(pmd, *pmd_k);
  378. /* Make sure the actual PTE exists as well to
  379. * catch kernel vmalloc-area accesses to non-mapped
  380. * addresses. If we don't do this, this will just
  381. * silently loop forever.
  382. */
  383. pte_k = pte_offset_kernel(pmd_k, address);
  384. if (!pte_present(*pte_k))
  385. goto no_context;
  386. return;
  387. }
  388. }
  389. /* Find fixup code. */
  390. int
  391. find_fixup_code(struct pt_regs *regs)
  392. {
  393. const struct exception_table_entry *fixup;
  394. if ((fixup = search_exception_tables(instruction_pointer(regs))) != 0) {
  395. /* Adjust the instruction pointer in the stackframe. */
  396. instruction_pointer(regs) = fixup->fixup;
  397. arch_fixup(regs);
  398. return 1;
  399. }
  400. return 0;
  401. }