fault.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440
  1. /*
  2. * arch/ppc/mm/fault.c
  3. *
  4. * PowerPC version
  5. * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
  6. *
  7. * Derived from "arch/i386/mm/fault.c"
  8. * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
  9. *
  10. * Modified by Cort Dougan and Paul Mackerras.
  11. *
  12. * This program is free software; you can redistribute it and/or
  13. * modify it under the terms of the GNU General Public License
  14. * as published by the Free Software Foundation; either version
  15. * 2 of the License, or (at your option) any later version.
  16. */
  17. #include <linux/config.h>
  18. #include <linux/signal.h>
  19. #include <linux/sched.h>
  20. #include <linux/kernel.h>
  21. #include <linux/errno.h>
  22. #include <linux/string.h>
  23. #include <linux/types.h>
  24. #include <linux/ptrace.h>
  25. #include <linux/mman.h>
  26. #include <linux/mm.h>
  27. #include <linux/interrupt.h>
  28. #include <linux/highmem.h>
  29. #include <linux/module.h>
  30. #include <asm/page.h>
  31. #include <asm/pgtable.h>
  32. #include <asm/mmu.h>
  33. #include <asm/mmu_context.h>
  34. #include <asm/system.h>
  35. #include <asm/uaccess.h>
  36. #include <asm/tlbflush.h>
  37. #if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
  38. extern void (*debugger)(struct pt_regs *);
  39. extern void (*debugger_fault_handler)(struct pt_regs *);
  40. extern int (*debugger_dabr_match)(struct pt_regs *);
  41. int debugger_kernel_faults = 1;
  42. #endif
  43. unsigned long htab_reloads; /* updated by hashtable.S:hash_page() */
  44. unsigned long htab_evicts; /* updated by hashtable.S:hash_page() */
  45. unsigned long htab_preloads; /* updated by hashtable.S:add_hash_page() */
  46. unsigned long pte_misses; /* updated by do_page_fault() */
  47. unsigned long pte_errors; /* updated by do_page_fault() */
  48. unsigned int probingmem;
  49. /*
  50. * Check whether the instruction at regs->nip is a store using
  51. * an update addressing form which will update r1.
  52. */
  53. static int store_updates_sp(struct pt_regs *regs)
  54. {
  55. unsigned int inst;
  56. if (get_user(inst, (unsigned int __user *)regs->nip))
  57. return 0;
  58. /* check for 1 in the rA field */
  59. if (((inst >> 16) & 0x1f) != 1)
  60. return 0;
  61. /* check major opcode */
  62. switch (inst >> 26) {
  63. case 37: /* stwu */
  64. case 39: /* stbu */
  65. case 45: /* sthu */
  66. case 53: /* stfsu */
  67. case 55: /* stfdu */
  68. return 1;
  69. case 31:
  70. /* check minor opcode */
  71. switch ((inst >> 1) & 0x3ff) {
  72. case 183: /* stwux */
  73. case 247: /* stbux */
  74. case 439: /* sthux */
  75. case 695: /* stfsux */
  76. case 759: /* stfdux */
  77. return 1;
  78. }
  79. }
  80. return 0;
  81. }
  82. /*
  83. * For 600- and 800-family processors, the error_code parameter is DSISR
  84. * for a data fault, SRR1 for an instruction fault. For 400-family processors
  85. * the error_code parameter is ESR for a data fault, 0 for an instruction
  86. * fault.
  87. */
  88. int do_page_fault(struct pt_regs *regs, unsigned long address,
  89. unsigned long error_code)
  90. {
  91. struct vm_area_struct * vma;
  92. struct mm_struct *mm = current->mm;
  93. siginfo_t info;
  94. int code = SEGV_MAPERR;
  95. #if defined(CONFIG_4xx) || defined (CONFIG_BOOKE)
  96. int is_write = error_code & ESR_DST;
  97. #else
  98. int is_write = 0;
  99. /*
  100. * Fortunately the bit assignments in SRR1 for an instruction
  101. * fault and DSISR for a data fault are mostly the same for the
  102. * bits we are interested in. But there are some bits which
  103. * indicate errors in DSISR but can validly be set in SRR1.
  104. */
  105. if (TRAP(regs) == 0x400)
  106. error_code &= 0x48200000;
  107. else
  108. is_write = error_code & 0x02000000;
  109. #endif /* CONFIG_4xx || CONFIG_BOOKE */
  110. #if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
  111. if (debugger_fault_handler && TRAP(regs) == 0x300) {
  112. debugger_fault_handler(regs);
  113. return 0;
  114. }
  115. #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
  116. if (error_code & 0x00400000) {
  117. /* DABR match */
  118. if (debugger_dabr_match(regs))
  119. return 0;
  120. }
  121. #endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/
  122. #endif /* CONFIG_XMON || CONFIG_KGDB */
  123. if (in_atomic() || mm == NULL)
  124. return SIGSEGV;
  125. down_read(&mm->mmap_sem);
  126. vma = find_vma(mm, address);
  127. if (!vma)
  128. goto bad_area;
  129. if (vma->vm_start <= address)
  130. goto good_area;
  131. if (!(vma->vm_flags & VM_GROWSDOWN))
  132. goto bad_area;
  133. if (!is_write)
  134. goto bad_area;
  135. /*
  136. * N.B. The rs6000/xcoff ABI allows programs to access up to
  137. * a few hundred bytes below the stack pointer.
  138. * The kernel signal delivery code writes up to about 1.5kB
  139. * below the stack pointer (r1) before decrementing it.
  140. * The exec code can write slightly over 640kB to the stack
  141. * before setting the user r1. Thus we allow the stack to
  142. * expand to 1MB without further checks.
  143. */
  144. if (address + 0x100000 < vma->vm_end) {
  145. /* get user regs even if this fault is in kernel mode */
  146. struct pt_regs *uregs = current->thread.regs;
  147. if (uregs == NULL)
  148. goto bad_area;
  149. /*
  150. * A user-mode access to an address a long way below
  151. * the stack pointer is only valid if the instruction
  152. * is one which would update the stack pointer to the
  153. * address accessed if the instruction completed,
  154. * i.e. either stwu rs,n(r1) or stwux rs,r1,rb
  155. * (or the byte, halfword, float or double forms).
  156. *
  157. * If we don't check this then any write to the area
  158. * between the last mapped region and the stack will
  159. * expand the stack rather than segfaulting.
  160. */
  161. if (address + 2048 < uregs->gpr[1]
  162. && (!user_mode(regs) || !store_updates_sp(regs)))
  163. goto bad_area;
  164. }
  165. if (expand_stack(vma, address))
  166. goto bad_area;
  167. good_area:
  168. code = SEGV_ACCERR;
  169. #if defined(CONFIG_6xx)
  170. if (error_code & 0x95700000)
  171. /* an error such as lwarx to I/O controller space,
  172. address matching DABR, eciwx, etc. */
  173. goto bad_area;
  174. #endif /* CONFIG_6xx */
  175. #if defined(CONFIG_8xx)
  176. /* The MPC8xx seems to always set 0x80000000, which is
  177. * "undefined". Of those that can be set, this is the only
  178. * one which seems bad.
  179. */
  180. if (error_code & 0x10000000)
  181. /* Guarded storage error. */
  182. goto bad_area;
  183. #endif /* CONFIG_8xx */
  184. /* a write */
  185. if (is_write) {
  186. if (!(vma->vm_flags & VM_WRITE))
  187. goto bad_area;
  188. #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
  189. /* an exec - 4xx/Book-E allows for per-page execute permission */
  190. } else if (TRAP(regs) == 0x400) {
  191. pte_t *ptep;
  192. #if 0
  193. /* It would be nice to actually enforce the VM execute
  194. permission on CPUs which can do so, but far too
  195. much stuff in userspace doesn't get the permissions
  196. right, so we let any page be executed for now. */
  197. if (! (vma->vm_flags & VM_EXEC))
  198. goto bad_area;
  199. #endif
  200. /* Since 4xx/Book-E supports per-page execute permission,
  201. * we lazily flush dcache to icache. */
  202. ptep = NULL;
  203. if (get_pteptr(mm, address, &ptep) && pte_present(*ptep)) {
  204. struct page *page = pte_page(*ptep);
  205. if (! test_bit(PG_arch_1, &page->flags)) {
  206. flush_dcache_icache_page(page);
  207. set_bit(PG_arch_1, &page->flags);
  208. }
  209. pte_update(ptep, 0, _PAGE_HWEXEC);
  210. _tlbie(address);
  211. pte_unmap(ptep);
  212. up_read(&mm->mmap_sem);
  213. return 0;
  214. }
  215. if (ptep != NULL)
  216. pte_unmap(ptep);
  217. #endif
  218. /* a read */
  219. } else {
  220. /* protection fault */
  221. if (error_code & 0x08000000)
  222. goto bad_area;
  223. if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
  224. goto bad_area;
  225. }
  226. /*
  227. * If for any reason at all we couldn't handle the fault,
  228. * make sure we exit gracefully rather than endlessly redo
  229. * the fault.
  230. */
  231. survive:
  232. switch (handle_mm_fault(mm, vma, address, is_write)) {
  233. case VM_FAULT_MINOR:
  234. current->min_flt++;
  235. break;
  236. case VM_FAULT_MAJOR:
  237. current->maj_flt++;
  238. break;
  239. case VM_FAULT_SIGBUS:
  240. goto do_sigbus;
  241. case VM_FAULT_OOM:
  242. goto out_of_memory;
  243. default:
  244. BUG();
  245. }
  246. up_read(&mm->mmap_sem);
  247. /*
  248. * keep track of tlb+htab misses that are good addrs but
  249. * just need pte's created via handle_mm_fault()
  250. * -- Cort
  251. */
  252. pte_misses++;
  253. return 0;
  254. bad_area:
  255. up_read(&mm->mmap_sem);
  256. pte_errors++;
  257. /* User mode accesses cause a SIGSEGV */
  258. if (user_mode(regs)) {
  259. info.si_signo = SIGSEGV;
  260. info.si_errno = 0;
  261. info.si_code = code;
  262. info.si_addr = (void __user *) address;
  263. force_sig_info(SIGSEGV, &info, current);
  264. return 0;
  265. }
  266. return SIGSEGV;
  267. /*
  268. * We ran out of memory, or some other thing happened to us that made
  269. * us unable to handle the page fault gracefully.
  270. */
  271. out_of_memory:
  272. up_read(&mm->mmap_sem);
  273. if (current->pid == 1) {
  274. yield();
  275. down_read(&mm->mmap_sem);
  276. goto survive;
  277. }
  278. printk("VM: killing process %s\n", current->comm);
  279. if (user_mode(regs))
  280. do_exit(SIGKILL);
  281. return SIGKILL;
  282. do_sigbus:
  283. up_read(&mm->mmap_sem);
  284. info.si_signo = SIGBUS;
  285. info.si_errno = 0;
  286. info.si_code = BUS_ADRERR;
  287. info.si_addr = (void __user *)address;
  288. force_sig_info (SIGBUS, &info, current);
  289. if (!user_mode(regs))
  290. return SIGBUS;
  291. return 0;
  292. }
  293. /*
  294. * bad_page_fault is called when we have a bad access from the kernel.
  295. * It is called from the DSI and ISI handlers in head.S and from some
  296. * of the procedures in traps.c.
  297. */
  298. void
  299. bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
  300. {
  301. const struct exception_table_entry *entry;
  302. /* Are we prepared to handle this fault? */
  303. if ((entry = search_exception_tables(regs->nip)) != NULL) {
  304. regs->nip = entry->fixup;
  305. return;
  306. }
  307. /* kernel has accessed a bad area */
  308. #if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
  309. if (debugger_kernel_faults)
  310. debugger(regs);
  311. #endif
  312. die("kernel access of bad area", regs, sig);
  313. }
  314. #ifdef CONFIG_8xx
  315. /* The pgtable.h claims some functions generically exist, but I
  316. * can't find them......
  317. */
  318. pte_t *va_to_pte(unsigned long address)
  319. {
  320. pgd_t *dir;
  321. pmd_t *pmd;
  322. pte_t *pte;
  323. if (address < TASK_SIZE)
  324. return NULL;
  325. dir = pgd_offset(&init_mm, address);
  326. if (dir) {
  327. pmd = pmd_offset(dir, address & PAGE_MASK);
  328. if (pmd && pmd_present(*pmd)) {
  329. pte = pte_offset_kernel(pmd, address & PAGE_MASK);
  330. if (pte && pte_present(*pte))
  331. return(pte);
  332. }
  333. }
  334. return NULL;
  335. }
  336. unsigned long va_to_phys(unsigned long address)
  337. {
  338. pte_t *pte;
  339. pte = va_to_pte(address);
  340. if (pte)
  341. return(((unsigned long)(pte_val(*pte)) & PAGE_MASK) | (address & ~(PAGE_MASK)));
  342. return (0);
  343. }
  344. void
  345. print_8xx_pte(struct mm_struct *mm, unsigned long addr)
  346. {
  347. pgd_t * pgd;
  348. pmd_t * pmd;
  349. pte_t * pte;
  350. printk(" pte @ 0x%8lx: ", addr);
  351. pgd = pgd_offset(mm, addr & PAGE_MASK);
  352. if (pgd) {
  353. pmd = pmd_offset(pgd, addr & PAGE_MASK);
  354. if (pmd && pmd_present(*pmd)) {
  355. pte = pte_offset_kernel(pmd, addr & PAGE_MASK);
  356. if (pte) {
  357. printk(" (0x%08lx)->(0x%08lx)->0x%08lx\n",
  358. (long)pgd, (long)pte, (long)pte_val(*pte));
  359. #define pp ((long)pte_val(*pte))
  360. printk(" RPN: %05lx PP: %lx SPS: %lx SH: %lx "
  361. "CI: %lx v: %lx\n",
  362. pp>>12, /* rpn */
  363. (pp>>10)&3, /* pp */
  364. (pp>>3)&1, /* small */
  365. (pp>>2)&1, /* shared */
  366. (pp>>1)&1, /* cache inhibit */
  367. pp&1 /* valid */
  368. );
  369. #undef pp
  370. }
  371. else {
  372. printk("no pte\n");
  373. }
  374. }
  375. else {
  376. printk("no pmd\n");
  377. }
  378. }
  379. else {
  380. printk("no pgd\n");
  381. }
  382. }
  383. int
  384. get_8xx_pte(struct mm_struct *mm, unsigned long addr)
  385. {
  386. pgd_t * pgd;
  387. pmd_t * pmd;
  388. pte_t * pte;
  389. int retval = 0;
  390. pgd = pgd_offset(mm, addr & PAGE_MASK);
  391. if (pgd) {
  392. pmd = pmd_offset(pgd, addr & PAGE_MASK);
  393. if (pmd && pmd_present(*pmd)) {
  394. pte = pte_offset_kernel(pmd, addr & PAGE_MASK);
  395. if (pte) {
  396. retval = (int)pte_val(*pte);
  397. }
  398. }
  399. }
  400. return(retval);
  401. }
  402. #endif /* CONFIG_8xx */