crash.c 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264
  1. /*
  2. * Architecture specific (PPC64) functions for kexec based crash dumps.
  3. *
  4. * Copyright (C) 2005, IBM Corp.
  5. *
  6. * Created by: Haren Myneni
  7. *
  8. * This source code is licensed under the GNU General Public License,
  9. * Version 2. See the file COPYING for more details.
  10. *
  11. */
  12. #undef DEBUG
  13. #include <linux/kernel.h>
  14. #include <linux/smp.h>
  15. #include <linux/reboot.h>
  16. #include <linux/kexec.h>
  17. #include <linux/bootmem.h>
  18. #include <linux/crash_dump.h>
  19. #include <linux/delay.h>
  20. #include <linux/elf.h>
  21. #include <linux/elfcore.h>
  22. #include <linux/init.h>
  23. #include <linux/types.h>
  24. #include <asm/processor.h>
  25. #include <asm/machdep.h>
  26. #include <asm/kdump.h>
  27. #include <asm/lmb.h>
  28. #include <asm/firmware.h>
  29. #include <asm/smp.h>
  30. #ifdef DEBUG
  31. #include <asm/udbg.h>
  32. #define DBG(fmt...) udbg_printf(fmt)
  33. #else
  34. #define DBG(fmt...)
  35. #endif
  36. /* This keeps a track of which one is crashing cpu. */
  37. int crashing_cpu = -1;
  38. static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data,
  39. size_t data_len)
  40. {
  41. struct elf_note note;
  42. note.n_namesz = strlen(name) + 1;
  43. note.n_descsz = data_len;
  44. note.n_type = type;
  45. memcpy(buf, &note, sizeof(note));
  46. buf += (sizeof(note) +3)/4;
  47. memcpy(buf, name, note.n_namesz);
  48. buf += (note.n_namesz + 3)/4;
  49. memcpy(buf, data, note.n_descsz);
  50. buf += (note.n_descsz + 3)/4;
  51. return buf;
  52. }
  53. static void final_note(u32 *buf)
  54. {
  55. struct elf_note note;
  56. note.n_namesz = 0;
  57. note.n_descsz = 0;
  58. note.n_type = 0;
  59. memcpy(buf, &note, sizeof(note));
  60. }
  61. static void crash_save_this_cpu(struct pt_regs *regs, int cpu)
  62. {
  63. struct elf_prstatus prstatus;
  64. u32 *buf;
  65. if ((cpu < 0) || (cpu >= NR_CPUS))
  66. return;
  67. /* Using ELF notes here is opportunistic.
  68. * I need a well defined structure format
  69. * for the data I pass, and I need tags
  70. * on the data to indicate what information I have
  71. * squirrelled away. ELF notes happen to provide
  72. * all of that that no need to invent something new.
  73. */
  74. buf = &crash_notes[cpu][0];
  75. memset(&prstatus, 0, sizeof(prstatus));
  76. prstatus.pr_pid = current->pid;
  77. elf_core_copy_regs(&prstatus.pr_reg, regs);
  78. buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus,
  79. sizeof(prstatus));
  80. final_note(buf);
  81. }
  82. /* FIXME Merge this with xmon_save_regs ?? */
  83. static inline void crash_get_current_regs(struct pt_regs *regs)
  84. {
  85. unsigned long tmp1, tmp2;
  86. __asm__ __volatile__ (
  87. "std 0,0(%2)\n"
  88. "std 1,8(%2)\n"
  89. "std 2,16(%2)\n"
  90. "std 3,24(%2)\n"
  91. "std 4,32(%2)\n"
  92. "std 5,40(%2)\n"
  93. "std 6,48(%2)\n"
  94. "std 7,56(%2)\n"
  95. "std 8,64(%2)\n"
  96. "std 9,72(%2)\n"
  97. "std 10,80(%2)\n"
  98. "std 11,88(%2)\n"
  99. "std 12,96(%2)\n"
  100. "std 13,104(%2)\n"
  101. "std 14,112(%2)\n"
  102. "std 15,120(%2)\n"
  103. "std 16,128(%2)\n"
  104. "std 17,136(%2)\n"
  105. "std 18,144(%2)\n"
  106. "std 19,152(%2)\n"
  107. "std 20,160(%2)\n"
  108. "std 21,168(%2)\n"
  109. "std 22,176(%2)\n"
  110. "std 23,184(%2)\n"
  111. "std 24,192(%2)\n"
  112. "std 25,200(%2)\n"
  113. "std 26,208(%2)\n"
  114. "std 27,216(%2)\n"
  115. "std 28,224(%2)\n"
  116. "std 29,232(%2)\n"
  117. "std 30,240(%2)\n"
  118. "std 31,248(%2)\n"
  119. "mfmsr %0\n"
  120. "std %0, 264(%2)\n"
  121. "mfctr %0\n"
  122. "std %0, 280(%2)\n"
  123. "mflr %0\n"
  124. "std %0, 288(%2)\n"
  125. "bl 1f\n"
  126. "1: mflr %1\n"
  127. "std %1, 256(%2)\n"
  128. "mtlr %0\n"
  129. "mfxer %0\n"
  130. "std %0, 296(%2)\n"
  131. : "=&r" (tmp1), "=&r" (tmp2)
  132. : "b" (regs));
  133. }
  134. /* We may have saved_regs from where the error came from
  135. * or it is NULL if via a direct panic().
  136. */
  137. static void crash_save_self(struct pt_regs *saved_regs)
  138. {
  139. struct pt_regs regs;
  140. int cpu;
  141. cpu = smp_processor_id();
  142. if (saved_regs)
  143. memcpy(&regs, saved_regs, sizeof(regs));
  144. else
  145. crash_get_current_regs(&regs);
  146. crash_save_this_cpu(&regs, cpu);
  147. }
  148. #ifdef CONFIG_SMP
  149. static atomic_t waiting_for_crash_ipi;
  150. void crash_ipi_callback(struct pt_regs *regs)
  151. {
  152. int cpu = smp_processor_id();
  153. if (cpu == crashing_cpu)
  154. return;
  155. if (!cpu_online(cpu))
  156. return;
  157. if (ppc_md.kexec_cpu_down)
  158. ppc_md.kexec_cpu_down(1, 1);
  159. local_irq_disable();
  160. crash_save_this_cpu(regs, cpu);
  161. atomic_dec(&waiting_for_crash_ipi);
  162. kexec_smp_wait();
  163. /* NOTREACHED */
  164. }
  165. static void crash_kexec_prepare_cpus(void)
  166. {
  167. unsigned int msecs;
  168. atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
  169. crash_send_ipi(crash_ipi_callback);
  170. smp_wmb();
  171. /*
  172. * FIXME: Until we will have the way to stop other CPUSs reliabally,
  173. * the crash CPU will send an IPI and wait for other CPUs to
  174. * respond. If not, proceed the kexec boot even though we failed to
  175. * capture other CPU states.
  176. */
  177. msecs = 1000000;
  178. while ((atomic_read(&waiting_for_crash_ipi) > 0) && (--msecs > 0)) {
  179. barrier();
  180. mdelay(1);
  181. }
  182. /* Would it be better to replace the trap vector here? */
  183. /*
  184. * FIXME: In case if we do not get all CPUs, one possibility: ask the
  185. * user to do soft reset such that we get all.
  186. * IPI handler is already set by the panic cpu initially. Therefore,
  187. * all cpus could invoke this handler from die() and the panic CPU
  188. * will call machine_kexec() directly from this handler to do
  189. * kexec boot.
  190. */
  191. if (atomic_read(&waiting_for_crash_ipi))
  192. printk(KERN_ALERT "done waiting: %d cpus not responding\n",
  193. atomic_read(&waiting_for_crash_ipi));
  194. /* Leave the IPI callback set */
  195. }
  196. #else
  197. static void crash_kexec_prepare_cpus(void)
  198. {
  199. /*
  200. * move the secondarys to us so that we can copy
  201. * the new kernel 0-0x100 safely
  202. *
  203. * do this if kexec in setup.c ?
  204. */
  205. smp_release_cpus();
  206. }
  207. #endif
  208. void default_machine_crash_shutdown(struct pt_regs *regs)
  209. {
  210. /*
  211. * This function is only called after the system
  212. * has paniced or is otherwise in a critical state.
  213. * The minimum amount of code to allow a kexec'd kernel
  214. * to run successfully needs to happen here.
  215. *
  216. * In practice this means stopping other cpus in
  217. * an SMP system.
  218. * The kernel is broken so disable interrupts.
  219. */
  220. local_irq_disable();
  221. if (ppc_md.kexec_cpu_down)
  222. ppc_md.kexec_cpu_down(1, 0);
  223. /*
  224. * Make a note of crashing cpu. Will be used in machine_kexec
  225. * such that another IPI will not be sent.
  226. */
  227. crashing_cpu = smp_processor_id();
  228. crash_kexec_prepare_cpus();
  229. crash_save_self(regs);
  230. }