ras.c 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271
  1. #define DEBUG
  2. #include <linux/types.h>
  3. #include <linux/kernel.h>
  4. #include <linux/smp.h>
  5. #include <linux/reboot.h>
  6. #include <asm/reg.h>
  7. #include <asm/io.h>
  8. #include <asm/prom.h>
  9. #include <asm/machdep.h>
  10. #include <asm/rtas.h>
  11. #include <asm/cell-regs.h>
  12. #include "ras.h"
  13. static void dump_fir(int cpu)
  14. {
  15. struct cbe_pmd_regs __iomem *pregs = cbe_get_cpu_pmd_regs(cpu);
  16. struct cbe_iic_regs __iomem *iregs = cbe_get_cpu_iic_regs(cpu);
  17. if (pregs == NULL)
  18. return;
  19. /* Todo: do some nicer parsing of bits and based on them go down
  20. * to other sub-units FIRs and not only IIC
  21. */
  22. printk(KERN_ERR "Global Checkstop FIR : 0x%016lx\n",
  23. in_be64(&pregs->checkstop_fir));
  24. printk(KERN_ERR "Global Recoverable FIR : 0x%016lx\n",
  25. in_be64(&pregs->checkstop_fir));
  26. printk(KERN_ERR "Global MachineCheck FIR : 0x%016lx\n",
  27. in_be64(&pregs->spec_att_mchk_fir));
  28. if (iregs == NULL)
  29. return;
  30. printk(KERN_ERR "IOC FIR : 0x%016lx\n",
  31. in_be64(&iregs->ioc_fir));
  32. }
  33. void cbe_system_error_exception(struct pt_regs *regs)
  34. {
  35. int cpu = smp_processor_id();
  36. printk(KERN_ERR "System Error Interrupt on CPU %d !\n", cpu);
  37. dump_fir(cpu);
  38. dump_stack();
  39. }
  40. void cbe_maintenance_exception(struct pt_regs *regs)
  41. {
  42. int cpu = smp_processor_id();
  43. /*
  44. * Nothing implemented for the maintenance interrupt at this point
  45. */
  46. printk(KERN_ERR "Unhandled Maintenance interrupt on CPU %d !\n", cpu);
  47. dump_stack();
  48. }
  49. void cbe_thermal_exception(struct pt_regs *regs)
  50. {
  51. int cpu = smp_processor_id();
  52. /*
  53. * Nothing implemented for the thermal interrupt at this point
  54. */
  55. printk(KERN_ERR "Unhandled Thermal interrupt on CPU %d !\n", cpu);
  56. dump_stack();
  57. }
  58. static int cbe_machine_check_handler(struct pt_regs *regs)
  59. {
  60. int cpu = smp_processor_id();
  61. printk(KERN_ERR "Machine Check Interrupt on CPU %d !\n", cpu);
  62. dump_fir(cpu);
  63. /* No recovery from this code now, lets continue */
  64. return 0;
  65. }
  66. struct ptcal_area {
  67. struct list_head list;
  68. int nid;
  69. int order;
  70. struct page *pages;
  71. };
  72. static LIST_HEAD(ptcal_list);
  73. static int ptcal_start_tok, ptcal_stop_tok;
  74. static int __init cbe_ptcal_enable_on_node(int nid, int order)
  75. {
  76. struct ptcal_area *area;
  77. int ret = -ENOMEM;
  78. unsigned long addr;
  79. #ifdef CONFIG_CRASH_DUMP
  80. rtas_call(ptcal_stop_tok, 1, 1, NULL, nid);
  81. #endif
  82. area = kmalloc(sizeof(*area), GFP_KERNEL);
  83. if (!area)
  84. goto out_err;
  85. area->nid = nid;
  86. area->order = order;
  87. area->pages = alloc_pages_node(area->nid, GFP_KERNEL, area->order);
  88. if (!area->pages)
  89. goto out_free_area;
  90. addr = __pa(page_address(area->pages));
  91. ret = -EIO;
  92. if (rtas_call(ptcal_start_tok, 3, 1, NULL, area->nid,
  93. (unsigned int)(addr >> 32),
  94. (unsigned int)(addr & 0xffffffff))) {
  95. printk(KERN_ERR "%s: error enabling PTCAL on node %d!\n",
  96. __FUNCTION__, nid);
  97. goto out_free_pages;
  98. }
  99. list_add(&area->list, &ptcal_list);
  100. return 0;
  101. out_free_pages:
  102. __free_pages(area->pages, area->order);
  103. out_free_area:
  104. kfree(area);
  105. out_err:
  106. return ret;
  107. }
  108. static int __init cbe_ptcal_enable(void)
  109. {
  110. const u32 *size;
  111. struct device_node *np;
  112. int order, found_mic = 0;
  113. np = of_find_node_by_path("/rtas");
  114. if (!np)
  115. return -ENODEV;
  116. size = of_get_property(np, "ibm,cbe-ptcal-size", NULL);
  117. if (!size)
  118. return -ENODEV;
  119. pr_debug("%s: enabling PTCAL, size = 0x%x\n", __FUNCTION__, *size);
  120. order = get_order(*size);
  121. of_node_put(np);
  122. /* support for malta device trees, with be@/mic@ nodes */
  123. for_each_node_by_type(np, "mic-tm") {
  124. cbe_ptcal_enable_on_node(of_node_to_nid(np), order);
  125. found_mic = 1;
  126. }
  127. if (found_mic)
  128. return 0;
  129. /* support for older device tree - use cpu nodes */
  130. for_each_node_by_type(np, "cpu") {
  131. const u32 *nid = of_get_property(np, "node-id", NULL);
  132. if (!nid) {
  133. printk(KERN_ERR "%s: node %s is missing node-id?\n",
  134. __FUNCTION__, np->full_name);
  135. continue;
  136. }
  137. cbe_ptcal_enable_on_node(*nid, order);
  138. found_mic = 1;
  139. }
  140. return found_mic ? 0 : -ENODEV;
  141. }
  142. static int cbe_ptcal_disable(void)
  143. {
  144. struct ptcal_area *area, *tmp;
  145. int ret = 0;
  146. pr_debug("%s: disabling PTCAL\n", __FUNCTION__);
  147. list_for_each_entry_safe(area, tmp, &ptcal_list, list) {
  148. /* disable ptcal on this node */
  149. if (rtas_call(ptcal_stop_tok, 1, 1, NULL, area->nid)) {
  150. printk(KERN_ERR "%s: error disabling PTCAL "
  151. "on node %d!\n", __FUNCTION__,
  152. area->nid);
  153. ret = -EIO;
  154. continue;
  155. }
  156. /* ensure we can access the PTCAL area */
  157. memset(page_address(area->pages), 0,
  158. 1 << (area->order + PAGE_SHIFT));
  159. /* clean up */
  160. list_del(&area->list);
  161. __free_pages(area->pages, area->order);
  162. kfree(area);
  163. }
  164. return ret;
  165. }
  166. static int cbe_ptcal_notify_reboot(struct notifier_block *nb,
  167. unsigned long code, void *data)
  168. {
  169. return cbe_ptcal_disable();
  170. }
  171. static struct notifier_block cbe_ptcal_reboot_notifier = {
  172. .notifier_call = cbe_ptcal_notify_reboot
  173. };
  174. int __init cbe_ptcal_init(void)
  175. {
  176. int ret;
  177. ptcal_start_tok = rtas_token("ibm,cbe-start-ptcal");
  178. ptcal_stop_tok = rtas_token("ibm,cbe-stop-ptcal");
  179. if (ptcal_start_tok == RTAS_UNKNOWN_SERVICE
  180. || ptcal_stop_tok == RTAS_UNKNOWN_SERVICE)
  181. return -ENODEV;
  182. ret = register_reboot_notifier(&cbe_ptcal_reboot_notifier);
  183. if (ret) {
  184. printk(KERN_ERR "Can't disable PTCAL, so not enabling\n");
  185. return ret;
  186. }
  187. return cbe_ptcal_enable();
  188. }
  189. arch_initcall(cbe_ptcal_init);
  190. void __init cbe_ras_init(void)
  191. {
  192. unsigned long hid0;
  193. /*
  194. * Enable System Error & thermal interrupts and wakeup conditions
  195. */
  196. hid0 = mfspr(SPRN_HID0);
  197. hid0 |= HID0_CBE_THERM_INT_EN | HID0_CBE_THERM_WAKEUP |
  198. HID0_CBE_SYSERR_INT_EN | HID0_CBE_SYSERR_WAKEUP;
  199. mtspr(SPRN_HID0, hid0);
  200. mb();
  201. /*
  202. * Install machine check handler. Leave setting of precise mode to
  203. * what the firmware did for now
  204. */
  205. ppc_md.machine_check_exception = cbe_machine_check_handler;
  206. mb();
  207. /*
  208. * For now, we assume that IOC_FIR is already set to forward some
  209. * error conditions to the System Error handler. If that is not true
  210. * then it will have to be fixed up here.
  211. */
  212. }