ras.c 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. #define DEBUG
  2. #include <linux/types.h>
  3. #include <linux/kernel.h>
  4. #include <linux/smp.h>
  5. #include <asm/reg.h>
  6. #include <asm/io.h>
  7. #include <asm/prom.h>
  8. #include <asm/machdep.h>
  9. #include "ras.h"
  10. #include "cbe_regs.h"
  11. static void dump_fir(int cpu)
  12. {
  13. struct cbe_pmd_regs __iomem *pregs = cbe_get_cpu_pmd_regs(cpu);
  14. struct cbe_iic_regs __iomem *iregs = cbe_get_cpu_iic_regs(cpu);
  15. if (pregs == NULL)
  16. return;
  17. /* Todo: do some nicer parsing of bits and based on them go down
  18. * to other sub-units FIRs and not only IIC
  19. */
  20. printk(KERN_ERR "Global Checkstop FIR : 0x%016lx\n",
  21. in_be64(&pregs->checkstop_fir));
  22. printk(KERN_ERR "Global Recoverable FIR : 0x%016lx\n",
  23. in_be64(&pregs->checkstop_fir));
  24. printk(KERN_ERR "Global MachineCheck FIR : 0x%016lx\n",
  25. in_be64(&pregs->spec_att_mchk_fir));
  26. if (iregs == NULL)
  27. return;
  28. printk(KERN_ERR "IOC FIR : 0x%016lx\n",
  29. in_be64(&iregs->ioc_fir));
  30. }
  31. void cbe_system_error_exception(struct pt_regs *regs)
  32. {
  33. int cpu = smp_processor_id();
  34. printk(KERN_ERR "System Error Interrupt on CPU %d !\n", cpu);
  35. dump_fir(cpu);
  36. dump_stack();
  37. }
  38. void cbe_maintenance_exception(struct pt_regs *regs)
  39. {
  40. int cpu = smp_processor_id();
  41. /*
  42. * Nothing implemented for the maintenance interrupt at this point
  43. */
  44. printk(KERN_ERR "Unhandled Maintenance interrupt on CPU %d !\n", cpu);
  45. dump_stack();
  46. }
  47. void cbe_thermal_exception(struct pt_regs *regs)
  48. {
  49. int cpu = smp_processor_id();
  50. /*
  51. * Nothing implemented for the thermal interrupt at this point
  52. */
  53. printk(KERN_ERR "Unhandled Thermal interrupt on CPU %d !\n", cpu);
  54. dump_stack();
  55. }
  56. static int cbe_machine_check_handler(struct pt_regs *regs)
  57. {
  58. int cpu = smp_processor_id();
  59. printk(KERN_ERR "Machine Check Interrupt on CPU %d !\n", cpu);
  60. dump_fir(cpu);
  61. /* No recovery from this code now, lets continue */
  62. return 0;
  63. }
  64. void __init cbe_ras_init(void)
  65. {
  66. unsigned long hid0;
  67. /*
  68. * Enable System Error & thermal interrupts and wakeup conditions
  69. */
  70. hid0 = mfspr(SPRN_HID0);
  71. hid0 |= HID0_CBE_THERM_INT_EN | HID0_CBE_THERM_WAKEUP |
  72. HID0_CBE_SYSERR_INT_EN | HID0_CBE_SYSERR_WAKEUP;
  73. mtspr(SPRN_HID0, hid0);
  74. mb();
  75. /*
  76. * Install machine check handler. Leave setting of precise mode to
  77. * what the firmware did for now
  78. */
  79. ppc_md.machine_check_exception = cbe_machine_check_handler;
  80. mb();
  81. /*
  82. * For now, we assume that IOC_FIR is already set to forward some
  83. * error conditions to the System Error handler. If that is not true
  84. * then it will have to be fixed up here.
  85. */
  86. }