ras.c 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. #define DEBUG
  2. #include <linux/config.h>
  3. #include <linux/types.h>
  4. #include <linux/kernel.h>
  5. #include <linux/smp.h>
  6. #include <asm/reg.h>
  7. #include <asm/io.h>
  8. #include <asm/prom.h>
  9. #include <asm/machdep.h>
  10. #include "ras.h"
  11. #include "cbe_regs.h"
  12. static void dump_fir(int cpu)
  13. {
  14. struct cbe_pmd_regs __iomem *pregs = cbe_get_cpu_pmd_regs(cpu);
  15. struct cbe_iic_regs __iomem *iregs = cbe_get_cpu_iic_regs(cpu);
  16. if (pregs == NULL)
  17. return;
  18. /* Todo: do some nicer parsing of bits and based on them go down
  19. * to other sub-units FIRs and not only IIC
  20. */
  21. printk(KERN_ERR "Global Checkstop FIR : 0x%016lx\n",
  22. in_be64(&pregs->checkstop_fir));
  23. printk(KERN_ERR "Global Recoverable FIR : 0x%016lx\n",
  24. in_be64(&pregs->checkstop_fir));
  25. printk(KERN_ERR "Global MachineCheck FIR : 0x%016lx\n",
  26. in_be64(&pregs->spec_att_mchk_fir));
  27. if (iregs == NULL)
  28. return;
  29. printk(KERN_ERR "IOC FIR : 0x%016lx\n",
  30. in_be64(&iregs->ioc_fir));
  31. }
  32. void cbe_system_error_exception(struct pt_regs *regs)
  33. {
  34. int cpu = smp_processor_id();
  35. printk(KERN_ERR "System Error Interrupt on CPU %d !\n", cpu);
  36. dump_fir(cpu);
  37. dump_stack();
  38. }
  39. void cbe_maintenance_exception(struct pt_regs *regs)
  40. {
  41. int cpu = smp_processor_id();
  42. /*
  43. * Nothing implemented for the maintenance interrupt at this point
  44. */
  45. printk(KERN_ERR "Unhandled Maintenance interrupt on CPU %d !\n", cpu);
  46. dump_stack();
  47. }
  48. void cbe_thermal_exception(struct pt_regs *regs)
  49. {
  50. int cpu = smp_processor_id();
  51. /*
  52. * Nothing implemented for the thermal interrupt at this point
  53. */
  54. printk(KERN_ERR "Unhandled Thermal interrupt on CPU %d !\n", cpu);
  55. dump_stack();
  56. }
  57. static int cbe_machine_check_handler(struct pt_regs *regs)
  58. {
  59. int cpu = smp_processor_id();
  60. printk(KERN_ERR "Machine Check Interrupt on CPU %d !\n", cpu);
  61. dump_fir(cpu);
  62. /* No recovery from this code now, lets continue */
  63. return 0;
  64. }
  65. void __init cbe_ras_init(void)
  66. {
  67. unsigned long hid0;
  68. /*
  69. * Enable System Error & thermal interrupts and wakeup conditions
  70. */
  71. hid0 = mfspr(SPRN_HID0);
  72. hid0 |= HID0_CBE_THERM_INT_EN | HID0_CBE_THERM_WAKEUP |
  73. HID0_CBE_SYSERR_INT_EN | HID0_CBE_SYSERR_WAKEUP;
  74. mtspr(SPRN_HID0, hid0);
  75. mb();
  76. /*
  77. * Install machine check handler. Leave setting of precise mode to
  78. * what the firmware did for now
  79. */
  80. ppc_md.machine_check_exception = cbe_machine_check_handler;
  81. mb();
  82. /*
  83. * For now, we assume that IOC_FIR is already set to forward some
  84. * error conditions to the System Error handler. If that is not true
  85. * then it will have to be fixed up here.
  86. */
  87. }