non-fatal.c 2.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. /*
  2. * Non Fatal Machine Check Exception Reporting
  3. *
  4. * (C) Copyright 2002 Dave Jones. <davej@redhat.com>
  5. *
  6. * This file contains routines to check for non-fatal MCEs every 15s
  7. *
  8. */
  9. #include <linux/init.h>
  10. #include <linux/types.h>
  11. #include <linux/kernel.h>
  12. #include <linux/jiffies.h>
  13. #include <linux/workqueue.h>
  14. #include <linux/interrupt.h>
  15. #include <linux/smp.h>
  16. #include <linux/module.h>
  17. #include <asm/processor.h>
  18. #include <asm/system.h>
  19. #include <asm/msr.h>
  20. #include "mce.h"
  21. static int firstbank;
  22. #define MCE_RATE 15*HZ /* timer rate is 15s */
  23. static void mce_checkregs(void *info)
  24. {
  25. u32 low, high;
  26. int i;
  27. for (i = firstbank; i < nr_mce_banks; i++) {
  28. rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
  29. if (high & (1<<31)) {
  30. printk(KERN_INFO "MCE: The hardware reports a non "
  31. "fatal, correctable incident occurred on "
  32. "CPU %d.\n",
  33. smp_processor_id());
  34. printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low);
  35. /*
  36. * Scrub the error so we don't pick it up in MCE_RATE
  37. * seconds time.
  38. */
  39. wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
  40. /* Serialize */
  41. wmb();
  42. add_taint(TAINT_MACHINE_CHECK);
  43. }
  44. }
  45. }
  46. static void mce_work_fn(struct work_struct *work);
  47. static DECLARE_DELAYED_WORK(mce_work, mce_work_fn);
  48. static void mce_work_fn(struct work_struct *work)
  49. {
  50. on_each_cpu(mce_checkregs, NULL, 1);
  51. schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
  52. }
  53. static int __init init_nonfatal_mce_checker(void)
  54. {
  55. struct cpuinfo_x86 *c = &boot_cpu_data;
  56. /* Check for MCE support */
  57. if (!cpu_has(c, X86_FEATURE_MCE))
  58. return -ENODEV;
  59. /* Check for PPro style MCA */
  60. if (!cpu_has(c, X86_FEATURE_MCA))
  61. return -ENODEV;
  62. /* Some Athlons misbehave when we frob bank 0 */
  63. if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
  64. boot_cpu_data.x86 == 6)
  65. firstbank = 1;
  66. else
  67. firstbank = 0;
  68. /*
  69. * Check for non-fatal errors every MCE_RATE s
  70. */
  71. schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
  72. printk(KERN_INFO "Machine check exception polling timer started.\n");
  73. return 0;
  74. }
  75. module_init(init_nonfatal_mce_checker);
  76. MODULE_LICENSE("GPL");