hw_nmi.c 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. /*
  2. * HW NMI watchdog support
  3. *
  4. * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
  5. *
  6. * Arch specific calls to support NMI watchdog
  7. *
  8. * Bits copied from original nmi.c file
  9. *
  10. */
  11. #include <asm/apic.h>
  12. #include <linux/smp.h>
  13. #include <linux/cpumask.h>
  14. #include <linux/sched.h>
  15. #include <linux/percpu.h>
  16. #include <linux/cpumask.h>
  17. #include <linux/kernel_stat.h>
  18. #include <asm/mce.h>
  19. #include <linux/nmi.h>
  20. #include <linux/module.h>
  21. /* For reliability, we're prepared to waste bits here. */
  22. static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
  23. static DEFINE_PER_CPU(unsigned, last_irq_sum);
  24. /*
  25. * Take the local apic timer and PIT/HPET into account. We don't
  26. * know which one is active, when we have highres/dyntick on
  27. */
  28. static inline unsigned int get_timer_irqs(int cpu)
  29. {
  30. unsigned int irqs = per_cpu(irq_stat, cpu).irq0_irqs;
  31. #if defined(CONFIG_X86_LOCAL_APIC)
  32. irqs += per_cpu(irq_stat, cpu).apic_timer_irqs;
  33. #endif
  34. return irqs;
  35. }
  36. static inline int mce_in_progress(void)
  37. {
  38. #if defined(CONFIG_X86_MCE)
  39. return atomic_read(&mce_entry) > 0;
  40. #endif
  41. return 0;
  42. }
  43. int hw_nmi_is_cpu_stuck(struct pt_regs *regs)
  44. {
  45. unsigned int sum;
  46. int cpu = smp_processor_id();
  47. /* FIXME: cheap hack for this check, probably should get its own
  48. * die_notifier handler
  49. */
  50. if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
  51. static DEFINE_SPINLOCK(lock); /* Serialise the printks */
  52. spin_lock(&lock);
  53. printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
  54. show_regs(regs);
  55. dump_stack();
  56. spin_unlock(&lock);
  57. cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
  58. }
  59. /* if we are doing an mce, just assume the cpu is not stuck */
  60. /* Could check oops_in_progress here too, but it's safer not to */
  61. if (mce_in_progress())
  62. return 0;
  63. /* We determine if the cpu is stuck by checking whether any
  64. * interrupts have happened since we last checked. Of course
  65. * an nmi storm could create false positives, but the higher
  66. * level logic should account for that
  67. */
  68. sum = get_timer_irqs(cpu);
  69. if (__get_cpu_var(last_irq_sum) == sum) {
  70. return 1;
  71. } else {
  72. __get_cpu_var(last_irq_sum) = sum;
  73. return 0;
  74. }
  75. }
  76. u64 hw_nmi_get_sample_period(void)
  77. {
  78. return cpu_khz * 1000;
  79. }
  80. #ifdef ARCH_HAS_NMI_WATCHDOG
  81. void arch_trigger_all_cpu_backtrace(void)
  82. {
  83. int i;
  84. cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
  85. printk(KERN_INFO "sending NMI to all CPUs:\n");
  86. apic->send_IPI_all(NMI_VECTOR);
  87. /* Wait for up to 10 seconds for all CPUs to do the backtrace */
  88. for (i = 0; i < 10 * 1000; i++) {
  89. if (cpumask_empty(to_cpumask(backtrace_mask)))
  90. break;
  91. mdelay(1);
  92. }
  93. }
  94. #endif
  95. /* STUB calls to mimic old nmi_watchdog behaviour */
  96. #if defined(CONFIG_X86_LOCAL_APIC)
  97. unsigned int nmi_watchdog = NMI_NONE;
  98. EXPORT_SYMBOL(nmi_watchdog);
  99. void acpi_nmi_enable(void) { return; }
  100. void acpi_nmi_disable(void) { return; }
  101. #endif
  102. atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
  103. EXPORT_SYMBOL(nmi_active);
  104. int unknown_nmi_panic;
  105. void cpu_nmi_set_wd_enabled(void) { return; }
  106. void stop_apic_nmi_watchdog(void *unused) { return; }
  107. void setup_apic_nmi_watchdog(void *unused) { return; }
  108. int __init check_nmi_watchdog(void) { return 0; }