nmi.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680
  1. /*
  2. * linux/arch/i386/nmi.c
  3. *
  4. * NMI watchdog support on APIC systems
  5. *
  6. * Started by Ingo Molnar <mingo@redhat.com>
  7. *
  8. * Fixes:
  9. * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
  10. * Mikael Pettersson : Power Management for local APIC NMI watchdog.
  11. * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
  12. * Pavel Machek and
  13. * Mikael Pettersson : PM converted to driver model. Disable/enable API.
  14. */
  15. #include <linux/config.h>
  16. #include <linux/delay.h>
  17. #include <linux/interrupt.h>
  18. #include <linux/module.h>
  19. #include <linux/nmi.h>
  20. #include <linux/sysdev.h>
  21. #include <linux/sysctl.h>
  22. #include <linux/percpu.h>
  23. #include <asm/smp.h>
  24. #include <asm/nmi.h>
  25. #include <asm/intel_arch_perfmon.h>
  26. #include "mach_traps.h"
  27. unsigned int nmi_watchdog = NMI_NONE;
  28. extern int unknown_nmi_panic;
  29. static unsigned int nmi_hz = HZ;
  30. static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
  31. static unsigned int nmi_p4_cccr_val;
  32. extern void show_registers(struct pt_regs *regs);
  33. /*
  34. * lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
  35. * - it may be reserved by some other driver, or not
  36. * - when not reserved by some other driver, it may be used for
  37. * the NMI watchdog, or not
  38. *
  39. * This is maintained separately from nmi_active because the NMI
  40. * watchdog may also be driven from the I/O APIC timer.
  41. */
  42. static DEFINE_SPINLOCK(lapic_nmi_owner_lock);
  43. static unsigned int lapic_nmi_owner;
  44. #define LAPIC_NMI_WATCHDOG (1<<0)
  45. #define LAPIC_NMI_RESERVED (1<<1)
  46. /* nmi_active:
  47. * +1: the lapic NMI watchdog is active, but can be disabled
  48. * 0: the lapic NMI watchdog has not been set up, and cannot
  49. * be enabled
  50. * -1: the lapic NMI watchdog is disabled, but can be enabled
  51. */
  52. int nmi_active;
  53. #define K7_EVNTSEL_ENABLE (1 << 22)
  54. #define K7_EVNTSEL_INT (1 << 20)
  55. #define K7_EVNTSEL_OS (1 << 17)
  56. #define K7_EVNTSEL_USR (1 << 16)
  57. #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
  58. #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
  59. #define P6_EVNTSEL0_ENABLE (1 << 22)
  60. #define P6_EVNTSEL_INT (1 << 20)
  61. #define P6_EVNTSEL_OS (1 << 17)
  62. #define P6_EVNTSEL_USR (1 << 16)
  63. #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
  64. #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
  65. #define MSR_P4_MISC_ENABLE 0x1A0
  66. #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
  67. #define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12)
  68. #define MSR_P4_PERFCTR0 0x300
  69. #define MSR_P4_CCCR0 0x360
  70. #define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
  71. #define P4_ESCR_OS (1<<3)
  72. #define P4_ESCR_USR (1<<2)
  73. #define P4_CCCR_OVF_PMI0 (1<<26)
  74. #define P4_CCCR_OVF_PMI1 (1<<27)
  75. #define P4_CCCR_THRESHOLD(N) ((N)<<20)
  76. #define P4_CCCR_COMPLEMENT (1<<19)
  77. #define P4_CCCR_COMPARE (1<<18)
  78. #define P4_CCCR_REQUIRED (3<<16)
  79. #define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
  80. #define P4_CCCR_ENABLE (1<<12)
  81. /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
  82. CRU_ESCR0 (with any non-null event selector) through a complemented
  83. max threshold. [IA32-Vol3, Section 14.9.9] */
  84. #define MSR_P4_IQ_COUNTER0 0x30C
  85. #define P4_NMI_CRU_ESCR0 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR)
  86. #define P4_NMI_IQ_CCCR0 \
  87. (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
  88. P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
  89. #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
  90. #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
  91. #ifdef CONFIG_SMP
  92. /* The performance counters used by NMI_LOCAL_APIC don't trigger when
  93. * the CPU is idle. To make sure the NMI watchdog really ticks on all
  94. * CPUs during the test make them busy.
  95. */
  96. static __init void nmi_cpu_busy(void *data)
  97. {
  98. volatile int *endflag = data;
  99. local_irq_enable_in_hardirq();
  100. /* Intentionally don't use cpu_relax here. This is
  101. to make sure that the performance counter really ticks,
  102. even if there is a simulator or similar that catches the
  103. pause instruction. On a real HT machine this is fine because
  104. all other CPUs are busy with "useless" delay loops and don't
  105. care if they get somewhat less cycles. */
  106. while (*endflag == 0)
  107. barrier();
  108. }
  109. #endif
  110. static int __init check_nmi_watchdog(void)
  111. {
  112. volatile int endflag = 0;
  113. unsigned int *prev_nmi_count;
  114. int cpu;
  115. if (nmi_watchdog == NMI_NONE)
  116. return 0;
  117. prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
  118. if (!prev_nmi_count)
  119. return -1;
  120. printk(KERN_INFO "Testing NMI watchdog ... ");
  121. if (nmi_watchdog == NMI_LOCAL_APIC)
  122. smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
  123. for_each_possible_cpu(cpu)
  124. prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
  125. local_irq_enable();
  126. mdelay((10*1000)/nmi_hz); // wait 10 ticks
  127. for_each_possible_cpu(cpu) {
  128. #ifdef CONFIG_SMP
  129. /* Check cpu_callin_map here because that is set
  130. after the timer is started. */
  131. if (!cpu_isset(cpu, cpu_callin_map))
  132. continue;
  133. #endif
  134. if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
  135. endflag = 1;
  136. printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
  137. cpu,
  138. prev_nmi_count[cpu],
  139. nmi_count(cpu));
  140. nmi_active = 0;
  141. lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG;
  142. kfree(prev_nmi_count);
  143. return -1;
  144. }
  145. }
  146. endflag = 1;
  147. printk("OK.\n");
  148. /* now that we know it works we can reduce NMI frequency to
  149. something more reasonable; makes a difference in some configs */
  150. if (nmi_watchdog == NMI_LOCAL_APIC)
  151. nmi_hz = 1;
  152. kfree(prev_nmi_count);
  153. return 0;
  154. }
  155. /* This needs to happen later in boot so counters are working */
  156. late_initcall(check_nmi_watchdog);
  157. static int __init setup_nmi_watchdog(char *str)
  158. {
  159. int nmi;
  160. get_option(&str, &nmi);
  161. if (nmi >= NMI_INVALID)
  162. return 0;
  163. if (nmi == NMI_NONE)
  164. nmi_watchdog = nmi;
  165. /*
  166. * If any other x86 CPU has a local APIC, then
  167. * please test the NMI stuff there and send me the
  168. * missing bits. Right now Intel P6/P4 and AMD K7 only.
  169. */
  170. if ((nmi == NMI_LOCAL_APIC) &&
  171. (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
  172. (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15))
  173. nmi_watchdog = nmi;
  174. if ((nmi == NMI_LOCAL_APIC) &&
  175. (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
  176. (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15))
  177. nmi_watchdog = nmi;
  178. /*
  179. * We can enable the IO-APIC watchdog
  180. * unconditionally.
  181. */
  182. if (nmi == NMI_IO_APIC) {
  183. nmi_active = 1;
  184. nmi_watchdog = nmi;
  185. }
  186. return 1;
  187. }
  188. __setup("nmi_watchdog=", setup_nmi_watchdog);
  189. static void disable_intel_arch_watchdog(void);
  190. static void disable_lapic_nmi_watchdog(void)
  191. {
  192. if (nmi_active <= 0)
  193. return;
  194. switch (boot_cpu_data.x86_vendor) {
  195. case X86_VENDOR_AMD:
  196. wrmsr(MSR_K7_EVNTSEL0, 0, 0);
  197. break;
  198. case X86_VENDOR_INTEL:
  199. if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
  200. disable_intel_arch_watchdog();
  201. break;
  202. }
  203. switch (boot_cpu_data.x86) {
  204. case 6:
  205. if (boot_cpu_data.x86_model > 0xd)
  206. break;
  207. wrmsr(MSR_P6_EVNTSEL0, 0, 0);
  208. break;
  209. case 15:
  210. if (boot_cpu_data.x86_model > 0x4)
  211. break;
  212. wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
  213. wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
  214. break;
  215. }
  216. break;
  217. }
  218. nmi_active = -1;
  219. /* tell do_nmi() and others that we're not active any more */
  220. nmi_watchdog = 0;
  221. }
  222. static void enable_lapic_nmi_watchdog(void)
  223. {
  224. if (nmi_active < 0) {
  225. nmi_watchdog = NMI_LOCAL_APIC;
  226. setup_apic_nmi_watchdog();
  227. }
  228. }
  229. int reserve_lapic_nmi(void)
  230. {
  231. unsigned int old_owner;
  232. spin_lock(&lapic_nmi_owner_lock);
  233. old_owner = lapic_nmi_owner;
  234. lapic_nmi_owner |= LAPIC_NMI_RESERVED;
  235. spin_unlock(&lapic_nmi_owner_lock);
  236. if (old_owner & LAPIC_NMI_RESERVED)
  237. return -EBUSY;
  238. if (old_owner & LAPIC_NMI_WATCHDOG)
  239. disable_lapic_nmi_watchdog();
  240. return 0;
  241. }
  242. void release_lapic_nmi(void)
  243. {
  244. unsigned int new_owner;
  245. spin_lock(&lapic_nmi_owner_lock);
  246. new_owner = lapic_nmi_owner & ~LAPIC_NMI_RESERVED;
  247. lapic_nmi_owner = new_owner;
  248. spin_unlock(&lapic_nmi_owner_lock);
  249. if (new_owner & LAPIC_NMI_WATCHDOG)
  250. enable_lapic_nmi_watchdog();
  251. }
  252. void disable_timer_nmi_watchdog(void)
  253. {
  254. if ((nmi_watchdog != NMI_IO_APIC) || (nmi_active <= 0))
  255. return;
  256. unset_nmi_callback();
  257. nmi_active = -1;
  258. nmi_watchdog = NMI_NONE;
  259. }
  260. void enable_timer_nmi_watchdog(void)
  261. {
  262. if (nmi_active < 0) {
  263. nmi_watchdog = NMI_IO_APIC;
  264. touch_nmi_watchdog();
  265. nmi_active = 1;
  266. }
  267. }
  268. #ifdef CONFIG_PM
  269. static int nmi_pm_active; /* nmi_active before suspend */
  270. static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
  271. {
  272. nmi_pm_active = nmi_active;
  273. disable_lapic_nmi_watchdog();
  274. return 0;
  275. }
  276. static int lapic_nmi_resume(struct sys_device *dev)
  277. {
  278. if (nmi_pm_active > 0)
  279. enable_lapic_nmi_watchdog();
  280. return 0;
  281. }
  282. static struct sysdev_class nmi_sysclass = {
  283. set_kset_name("lapic_nmi"),
  284. .resume = lapic_nmi_resume,
  285. .suspend = lapic_nmi_suspend,
  286. };
  287. static struct sys_device device_lapic_nmi = {
  288. .id = 0,
  289. .cls = &nmi_sysclass,
  290. };
  291. static int __init init_lapic_nmi_sysfs(void)
  292. {
  293. int error;
  294. if (nmi_active == 0 || nmi_watchdog != NMI_LOCAL_APIC)
  295. return 0;
  296. error = sysdev_class_register(&nmi_sysclass);
  297. if (!error)
  298. error = sysdev_register(&device_lapic_nmi);
  299. return error;
  300. }
  301. /* must come after the local APIC's device_initcall() */
  302. late_initcall(init_lapic_nmi_sysfs);
  303. #endif /* CONFIG_PM */
  304. /*
  305. * Activate the NMI watchdog via the local APIC.
  306. * Original code written by Keith Owens.
  307. */
  308. static void clear_msr_range(unsigned int base, unsigned int n)
  309. {
  310. unsigned int i;
  311. for(i = 0; i < n; ++i)
  312. wrmsr(base+i, 0, 0);
  313. }
  314. static void write_watchdog_counter(const char *descr)
  315. {
  316. u64 count = (u64)cpu_khz * 1000;
  317. do_div(count, nmi_hz);
  318. if(descr)
  319. Dprintk("setting %s to -0x%08Lx\n", descr, count);
  320. wrmsrl(nmi_perfctr_msr, 0 - count);
  321. }
  322. static void setup_k7_watchdog(void)
  323. {
  324. unsigned int evntsel;
  325. nmi_perfctr_msr = MSR_K7_PERFCTR0;
  326. clear_msr_range(MSR_K7_EVNTSEL0, 4);
  327. clear_msr_range(MSR_K7_PERFCTR0, 4);
  328. evntsel = K7_EVNTSEL_INT
  329. | K7_EVNTSEL_OS
  330. | K7_EVNTSEL_USR
  331. | K7_NMI_EVENT;
  332. wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
  333. write_watchdog_counter("K7_PERFCTR0");
  334. apic_write(APIC_LVTPC, APIC_DM_NMI);
  335. evntsel |= K7_EVNTSEL_ENABLE;
  336. wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
  337. }
  338. static void setup_p6_watchdog(void)
  339. {
  340. unsigned int evntsel;
  341. nmi_perfctr_msr = MSR_P6_PERFCTR0;
  342. clear_msr_range(MSR_P6_EVNTSEL0, 2);
  343. clear_msr_range(MSR_P6_PERFCTR0, 2);
  344. evntsel = P6_EVNTSEL_INT
  345. | P6_EVNTSEL_OS
  346. | P6_EVNTSEL_USR
  347. | P6_NMI_EVENT;
  348. wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
  349. write_watchdog_counter("P6_PERFCTR0");
  350. apic_write(APIC_LVTPC, APIC_DM_NMI);
  351. evntsel |= P6_EVNTSEL0_ENABLE;
  352. wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
  353. }
  354. static int setup_p4_watchdog(void)
  355. {
  356. unsigned int misc_enable, dummy;
  357. rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy);
  358. if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
  359. return 0;
  360. nmi_perfctr_msr = MSR_P4_IQ_COUNTER0;
  361. nmi_p4_cccr_val = P4_NMI_IQ_CCCR0;
  362. #ifdef CONFIG_SMP
  363. if (smp_num_siblings == 2)
  364. nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1;
  365. #endif
  366. if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL))
  367. clear_msr_range(0x3F1, 2);
  368. /* MSR 0x3F0 seems to have a default value of 0xFC00, but current
  369. docs doesn't fully define it, so leave it alone for now. */
  370. if (boot_cpu_data.x86_model >= 0x3) {
  371. /* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */
  372. clear_msr_range(0x3A0, 26);
  373. clear_msr_range(0x3BC, 3);
  374. } else {
  375. clear_msr_range(0x3A0, 31);
  376. }
  377. clear_msr_range(0x3C0, 6);
  378. clear_msr_range(0x3C8, 6);
  379. clear_msr_range(0x3E0, 2);
  380. clear_msr_range(MSR_P4_CCCR0, 18);
  381. clear_msr_range(MSR_P4_PERFCTR0, 18);
  382. wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
  383. wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
  384. write_watchdog_counter("P4_IQ_COUNTER0");
  385. apic_write(APIC_LVTPC, APIC_DM_NMI);
  386. wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
  387. return 1;
  388. }
  389. static void disable_intel_arch_watchdog(void)
  390. {
  391. unsigned ebx;
  392. /*
  393. * Check whether the Architectural PerfMon supports
  394. * Unhalted Core Cycles Event or not.
  395. * NOTE: Corresponding bit = 0 in ebp indicates event present.
  396. */
  397. ebx = cpuid_ebx(10);
  398. if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
  399. wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0);
  400. }
  401. static int setup_intel_arch_watchdog(void)
  402. {
  403. unsigned int evntsel;
  404. unsigned ebx;
  405. /*
  406. * Check whether the Architectural PerfMon supports
  407. * Unhalted Core Cycles Event or not.
  408. * NOTE: Corresponding bit = 0 in ebp indicates event present.
  409. */
  410. ebx = cpuid_ebx(10);
  411. if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
  412. return 0;
  413. nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
  414. clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2);
  415. clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2);
  416. evntsel = ARCH_PERFMON_EVENTSEL_INT
  417. | ARCH_PERFMON_EVENTSEL_OS
  418. | ARCH_PERFMON_EVENTSEL_USR
  419. | ARCH_PERFMON_NMI_EVENT_SEL
  420. | ARCH_PERFMON_NMI_EVENT_UMASK;
  421. wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
  422. write_watchdog_counter("INTEL_ARCH_PERFCTR0");
  423. apic_write(APIC_LVTPC, APIC_DM_NMI);
  424. evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
  425. wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
  426. return 1;
  427. }
  428. void setup_apic_nmi_watchdog (void)
  429. {
  430. switch (boot_cpu_data.x86_vendor) {
  431. case X86_VENDOR_AMD:
  432. if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15)
  433. return;
  434. setup_k7_watchdog();
  435. break;
  436. case X86_VENDOR_INTEL:
  437. if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
  438. if (!setup_intel_arch_watchdog())
  439. return;
  440. break;
  441. }
  442. switch (boot_cpu_data.x86) {
  443. case 6:
  444. if (boot_cpu_data.x86_model > 0xd)
  445. return;
  446. setup_p6_watchdog();
  447. break;
  448. case 15:
  449. if (boot_cpu_data.x86_model > 0x4)
  450. return;
  451. if (!setup_p4_watchdog())
  452. return;
  453. break;
  454. default:
  455. return;
  456. }
  457. break;
  458. default:
  459. return;
  460. }
  461. lapic_nmi_owner = LAPIC_NMI_WATCHDOG;
  462. nmi_active = 1;
  463. }
  464. /*
  465. * the best way to detect whether a CPU has a 'hard lockup' problem
  466. * is to check it's local APIC timer IRQ counts. If they are not
  467. * changing then that CPU has some problem.
  468. *
  469. * as these watchdog NMI IRQs are generated on every CPU, we only
  470. * have to check the current processor.
  471. *
  472. * since NMIs don't listen to _any_ locks, we have to be extremely
  473. * careful not to rely on unsafe variables. The printk might lock
  474. * up though, so we have to break up any console locks first ...
  475. * [when there will be more tty-related locks, break them up
  476. * here too!]
  477. */
  478. static unsigned int
  479. last_irq_sums [NR_CPUS],
  480. alert_counter [NR_CPUS];
  481. void touch_nmi_watchdog (void)
  482. {
  483. int i;
  484. /*
  485. * Just reset the alert counters, (other CPUs might be
  486. * spinning on locks we hold):
  487. */
  488. for_each_possible_cpu(i)
  489. alert_counter[i] = 0;
  490. /*
  491. * Tickle the softlockup detector too:
  492. */
  493. touch_softlockup_watchdog();
  494. }
  495. extern void die_nmi(struct pt_regs *, const char *msg);
  496. void nmi_watchdog_tick (struct pt_regs * regs)
  497. {
  498. /*
  499. * Since current_thread_info()-> is always on the stack, and we
  500. * always switch the stack NMI-atomically, it's safe to use
  501. * smp_processor_id().
  502. */
  503. unsigned int sum;
  504. int cpu = smp_processor_id();
  505. sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
  506. if (last_irq_sums[cpu] == sum) {
  507. /*
  508. * Ayiee, looks like this CPU is stuck ...
  509. * wait a few IRQs (5 seconds) before doing the oops ...
  510. */
  511. alert_counter[cpu]++;
  512. if (alert_counter[cpu] == 5*nmi_hz)
  513. /*
  514. * die_nmi will return ONLY if NOTIFY_STOP happens..
  515. */
  516. die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP");
  517. } else {
  518. last_irq_sums[cpu] = sum;
  519. alert_counter[cpu] = 0;
  520. }
  521. if (nmi_perfctr_msr) {
  522. if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) {
  523. /*
  524. * P4 quirks:
  525. * - An overflown perfctr will assert its interrupt
  526. * until the OVF flag in its CCCR is cleared.
  527. * - LVTPC is masked on interrupt and must be
  528. * unmasked by the LVTPC handler.
  529. */
  530. wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
  531. apic_write(APIC_LVTPC, APIC_DM_NMI);
  532. }
  533. else if (nmi_perfctr_msr == MSR_P6_PERFCTR0 ||
  534. nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
  535. /* Only P6 based Pentium M need to re-unmask
  536. * the apic vector but it doesn't hurt
  537. * other P6 variant */
  538. apic_write(APIC_LVTPC, APIC_DM_NMI);
  539. }
  540. write_watchdog_counter(NULL);
  541. }
  542. }
  543. #ifdef CONFIG_SYSCTL
  544. static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
  545. {
  546. unsigned char reason = get_nmi_reason();
  547. char buf[64];
  548. if (!(reason & 0xc0)) {
  549. sprintf(buf, "NMI received for unknown reason %02x\n", reason);
  550. die_nmi(regs, buf);
  551. }
  552. return 0;
  553. }
  554. /*
  555. * proc handler for /proc/sys/kernel/unknown_nmi_panic
  556. */
  557. int proc_unknown_nmi_panic(ctl_table *table, int write, struct file *file,
  558. void __user *buffer, size_t *length, loff_t *ppos)
  559. {
  560. int old_state;
  561. old_state = unknown_nmi_panic;
  562. proc_dointvec(table, write, file, buffer, length, ppos);
  563. if (!!old_state == !!unknown_nmi_panic)
  564. return 0;
  565. if (unknown_nmi_panic) {
  566. if (reserve_lapic_nmi() < 0) {
  567. unknown_nmi_panic = 0;
  568. return -EBUSY;
  569. } else {
  570. set_nmi_callback(unknown_nmi_panic_callback);
  571. }
  572. } else {
  573. release_lapic_nmi();
  574. unset_nmi_callback();
  575. }
  576. return 0;
  577. }
  578. #endif
  579. EXPORT_SYMBOL(nmi_active);
  580. EXPORT_SYMBOL(nmi_watchdog);
  581. EXPORT_SYMBOL(reserve_lapic_nmi);
  582. EXPORT_SYMBOL(release_lapic_nmi);
  583. EXPORT_SYMBOL(disable_timer_nmi_watchdog);
  584. EXPORT_SYMBOL(enable_timer_nmi_watchdog);