nmi.c 28 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126
  1. /*
  2. * linux/arch/i386/nmi.c
  3. *
  4. * NMI watchdog support on APIC systems
  5. *
  6. * Started by Ingo Molnar <mingo@redhat.com>
  7. *
  8. * Fixes:
  9. * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
  10. * Mikael Pettersson : Power Management for local APIC NMI watchdog.
  11. * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
  12. * Pavel Machek and
  13. * Mikael Pettersson : PM converted to driver model. Disable/enable API.
  14. */
  15. #include <linux/delay.h>
  16. #include <linux/interrupt.h>
  17. #include <linux/module.h>
  18. #include <linux/nmi.h>
  19. #include <linux/sysdev.h>
  20. #include <linux/sysctl.h>
  21. #include <linux/percpu.h>
  22. #include <linux/dmi.h>
  23. #include <linux/kprobes.h>
  24. #include <linux/cpumask.h>
  25. #include <linux/kernel_stat.h>
  26. #include <asm/smp.h>
  27. #include <asm/nmi.h>
  28. #include <asm/kdebug.h>
  29. #include <asm/intel_arch_perfmon.h>
  30. #include "mach_traps.h"
  31. int unknown_nmi_panic;
  32. int nmi_watchdog_enabled;
  33. /* perfctr_nmi_owner tracks the ownership of the perfctr registers:
  34. * evtsel_nmi_owner tracks the ownership of the event selection
  35. * - different performance counters/ event selection may be reserved for
  36. * different subsystems this reservation system just tries to coordinate
  37. * things a little
  38. */
  39. static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner);
  40. static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]);
  41. static cpumask_t backtrace_mask = CPU_MASK_NONE;
  42. /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
  43. * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
  44. */
  45. #define NMI_MAX_COUNTER_BITS 66
  46. /* nmi_active:
  47. * >0: the lapic NMI watchdog is active, but can be disabled
  48. * <0: the lapic NMI watchdog has not been set up, and cannot
  49. * be enabled
  50. * 0: the lapic NMI watchdog is disabled, but can be enabled
  51. */
  52. atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
  53. unsigned int nmi_watchdog = NMI_DEFAULT;
  54. static unsigned int nmi_hz = HZ;
  55. struct nmi_watchdog_ctlblk {
  56. int enabled;
  57. u64 check_bit;
  58. unsigned int cccr_msr;
  59. unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
  60. unsigned int evntsel_msr; /* the MSR to select the events to handle */
  61. };
  62. static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
  63. /* local prototypes */
  64. static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
  65. extern void show_registers(struct pt_regs *regs);
  66. extern int unknown_nmi_panic;
  67. /* converts an msr to an appropriate reservation bit */
  68. static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
  69. {
  70. /* returns the bit offset of the performance counter register */
  71. switch (boot_cpu_data.x86_vendor) {
  72. case X86_VENDOR_AMD:
  73. return (msr - MSR_K7_PERFCTR0);
  74. case X86_VENDOR_INTEL:
  75. if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
  76. return (msr - MSR_ARCH_PERFMON_PERFCTR0);
  77. switch (boot_cpu_data.x86) {
  78. case 6:
  79. return (msr - MSR_P6_PERFCTR0);
  80. case 15:
  81. return (msr - MSR_P4_BPU_PERFCTR0);
  82. }
  83. }
  84. return 0;
  85. }
  86. /* converts an msr to an appropriate reservation bit */
  87. static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
  88. {
  89. /* returns the bit offset of the event selection register */
  90. switch (boot_cpu_data.x86_vendor) {
  91. case X86_VENDOR_AMD:
  92. return (msr - MSR_K7_EVNTSEL0);
  93. case X86_VENDOR_INTEL:
  94. if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
  95. return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
  96. switch (boot_cpu_data.x86) {
  97. case 6:
  98. return (msr - MSR_P6_EVNTSEL0);
  99. case 15:
  100. return (msr - MSR_P4_BSU_ESCR0);
  101. }
  102. }
  103. return 0;
  104. }
  105. /* checks for a bit availability (hack for oprofile) */
  106. int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
  107. {
  108. BUG_ON(counter > NMI_MAX_COUNTER_BITS);
  109. return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
  110. }
  111. /* checks the an msr for availability */
  112. int avail_to_resrv_perfctr_nmi(unsigned int msr)
  113. {
  114. unsigned int counter;
  115. counter = nmi_perfctr_msr_to_bit(msr);
  116. BUG_ON(counter > NMI_MAX_COUNTER_BITS);
  117. return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
  118. }
  119. int reserve_perfctr_nmi(unsigned int msr)
  120. {
  121. unsigned int counter;
  122. counter = nmi_perfctr_msr_to_bit(msr);
  123. BUG_ON(counter > NMI_MAX_COUNTER_BITS);
  124. if (!test_and_set_bit(counter, &__get_cpu_var(perfctr_nmi_owner)))
  125. return 1;
  126. return 0;
  127. }
  128. void release_perfctr_nmi(unsigned int msr)
  129. {
  130. unsigned int counter;
  131. counter = nmi_perfctr_msr_to_bit(msr);
  132. BUG_ON(counter > NMI_MAX_COUNTER_BITS);
  133. clear_bit(counter, &__get_cpu_var(perfctr_nmi_owner));
  134. }
  135. int reserve_evntsel_nmi(unsigned int msr)
  136. {
  137. unsigned int counter;
  138. counter = nmi_evntsel_msr_to_bit(msr);
  139. BUG_ON(counter > NMI_MAX_COUNTER_BITS);
  140. if (!test_and_set_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]))
  141. return 1;
  142. return 0;
  143. }
  144. void release_evntsel_nmi(unsigned int msr)
  145. {
  146. unsigned int counter;
  147. counter = nmi_evntsel_msr_to_bit(msr);
  148. BUG_ON(counter > NMI_MAX_COUNTER_BITS);
  149. clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]);
  150. }
  151. static __cpuinit inline int nmi_known_cpu(void)
  152. {
  153. switch (boot_cpu_data.x86_vendor) {
  154. case X86_VENDOR_AMD:
  155. return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6)
  156. || (boot_cpu_data.x86 == 16));
  157. case X86_VENDOR_INTEL:
  158. if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
  159. return 1;
  160. else
  161. return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
  162. }
  163. return 0;
  164. }
  165. static int endflag __initdata = 0;
  166. #ifdef CONFIG_SMP
  167. /* The performance counters used by NMI_LOCAL_APIC don't trigger when
  168. * the CPU is idle. To make sure the NMI watchdog really ticks on all
  169. * CPUs during the test make them busy.
  170. */
  171. static __init void nmi_cpu_busy(void *data)
  172. {
  173. local_irq_enable_in_hardirq();
  174. /* Intentionally don't use cpu_relax here. This is
  175. to make sure that the performance counter really ticks,
  176. even if there is a simulator or similar that catches the
  177. pause instruction. On a real HT machine this is fine because
  178. all other CPUs are busy with "useless" delay loops and don't
  179. care if they get somewhat less cycles. */
  180. while (endflag == 0)
  181. mb();
  182. }
  183. #endif
  184. static unsigned int adjust_for_32bit_ctr(unsigned int hz)
  185. {
  186. u64 counter_val;
  187. unsigned int retval = hz;
  188. /*
  189. * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
  190. * are writable, with higher bits sign extending from bit 31.
  191. * So, we can only program the counter with 31 bit values and
  192. * 32nd bit should be 1, for 33.. to be 1.
  193. * Find the appropriate nmi_hz
  194. */
  195. counter_val = (u64)cpu_khz * 1000;
  196. do_div(counter_val, retval);
  197. if (counter_val > 0x7fffffffULL) {
  198. u64 count = (u64)cpu_khz * 1000;
  199. do_div(count, 0x7fffffffUL);
  200. retval = count + 1;
  201. }
  202. return retval;
  203. }
  204. static int __init check_nmi_watchdog(void)
  205. {
  206. unsigned int *prev_nmi_count;
  207. int cpu;
  208. if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT))
  209. return 0;
  210. if (!atomic_read(&nmi_active))
  211. return 0;
  212. prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
  213. if (!prev_nmi_count)
  214. return -1;
  215. printk(KERN_INFO "Testing NMI watchdog ... ");
  216. if (nmi_watchdog == NMI_LOCAL_APIC)
  217. smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
  218. for_each_possible_cpu(cpu)
  219. prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
  220. local_irq_enable();
  221. mdelay((10*1000)/nmi_hz); // wait 10 ticks
  222. for_each_possible_cpu(cpu) {
  223. #ifdef CONFIG_SMP
  224. /* Check cpu_callin_map here because that is set
  225. after the timer is started. */
  226. if (!cpu_isset(cpu, cpu_callin_map))
  227. continue;
  228. #endif
  229. if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled)
  230. continue;
  231. if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
  232. printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
  233. cpu,
  234. prev_nmi_count[cpu],
  235. nmi_count(cpu));
  236. per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0;
  237. atomic_dec(&nmi_active);
  238. }
  239. }
  240. if (!atomic_read(&nmi_active)) {
  241. kfree(prev_nmi_count);
  242. atomic_set(&nmi_active, -1);
  243. return -1;
  244. }
  245. endflag = 1;
  246. printk("OK.\n");
  247. /* now that we know it works we can reduce NMI frequency to
  248. something more reasonable; makes a difference in some configs */
  249. if (nmi_watchdog == NMI_LOCAL_APIC) {
  250. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  251. nmi_hz = 1;
  252. if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
  253. wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
  254. nmi_hz = adjust_for_32bit_ctr(nmi_hz);
  255. }
  256. }
  257. kfree(prev_nmi_count);
  258. return 0;
  259. }
  260. /* This needs to happen later in boot so counters are working */
  261. late_initcall(check_nmi_watchdog);
  262. static int __init setup_nmi_watchdog(char *str)
  263. {
  264. int nmi;
  265. get_option(&str, &nmi);
  266. if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE))
  267. return 0;
  268. nmi_watchdog = nmi;
  269. return 1;
  270. }
  271. __setup("nmi_watchdog=", setup_nmi_watchdog);
  272. static void disable_lapic_nmi_watchdog(void)
  273. {
  274. BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
  275. if (atomic_read(&nmi_active) <= 0)
  276. return;
  277. on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
  278. BUG_ON(atomic_read(&nmi_active) != 0);
  279. }
  280. static void enable_lapic_nmi_watchdog(void)
  281. {
  282. BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
  283. /* are we already enabled */
  284. if (atomic_read(&nmi_active) != 0)
  285. return;
  286. /* are we lapic aware */
  287. if (nmi_known_cpu() <= 0)
  288. return;
  289. on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
  290. touch_nmi_watchdog();
  291. }
  292. void disable_timer_nmi_watchdog(void)
  293. {
  294. BUG_ON(nmi_watchdog != NMI_IO_APIC);
  295. if (atomic_read(&nmi_active) <= 0)
  296. return;
  297. disable_irq(0);
  298. on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
  299. BUG_ON(atomic_read(&nmi_active) != 0);
  300. }
  301. void enable_timer_nmi_watchdog(void)
  302. {
  303. BUG_ON(nmi_watchdog != NMI_IO_APIC);
  304. if (atomic_read(&nmi_active) == 0) {
  305. touch_nmi_watchdog();
  306. on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
  307. enable_irq(0);
  308. }
  309. }
  310. static void __acpi_nmi_disable(void *__unused)
  311. {
  312. apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
  313. }
  314. /*
  315. * Disable timer based NMIs on all CPUs:
  316. */
  317. void acpi_nmi_disable(void)
  318. {
  319. if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
  320. on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
  321. }
  322. static void __acpi_nmi_enable(void *__unused)
  323. {
  324. apic_write_around(APIC_LVT0, APIC_DM_NMI);
  325. }
  326. /*
  327. * Enable timer based NMIs on all CPUs:
  328. */
  329. void acpi_nmi_enable(void)
  330. {
  331. if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
  332. on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
  333. }
  334. #ifdef CONFIG_PM
  335. static int nmi_pm_active; /* nmi_active before suspend */
  336. static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
  337. {
  338. /* only CPU0 goes here, other CPUs should be offline */
  339. nmi_pm_active = atomic_read(&nmi_active);
  340. stop_apic_nmi_watchdog(NULL);
  341. BUG_ON(atomic_read(&nmi_active) != 0);
  342. return 0;
  343. }
  344. static int lapic_nmi_resume(struct sys_device *dev)
  345. {
  346. /* only CPU0 goes here, other CPUs should be offline */
  347. if (nmi_pm_active > 0) {
  348. setup_apic_nmi_watchdog(NULL);
  349. touch_nmi_watchdog();
  350. }
  351. return 0;
  352. }
  353. static struct sysdev_class nmi_sysclass = {
  354. set_kset_name("lapic_nmi"),
  355. .resume = lapic_nmi_resume,
  356. .suspend = lapic_nmi_suspend,
  357. };
  358. static struct sys_device device_lapic_nmi = {
  359. .id = 0,
  360. .cls = &nmi_sysclass,
  361. };
  362. static int __init init_lapic_nmi_sysfs(void)
  363. {
  364. int error;
  365. /* should really be a BUG_ON but b/c this is an
  366. * init call, it just doesn't work. -dcz
  367. */
  368. if (nmi_watchdog != NMI_LOCAL_APIC)
  369. return 0;
  370. if ( atomic_read(&nmi_active) < 0 )
  371. return 0;
  372. error = sysdev_class_register(&nmi_sysclass);
  373. if (!error)
  374. error = sysdev_register(&device_lapic_nmi);
  375. return error;
  376. }
  377. /* must come after the local APIC's device_initcall() */
  378. late_initcall(init_lapic_nmi_sysfs);
  379. #endif /* CONFIG_PM */
  380. /*
  381. * Activate the NMI watchdog via the local APIC.
  382. * Original code written by Keith Owens.
  383. */
  384. static void write_watchdog_counter(unsigned int perfctr_msr, const char *descr)
  385. {
  386. u64 count = (u64)cpu_khz * 1000;
  387. do_div(count, nmi_hz);
  388. if(descr)
  389. Dprintk("setting %s to -0x%08Lx\n", descr, count);
  390. wrmsrl(perfctr_msr, 0 - count);
  391. }
  392. static void write_watchdog_counter32(unsigned int perfctr_msr,
  393. const char *descr)
  394. {
  395. u64 count = (u64)cpu_khz * 1000;
  396. do_div(count, nmi_hz);
  397. if(descr)
  398. Dprintk("setting %s to -0x%08Lx\n", descr, count);
  399. wrmsr(perfctr_msr, (u32)(-count), 0);
  400. }
  401. /* Note that these events don't tick when the CPU idles. This means
  402. the frequency varies with CPU load. */
  403. #define K7_EVNTSEL_ENABLE (1 << 22)
  404. #define K7_EVNTSEL_INT (1 << 20)
  405. #define K7_EVNTSEL_OS (1 << 17)
  406. #define K7_EVNTSEL_USR (1 << 16)
  407. #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
  408. #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
  409. static int setup_k7_watchdog(void)
  410. {
  411. unsigned int perfctr_msr, evntsel_msr;
  412. unsigned int evntsel;
  413. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  414. perfctr_msr = MSR_K7_PERFCTR0;
  415. evntsel_msr = MSR_K7_EVNTSEL0;
  416. if (!reserve_perfctr_nmi(perfctr_msr))
  417. goto fail;
  418. if (!reserve_evntsel_nmi(evntsel_msr))
  419. goto fail1;
  420. wrmsrl(perfctr_msr, 0UL);
  421. evntsel = K7_EVNTSEL_INT
  422. | K7_EVNTSEL_OS
  423. | K7_EVNTSEL_USR
  424. | K7_NMI_EVENT;
  425. /* setup the timer */
  426. wrmsr(evntsel_msr, evntsel, 0);
  427. write_watchdog_counter(perfctr_msr, "K7_PERFCTR0");
  428. apic_write(APIC_LVTPC, APIC_DM_NMI);
  429. evntsel |= K7_EVNTSEL_ENABLE;
  430. wrmsr(evntsel_msr, evntsel, 0);
  431. wd->perfctr_msr = perfctr_msr;
  432. wd->evntsel_msr = evntsel_msr;
  433. wd->cccr_msr = 0; //unused
  434. wd->check_bit = 1ULL<<63;
  435. return 1;
  436. fail1:
  437. release_perfctr_nmi(perfctr_msr);
  438. fail:
  439. return 0;
  440. }
  441. static void stop_k7_watchdog(void)
  442. {
  443. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  444. wrmsr(wd->evntsel_msr, 0, 0);
  445. release_evntsel_nmi(wd->evntsel_msr);
  446. release_perfctr_nmi(wd->perfctr_msr);
  447. }
  448. #define P6_EVNTSEL0_ENABLE (1 << 22)
  449. #define P6_EVNTSEL_INT (1 << 20)
  450. #define P6_EVNTSEL_OS (1 << 17)
  451. #define P6_EVNTSEL_USR (1 << 16)
  452. #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
  453. #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
  454. static int setup_p6_watchdog(void)
  455. {
  456. unsigned int perfctr_msr, evntsel_msr;
  457. unsigned int evntsel;
  458. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  459. perfctr_msr = MSR_P6_PERFCTR0;
  460. evntsel_msr = MSR_P6_EVNTSEL0;
  461. if (!reserve_perfctr_nmi(perfctr_msr))
  462. goto fail;
  463. if (!reserve_evntsel_nmi(evntsel_msr))
  464. goto fail1;
  465. wrmsrl(perfctr_msr, 0UL);
  466. evntsel = P6_EVNTSEL_INT
  467. | P6_EVNTSEL_OS
  468. | P6_EVNTSEL_USR
  469. | P6_NMI_EVENT;
  470. /* setup the timer */
  471. wrmsr(evntsel_msr, evntsel, 0);
  472. nmi_hz = adjust_for_32bit_ctr(nmi_hz);
  473. write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0");
  474. apic_write(APIC_LVTPC, APIC_DM_NMI);
  475. evntsel |= P6_EVNTSEL0_ENABLE;
  476. wrmsr(evntsel_msr, evntsel, 0);
  477. wd->perfctr_msr = perfctr_msr;
  478. wd->evntsel_msr = evntsel_msr;
  479. wd->cccr_msr = 0; //unused
  480. wd->check_bit = 1ULL<<39;
  481. return 1;
  482. fail1:
  483. release_perfctr_nmi(perfctr_msr);
  484. fail:
  485. return 0;
  486. }
  487. static void stop_p6_watchdog(void)
  488. {
  489. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  490. wrmsr(wd->evntsel_msr, 0, 0);
  491. release_evntsel_nmi(wd->evntsel_msr);
  492. release_perfctr_nmi(wd->perfctr_msr);
  493. }
  494. /* Note that these events don't tick when the CPU idles. This means
  495. the frequency varies with CPU load. */
  496. #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
  497. #define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
  498. #define P4_ESCR_OS (1<<3)
  499. #define P4_ESCR_USR (1<<2)
  500. #define P4_CCCR_OVF_PMI0 (1<<26)
  501. #define P4_CCCR_OVF_PMI1 (1<<27)
  502. #define P4_CCCR_THRESHOLD(N) ((N)<<20)
  503. #define P4_CCCR_COMPLEMENT (1<<19)
  504. #define P4_CCCR_COMPARE (1<<18)
  505. #define P4_CCCR_REQUIRED (3<<16)
  506. #define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
  507. #define P4_CCCR_ENABLE (1<<12)
  508. #define P4_CCCR_OVF (1<<31)
  509. /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
  510. CRU_ESCR0 (with any non-null event selector) through a complemented
  511. max threshold. [IA32-Vol3, Section 14.9.9] */
  512. static int setup_p4_watchdog(void)
  513. {
  514. unsigned int perfctr_msr, evntsel_msr, cccr_msr;
  515. unsigned int evntsel, cccr_val;
  516. unsigned int misc_enable, dummy;
  517. unsigned int ht_num;
  518. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  519. rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
  520. if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
  521. return 0;
  522. #ifdef CONFIG_SMP
  523. /* detect which hyperthread we are on */
  524. if (smp_num_siblings == 2) {
  525. unsigned int ebx, apicid;
  526. ebx = cpuid_ebx(1);
  527. apicid = (ebx >> 24) & 0xff;
  528. ht_num = apicid & 1;
  529. } else
  530. #endif
  531. ht_num = 0;
  532. /* performance counters are shared resources
  533. * assign each hyperthread its own set
  534. * (re-use the ESCR0 register, seems safe
  535. * and keeps the cccr_val the same)
  536. */
  537. if (!ht_num) {
  538. /* logical cpu 0 */
  539. perfctr_msr = MSR_P4_IQ_PERFCTR0;
  540. evntsel_msr = MSR_P4_CRU_ESCR0;
  541. cccr_msr = MSR_P4_IQ_CCCR0;
  542. cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
  543. } else {
  544. /* logical cpu 1 */
  545. perfctr_msr = MSR_P4_IQ_PERFCTR1;
  546. evntsel_msr = MSR_P4_CRU_ESCR0;
  547. cccr_msr = MSR_P4_IQ_CCCR1;
  548. cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
  549. }
  550. if (!reserve_perfctr_nmi(perfctr_msr))
  551. goto fail;
  552. if (!reserve_evntsel_nmi(evntsel_msr))
  553. goto fail1;
  554. evntsel = P4_ESCR_EVENT_SELECT(0x3F)
  555. | P4_ESCR_OS
  556. | P4_ESCR_USR;
  557. cccr_val |= P4_CCCR_THRESHOLD(15)
  558. | P4_CCCR_COMPLEMENT
  559. | P4_CCCR_COMPARE
  560. | P4_CCCR_REQUIRED;
  561. wrmsr(evntsel_msr, evntsel, 0);
  562. wrmsr(cccr_msr, cccr_val, 0);
  563. write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0");
  564. apic_write(APIC_LVTPC, APIC_DM_NMI);
  565. cccr_val |= P4_CCCR_ENABLE;
  566. wrmsr(cccr_msr, cccr_val, 0);
  567. wd->perfctr_msr = perfctr_msr;
  568. wd->evntsel_msr = evntsel_msr;
  569. wd->cccr_msr = cccr_msr;
  570. wd->check_bit = 1ULL<<39;
  571. return 1;
  572. fail1:
  573. release_perfctr_nmi(perfctr_msr);
  574. fail:
  575. return 0;
  576. }
  577. static void stop_p4_watchdog(void)
  578. {
  579. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  580. wrmsr(wd->cccr_msr, 0, 0);
  581. wrmsr(wd->evntsel_msr, 0, 0);
  582. release_evntsel_nmi(wd->evntsel_msr);
  583. release_perfctr_nmi(wd->perfctr_msr);
  584. }
  585. #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
  586. #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
  587. static int setup_intel_arch_watchdog(void)
  588. {
  589. unsigned int ebx;
  590. union cpuid10_eax eax;
  591. unsigned int unused;
  592. unsigned int perfctr_msr, evntsel_msr;
  593. unsigned int evntsel;
  594. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  595. /*
  596. * Check whether the Architectural PerfMon supports
  597. * Unhalted Core Cycles Event or not.
  598. * NOTE: Corresponding bit = 0 in ebx indicates event present.
  599. */
  600. cpuid(10, &(eax.full), &ebx, &unused, &unused);
  601. if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
  602. (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
  603. goto fail;
  604. perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
  605. evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;
  606. if (!reserve_perfctr_nmi(perfctr_msr))
  607. goto fail;
  608. if (!reserve_evntsel_nmi(evntsel_msr))
  609. goto fail1;
  610. wrmsrl(perfctr_msr, 0UL);
  611. evntsel = ARCH_PERFMON_EVENTSEL_INT
  612. | ARCH_PERFMON_EVENTSEL_OS
  613. | ARCH_PERFMON_EVENTSEL_USR
  614. | ARCH_PERFMON_NMI_EVENT_SEL
  615. | ARCH_PERFMON_NMI_EVENT_UMASK;
  616. /* setup the timer */
  617. wrmsr(evntsel_msr, evntsel, 0);
  618. nmi_hz = adjust_for_32bit_ctr(nmi_hz);
  619. write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0");
  620. apic_write(APIC_LVTPC, APIC_DM_NMI);
  621. evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
  622. wrmsr(evntsel_msr, evntsel, 0);
  623. wd->perfctr_msr = perfctr_msr;
  624. wd->evntsel_msr = evntsel_msr;
  625. wd->cccr_msr = 0; //unused
  626. wd->check_bit = 1ULL << (eax.split.bit_width - 1);
  627. return 1;
  628. fail1:
  629. release_perfctr_nmi(perfctr_msr);
  630. fail:
  631. return 0;
  632. }
  633. static void stop_intel_arch_watchdog(void)
  634. {
  635. unsigned int ebx;
  636. union cpuid10_eax eax;
  637. unsigned int unused;
  638. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  639. /*
  640. * Check whether the Architectural PerfMon supports
  641. * Unhalted Core Cycles Event or not.
  642. * NOTE: Corresponding bit = 0 in ebx indicates event present.
  643. */
  644. cpuid(10, &(eax.full), &ebx, &unused, &unused);
  645. if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
  646. (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
  647. return;
  648. wrmsr(wd->evntsel_msr, 0, 0);
  649. release_evntsel_nmi(wd->evntsel_msr);
  650. release_perfctr_nmi(wd->perfctr_msr);
  651. }
  652. void setup_apic_nmi_watchdog (void *unused)
  653. {
  654. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  655. /* only support LOCAL and IO APICs for now */
  656. if ((nmi_watchdog != NMI_LOCAL_APIC) &&
  657. (nmi_watchdog != NMI_IO_APIC))
  658. return;
  659. if (wd->enabled == 1)
  660. return;
  661. /* cheap hack to support suspend/resume */
  662. /* if cpu0 is not active neither should the other cpus */
  663. if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
  664. return;
  665. if (nmi_watchdog == NMI_LOCAL_APIC) {
  666. switch (boot_cpu_data.x86_vendor) {
  667. case X86_VENDOR_AMD:
  668. if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 &&
  669. boot_cpu_data.x86 != 16)
  670. return;
  671. if (!setup_k7_watchdog())
  672. return;
  673. break;
  674. case X86_VENDOR_INTEL:
  675. if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
  676. if (!setup_intel_arch_watchdog())
  677. return;
  678. break;
  679. }
  680. switch (boot_cpu_data.x86) {
  681. case 6:
  682. if (boot_cpu_data.x86_model > 0xd)
  683. return;
  684. if (!setup_p6_watchdog())
  685. return;
  686. break;
  687. case 15:
  688. if (boot_cpu_data.x86_model > 0x4)
  689. return;
  690. if (!setup_p4_watchdog())
  691. return;
  692. break;
  693. default:
  694. return;
  695. }
  696. break;
  697. default:
  698. return;
  699. }
  700. }
  701. wd->enabled = 1;
  702. atomic_inc(&nmi_active);
  703. }
  704. void stop_apic_nmi_watchdog(void *unused)
  705. {
  706. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  707. /* only support LOCAL and IO APICs for now */
  708. if ((nmi_watchdog != NMI_LOCAL_APIC) &&
  709. (nmi_watchdog != NMI_IO_APIC))
  710. return;
  711. if (wd->enabled == 0)
  712. return;
  713. if (nmi_watchdog == NMI_LOCAL_APIC) {
  714. switch (boot_cpu_data.x86_vendor) {
  715. case X86_VENDOR_AMD:
  716. stop_k7_watchdog();
  717. break;
  718. case X86_VENDOR_INTEL:
  719. if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
  720. stop_intel_arch_watchdog();
  721. break;
  722. }
  723. switch (boot_cpu_data.x86) {
  724. case 6:
  725. if (boot_cpu_data.x86_model > 0xd)
  726. break;
  727. stop_p6_watchdog();
  728. break;
  729. case 15:
  730. if (boot_cpu_data.x86_model > 0x4)
  731. break;
  732. stop_p4_watchdog();
  733. break;
  734. }
  735. break;
  736. default:
  737. return;
  738. }
  739. }
  740. wd->enabled = 0;
  741. atomic_dec(&nmi_active);
  742. }
  743. /*
  744. * the best way to detect whether a CPU has a 'hard lockup' problem
  745. * is to check it's local APIC timer IRQ counts. If they are not
  746. * changing then that CPU has some problem.
  747. *
  748. * as these watchdog NMI IRQs are generated on every CPU, we only
  749. * have to check the current processor.
  750. *
  751. * since NMIs don't listen to _any_ locks, we have to be extremely
  752. * careful not to rely on unsafe variables. The printk might lock
  753. * up though, so we have to break up any console locks first ...
  754. * [when there will be more tty-related locks, break them up
  755. * here too!]
  756. */
  757. static unsigned int
  758. last_irq_sums [NR_CPUS],
  759. alert_counter [NR_CPUS];
  760. void touch_nmi_watchdog (void)
  761. {
  762. if (nmi_watchdog > 0) {
  763. unsigned cpu;
  764. /*
  765. * Just reset the alert counters, (other CPUs might be
  766. * spinning on locks we hold):
  767. */
  768. for_each_present_cpu (cpu)
  769. alert_counter[cpu] = 0;
  770. }
  771. /*
  772. * Tickle the softlockup detector too:
  773. */
  774. touch_softlockup_watchdog();
  775. }
  776. EXPORT_SYMBOL(touch_nmi_watchdog);
  777. extern void die_nmi(struct pt_regs *, const char *msg);
  778. __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
  779. {
  780. /*
  781. * Since current_thread_info()-> is always on the stack, and we
  782. * always switch the stack NMI-atomically, it's safe to use
  783. * smp_processor_id().
  784. */
  785. unsigned int sum;
  786. int touched = 0;
  787. int cpu = smp_processor_id();
  788. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  789. u64 dummy;
  790. int rc=0;
  791. /* check for other users first */
  792. if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
  793. == NOTIFY_STOP) {
  794. rc = 1;
  795. touched = 1;
  796. }
  797. if (cpu_isset(cpu, backtrace_mask)) {
  798. static DEFINE_SPINLOCK(lock); /* Serialise the printks */
  799. spin_lock(&lock);
  800. printk("NMI backtrace for cpu %d\n", cpu);
  801. dump_stack();
  802. spin_unlock(&lock);
  803. cpu_clear(cpu, backtrace_mask);
  804. }
  805. /*
  806. * Take the local apic timer and PIT/HPET into account. We don't
  807. * know which one is active, when we have highres/dyntick on
  808. */
  809. sum = per_cpu(irq_stat, cpu).apic_timer_irqs + kstat_irqs(0);
  810. /* if the none of the timers isn't firing, this cpu isn't doing much */
  811. if (!touched && last_irq_sums[cpu] == sum) {
  812. /*
  813. * Ayiee, looks like this CPU is stuck ...
  814. * wait a few IRQs (5 seconds) before doing the oops ...
  815. */
  816. alert_counter[cpu]++;
  817. if (alert_counter[cpu] == 5*nmi_hz)
  818. /*
  819. * die_nmi will return ONLY if NOTIFY_STOP happens..
  820. */
  821. die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP");
  822. } else {
  823. last_irq_sums[cpu] = sum;
  824. alert_counter[cpu] = 0;
  825. }
  826. /* see if the nmi watchdog went off */
  827. if (wd->enabled) {
  828. if (nmi_watchdog == NMI_LOCAL_APIC) {
  829. rdmsrl(wd->perfctr_msr, dummy);
  830. if (dummy & wd->check_bit){
  831. /* this wasn't a watchdog timer interrupt */
  832. goto done;
  833. }
  834. /* only Intel P4 uses the cccr msr */
  835. if (wd->cccr_msr != 0) {
  836. /*
  837. * P4 quirks:
  838. * - An overflown perfctr will assert its interrupt
  839. * until the OVF flag in its CCCR is cleared.
  840. * - LVTPC is masked on interrupt and must be
  841. * unmasked by the LVTPC handler.
  842. */
  843. rdmsrl(wd->cccr_msr, dummy);
  844. dummy &= ~P4_CCCR_OVF;
  845. wrmsrl(wd->cccr_msr, dummy);
  846. apic_write(APIC_LVTPC, APIC_DM_NMI);
  847. /* start the cycle over again */
  848. write_watchdog_counter(wd->perfctr_msr, NULL);
  849. }
  850. else if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
  851. wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
  852. /* P6 based Pentium M need to re-unmask
  853. * the apic vector but it doesn't hurt
  854. * other P6 variant.
  855. * ArchPerfom/Core Duo also needs this */
  856. apic_write(APIC_LVTPC, APIC_DM_NMI);
  857. /* P6/ARCH_PERFMON has 32 bit counter write */
  858. write_watchdog_counter32(wd->perfctr_msr, NULL);
  859. } else {
  860. /* start the cycle over again */
  861. write_watchdog_counter(wd->perfctr_msr, NULL);
  862. }
  863. rc = 1;
  864. } else if (nmi_watchdog == NMI_IO_APIC) {
  865. /* don't know how to accurately check for this.
  866. * just assume it was a watchdog timer interrupt
  867. * This matches the old behaviour.
  868. */
  869. rc = 1;
  870. }
  871. }
  872. done:
  873. return rc;
  874. }
  875. int do_nmi_callback(struct pt_regs * regs, int cpu)
  876. {
  877. #ifdef CONFIG_SYSCTL
  878. if (unknown_nmi_panic)
  879. return unknown_nmi_panic_callback(regs, cpu);
  880. #endif
  881. return 0;
  882. }
  883. #ifdef CONFIG_SYSCTL
  884. static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
  885. {
  886. unsigned char reason = get_nmi_reason();
  887. char buf[64];
  888. sprintf(buf, "NMI received for unknown reason %02x\n", reason);
  889. die_nmi(regs, buf);
  890. return 0;
  891. }
  892. /*
  893. * proc handler for /proc/sys/kernel/nmi
  894. */
  895. int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
  896. void __user *buffer, size_t *length, loff_t *ppos)
  897. {
  898. int old_state;
  899. nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
  900. old_state = nmi_watchdog_enabled;
  901. proc_dointvec(table, write, file, buffer, length, ppos);
  902. if (!!old_state == !!nmi_watchdog_enabled)
  903. return 0;
  904. if (atomic_read(&nmi_active) < 0) {
  905. printk( KERN_WARNING "NMI watchdog is permanently disabled\n");
  906. return -EIO;
  907. }
  908. if (nmi_watchdog == NMI_DEFAULT) {
  909. if (nmi_known_cpu() > 0)
  910. nmi_watchdog = NMI_LOCAL_APIC;
  911. else
  912. nmi_watchdog = NMI_IO_APIC;
  913. }
  914. if (nmi_watchdog == NMI_LOCAL_APIC) {
  915. if (nmi_watchdog_enabled)
  916. enable_lapic_nmi_watchdog();
  917. else
  918. disable_lapic_nmi_watchdog();
  919. } else {
  920. printk( KERN_WARNING
  921. "NMI watchdog doesn't know what hardware to touch\n");
  922. return -EIO;
  923. }
  924. return 0;
  925. }
  926. #endif
  927. void __trigger_all_cpu_backtrace(void)
  928. {
  929. int i;
  930. backtrace_mask = cpu_online_map;
  931. /* Wait for up to 10 seconds for all CPUs to do the backtrace */
  932. for (i = 0; i < 10 * 1000; i++) {
  933. if (cpus_empty(backtrace_mask))
  934. break;
  935. mdelay(1);
  936. }
  937. }
  938. EXPORT_SYMBOL(nmi_active);
  939. EXPORT_SYMBOL(nmi_watchdog);
  940. EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
  941. EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
  942. EXPORT_SYMBOL(reserve_perfctr_nmi);
  943. EXPORT_SYMBOL(release_perfctr_nmi);
  944. EXPORT_SYMBOL(reserve_evntsel_nmi);
  945. EXPORT_SYMBOL(release_evntsel_nmi);
  946. EXPORT_SYMBOL(disable_timer_nmi_watchdog);
  947. EXPORT_SYMBOL(enable_timer_nmi_watchdog);