nmi.c 29 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192
  1. /*
  2. * linux/arch/i386/nmi.c
  3. *
  4. * NMI watchdog support on APIC systems
  5. *
  6. * Started by Ingo Molnar <mingo@redhat.com>
  7. *
  8. * Fixes:
  9. * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
  10. * Mikael Pettersson : Power Management for local APIC NMI watchdog.
  11. * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
  12. * Pavel Machek and
  13. * Mikael Pettersson : PM converted to driver model. Disable/enable API.
  14. */
  15. #include <linux/delay.h>
  16. #include <linux/interrupt.h>
  17. #include <linux/module.h>
  18. #include <linux/nmi.h>
  19. #include <linux/sysdev.h>
  20. #include <linux/sysctl.h>
  21. #include <linux/percpu.h>
  22. #include <linux/dmi.h>
  23. #include <linux/kprobes.h>
  24. #include <linux/cpumask.h>
  25. #include <linux/kernel_stat.h>
  26. #include <asm/smp.h>
  27. #include <asm/nmi.h>
  28. #include <asm/kdebug.h>
  29. #include <asm/intel_arch_perfmon.h>
  30. #include "mach_traps.h"
  31. int unknown_nmi_panic;
  32. int nmi_watchdog_enabled;
  33. /* perfctr_nmi_owner tracks the ownership of the perfctr registers:
  34. * evtsel_nmi_owner tracks the ownership of the event selection
  35. * - different performance counters/ event selection may be reserved for
  36. * different subsystems this reservation system just tries to coordinate
  37. * things a little
  38. */
  39. /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
  40. * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
  41. */
  42. #define NMI_MAX_COUNTER_BITS 66
  43. #define NMI_MAX_COUNTER_LONGS BITS_TO_LONGS(NMI_MAX_COUNTER_BITS)
  44. static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner[NMI_MAX_COUNTER_LONGS]);
  45. static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[NMI_MAX_COUNTER_LONGS]);
  46. static cpumask_t backtrace_mask = CPU_MASK_NONE;
  47. /* nmi_active:
  48. * >0: the lapic NMI watchdog is active, but can be disabled
  49. * <0: the lapic NMI watchdog has not been set up, and cannot
  50. * be enabled
  51. * 0: the lapic NMI watchdog is disabled, but can be enabled
  52. */
  53. atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
  54. unsigned int nmi_watchdog = NMI_DEFAULT;
  55. static unsigned int nmi_hz = HZ;
  56. struct nmi_watchdog_ctlblk {
  57. int enabled;
  58. u64 check_bit;
  59. unsigned int cccr_msr;
  60. unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
  61. unsigned int evntsel_msr; /* the MSR to select the events to handle */
  62. };
  63. static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
  64. /* local prototypes */
  65. static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
  66. extern void show_registers(struct pt_regs *regs);
  67. extern int unknown_nmi_panic;
  68. /* converts an msr to an appropriate reservation bit */
  69. static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
  70. {
  71. /* returns the bit offset of the performance counter register */
  72. switch (boot_cpu_data.x86_vendor) {
  73. case X86_VENDOR_AMD:
  74. return (msr - MSR_K7_PERFCTR0);
  75. case X86_VENDOR_INTEL:
  76. if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
  77. return (msr - MSR_ARCH_PERFMON_PERFCTR0);
  78. switch (boot_cpu_data.x86) {
  79. case 6:
  80. return (msr - MSR_P6_PERFCTR0);
  81. case 15:
  82. return (msr - MSR_P4_BPU_PERFCTR0);
  83. }
  84. }
  85. return 0;
  86. }
  87. /* converts an msr to an appropriate reservation bit */
  88. static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
  89. {
  90. /* returns the bit offset of the event selection register */
  91. switch (boot_cpu_data.x86_vendor) {
  92. case X86_VENDOR_AMD:
  93. return (msr - MSR_K7_EVNTSEL0);
  94. case X86_VENDOR_INTEL:
  95. if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
  96. return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
  97. switch (boot_cpu_data.x86) {
  98. case 6:
  99. return (msr - MSR_P6_EVNTSEL0);
  100. case 15:
  101. return (msr - MSR_P4_BSU_ESCR0);
  102. }
  103. }
  104. return 0;
  105. }
  106. /* checks for a bit availability (hack for oprofile) */
  107. int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
  108. {
  109. int cpu;
  110. BUG_ON(counter > NMI_MAX_COUNTER_BITS);
  111. for_each_possible_cpu (cpu) {
  112. if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)[0]))
  113. return 0;
  114. }
  115. return 1;
  116. }
  117. /* checks the an msr for availability */
  118. int avail_to_resrv_perfctr_nmi(unsigned int msr)
  119. {
  120. unsigned int counter;
  121. int cpu;
  122. counter = nmi_perfctr_msr_to_bit(msr);
  123. BUG_ON(counter > NMI_MAX_COUNTER_BITS);
  124. for_each_possible_cpu (cpu) {
  125. if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)[0]))
  126. return 0;
  127. }
  128. return 1;
  129. }
  130. static int __reserve_perfctr_nmi(int cpu, unsigned int msr)
  131. {
  132. unsigned int counter;
  133. if (cpu < 0)
  134. cpu = smp_processor_id();
  135. counter = nmi_perfctr_msr_to_bit(msr);
  136. BUG_ON(counter > NMI_MAX_COUNTER_BITS);
  137. if (!test_and_set_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)[0]))
  138. return 1;
  139. return 0;
  140. }
  141. static void __release_perfctr_nmi(int cpu, unsigned int msr)
  142. {
  143. unsigned int counter;
  144. if (cpu < 0)
  145. cpu = smp_processor_id();
  146. counter = nmi_perfctr_msr_to_bit(msr);
  147. BUG_ON(counter > NMI_MAX_COUNTER_BITS);
  148. clear_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)[0]);
  149. }
  150. int reserve_perfctr_nmi(unsigned int msr)
  151. {
  152. int cpu, i;
  153. for_each_possible_cpu (cpu) {
  154. if (!__reserve_perfctr_nmi(cpu, msr)) {
  155. for_each_possible_cpu (i) {
  156. if (i >= cpu)
  157. break;
  158. __release_perfctr_nmi(i, msr);
  159. }
  160. return 0;
  161. }
  162. }
  163. return 1;
  164. }
  165. void release_perfctr_nmi(unsigned int msr)
  166. {
  167. int cpu;
  168. for_each_possible_cpu (cpu) {
  169. __release_perfctr_nmi(cpu, msr);
  170. }
  171. }
  172. int __reserve_evntsel_nmi(int cpu, unsigned int msr)
  173. {
  174. unsigned int counter;
  175. if (cpu < 0)
  176. cpu = smp_processor_id();
  177. counter = nmi_evntsel_msr_to_bit(msr);
  178. BUG_ON(counter > NMI_MAX_COUNTER_BITS);
  179. if (!test_and_set_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0]))
  180. return 1;
  181. return 0;
  182. }
  183. static void __release_evntsel_nmi(int cpu, unsigned int msr)
  184. {
  185. unsigned int counter;
  186. if (cpu < 0)
  187. cpu = smp_processor_id();
  188. counter = nmi_evntsel_msr_to_bit(msr);
  189. BUG_ON(counter > NMI_MAX_COUNTER_BITS);
  190. clear_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0]);
  191. }
  192. int reserve_evntsel_nmi(unsigned int msr)
  193. {
  194. int cpu, i;
  195. for_each_possible_cpu (cpu) {
  196. if (!__reserve_evntsel_nmi(cpu, msr)) {
  197. for_each_possible_cpu (i) {
  198. if (i >= cpu)
  199. break;
  200. __release_evntsel_nmi(i, msr);
  201. }
  202. return 0;
  203. }
  204. }
  205. return 1;
  206. }
  207. void release_evntsel_nmi(unsigned int msr)
  208. {
  209. int cpu;
  210. for_each_possible_cpu (cpu) {
  211. __release_evntsel_nmi(cpu, msr);
  212. }
  213. }
  214. static __cpuinit inline int nmi_known_cpu(void)
  215. {
  216. switch (boot_cpu_data.x86_vendor) {
  217. case X86_VENDOR_AMD:
  218. return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6)
  219. || (boot_cpu_data.x86 == 16));
  220. case X86_VENDOR_INTEL:
  221. if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
  222. return 1;
  223. else
  224. return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
  225. }
  226. return 0;
  227. }
  228. static int endflag __initdata = 0;
  229. #ifdef CONFIG_SMP
  230. /* The performance counters used by NMI_LOCAL_APIC don't trigger when
  231. * the CPU is idle. To make sure the NMI watchdog really ticks on all
  232. * CPUs during the test make them busy.
  233. */
  234. static __init void nmi_cpu_busy(void *data)
  235. {
  236. local_irq_enable_in_hardirq();
  237. /* Intentionally don't use cpu_relax here. This is
  238. to make sure that the performance counter really ticks,
  239. even if there is a simulator or similar that catches the
  240. pause instruction. On a real HT machine this is fine because
  241. all other CPUs are busy with "useless" delay loops and don't
  242. care if they get somewhat less cycles. */
  243. while (endflag == 0)
  244. mb();
  245. }
  246. #endif
  247. static unsigned int adjust_for_32bit_ctr(unsigned int hz)
  248. {
  249. u64 counter_val;
  250. unsigned int retval = hz;
  251. /*
  252. * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
  253. * are writable, with higher bits sign extending from bit 31.
  254. * So, we can only program the counter with 31 bit values and
  255. * 32nd bit should be 1, for 33.. to be 1.
  256. * Find the appropriate nmi_hz
  257. */
  258. counter_val = (u64)cpu_khz * 1000;
  259. do_div(counter_val, retval);
  260. if (counter_val > 0x7fffffffULL) {
  261. u64 count = (u64)cpu_khz * 1000;
  262. do_div(count, 0x7fffffffUL);
  263. retval = count + 1;
  264. }
  265. return retval;
  266. }
  267. static int __init check_nmi_watchdog(void)
  268. {
  269. unsigned int *prev_nmi_count;
  270. int cpu;
  271. if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT))
  272. return 0;
  273. if (!atomic_read(&nmi_active))
  274. return 0;
  275. prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
  276. if (!prev_nmi_count)
  277. return -1;
  278. printk(KERN_INFO "Testing NMI watchdog ... ");
  279. if (nmi_watchdog == NMI_LOCAL_APIC)
  280. smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
  281. for_each_possible_cpu(cpu)
  282. prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
  283. local_irq_enable();
  284. mdelay((20*1000)/nmi_hz); // wait 20 ticks
  285. for_each_possible_cpu(cpu) {
  286. #ifdef CONFIG_SMP
  287. /* Check cpu_callin_map here because that is set
  288. after the timer is started. */
  289. if (!cpu_isset(cpu, cpu_callin_map))
  290. continue;
  291. #endif
  292. if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled)
  293. continue;
  294. if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
  295. printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
  296. cpu,
  297. prev_nmi_count[cpu],
  298. nmi_count(cpu));
  299. per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0;
  300. atomic_dec(&nmi_active);
  301. }
  302. }
  303. if (!atomic_read(&nmi_active)) {
  304. kfree(prev_nmi_count);
  305. atomic_set(&nmi_active, -1);
  306. return -1;
  307. }
  308. endflag = 1;
  309. printk("OK.\n");
  310. /* now that we know it works we can reduce NMI frequency to
  311. something more reasonable; makes a difference in some configs */
  312. if (nmi_watchdog == NMI_LOCAL_APIC) {
  313. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  314. nmi_hz = 1;
  315. if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
  316. wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1) {
  317. nmi_hz = adjust_for_32bit_ctr(nmi_hz);
  318. }
  319. }
  320. kfree(prev_nmi_count);
  321. return 0;
  322. }
  323. /* This needs to happen later in boot so counters are working */
  324. late_initcall(check_nmi_watchdog);
  325. static int __init setup_nmi_watchdog(char *str)
  326. {
  327. int nmi;
  328. get_option(&str, &nmi);
  329. if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE))
  330. return 0;
  331. nmi_watchdog = nmi;
  332. return 1;
  333. }
  334. __setup("nmi_watchdog=", setup_nmi_watchdog);
  335. static void disable_lapic_nmi_watchdog(void)
  336. {
  337. BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
  338. if (atomic_read(&nmi_active) <= 0)
  339. return;
  340. on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
  341. BUG_ON(atomic_read(&nmi_active) != 0);
  342. }
  343. static void enable_lapic_nmi_watchdog(void)
  344. {
  345. BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
  346. /* are we already enabled */
  347. if (atomic_read(&nmi_active) != 0)
  348. return;
  349. /* are we lapic aware */
  350. if (nmi_known_cpu() <= 0)
  351. return;
  352. on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
  353. touch_nmi_watchdog();
  354. }
  355. void disable_timer_nmi_watchdog(void)
  356. {
  357. BUG_ON(nmi_watchdog != NMI_IO_APIC);
  358. if (atomic_read(&nmi_active) <= 0)
  359. return;
  360. disable_irq(0);
  361. on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
  362. BUG_ON(atomic_read(&nmi_active) != 0);
  363. }
  364. void enable_timer_nmi_watchdog(void)
  365. {
  366. BUG_ON(nmi_watchdog != NMI_IO_APIC);
  367. if (atomic_read(&nmi_active) == 0) {
  368. touch_nmi_watchdog();
  369. on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
  370. enable_irq(0);
  371. }
  372. }
  373. static void __acpi_nmi_disable(void *__unused)
  374. {
  375. apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
  376. }
  377. /*
  378. * Disable timer based NMIs on all CPUs:
  379. */
  380. void acpi_nmi_disable(void)
  381. {
  382. if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
  383. on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
  384. }
  385. static void __acpi_nmi_enable(void *__unused)
  386. {
  387. apic_write_around(APIC_LVT0, APIC_DM_NMI);
  388. }
  389. /*
  390. * Enable timer based NMIs on all CPUs:
  391. */
  392. void acpi_nmi_enable(void)
  393. {
  394. if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
  395. on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
  396. }
  397. #ifdef CONFIG_PM
  398. static int nmi_pm_active; /* nmi_active before suspend */
  399. static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
  400. {
  401. /* only CPU0 goes here, other CPUs should be offline */
  402. nmi_pm_active = atomic_read(&nmi_active);
  403. stop_apic_nmi_watchdog(NULL);
  404. BUG_ON(atomic_read(&nmi_active) != 0);
  405. return 0;
  406. }
  407. static int lapic_nmi_resume(struct sys_device *dev)
  408. {
  409. /* only CPU0 goes here, other CPUs should be offline */
  410. if (nmi_pm_active > 0) {
  411. setup_apic_nmi_watchdog(NULL);
  412. touch_nmi_watchdog();
  413. }
  414. return 0;
  415. }
  416. static struct sysdev_class nmi_sysclass = {
  417. set_kset_name("lapic_nmi"),
  418. .resume = lapic_nmi_resume,
  419. .suspend = lapic_nmi_suspend,
  420. };
  421. static struct sys_device device_lapic_nmi = {
  422. .id = 0,
  423. .cls = &nmi_sysclass,
  424. };
  425. static int __init init_lapic_nmi_sysfs(void)
  426. {
  427. int error;
  428. /* should really be a BUG_ON but b/c this is an
  429. * init call, it just doesn't work. -dcz
  430. */
  431. if (nmi_watchdog != NMI_LOCAL_APIC)
  432. return 0;
  433. if ( atomic_read(&nmi_active) < 0 )
  434. return 0;
  435. error = sysdev_class_register(&nmi_sysclass);
  436. if (!error)
  437. error = sysdev_register(&device_lapic_nmi);
  438. return error;
  439. }
  440. /* must come after the local APIC's device_initcall() */
  441. late_initcall(init_lapic_nmi_sysfs);
  442. #endif /* CONFIG_PM */
  443. /*
  444. * Activate the NMI watchdog via the local APIC.
  445. * Original code written by Keith Owens.
  446. */
  447. static void write_watchdog_counter(unsigned int perfctr_msr, const char *descr)
  448. {
  449. u64 count = (u64)cpu_khz * 1000;
  450. do_div(count, nmi_hz);
  451. if(descr)
  452. Dprintk("setting %s to -0x%08Lx\n", descr, count);
  453. wrmsrl(perfctr_msr, 0 - count);
  454. }
  455. static void write_watchdog_counter32(unsigned int perfctr_msr,
  456. const char *descr)
  457. {
  458. u64 count = (u64)cpu_khz * 1000;
  459. do_div(count, nmi_hz);
  460. if(descr)
  461. Dprintk("setting %s to -0x%08Lx\n", descr, count);
  462. wrmsr(perfctr_msr, (u32)(-count), 0);
  463. }
  464. /* Note that these events don't tick when the CPU idles. This means
  465. the frequency varies with CPU load. */
  466. #define K7_EVNTSEL_ENABLE (1 << 22)
  467. #define K7_EVNTSEL_INT (1 << 20)
  468. #define K7_EVNTSEL_OS (1 << 17)
  469. #define K7_EVNTSEL_USR (1 << 16)
  470. #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
  471. #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
  472. static int setup_k7_watchdog(void)
  473. {
  474. unsigned int perfctr_msr, evntsel_msr;
  475. unsigned int evntsel;
  476. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  477. perfctr_msr = MSR_K7_PERFCTR0;
  478. evntsel_msr = MSR_K7_EVNTSEL0;
  479. if (!__reserve_perfctr_nmi(-1, perfctr_msr))
  480. goto fail;
  481. if (!__reserve_evntsel_nmi(-1, evntsel_msr))
  482. goto fail1;
  483. wrmsrl(perfctr_msr, 0UL);
  484. evntsel = K7_EVNTSEL_INT
  485. | K7_EVNTSEL_OS
  486. | K7_EVNTSEL_USR
  487. | K7_NMI_EVENT;
  488. /* setup the timer */
  489. wrmsr(evntsel_msr, evntsel, 0);
  490. write_watchdog_counter(perfctr_msr, "K7_PERFCTR0");
  491. apic_write(APIC_LVTPC, APIC_DM_NMI);
  492. evntsel |= K7_EVNTSEL_ENABLE;
  493. wrmsr(evntsel_msr, evntsel, 0);
  494. wd->perfctr_msr = perfctr_msr;
  495. wd->evntsel_msr = evntsel_msr;
  496. wd->cccr_msr = 0; //unused
  497. wd->check_bit = 1ULL<<63;
  498. return 1;
  499. fail1:
  500. __release_perfctr_nmi(-1, perfctr_msr);
  501. fail:
  502. return 0;
  503. }
  504. static void stop_k7_watchdog(void)
  505. {
  506. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  507. wrmsr(wd->evntsel_msr, 0, 0);
  508. __release_evntsel_nmi(-1, wd->evntsel_msr);
  509. __release_perfctr_nmi(-1, wd->perfctr_msr);
  510. }
  511. #define P6_EVNTSEL0_ENABLE (1 << 22)
  512. #define P6_EVNTSEL_INT (1 << 20)
  513. #define P6_EVNTSEL_OS (1 << 17)
  514. #define P6_EVNTSEL_USR (1 << 16)
  515. #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
  516. #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
  517. static int setup_p6_watchdog(void)
  518. {
  519. unsigned int perfctr_msr, evntsel_msr;
  520. unsigned int evntsel;
  521. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  522. perfctr_msr = MSR_P6_PERFCTR0;
  523. evntsel_msr = MSR_P6_EVNTSEL0;
  524. if (!__reserve_perfctr_nmi(-1, perfctr_msr))
  525. goto fail;
  526. if (!__reserve_evntsel_nmi(-1, evntsel_msr))
  527. goto fail1;
  528. wrmsrl(perfctr_msr, 0UL);
  529. evntsel = P6_EVNTSEL_INT
  530. | P6_EVNTSEL_OS
  531. | P6_EVNTSEL_USR
  532. | P6_NMI_EVENT;
  533. /* setup the timer */
  534. wrmsr(evntsel_msr, evntsel, 0);
  535. nmi_hz = adjust_for_32bit_ctr(nmi_hz);
  536. write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0");
  537. apic_write(APIC_LVTPC, APIC_DM_NMI);
  538. evntsel |= P6_EVNTSEL0_ENABLE;
  539. wrmsr(evntsel_msr, evntsel, 0);
  540. wd->perfctr_msr = perfctr_msr;
  541. wd->evntsel_msr = evntsel_msr;
  542. wd->cccr_msr = 0; //unused
  543. wd->check_bit = 1ULL<<39;
  544. return 1;
  545. fail1:
  546. __release_perfctr_nmi(-1, perfctr_msr);
  547. fail:
  548. return 0;
  549. }
  550. static void stop_p6_watchdog(void)
  551. {
  552. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  553. wrmsr(wd->evntsel_msr, 0, 0);
  554. __release_evntsel_nmi(-1, wd->evntsel_msr);
  555. __release_perfctr_nmi(-1, wd->perfctr_msr);
  556. }
  557. /* Note that these events don't tick when the CPU idles. This means
  558. the frequency varies with CPU load. */
  559. #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
  560. #define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
  561. #define P4_ESCR_OS (1<<3)
  562. #define P4_ESCR_USR (1<<2)
  563. #define P4_CCCR_OVF_PMI0 (1<<26)
  564. #define P4_CCCR_OVF_PMI1 (1<<27)
  565. #define P4_CCCR_THRESHOLD(N) ((N)<<20)
  566. #define P4_CCCR_COMPLEMENT (1<<19)
  567. #define P4_CCCR_COMPARE (1<<18)
  568. #define P4_CCCR_REQUIRED (3<<16)
  569. #define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
  570. #define P4_CCCR_ENABLE (1<<12)
  571. #define P4_CCCR_OVF (1<<31)
  572. /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
  573. CRU_ESCR0 (with any non-null event selector) through a complemented
  574. max threshold. [IA32-Vol3, Section 14.9.9] */
  575. static int setup_p4_watchdog(void)
  576. {
  577. unsigned int perfctr_msr, evntsel_msr, cccr_msr;
  578. unsigned int evntsel, cccr_val;
  579. unsigned int misc_enable, dummy;
  580. unsigned int ht_num;
  581. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  582. rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
  583. if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
  584. return 0;
  585. #ifdef CONFIG_SMP
  586. /* detect which hyperthread we are on */
  587. if (smp_num_siblings == 2) {
  588. unsigned int ebx, apicid;
  589. ebx = cpuid_ebx(1);
  590. apicid = (ebx >> 24) & 0xff;
  591. ht_num = apicid & 1;
  592. } else
  593. #endif
  594. ht_num = 0;
  595. /* performance counters are shared resources
  596. * assign each hyperthread its own set
  597. * (re-use the ESCR0 register, seems safe
  598. * and keeps the cccr_val the same)
  599. */
  600. if (!ht_num) {
  601. /* logical cpu 0 */
  602. perfctr_msr = MSR_P4_IQ_PERFCTR0;
  603. evntsel_msr = MSR_P4_CRU_ESCR0;
  604. cccr_msr = MSR_P4_IQ_CCCR0;
  605. cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
  606. } else {
  607. /* logical cpu 1 */
  608. perfctr_msr = MSR_P4_IQ_PERFCTR1;
  609. evntsel_msr = MSR_P4_CRU_ESCR0;
  610. cccr_msr = MSR_P4_IQ_CCCR1;
  611. cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
  612. }
  613. if (!__reserve_perfctr_nmi(-1, perfctr_msr))
  614. goto fail;
  615. if (!__reserve_evntsel_nmi(-1, evntsel_msr))
  616. goto fail1;
  617. evntsel = P4_ESCR_EVENT_SELECT(0x3F)
  618. | P4_ESCR_OS
  619. | P4_ESCR_USR;
  620. cccr_val |= P4_CCCR_THRESHOLD(15)
  621. | P4_CCCR_COMPLEMENT
  622. | P4_CCCR_COMPARE
  623. | P4_CCCR_REQUIRED;
  624. wrmsr(evntsel_msr, evntsel, 0);
  625. wrmsr(cccr_msr, cccr_val, 0);
  626. write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0");
  627. apic_write(APIC_LVTPC, APIC_DM_NMI);
  628. cccr_val |= P4_CCCR_ENABLE;
  629. wrmsr(cccr_msr, cccr_val, 0);
  630. wd->perfctr_msr = perfctr_msr;
  631. wd->evntsel_msr = evntsel_msr;
  632. wd->cccr_msr = cccr_msr;
  633. wd->check_bit = 1ULL<<39;
  634. return 1;
  635. fail1:
  636. __release_perfctr_nmi(-1, perfctr_msr);
  637. fail:
  638. return 0;
  639. }
  640. static void stop_p4_watchdog(void)
  641. {
  642. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  643. wrmsr(wd->cccr_msr, 0, 0);
  644. wrmsr(wd->evntsel_msr, 0, 0);
  645. __release_evntsel_nmi(-1, wd->evntsel_msr);
  646. __release_perfctr_nmi(-1, wd->perfctr_msr);
  647. }
  648. #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
  649. #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
  650. static int setup_intel_arch_watchdog(void)
  651. {
  652. unsigned int ebx;
  653. union cpuid10_eax eax;
  654. unsigned int unused;
  655. unsigned int perfctr_msr, evntsel_msr;
  656. unsigned int evntsel;
  657. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  658. /*
  659. * Check whether the Architectural PerfMon supports
  660. * Unhalted Core Cycles Event or not.
  661. * NOTE: Corresponding bit = 0 in ebx indicates event present.
  662. */
  663. cpuid(10, &(eax.full), &ebx, &unused, &unused);
  664. if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
  665. (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
  666. goto fail;
  667. perfctr_msr = MSR_ARCH_PERFMON_PERFCTR1;
  668. evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL1;
  669. if (!__reserve_perfctr_nmi(-1, perfctr_msr))
  670. goto fail;
  671. if (!__reserve_evntsel_nmi(-1, evntsel_msr))
  672. goto fail1;
  673. wrmsrl(perfctr_msr, 0UL);
  674. evntsel = ARCH_PERFMON_EVENTSEL_INT
  675. | ARCH_PERFMON_EVENTSEL_OS
  676. | ARCH_PERFMON_EVENTSEL_USR
  677. | ARCH_PERFMON_NMI_EVENT_SEL
  678. | ARCH_PERFMON_NMI_EVENT_UMASK;
  679. /* setup the timer */
  680. wrmsr(evntsel_msr, evntsel, 0);
  681. nmi_hz = adjust_for_32bit_ctr(nmi_hz);
  682. write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0");
  683. apic_write(APIC_LVTPC, APIC_DM_NMI);
  684. evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
  685. wrmsr(evntsel_msr, evntsel, 0);
  686. wd->perfctr_msr = perfctr_msr;
  687. wd->evntsel_msr = evntsel_msr;
  688. wd->cccr_msr = 0; //unused
  689. wd->check_bit = 1ULL << (eax.split.bit_width - 1);
  690. return 1;
  691. fail1:
  692. __release_perfctr_nmi(-1, perfctr_msr);
  693. fail:
  694. return 0;
  695. }
  696. static void stop_intel_arch_watchdog(void)
  697. {
  698. unsigned int ebx;
  699. union cpuid10_eax eax;
  700. unsigned int unused;
  701. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  702. /*
  703. * Check whether the Architectural PerfMon supports
  704. * Unhalted Core Cycles Event or not.
  705. * NOTE: Corresponding bit = 0 in ebx indicates event present.
  706. */
  707. cpuid(10, &(eax.full), &ebx, &unused, &unused);
  708. if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
  709. (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
  710. return;
  711. wrmsr(wd->evntsel_msr, 0, 0);
  712. __release_evntsel_nmi(-1, wd->evntsel_msr);
  713. __release_perfctr_nmi(-1, wd->perfctr_msr);
  714. }
  715. void setup_apic_nmi_watchdog (void *unused)
  716. {
  717. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  718. /* only support LOCAL and IO APICs for now */
  719. if ((nmi_watchdog != NMI_LOCAL_APIC) &&
  720. (nmi_watchdog != NMI_IO_APIC))
  721. return;
  722. if (wd->enabled == 1)
  723. return;
  724. /* cheap hack to support suspend/resume */
  725. /* if cpu0 is not active neither should the other cpus */
  726. if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
  727. return;
  728. if (nmi_watchdog == NMI_LOCAL_APIC) {
  729. switch (boot_cpu_data.x86_vendor) {
  730. case X86_VENDOR_AMD:
  731. if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 &&
  732. boot_cpu_data.x86 != 16)
  733. return;
  734. if (!setup_k7_watchdog())
  735. return;
  736. break;
  737. case X86_VENDOR_INTEL:
  738. if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
  739. if (!setup_intel_arch_watchdog())
  740. return;
  741. break;
  742. }
  743. switch (boot_cpu_data.x86) {
  744. case 6:
  745. if (boot_cpu_data.x86_model > 0xd)
  746. return;
  747. if (!setup_p6_watchdog())
  748. return;
  749. break;
  750. case 15:
  751. if (boot_cpu_data.x86_model > 0x4)
  752. return;
  753. if (!setup_p4_watchdog())
  754. return;
  755. break;
  756. default:
  757. return;
  758. }
  759. break;
  760. default:
  761. return;
  762. }
  763. }
  764. wd->enabled = 1;
  765. atomic_inc(&nmi_active);
  766. }
  767. void stop_apic_nmi_watchdog(void *unused)
  768. {
  769. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  770. /* only support LOCAL and IO APICs for now */
  771. if ((nmi_watchdog != NMI_LOCAL_APIC) &&
  772. (nmi_watchdog != NMI_IO_APIC))
  773. return;
  774. if (wd->enabled == 0)
  775. return;
  776. if (nmi_watchdog == NMI_LOCAL_APIC) {
  777. switch (boot_cpu_data.x86_vendor) {
  778. case X86_VENDOR_AMD:
  779. stop_k7_watchdog();
  780. break;
  781. case X86_VENDOR_INTEL:
  782. if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
  783. stop_intel_arch_watchdog();
  784. break;
  785. }
  786. switch (boot_cpu_data.x86) {
  787. case 6:
  788. if (boot_cpu_data.x86_model > 0xd)
  789. break;
  790. stop_p6_watchdog();
  791. break;
  792. case 15:
  793. if (boot_cpu_data.x86_model > 0x4)
  794. break;
  795. stop_p4_watchdog();
  796. break;
  797. }
  798. break;
  799. default:
  800. return;
  801. }
  802. }
  803. wd->enabled = 0;
  804. atomic_dec(&nmi_active);
  805. }
  806. /*
  807. * the best way to detect whether a CPU has a 'hard lockup' problem
  808. * is to check it's local APIC timer IRQ counts. If they are not
  809. * changing then that CPU has some problem.
  810. *
  811. * as these watchdog NMI IRQs are generated on every CPU, we only
  812. * have to check the current processor.
  813. *
  814. * since NMIs don't listen to _any_ locks, we have to be extremely
  815. * careful not to rely on unsafe variables. The printk might lock
  816. * up though, so we have to break up any console locks first ...
  817. * [when there will be more tty-related locks, break them up
  818. * here too!]
  819. */
  820. static unsigned int
  821. last_irq_sums [NR_CPUS],
  822. alert_counter [NR_CPUS];
  823. void touch_nmi_watchdog (void)
  824. {
  825. if (nmi_watchdog > 0) {
  826. unsigned cpu;
  827. /*
  828. * Just reset the alert counters, (other CPUs might be
  829. * spinning on locks we hold):
  830. */
  831. for_each_present_cpu (cpu)
  832. alert_counter[cpu] = 0;
  833. }
  834. /*
  835. * Tickle the softlockup detector too:
  836. */
  837. touch_softlockup_watchdog();
  838. }
  839. EXPORT_SYMBOL(touch_nmi_watchdog);
  840. extern void die_nmi(struct pt_regs *, const char *msg);
  841. __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
  842. {
  843. /*
  844. * Since current_thread_info()-> is always on the stack, and we
  845. * always switch the stack NMI-atomically, it's safe to use
  846. * smp_processor_id().
  847. */
  848. unsigned int sum;
  849. int touched = 0;
  850. int cpu = smp_processor_id();
  851. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  852. u64 dummy;
  853. int rc=0;
  854. /* check for other users first */
  855. if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
  856. == NOTIFY_STOP) {
  857. rc = 1;
  858. touched = 1;
  859. }
  860. if (cpu_isset(cpu, backtrace_mask)) {
  861. static DEFINE_SPINLOCK(lock); /* Serialise the printks */
  862. spin_lock(&lock);
  863. printk("NMI backtrace for cpu %d\n", cpu);
  864. dump_stack();
  865. spin_unlock(&lock);
  866. cpu_clear(cpu, backtrace_mask);
  867. }
  868. /*
  869. * Take the local apic timer and PIT/HPET into account. We don't
  870. * know which one is active, when we have highres/dyntick on
  871. */
  872. sum = per_cpu(irq_stat, cpu).apic_timer_irqs + kstat_irqs(0);
  873. /* if the none of the timers isn't firing, this cpu isn't doing much */
  874. if (!touched && last_irq_sums[cpu] == sum) {
  875. /*
  876. * Ayiee, looks like this CPU is stuck ...
  877. * wait a few IRQs (5 seconds) before doing the oops ...
  878. */
  879. alert_counter[cpu]++;
  880. if (alert_counter[cpu] == 5*nmi_hz)
  881. /*
  882. * die_nmi will return ONLY if NOTIFY_STOP happens..
  883. */
  884. die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP");
  885. } else {
  886. last_irq_sums[cpu] = sum;
  887. alert_counter[cpu] = 0;
  888. }
  889. /* see if the nmi watchdog went off */
  890. if (wd->enabled) {
  891. if (nmi_watchdog == NMI_LOCAL_APIC) {
  892. rdmsrl(wd->perfctr_msr, dummy);
  893. if (dummy & wd->check_bit){
  894. /* this wasn't a watchdog timer interrupt */
  895. goto done;
  896. }
  897. /* only Intel P4 uses the cccr msr */
  898. if (wd->cccr_msr != 0) {
  899. /*
  900. * P4 quirks:
  901. * - An overflown perfctr will assert its interrupt
  902. * until the OVF flag in its CCCR is cleared.
  903. * - LVTPC is masked on interrupt and must be
  904. * unmasked by the LVTPC handler.
  905. */
  906. rdmsrl(wd->cccr_msr, dummy);
  907. dummy &= ~P4_CCCR_OVF;
  908. wrmsrl(wd->cccr_msr, dummy);
  909. apic_write(APIC_LVTPC, APIC_DM_NMI);
  910. /* start the cycle over again */
  911. write_watchdog_counter(wd->perfctr_msr, NULL);
  912. }
  913. else if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
  914. wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1) {
  915. /* P6 based Pentium M need to re-unmask
  916. * the apic vector but it doesn't hurt
  917. * other P6 variant.
  918. * ArchPerfom/Core Duo also needs this */
  919. apic_write(APIC_LVTPC, APIC_DM_NMI);
  920. /* P6/ARCH_PERFMON has 32 bit counter write */
  921. write_watchdog_counter32(wd->perfctr_msr, NULL);
  922. } else {
  923. /* start the cycle over again */
  924. write_watchdog_counter(wd->perfctr_msr, NULL);
  925. }
  926. rc = 1;
  927. } else if (nmi_watchdog == NMI_IO_APIC) {
  928. /* don't know how to accurately check for this.
  929. * just assume it was a watchdog timer interrupt
  930. * This matches the old behaviour.
  931. */
  932. rc = 1;
  933. }
  934. }
  935. done:
  936. return rc;
  937. }
  938. int do_nmi_callback(struct pt_regs * regs, int cpu)
  939. {
  940. #ifdef CONFIG_SYSCTL
  941. if (unknown_nmi_panic)
  942. return unknown_nmi_panic_callback(regs, cpu);
  943. #endif
  944. return 0;
  945. }
  946. #ifdef CONFIG_SYSCTL
  947. static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
  948. {
  949. unsigned char reason = get_nmi_reason();
  950. char buf[64];
  951. sprintf(buf, "NMI received for unknown reason %02x\n", reason);
  952. die_nmi(regs, buf);
  953. return 0;
  954. }
  955. /*
  956. * proc handler for /proc/sys/kernel/nmi
  957. */
  958. int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
  959. void __user *buffer, size_t *length, loff_t *ppos)
  960. {
  961. int old_state;
  962. nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
  963. old_state = nmi_watchdog_enabled;
  964. proc_dointvec(table, write, file, buffer, length, ppos);
  965. if (!!old_state == !!nmi_watchdog_enabled)
  966. return 0;
  967. if (atomic_read(&nmi_active) < 0) {
  968. printk( KERN_WARNING "NMI watchdog is permanently disabled\n");
  969. return -EIO;
  970. }
  971. if (nmi_watchdog == NMI_DEFAULT) {
  972. if (nmi_known_cpu() > 0)
  973. nmi_watchdog = NMI_LOCAL_APIC;
  974. else
  975. nmi_watchdog = NMI_IO_APIC;
  976. }
  977. if (nmi_watchdog == NMI_LOCAL_APIC) {
  978. if (nmi_watchdog_enabled)
  979. enable_lapic_nmi_watchdog();
  980. else
  981. disable_lapic_nmi_watchdog();
  982. } else {
  983. printk( KERN_WARNING
  984. "NMI watchdog doesn't know what hardware to touch\n");
  985. return -EIO;
  986. }
  987. return 0;
  988. }
  989. #endif
  990. void __trigger_all_cpu_backtrace(void)
  991. {
  992. int i;
  993. backtrace_mask = cpu_online_map;
  994. /* Wait for up to 10 seconds for all CPUs to do the backtrace */
  995. for (i = 0; i < 10 * 1000; i++) {
  996. if (cpus_empty(backtrace_mask))
  997. break;
  998. mdelay(1);
  999. }
  1000. }
  1001. EXPORT_SYMBOL(nmi_active);
  1002. EXPORT_SYMBOL(nmi_watchdog);
  1003. EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
  1004. EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
  1005. EXPORT_SYMBOL(reserve_perfctr_nmi);
  1006. EXPORT_SYMBOL(release_perfctr_nmi);
  1007. EXPORT_SYMBOL(reserve_evntsel_nmi);
  1008. EXPORT_SYMBOL(release_evntsel_nmi);
  1009. EXPORT_SYMBOL(disable_timer_nmi_watchdog);
  1010. EXPORT_SYMBOL(enable_timer_nmi_watchdog);