nmi.c 25 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015
  1. /*
  2. * linux/arch/x86_64/nmi.c
  3. *
  4. * NMI watchdog support on APIC systems
  5. *
  6. * Started by Ingo Molnar <mingo@redhat.com>
  7. *
  8. * Fixes:
  9. * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
  10. * Mikael Pettersson : Power Management for local APIC NMI watchdog.
  11. * Pavel Machek and
  12. * Mikael Pettersson : PM converted to driver model. Disable/enable API.
  13. */
  14. #include <linux/nmi.h>
  15. #include <linux/mm.h>
  16. #include <linux/delay.h>
  17. #include <linux/interrupt.h>
  18. #include <linux/module.h>
  19. #include <linux/sysdev.h>
  20. #include <linux/sysctl.h>
  21. #include <linux/kprobes.h>
  22. #include <linux/cpumask.h>
  23. #include <asm/smp.h>
  24. #include <asm/nmi.h>
  25. #include <asm/proto.h>
  26. #include <asm/kdebug.h>
  27. #include <asm/mce.h>
  28. #include <asm/intel_arch_perfmon.h>
  29. int unknown_nmi_panic;
  30. int nmi_watchdog_enabled;
  31. int panic_on_unrecovered_nmi;
  32. /* perfctr_nmi_owner tracks the ownership of the perfctr registers:
  33. * evtsel_nmi_owner tracks the ownership of the event selection
  34. * - different performance counters/ event selection may be reserved for
  35. * different subsystems this reservation system just tries to coordinate
  36. * things a little
  37. */
  38. static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner);
  39. static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[2]);
  40. static cpumask_t backtrace_mask = CPU_MASK_NONE;
  41. /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
  42. * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
  43. */
  44. #define NMI_MAX_COUNTER_BITS 66
  45. /* nmi_active:
  46. * >0: the lapic NMI watchdog is active, but can be disabled
  47. * <0: the lapic NMI watchdog has not been set up, and cannot
  48. * be enabled
  49. * 0: the lapic NMI watchdog is disabled, but can be enabled
  50. */
  51. atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
  52. int panic_on_timeout;
  53. unsigned int nmi_watchdog = NMI_DEFAULT;
  54. static unsigned int nmi_hz = HZ;
  55. struct nmi_watchdog_ctlblk {
  56. int enabled;
  57. u64 check_bit;
  58. unsigned int cccr_msr;
  59. unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
  60. unsigned int evntsel_msr; /* the MSR to select the events to handle */
  61. };
  62. static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
  63. /* local prototypes */
  64. static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
  65. /* converts an msr to an appropriate reservation bit */
  66. static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
  67. {
  68. /* returns the bit offset of the performance counter register */
  69. switch (boot_cpu_data.x86_vendor) {
  70. case X86_VENDOR_AMD:
  71. return (msr - MSR_K7_PERFCTR0);
  72. case X86_VENDOR_INTEL:
  73. if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
  74. return (msr - MSR_ARCH_PERFMON_PERFCTR0);
  75. else
  76. return (msr - MSR_P4_BPU_PERFCTR0);
  77. }
  78. return 0;
  79. }
  80. /* converts an msr to an appropriate reservation bit */
  81. static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
  82. {
  83. /* returns the bit offset of the event selection register */
  84. switch (boot_cpu_data.x86_vendor) {
  85. case X86_VENDOR_AMD:
  86. return (msr - MSR_K7_EVNTSEL0);
  87. case X86_VENDOR_INTEL:
  88. if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
  89. return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
  90. else
  91. return (msr - MSR_P4_BSU_ESCR0);
  92. }
  93. return 0;
  94. }
  95. /* checks for a bit availability (hack for oprofile) */
  96. int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
  97. {
  98. BUG_ON(counter > NMI_MAX_COUNTER_BITS);
  99. return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
  100. }
  101. /* checks the an msr for availability */
  102. int avail_to_resrv_perfctr_nmi(unsigned int msr)
  103. {
  104. unsigned int counter;
  105. counter = nmi_perfctr_msr_to_bit(msr);
  106. BUG_ON(counter > NMI_MAX_COUNTER_BITS);
  107. return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
  108. }
  109. int reserve_perfctr_nmi(unsigned int msr)
  110. {
  111. unsigned int counter;
  112. counter = nmi_perfctr_msr_to_bit(msr);
  113. BUG_ON(counter > NMI_MAX_COUNTER_BITS);
  114. if (!test_and_set_bit(counter, &__get_cpu_var(perfctr_nmi_owner)))
  115. return 1;
  116. return 0;
  117. }
  118. void release_perfctr_nmi(unsigned int msr)
  119. {
  120. unsigned int counter;
  121. counter = nmi_perfctr_msr_to_bit(msr);
  122. BUG_ON(counter > NMI_MAX_COUNTER_BITS);
  123. clear_bit(counter, &__get_cpu_var(perfctr_nmi_owner));
  124. }
  125. int reserve_evntsel_nmi(unsigned int msr)
  126. {
  127. unsigned int counter;
  128. counter = nmi_evntsel_msr_to_bit(msr);
  129. BUG_ON(counter > NMI_MAX_COUNTER_BITS);
  130. if (!test_and_set_bit(counter, &__get_cpu_var(evntsel_nmi_owner)))
  131. return 1;
  132. return 0;
  133. }
  134. void release_evntsel_nmi(unsigned int msr)
  135. {
  136. unsigned int counter;
  137. counter = nmi_evntsel_msr_to_bit(msr);
  138. BUG_ON(counter > NMI_MAX_COUNTER_BITS);
  139. clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner));
  140. }
  141. static __cpuinit inline int nmi_known_cpu(void)
  142. {
  143. switch (boot_cpu_data.x86_vendor) {
  144. case X86_VENDOR_AMD:
  145. return boot_cpu_data.x86 == 15;
  146. case X86_VENDOR_INTEL:
  147. if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
  148. return 1;
  149. else
  150. return (boot_cpu_data.x86 == 15);
  151. }
  152. return 0;
  153. }
  154. /* Run after command line and cpu_init init, but before all other checks */
  155. void nmi_watchdog_default(void)
  156. {
  157. if (nmi_watchdog != NMI_DEFAULT)
  158. return;
  159. if (nmi_known_cpu())
  160. nmi_watchdog = NMI_LOCAL_APIC;
  161. else
  162. nmi_watchdog = NMI_IO_APIC;
  163. }
  164. static int endflag __initdata = 0;
  165. #ifdef CONFIG_SMP
  166. /* The performance counters used by NMI_LOCAL_APIC don't trigger when
  167. * the CPU is idle. To make sure the NMI watchdog really ticks on all
  168. * CPUs during the test make them busy.
  169. */
  170. static __init void nmi_cpu_busy(void *data)
  171. {
  172. local_irq_enable_in_hardirq();
  173. /* Intentionally don't use cpu_relax here. This is
  174. to make sure that the performance counter really ticks,
  175. even if there is a simulator or similar that catches the
  176. pause instruction. On a real HT machine this is fine because
  177. all other CPUs are busy with "useless" delay loops and don't
  178. care if they get somewhat less cycles. */
  179. while (endflag == 0)
  180. mb();
  181. }
  182. #endif
  183. static unsigned int adjust_for_32bit_ctr(unsigned int hz)
  184. {
  185. unsigned int retval = hz;
  186. /*
  187. * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter
  188. * are writable, with higher bits sign extending from bit 31.
  189. * So, we can only program the counter with 31 bit values and
  190. * 32nd bit should be 1, for 33.. to be 1.
  191. * Find the appropriate nmi_hz
  192. */
  193. if ((((u64)cpu_khz * 1000) / retval) > 0x7fffffffULL) {
  194. retval = ((u64)cpu_khz * 1000) / 0x7fffffffUL + 1;
  195. }
  196. return retval;
  197. }
  198. int __init check_nmi_watchdog (void)
  199. {
  200. int *counts;
  201. int cpu;
  202. if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT))
  203. return 0;
  204. if (!atomic_read(&nmi_active))
  205. return 0;
  206. counts = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
  207. if (!counts)
  208. return -1;
  209. printk(KERN_INFO "testing NMI watchdog ... ");
  210. #ifdef CONFIG_SMP
  211. if (nmi_watchdog == NMI_LOCAL_APIC)
  212. smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
  213. #endif
  214. for (cpu = 0; cpu < NR_CPUS; cpu++)
  215. counts[cpu] = cpu_pda(cpu)->__nmi_count;
  216. local_irq_enable();
  217. mdelay((10*1000)/nmi_hz); // wait 10 ticks
  218. for_each_online_cpu(cpu) {
  219. if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled)
  220. continue;
  221. if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) {
  222. printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
  223. cpu,
  224. counts[cpu],
  225. cpu_pda(cpu)->__nmi_count);
  226. per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0;
  227. atomic_dec(&nmi_active);
  228. }
  229. }
  230. if (!atomic_read(&nmi_active)) {
  231. kfree(counts);
  232. atomic_set(&nmi_active, -1);
  233. endflag = 1;
  234. return -1;
  235. }
  236. endflag = 1;
  237. printk("OK.\n");
  238. /* now that we know it works we can reduce NMI frequency to
  239. something more reasonable; makes a difference in some configs */
  240. if (nmi_watchdog == NMI_LOCAL_APIC) {
  241. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  242. nmi_hz = 1;
  243. if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0)
  244. nmi_hz = adjust_for_32bit_ctr(nmi_hz);
  245. }
  246. kfree(counts);
  247. return 0;
  248. }
  249. int __init setup_nmi_watchdog(char *str)
  250. {
  251. int nmi;
  252. if (!strncmp(str,"panic",5)) {
  253. panic_on_timeout = 1;
  254. str = strchr(str, ',');
  255. if (!str)
  256. return 1;
  257. ++str;
  258. }
  259. get_option(&str, &nmi);
  260. if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE))
  261. return 0;
  262. nmi_watchdog = nmi;
  263. return 1;
  264. }
  265. __setup("nmi_watchdog=", setup_nmi_watchdog);
  266. static void disable_lapic_nmi_watchdog(void)
  267. {
  268. BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
  269. if (atomic_read(&nmi_active) <= 0)
  270. return;
  271. on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
  272. BUG_ON(atomic_read(&nmi_active) != 0);
  273. }
  274. static void enable_lapic_nmi_watchdog(void)
  275. {
  276. BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
  277. /* are we already enabled */
  278. if (atomic_read(&nmi_active) != 0)
  279. return;
  280. /* are we lapic aware */
  281. if (nmi_known_cpu() <= 0)
  282. return;
  283. on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
  284. touch_nmi_watchdog();
  285. }
  286. void disable_timer_nmi_watchdog(void)
  287. {
  288. BUG_ON(nmi_watchdog != NMI_IO_APIC);
  289. if (atomic_read(&nmi_active) <= 0)
  290. return;
  291. disable_irq(0);
  292. on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
  293. BUG_ON(atomic_read(&nmi_active) != 0);
  294. }
  295. void enable_timer_nmi_watchdog(void)
  296. {
  297. BUG_ON(nmi_watchdog != NMI_IO_APIC);
  298. if (atomic_read(&nmi_active) == 0) {
  299. touch_nmi_watchdog();
  300. on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
  301. enable_irq(0);
  302. }
  303. }
  304. static void __acpi_nmi_disable(void *__unused)
  305. {
  306. apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
  307. }
  308. /*
  309. * Disable timer based NMIs on all CPUs:
  310. */
  311. void acpi_nmi_disable(void)
  312. {
  313. if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
  314. on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
  315. }
  316. static void __acpi_nmi_enable(void *__unused)
  317. {
  318. apic_write(APIC_LVT0, APIC_DM_NMI);
  319. }
  320. /*
  321. * Enable timer based NMIs on all CPUs:
  322. */
  323. void acpi_nmi_enable(void)
  324. {
  325. if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
  326. on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
  327. }
  328. #ifdef CONFIG_PM
  329. static int nmi_pm_active; /* nmi_active before suspend */
  330. static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
  331. {
  332. /* only CPU0 goes here, other CPUs should be offline */
  333. nmi_pm_active = atomic_read(&nmi_active);
  334. stop_apic_nmi_watchdog(NULL);
  335. BUG_ON(atomic_read(&nmi_active) != 0);
  336. return 0;
  337. }
  338. static int lapic_nmi_resume(struct sys_device *dev)
  339. {
  340. /* only CPU0 goes here, other CPUs should be offline */
  341. if (nmi_pm_active > 0) {
  342. setup_apic_nmi_watchdog(NULL);
  343. touch_nmi_watchdog();
  344. }
  345. return 0;
  346. }
  347. static struct sysdev_class nmi_sysclass = {
  348. set_kset_name("lapic_nmi"),
  349. .resume = lapic_nmi_resume,
  350. .suspend = lapic_nmi_suspend,
  351. };
  352. static struct sys_device device_lapic_nmi = {
  353. .id = 0,
  354. .cls = &nmi_sysclass,
  355. };
  356. static int __init init_lapic_nmi_sysfs(void)
  357. {
  358. int error;
  359. /* should really be a BUG_ON but b/c this is an
  360. * init call, it just doesn't work. -dcz
  361. */
  362. if (nmi_watchdog != NMI_LOCAL_APIC)
  363. return 0;
  364. if ( atomic_read(&nmi_active) < 0 )
  365. return 0;
  366. error = sysdev_class_register(&nmi_sysclass);
  367. if (!error)
  368. error = sysdev_register(&device_lapic_nmi);
  369. return error;
  370. }
  371. /* must come after the local APIC's device_initcall() */
  372. late_initcall(init_lapic_nmi_sysfs);
  373. #endif /* CONFIG_PM */
  374. /*
  375. * Activate the NMI watchdog via the local APIC.
  376. * Original code written by Keith Owens.
  377. */
  378. /* Note that these events don't tick when the CPU idles. This means
  379. the frequency varies with CPU load. */
  380. #define K7_EVNTSEL_ENABLE (1 << 22)
  381. #define K7_EVNTSEL_INT (1 << 20)
  382. #define K7_EVNTSEL_OS (1 << 17)
  383. #define K7_EVNTSEL_USR (1 << 16)
  384. #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
  385. #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
  386. static int setup_k7_watchdog(void)
  387. {
  388. unsigned int perfctr_msr, evntsel_msr;
  389. unsigned int evntsel;
  390. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  391. perfctr_msr = MSR_K7_PERFCTR0;
  392. evntsel_msr = MSR_K7_EVNTSEL0;
  393. if (!reserve_perfctr_nmi(perfctr_msr))
  394. goto fail;
  395. if (!reserve_evntsel_nmi(evntsel_msr))
  396. goto fail1;
  397. /* Simulator may not support it */
  398. if (checking_wrmsrl(evntsel_msr, 0UL))
  399. goto fail2;
  400. wrmsrl(perfctr_msr, 0UL);
  401. evntsel = K7_EVNTSEL_INT
  402. | K7_EVNTSEL_OS
  403. | K7_EVNTSEL_USR
  404. | K7_NMI_EVENT;
  405. /* setup the timer */
  406. wrmsr(evntsel_msr, evntsel, 0);
  407. wrmsrl(perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
  408. apic_write(APIC_LVTPC, APIC_DM_NMI);
  409. evntsel |= K7_EVNTSEL_ENABLE;
  410. wrmsr(evntsel_msr, evntsel, 0);
  411. wd->perfctr_msr = perfctr_msr;
  412. wd->evntsel_msr = evntsel_msr;
  413. wd->cccr_msr = 0; //unused
  414. wd->check_bit = 1ULL<<63;
  415. return 1;
  416. fail2:
  417. release_evntsel_nmi(evntsel_msr);
  418. fail1:
  419. release_perfctr_nmi(perfctr_msr);
  420. fail:
  421. return 0;
  422. }
  423. static void stop_k7_watchdog(void)
  424. {
  425. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  426. wrmsr(wd->evntsel_msr, 0, 0);
  427. release_evntsel_nmi(wd->evntsel_msr);
  428. release_perfctr_nmi(wd->perfctr_msr);
  429. }
  430. /* Note that these events don't tick when the CPU idles. This means
  431. the frequency varies with CPU load. */
  432. #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
  433. #define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
  434. #define P4_ESCR_OS (1<<3)
  435. #define P4_ESCR_USR (1<<2)
  436. #define P4_CCCR_OVF_PMI0 (1<<26)
  437. #define P4_CCCR_OVF_PMI1 (1<<27)
  438. #define P4_CCCR_THRESHOLD(N) ((N)<<20)
  439. #define P4_CCCR_COMPLEMENT (1<<19)
  440. #define P4_CCCR_COMPARE (1<<18)
  441. #define P4_CCCR_REQUIRED (3<<16)
  442. #define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
  443. #define P4_CCCR_ENABLE (1<<12)
  444. #define P4_CCCR_OVF (1<<31)
  445. /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
  446. CRU_ESCR0 (with any non-null event selector) through a complemented
  447. max threshold. [IA32-Vol3, Section 14.9.9] */
  448. static int setup_p4_watchdog(void)
  449. {
  450. unsigned int perfctr_msr, evntsel_msr, cccr_msr;
  451. unsigned int evntsel, cccr_val;
  452. unsigned int misc_enable, dummy;
  453. unsigned int ht_num;
  454. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  455. rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
  456. if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
  457. return 0;
  458. #ifdef CONFIG_SMP
  459. /* detect which hyperthread we are on */
  460. if (smp_num_siblings == 2) {
  461. unsigned int ebx, apicid;
  462. ebx = cpuid_ebx(1);
  463. apicid = (ebx >> 24) & 0xff;
  464. ht_num = apicid & 1;
  465. } else
  466. #endif
  467. ht_num = 0;
  468. /* performance counters are shared resources
  469. * assign each hyperthread its own set
  470. * (re-use the ESCR0 register, seems safe
  471. * and keeps the cccr_val the same)
  472. */
  473. if (!ht_num) {
  474. /* logical cpu 0 */
  475. perfctr_msr = MSR_P4_IQ_PERFCTR0;
  476. evntsel_msr = MSR_P4_CRU_ESCR0;
  477. cccr_msr = MSR_P4_IQ_CCCR0;
  478. cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
  479. } else {
  480. /* logical cpu 1 */
  481. perfctr_msr = MSR_P4_IQ_PERFCTR1;
  482. evntsel_msr = MSR_P4_CRU_ESCR0;
  483. cccr_msr = MSR_P4_IQ_CCCR1;
  484. cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
  485. }
  486. if (!reserve_perfctr_nmi(perfctr_msr))
  487. goto fail;
  488. if (!reserve_evntsel_nmi(evntsel_msr))
  489. goto fail1;
  490. evntsel = P4_ESCR_EVENT_SELECT(0x3F)
  491. | P4_ESCR_OS
  492. | P4_ESCR_USR;
  493. cccr_val |= P4_CCCR_THRESHOLD(15)
  494. | P4_CCCR_COMPLEMENT
  495. | P4_CCCR_COMPARE
  496. | P4_CCCR_REQUIRED;
  497. wrmsr(evntsel_msr, evntsel, 0);
  498. wrmsr(cccr_msr, cccr_val, 0);
  499. wrmsrl(perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
  500. apic_write(APIC_LVTPC, APIC_DM_NMI);
  501. cccr_val |= P4_CCCR_ENABLE;
  502. wrmsr(cccr_msr, cccr_val, 0);
  503. wd->perfctr_msr = perfctr_msr;
  504. wd->evntsel_msr = evntsel_msr;
  505. wd->cccr_msr = cccr_msr;
  506. wd->check_bit = 1ULL<<39;
  507. return 1;
  508. fail1:
  509. release_perfctr_nmi(perfctr_msr);
  510. fail:
  511. return 0;
  512. }
  513. static void stop_p4_watchdog(void)
  514. {
  515. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  516. wrmsr(wd->cccr_msr, 0, 0);
  517. wrmsr(wd->evntsel_msr, 0, 0);
  518. release_evntsel_nmi(wd->evntsel_msr);
  519. release_perfctr_nmi(wd->perfctr_msr);
  520. }
  521. #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
  522. #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
  523. static int setup_intel_arch_watchdog(void)
  524. {
  525. unsigned int ebx;
  526. union cpuid10_eax eax;
  527. unsigned int unused;
  528. unsigned int perfctr_msr, evntsel_msr;
  529. unsigned int evntsel;
  530. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  531. /*
  532. * Check whether the Architectural PerfMon supports
  533. * Unhalted Core Cycles Event or not.
  534. * NOTE: Corresponding bit = 0 in ebx indicates event present.
  535. */
  536. cpuid(10, &(eax.full), &ebx, &unused, &unused);
  537. if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
  538. (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
  539. goto fail;
  540. perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
  541. evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;
  542. if (!reserve_perfctr_nmi(perfctr_msr))
  543. goto fail;
  544. if (!reserve_evntsel_nmi(evntsel_msr))
  545. goto fail1;
  546. wrmsrl(perfctr_msr, 0UL);
  547. evntsel = ARCH_PERFMON_EVENTSEL_INT
  548. | ARCH_PERFMON_EVENTSEL_OS
  549. | ARCH_PERFMON_EVENTSEL_USR
  550. | ARCH_PERFMON_NMI_EVENT_SEL
  551. | ARCH_PERFMON_NMI_EVENT_UMASK;
  552. /* setup the timer */
  553. wrmsr(evntsel_msr, evntsel, 0);
  554. nmi_hz = adjust_for_32bit_ctr(nmi_hz);
  555. wrmsr(perfctr_msr, (u32)(-((u64)cpu_khz * 1000 / nmi_hz)), 0);
  556. apic_write(APIC_LVTPC, APIC_DM_NMI);
  557. evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
  558. wrmsr(evntsel_msr, evntsel, 0);
  559. wd->perfctr_msr = perfctr_msr;
  560. wd->evntsel_msr = evntsel_msr;
  561. wd->cccr_msr = 0; //unused
  562. wd->check_bit = 1ULL << (eax.split.bit_width - 1);
  563. return 1;
  564. fail1:
  565. release_perfctr_nmi(perfctr_msr);
  566. fail:
  567. return 0;
  568. }
  569. static void stop_intel_arch_watchdog(void)
  570. {
  571. unsigned int ebx;
  572. union cpuid10_eax eax;
  573. unsigned int unused;
  574. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  575. /*
  576. * Check whether the Architectural PerfMon supports
  577. * Unhalted Core Cycles Event or not.
  578. * NOTE: Corresponding bit = 0 in ebx indicates event present.
  579. */
  580. cpuid(10, &(eax.full), &ebx, &unused, &unused);
  581. if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
  582. (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
  583. return;
  584. wrmsr(wd->evntsel_msr, 0, 0);
  585. release_evntsel_nmi(wd->evntsel_msr);
  586. release_perfctr_nmi(wd->perfctr_msr);
  587. }
  588. void setup_apic_nmi_watchdog(void *unused)
  589. {
  590. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  591. /* only support LOCAL and IO APICs for now */
  592. if ((nmi_watchdog != NMI_LOCAL_APIC) &&
  593. (nmi_watchdog != NMI_IO_APIC))
  594. return;
  595. if (wd->enabled == 1)
  596. return;
  597. /* cheap hack to support suspend/resume */
  598. /* if cpu0 is not active neither should the other cpus */
  599. if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
  600. return;
  601. if (nmi_watchdog == NMI_LOCAL_APIC) {
  602. switch (boot_cpu_data.x86_vendor) {
  603. case X86_VENDOR_AMD:
  604. if (strstr(boot_cpu_data.x86_model_id, "Screwdriver"))
  605. return;
  606. if (!setup_k7_watchdog())
  607. return;
  608. break;
  609. case X86_VENDOR_INTEL:
  610. if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
  611. if (!setup_intel_arch_watchdog())
  612. return;
  613. break;
  614. }
  615. if (!setup_p4_watchdog())
  616. return;
  617. break;
  618. default:
  619. return;
  620. }
  621. }
  622. wd->enabled = 1;
  623. atomic_inc(&nmi_active);
  624. }
  625. void stop_apic_nmi_watchdog(void *unused)
  626. {
  627. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  628. /* only support LOCAL and IO APICs for now */
  629. if ((nmi_watchdog != NMI_LOCAL_APIC) &&
  630. (nmi_watchdog != NMI_IO_APIC))
  631. return;
  632. if (wd->enabled == 0)
  633. return;
  634. if (nmi_watchdog == NMI_LOCAL_APIC) {
  635. switch (boot_cpu_data.x86_vendor) {
  636. case X86_VENDOR_AMD:
  637. if (strstr(boot_cpu_data.x86_model_id, "Screwdriver"))
  638. return;
  639. stop_k7_watchdog();
  640. break;
  641. case X86_VENDOR_INTEL:
  642. if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
  643. stop_intel_arch_watchdog();
  644. break;
  645. }
  646. stop_p4_watchdog();
  647. break;
  648. default:
  649. return;
  650. }
  651. }
  652. wd->enabled = 0;
  653. atomic_dec(&nmi_active);
  654. }
  655. /*
  656. * the best way to detect whether a CPU has a 'hard lockup' problem
  657. * is to check it's local APIC timer IRQ counts. If they are not
  658. * changing then that CPU has some problem.
  659. *
  660. * as these watchdog NMI IRQs are generated on every CPU, we only
  661. * have to check the current processor.
  662. */
  663. static DEFINE_PER_CPU(unsigned, last_irq_sum);
  664. static DEFINE_PER_CPU(local_t, alert_counter);
  665. static DEFINE_PER_CPU(int, nmi_touch);
  666. void touch_nmi_watchdog (void)
  667. {
  668. if (nmi_watchdog > 0) {
  669. unsigned cpu;
  670. /*
  671. * Tell other CPUs to reset their alert counters. We cannot
  672. * do it ourselves because the alert count increase is not
  673. * atomic.
  674. */
  675. for_each_present_cpu (cpu)
  676. per_cpu(nmi_touch, cpu) = 1;
  677. }
  678. touch_softlockup_watchdog();
  679. }
  680. int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
  681. {
  682. int sum;
  683. int touched = 0;
  684. int cpu = smp_processor_id();
  685. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  686. u64 dummy;
  687. int rc=0;
  688. /* check for other users first */
  689. if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
  690. == NOTIFY_STOP) {
  691. rc = 1;
  692. touched = 1;
  693. }
  694. sum = read_pda(apic_timer_irqs);
  695. if (__get_cpu_var(nmi_touch)) {
  696. __get_cpu_var(nmi_touch) = 0;
  697. touched = 1;
  698. }
  699. if (cpu_isset(cpu, backtrace_mask)) {
  700. static DEFINE_SPINLOCK(lock); /* Serialise the printks */
  701. spin_lock(&lock);
  702. printk("NMI backtrace for cpu %d\n", cpu);
  703. dump_stack();
  704. spin_unlock(&lock);
  705. cpu_clear(cpu, backtrace_mask);
  706. }
  707. #ifdef CONFIG_X86_MCE
  708. /* Could check oops_in_progress here too, but it's safer
  709. not too */
  710. if (atomic_read(&mce_entry) > 0)
  711. touched = 1;
  712. #endif
  713. /* if the apic timer isn't firing, this cpu isn't doing much */
  714. if (!touched && __get_cpu_var(last_irq_sum) == sum) {
  715. /*
  716. * Ayiee, looks like this CPU is stuck ...
  717. * wait a few IRQs (5 seconds) before doing the oops ...
  718. */
  719. local_inc(&__get_cpu_var(alert_counter));
  720. if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz)
  721. die_nmi("NMI Watchdog detected LOCKUP on CPU %d\n", regs,
  722. panic_on_timeout);
  723. } else {
  724. __get_cpu_var(last_irq_sum) = sum;
  725. local_set(&__get_cpu_var(alert_counter), 0);
  726. }
  727. /* see if the nmi watchdog went off */
  728. if (wd->enabled) {
  729. if (nmi_watchdog == NMI_LOCAL_APIC) {
  730. rdmsrl(wd->perfctr_msr, dummy);
  731. if (dummy & wd->check_bit){
  732. /* this wasn't a watchdog timer interrupt */
  733. goto done;
  734. }
  735. /* only Intel uses the cccr msr */
  736. if (wd->cccr_msr != 0) {
  737. /*
  738. * P4 quirks:
  739. * - An overflown perfctr will assert its interrupt
  740. * until the OVF flag in its CCCR is cleared.
  741. * - LVTPC is masked on interrupt and must be
  742. * unmasked by the LVTPC handler.
  743. */
  744. rdmsrl(wd->cccr_msr, dummy);
  745. dummy &= ~P4_CCCR_OVF;
  746. wrmsrl(wd->cccr_msr, dummy);
  747. apic_write(APIC_LVTPC, APIC_DM_NMI);
  748. /* start the cycle over again */
  749. wrmsrl(wd->perfctr_msr,
  750. -((u64)cpu_khz * 1000 / nmi_hz));
  751. } else if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
  752. /*
  753. * ArchPerfom/Core Duo needs to re-unmask
  754. * the apic vector
  755. */
  756. apic_write(APIC_LVTPC, APIC_DM_NMI);
  757. /* ARCH_PERFMON has 32 bit counter writes */
  758. wrmsr(wd->perfctr_msr,
  759. (u32)(-((u64)cpu_khz * 1000 / nmi_hz)), 0);
  760. } else {
  761. /* start the cycle over again */
  762. wrmsrl(wd->perfctr_msr,
  763. -((u64)cpu_khz * 1000 / nmi_hz));
  764. }
  765. rc = 1;
  766. } else if (nmi_watchdog == NMI_IO_APIC) {
  767. /* don't know how to accurately check for this.
  768. * just assume it was a watchdog timer interrupt
  769. * This matches the old behaviour.
  770. */
  771. rc = 1;
  772. } else
  773. printk(KERN_WARNING "Unknown enabled NMI hardware?!\n");
  774. }
  775. done:
  776. return rc;
  777. }
  778. asmlinkage __kprobes void do_nmi(struct pt_regs * regs, long error_code)
  779. {
  780. nmi_enter();
  781. add_pda(__nmi_count,1);
  782. default_do_nmi(regs);
  783. nmi_exit();
  784. }
  785. int do_nmi_callback(struct pt_regs * regs, int cpu)
  786. {
  787. #ifdef CONFIG_SYSCTL
  788. if (unknown_nmi_panic)
  789. return unknown_nmi_panic_callback(regs, cpu);
  790. #endif
  791. return 0;
  792. }
  793. #ifdef CONFIG_SYSCTL
  794. static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
  795. {
  796. unsigned char reason = get_nmi_reason();
  797. char buf[64];
  798. sprintf(buf, "NMI received for unknown reason %02x\n", reason);
  799. die_nmi(buf, regs, 1); /* Always panic here */
  800. return 0;
  801. }
  802. /*
  803. * proc handler for /proc/sys/kernel/nmi
  804. */
  805. int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
  806. void __user *buffer, size_t *length, loff_t *ppos)
  807. {
  808. int old_state;
  809. nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
  810. old_state = nmi_watchdog_enabled;
  811. proc_dointvec(table, write, file, buffer, length, ppos);
  812. if (!!old_state == !!nmi_watchdog_enabled)
  813. return 0;
  814. if (atomic_read(&nmi_active) < 0) {
  815. printk( KERN_WARNING "NMI watchdog is permanently disabled\n");
  816. return -EIO;
  817. }
  818. /* if nmi_watchdog is not set yet, then set it */
  819. nmi_watchdog_default();
  820. if (nmi_watchdog == NMI_LOCAL_APIC) {
  821. if (nmi_watchdog_enabled)
  822. enable_lapic_nmi_watchdog();
  823. else
  824. disable_lapic_nmi_watchdog();
  825. } else {
  826. printk( KERN_WARNING
  827. "NMI watchdog doesn't know what hardware to touch\n");
  828. return -EIO;
  829. }
  830. return 0;
  831. }
  832. #endif
  833. void __trigger_all_cpu_backtrace(void)
  834. {
  835. int i;
  836. backtrace_mask = cpu_online_map;
  837. /* Wait for up to 10 seconds for all CPUs to do the backtrace */
  838. for (i = 0; i < 10 * 1000; i++) {
  839. if (cpus_empty(backtrace_mask))
  840. break;
  841. mdelay(1);
  842. }
  843. }
  844. EXPORT_SYMBOL(nmi_active);
  845. EXPORT_SYMBOL(nmi_watchdog);
  846. EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
  847. EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
  848. EXPORT_SYMBOL(reserve_perfctr_nmi);
  849. EXPORT_SYMBOL(release_perfctr_nmi);
  850. EXPORT_SYMBOL(reserve_evntsel_nmi);
  851. EXPORT_SYMBOL(release_evntsel_nmi);
  852. EXPORT_SYMBOL(disable_timer_nmi_watchdog);
  853. EXPORT_SYMBOL(enable_timer_nmi_watchdog);
  854. EXPORT_SYMBOL(touch_nmi_watchdog);