nmi.c 26 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078
  1. /*
  2. * linux/arch/x86_64/nmi.c
  3. *
  4. * NMI watchdog support on APIC systems
  5. *
  6. * Started by Ingo Molnar <mingo@redhat.com>
  7. *
  8. * Fixes:
  9. * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
  10. * Mikael Pettersson : Power Management for local APIC NMI watchdog.
  11. * Pavel Machek and
  12. * Mikael Pettersson : PM converted to driver model. Disable/enable API.
  13. */
  14. #include <linux/nmi.h>
  15. #include <linux/mm.h>
  16. #include <linux/delay.h>
  17. #include <linux/interrupt.h>
  18. #include <linux/module.h>
  19. #include <linux/sysdev.h>
  20. #include <linux/sysctl.h>
  21. #include <linux/kprobes.h>
  22. #include <linux/cpumask.h>
  23. #include <asm/smp.h>
  24. #include <asm/nmi.h>
  25. #include <asm/proto.h>
  26. #include <asm/kdebug.h>
  27. #include <asm/mce.h>
  28. #include <asm/intel_arch_perfmon.h>
  29. int unknown_nmi_panic;
  30. int nmi_watchdog_enabled;
  31. int panic_on_unrecovered_nmi;
  32. /* perfctr_nmi_owner tracks the ownership of the perfctr registers:
  33. * evtsel_nmi_owner tracks the ownership of the event selection
  34. * - different performance counters/ event selection may be reserved for
  35. * different subsystems this reservation system just tries to coordinate
  36. * things a little
  37. */
  38. /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
  39. * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
  40. */
  41. #define NMI_MAX_COUNTER_BITS 66
  42. #define NMI_MAX_COUNTER_LONGS BITS_TO_LONGS(NMI_MAX_COUNTER_BITS)
  43. static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner[NMI_MAX_COUNTER_LONGS]);
  44. static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[NMI_MAX_COUNTER_LONGS]);
  45. static cpumask_t backtrace_mask = CPU_MASK_NONE;
  46. /* nmi_active:
  47. * >0: the lapic NMI watchdog is active, but can be disabled
  48. * <0: the lapic NMI watchdog has not been set up, and cannot
  49. * be enabled
  50. * 0: the lapic NMI watchdog is disabled, but can be enabled
  51. */
  52. atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
  53. int panic_on_timeout;
  54. unsigned int nmi_watchdog = NMI_DEFAULT;
  55. static unsigned int nmi_hz = HZ;
  56. struct nmi_watchdog_ctlblk {
  57. int enabled;
  58. u64 check_bit;
  59. unsigned int cccr_msr;
  60. unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
  61. unsigned int evntsel_msr; /* the MSR to select the events to handle */
  62. };
  63. static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
  64. /* local prototypes */
  65. static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
  66. /* converts an msr to an appropriate reservation bit */
  67. static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
  68. {
  69. /* returns the bit offset of the performance counter register */
  70. switch (boot_cpu_data.x86_vendor) {
  71. case X86_VENDOR_AMD:
  72. return (msr - MSR_K7_PERFCTR0);
  73. case X86_VENDOR_INTEL:
  74. if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
  75. return (msr - MSR_ARCH_PERFMON_PERFCTR0);
  76. else
  77. return (msr - MSR_P4_BPU_PERFCTR0);
  78. }
  79. return 0;
  80. }
  81. /* converts an msr to an appropriate reservation bit */
  82. static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
  83. {
  84. /* returns the bit offset of the event selection register */
  85. switch (boot_cpu_data.x86_vendor) {
  86. case X86_VENDOR_AMD:
  87. return (msr - MSR_K7_EVNTSEL0);
  88. case X86_VENDOR_INTEL:
  89. if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
  90. return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
  91. else
  92. return (msr - MSR_P4_BSU_ESCR0);
  93. }
  94. return 0;
  95. }
  96. /* checks for a bit availability (hack for oprofile) */
  97. int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
  98. {
  99. int cpu;
  100. BUG_ON(counter > NMI_MAX_COUNTER_BITS);
  101. for_each_possible_cpu (cpu) {
  102. if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)))
  103. return 0;
  104. }
  105. return 1;
  106. }
  107. /* checks the an msr for availability */
  108. int avail_to_resrv_perfctr_nmi(unsigned int msr)
  109. {
  110. unsigned int counter;
  111. int cpu;
  112. counter = nmi_perfctr_msr_to_bit(msr);
  113. BUG_ON(counter > NMI_MAX_COUNTER_BITS);
  114. for_each_possible_cpu (cpu) {
  115. if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)))
  116. return 0;
  117. }
  118. return 1;
  119. }
  120. static int __reserve_perfctr_nmi(int cpu, unsigned int msr)
  121. {
  122. unsigned int counter;
  123. if (cpu < 0)
  124. cpu = smp_processor_id();
  125. counter = nmi_perfctr_msr_to_bit(msr);
  126. BUG_ON(counter > NMI_MAX_COUNTER_BITS);
  127. if (!test_and_set_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)))
  128. return 1;
  129. return 0;
  130. }
  131. static void __release_perfctr_nmi(int cpu, unsigned int msr)
  132. {
  133. unsigned int counter;
  134. if (cpu < 0)
  135. cpu = smp_processor_id();
  136. counter = nmi_perfctr_msr_to_bit(msr);
  137. BUG_ON(counter > NMI_MAX_COUNTER_BITS);
  138. clear_bit(counter, &per_cpu(perfctr_nmi_owner, cpu));
  139. }
  140. int reserve_perfctr_nmi(unsigned int msr)
  141. {
  142. int cpu, i;
  143. for_each_possible_cpu (cpu) {
  144. if (!__reserve_perfctr_nmi(cpu, msr)) {
  145. for_each_possible_cpu (i) {
  146. if (i >= cpu)
  147. break;
  148. __release_perfctr_nmi(i, msr);
  149. }
  150. return 0;
  151. }
  152. }
  153. return 1;
  154. }
  155. void release_perfctr_nmi(unsigned int msr)
  156. {
  157. int cpu;
  158. for_each_possible_cpu (cpu)
  159. __release_perfctr_nmi(cpu, msr);
  160. }
  161. int __reserve_evntsel_nmi(int cpu, unsigned int msr)
  162. {
  163. unsigned int counter;
  164. if (cpu < 0)
  165. cpu = smp_processor_id();
  166. counter = nmi_evntsel_msr_to_bit(msr);
  167. BUG_ON(counter > NMI_MAX_COUNTER_BITS);
  168. if (!test_and_set_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0]))
  169. return 1;
  170. return 0;
  171. }
  172. static void __release_evntsel_nmi(int cpu, unsigned int msr)
  173. {
  174. unsigned int counter;
  175. if (cpu < 0)
  176. cpu = smp_processor_id();
  177. counter = nmi_evntsel_msr_to_bit(msr);
  178. BUG_ON(counter > NMI_MAX_COUNTER_BITS);
  179. clear_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0]);
  180. }
  181. int reserve_evntsel_nmi(unsigned int msr)
  182. {
  183. int cpu, i;
  184. for_each_possible_cpu (cpu) {
  185. if (!__reserve_evntsel_nmi(cpu, msr)) {
  186. for_each_possible_cpu (i) {
  187. if (i >= cpu)
  188. break;
  189. __release_evntsel_nmi(i, msr);
  190. }
  191. return 0;
  192. }
  193. }
  194. return 1;
  195. }
  196. void release_evntsel_nmi(unsigned int msr)
  197. {
  198. int cpu;
  199. for_each_possible_cpu (cpu) {
  200. __release_evntsel_nmi(cpu, msr);
  201. }
  202. }
  203. static __cpuinit inline int nmi_known_cpu(void)
  204. {
  205. switch (boot_cpu_data.x86_vendor) {
  206. case X86_VENDOR_AMD:
  207. return boot_cpu_data.x86 == 15 || boot_cpu_data.x86 == 16;
  208. case X86_VENDOR_INTEL:
  209. if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
  210. return 1;
  211. else
  212. return (boot_cpu_data.x86 == 15);
  213. }
  214. return 0;
  215. }
  216. /* Run after command line and cpu_init init, but before all other checks */
  217. void nmi_watchdog_default(void)
  218. {
  219. if (nmi_watchdog != NMI_DEFAULT)
  220. return;
  221. nmi_watchdog = NMI_NONE;
  222. }
  223. static int endflag __initdata = 0;
  224. #ifdef CONFIG_SMP
  225. /* The performance counters used by NMI_LOCAL_APIC don't trigger when
  226. * the CPU is idle. To make sure the NMI watchdog really ticks on all
  227. * CPUs during the test make them busy.
  228. */
  229. static __init void nmi_cpu_busy(void *data)
  230. {
  231. local_irq_enable_in_hardirq();
  232. /* Intentionally don't use cpu_relax here. This is
  233. to make sure that the performance counter really ticks,
  234. even if there is a simulator or similar that catches the
  235. pause instruction. On a real HT machine this is fine because
  236. all other CPUs are busy with "useless" delay loops and don't
  237. care if they get somewhat less cycles. */
  238. while (endflag == 0)
  239. mb();
  240. }
  241. #endif
  242. static unsigned int adjust_for_32bit_ctr(unsigned int hz)
  243. {
  244. unsigned int retval = hz;
  245. /*
  246. * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter
  247. * are writable, with higher bits sign extending from bit 31.
  248. * So, we can only program the counter with 31 bit values and
  249. * 32nd bit should be 1, for 33.. to be 1.
  250. * Find the appropriate nmi_hz
  251. */
  252. if ((((u64)cpu_khz * 1000) / retval) > 0x7fffffffULL) {
  253. retval = ((u64)cpu_khz * 1000) / 0x7fffffffUL + 1;
  254. }
  255. return retval;
  256. }
  257. int __init check_nmi_watchdog (void)
  258. {
  259. int *counts;
  260. int cpu;
  261. if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT))
  262. return 0;
  263. if (!atomic_read(&nmi_active))
  264. return 0;
  265. counts = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
  266. if (!counts)
  267. return -1;
  268. printk(KERN_INFO "testing NMI watchdog ... ");
  269. #ifdef CONFIG_SMP
  270. if (nmi_watchdog == NMI_LOCAL_APIC)
  271. smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
  272. #endif
  273. for (cpu = 0; cpu < NR_CPUS; cpu++)
  274. counts[cpu] = cpu_pda(cpu)->__nmi_count;
  275. local_irq_enable();
  276. mdelay((20*1000)/nmi_hz); // wait 20 ticks
  277. for_each_online_cpu(cpu) {
  278. if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled)
  279. continue;
  280. if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) {
  281. printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
  282. cpu,
  283. counts[cpu],
  284. cpu_pda(cpu)->__nmi_count);
  285. per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0;
  286. atomic_dec(&nmi_active);
  287. }
  288. }
  289. if (!atomic_read(&nmi_active)) {
  290. kfree(counts);
  291. atomic_set(&nmi_active, -1);
  292. endflag = 1;
  293. return -1;
  294. }
  295. endflag = 1;
  296. printk("OK.\n");
  297. /* now that we know it works we can reduce NMI frequency to
  298. something more reasonable; makes a difference in some configs */
  299. if (nmi_watchdog == NMI_LOCAL_APIC) {
  300. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  301. nmi_hz = 1;
  302. if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0)
  303. nmi_hz = adjust_for_32bit_ctr(nmi_hz);
  304. }
  305. kfree(counts);
  306. return 0;
  307. }
  308. int __init setup_nmi_watchdog(char *str)
  309. {
  310. int nmi;
  311. if (!strncmp(str,"panic",5)) {
  312. panic_on_timeout = 1;
  313. str = strchr(str, ',');
  314. if (!str)
  315. return 1;
  316. ++str;
  317. }
  318. get_option(&str, &nmi);
  319. if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE))
  320. return 0;
  321. nmi_watchdog = nmi;
  322. return 1;
  323. }
  324. __setup("nmi_watchdog=", setup_nmi_watchdog);
  325. static void disable_lapic_nmi_watchdog(void)
  326. {
  327. BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
  328. if (atomic_read(&nmi_active) <= 0)
  329. return;
  330. on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
  331. BUG_ON(atomic_read(&nmi_active) != 0);
  332. }
  333. static void enable_lapic_nmi_watchdog(void)
  334. {
  335. BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
  336. /* are we already enabled */
  337. if (atomic_read(&nmi_active) != 0)
  338. return;
  339. /* are we lapic aware */
  340. if (nmi_known_cpu() <= 0)
  341. return;
  342. on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
  343. touch_nmi_watchdog();
  344. }
  345. void disable_timer_nmi_watchdog(void)
  346. {
  347. BUG_ON(nmi_watchdog != NMI_IO_APIC);
  348. if (atomic_read(&nmi_active) <= 0)
  349. return;
  350. disable_irq(0);
  351. on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
  352. BUG_ON(atomic_read(&nmi_active) != 0);
  353. }
  354. void enable_timer_nmi_watchdog(void)
  355. {
  356. BUG_ON(nmi_watchdog != NMI_IO_APIC);
  357. if (atomic_read(&nmi_active) == 0) {
  358. touch_nmi_watchdog();
  359. on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
  360. enable_irq(0);
  361. }
  362. }
  363. static void __acpi_nmi_disable(void *__unused)
  364. {
  365. apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
  366. }
  367. /*
  368. * Disable timer based NMIs on all CPUs:
  369. */
  370. void acpi_nmi_disable(void)
  371. {
  372. if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
  373. on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
  374. }
  375. static void __acpi_nmi_enable(void *__unused)
  376. {
  377. apic_write(APIC_LVT0, APIC_DM_NMI);
  378. }
  379. /*
  380. * Enable timer based NMIs on all CPUs:
  381. */
  382. void acpi_nmi_enable(void)
  383. {
  384. if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
  385. on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
  386. }
  387. #ifdef CONFIG_PM
  388. static int nmi_pm_active; /* nmi_active before suspend */
  389. static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
  390. {
  391. /* only CPU0 goes here, other CPUs should be offline */
  392. nmi_pm_active = atomic_read(&nmi_active);
  393. stop_apic_nmi_watchdog(NULL);
  394. BUG_ON(atomic_read(&nmi_active) != 0);
  395. return 0;
  396. }
  397. static int lapic_nmi_resume(struct sys_device *dev)
  398. {
  399. /* only CPU0 goes here, other CPUs should be offline */
  400. if (nmi_pm_active > 0) {
  401. setup_apic_nmi_watchdog(NULL);
  402. touch_nmi_watchdog();
  403. }
  404. return 0;
  405. }
  406. static struct sysdev_class nmi_sysclass = {
  407. set_kset_name("lapic_nmi"),
  408. .resume = lapic_nmi_resume,
  409. .suspend = lapic_nmi_suspend,
  410. };
  411. static struct sys_device device_lapic_nmi = {
  412. .id = 0,
  413. .cls = &nmi_sysclass,
  414. };
  415. static int __init init_lapic_nmi_sysfs(void)
  416. {
  417. int error;
  418. /* should really be a BUG_ON but b/c this is an
  419. * init call, it just doesn't work. -dcz
  420. */
  421. if (nmi_watchdog != NMI_LOCAL_APIC)
  422. return 0;
  423. if ( atomic_read(&nmi_active) < 0 )
  424. return 0;
  425. error = sysdev_class_register(&nmi_sysclass);
  426. if (!error)
  427. error = sysdev_register(&device_lapic_nmi);
  428. return error;
  429. }
  430. /* must come after the local APIC's device_initcall() */
  431. late_initcall(init_lapic_nmi_sysfs);
  432. #endif /* CONFIG_PM */
  433. /*
  434. * Activate the NMI watchdog via the local APIC.
  435. * Original code written by Keith Owens.
  436. */
  437. /* Note that these events don't tick when the CPU idles. This means
  438. the frequency varies with CPU load. */
  439. #define K7_EVNTSEL_ENABLE (1 << 22)
  440. #define K7_EVNTSEL_INT (1 << 20)
  441. #define K7_EVNTSEL_OS (1 << 17)
  442. #define K7_EVNTSEL_USR (1 << 16)
  443. #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
  444. #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
  445. static int setup_k7_watchdog(void)
  446. {
  447. unsigned int perfctr_msr, evntsel_msr;
  448. unsigned int evntsel;
  449. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  450. perfctr_msr = MSR_K7_PERFCTR0;
  451. evntsel_msr = MSR_K7_EVNTSEL0;
  452. if (!__reserve_perfctr_nmi(-1, perfctr_msr))
  453. goto fail;
  454. if (!__reserve_evntsel_nmi(-1, evntsel_msr))
  455. goto fail1;
  456. /* Simulator may not support it */
  457. if (checking_wrmsrl(evntsel_msr, 0UL))
  458. goto fail2;
  459. wrmsrl(perfctr_msr, 0UL);
  460. evntsel = K7_EVNTSEL_INT
  461. | K7_EVNTSEL_OS
  462. | K7_EVNTSEL_USR
  463. | K7_NMI_EVENT;
  464. /* setup the timer */
  465. wrmsr(evntsel_msr, evntsel, 0);
  466. wrmsrl(perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
  467. apic_write(APIC_LVTPC, APIC_DM_NMI);
  468. evntsel |= K7_EVNTSEL_ENABLE;
  469. wrmsr(evntsel_msr, evntsel, 0);
  470. wd->perfctr_msr = perfctr_msr;
  471. wd->evntsel_msr = evntsel_msr;
  472. wd->cccr_msr = 0; //unused
  473. wd->check_bit = 1ULL<<63;
  474. return 1;
  475. fail2:
  476. __release_evntsel_nmi(-1, evntsel_msr);
  477. fail1:
  478. __release_perfctr_nmi(-1, perfctr_msr);
  479. fail:
  480. return 0;
  481. }
  482. static void stop_k7_watchdog(void)
  483. {
  484. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  485. wrmsr(wd->evntsel_msr, 0, 0);
  486. __release_evntsel_nmi(-1, wd->evntsel_msr);
  487. __release_perfctr_nmi(-1, wd->perfctr_msr);
  488. }
  489. /* Note that these events don't tick when the CPU idles. This means
  490. the frequency varies with CPU load. */
  491. #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
  492. #define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
  493. #define P4_ESCR_OS (1<<3)
  494. #define P4_ESCR_USR (1<<2)
  495. #define P4_CCCR_OVF_PMI0 (1<<26)
  496. #define P4_CCCR_OVF_PMI1 (1<<27)
  497. #define P4_CCCR_THRESHOLD(N) ((N)<<20)
  498. #define P4_CCCR_COMPLEMENT (1<<19)
  499. #define P4_CCCR_COMPARE (1<<18)
  500. #define P4_CCCR_REQUIRED (3<<16)
  501. #define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
  502. #define P4_CCCR_ENABLE (1<<12)
  503. #define P4_CCCR_OVF (1<<31)
  504. /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
  505. CRU_ESCR0 (with any non-null event selector) through a complemented
  506. max threshold. [IA32-Vol3, Section 14.9.9] */
  507. static int setup_p4_watchdog(void)
  508. {
  509. unsigned int perfctr_msr, evntsel_msr, cccr_msr;
  510. unsigned int evntsel, cccr_val;
  511. unsigned int misc_enable, dummy;
  512. unsigned int ht_num;
  513. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  514. rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
  515. if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
  516. return 0;
  517. #ifdef CONFIG_SMP
  518. /* detect which hyperthread we are on */
  519. if (smp_num_siblings == 2) {
  520. unsigned int ebx, apicid;
  521. ebx = cpuid_ebx(1);
  522. apicid = (ebx >> 24) & 0xff;
  523. ht_num = apicid & 1;
  524. } else
  525. #endif
  526. ht_num = 0;
  527. /* performance counters are shared resources
  528. * assign each hyperthread its own set
  529. * (re-use the ESCR0 register, seems safe
  530. * and keeps the cccr_val the same)
  531. */
  532. if (!ht_num) {
  533. /* logical cpu 0 */
  534. perfctr_msr = MSR_P4_IQ_PERFCTR0;
  535. evntsel_msr = MSR_P4_CRU_ESCR0;
  536. cccr_msr = MSR_P4_IQ_CCCR0;
  537. cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
  538. } else {
  539. /* logical cpu 1 */
  540. perfctr_msr = MSR_P4_IQ_PERFCTR1;
  541. evntsel_msr = MSR_P4_CRU_ESCR0;
  542. cccr_msr = MSR_P4_IQ_CCCR1;
  543. cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
  544. }
  545. if (!__reserve_perfctr_nmi(-1, perfctr_msr))
  546. goto fail;
  547. if (!__reserve_evntsel_nmi(-1, evntsel_msr))
  548. goto fail1;
  549. evntsel = P4_ESCR_EVENT_SELECT(0x3F)
  550. | P4_ESCR_OS
  551. | P4_ESCR_USR;
  552. cccr_val |= P4_CCCR_THRESHOLD(15)
  553. | P4_CCCR_COMPLEMENT
  554. | P4_CCCR_COMPARE
  555. | P4_CCCR_REQUIRED;
  556. wrmsr(evntsel_msr, evntsel, 0);
  557. wrmsr(cccr_msr, cccr_val, 0);
  558. wrmsrl(perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
  559. apic_write(APIC_LVTPC, APIC_DM_NMI);
  560. cccr_val |= P4_CCCR_ENABLE;
  561. wrmsr(cccr_msr, cccr_val, 0);
  562. wd->perfctr_msr = perfctr_msr;
  563. wd->evntsel_msr = evntsel_msr;
  564. wd->cccr_msr = cccr_msr;
  565. wd->check_bit = 1ULL<<39;
  566. return 1;
  567. fail1:
  568. __release_perfctr_nmi(-1, perfctr_msr);
  569. fail:
  570. return 0;
  571. }
  572. static void stop_p4_watchdog(void)
  573. {
  574. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  575. wrmsr(wd->cccr_msr, 0, 0);
  576. wrmsr(wd->evntsel_msr, 0, 0);
  577. __release_evntsel_nmi(-1, wd->evntsel_msr);
  578. __release_perfctr_nmi(-1, wd->perfctr_msr);
  579. }
  580. #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
  581. #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
  582. static int setup_intel_arch_watchdog(void)
  583. {
  584. unsigned int ebx;
  585. union cpuid10_eax eax;
  586. unsigned int unused;
  587. unsigned int perfctr_msr, evntsel_msr;
  588. unsigned int evntsel;
  589. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  590. /*
  591. * Check whether the Architectural PerfMon supports
  592. * Unhalted Core Cycles Event or not.
  593. * NOTE: Corresponding bit = 0 in ebx indicates event present.
  594. */
  595. cpuid(10, &(eax.full), &ebx, &unused, &unused);
  596. if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
  597. (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
  598. goto fail;
  599. perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
  600. evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;
  601. if (!__reserve_perfctr_nmi(-1, perfctr_msr))
  602. goto fail;
  603. if (!__reserve_evntsel_nmi(-1, evntsel_msr))
  604. goto fail1;
  605. wrmsrl(perfctr_msr, 0UL);
  606. evntsel = ARCH_PERFMON_EVENTSEL_INT
  607. | ARCH_PERFMON_EVENTSEL_OS
  608. | ARCH_PERFMON_EVENTSEL_USR
  609. | ARCH_PERFMON_NMI_EVENT_SEL
  610. | ARCH_PERFMON_NMI_EVENT_UMASK;
  611. /* setup the timer */
  612. wrmsr(evntsel_msr, evntsel, 0);
  613. nmi_hz = adjust_for_32bit_ctr(nmi_hz);
  614. wrmsr(perfctr_msr, (u32)(-((u64)cpu_khz * 1000 / nmi_hz)), 0);
  615. apic_write(APIC_LVTPC, APIC_DM_NMI);
  616. evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
  617. wrmsr(evntsel_msr, evntsel, 0);
  618. wd->perfctr_msr = perfctr_msr;
  619. wd->evntsel_msr = evntsel_msr;
  620. wd->cccr_msr = 0; //unused
  621. wd->check_bit = 1ULL << (eax.split.bit_width - 1);
  622. return 1;
  623. fail1:
  624. __release_perfctr_nmi(-1, perfctr_msr);
  625. fail:
  626. return 0;
  627. }
  628. static void stop_intel_arch_watchdog(void)
  629. {
  630. unsigned int ebx;
  631. union cpuid10_eax eax;
  632. unsigned int unused;
  633. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  634. /*
  635. * Check whether the Architectural PerfMon supports
  636. * Unhalted Core Cycles Event or not.
  637. * NOTE: Corresponding bit = 0 in ebx indicates event present.
  638. */
  639. cpuid(10, &(eax.full), &ebx, &unused, &unused);
  640. if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
  641. (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
  642. return;
  643. wrmsr(wd->evntsel_msr, 0, 0);
  644. __release_evntsel_nmi(-1, wd->evntsel_msr);
  645. __release_perfctr_nmi(-1, wd->perfctr_msr);
  646. }
  647. void setup_apic_nmi_watchdog(void *unused)
  648. {
  649. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  650. /* only support LOCAL and IO APICs for now */
  651. if ((nmi_watchdog != NMI_LOCAL_APIC) &&
  652. (nmi_watchdog != NMI_IO_APIC))
  653. return;
  654. if (wd->enabled == 1)
  655. return;
  656. /* cheap hack to support suspend/resume */
  657. /* if cpu0 is not active neither should the other cpus */
  658. if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
  659. return;
  660. if (nmi_watchdog == NMI_LOCAL_APIC) {
  661. switch (boot_cpu_data.x86_vendor) {
  662. case X86_VENDOR_AMD:
  663. if (strstr(boot_cpu_data.x86_model_id, "Screwdriver"))
  664. return;
  665. if (!setup_k7_watchdog())
  666. return;
  667. break;
  668. case X86_VENDOR_INTEL:
  669. if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
  670. if (!setup_intel_arch_watchdog())
  671. return;
  672. break;
  673. }
  674. if (!setup_p4_watchdog())
  675. return;
  676. break;
  677. default:
  678. return;
  679. }
  680. }
  681. wd->enabled = 1;
  682. atomic_inc(&nmi_active);
  683. }
  684. void stop_apic_nmi_watchdog(void *unused)
  685. {
  686. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  687. /* only support LOCAL and IO APICs for now */
  688. if ((nmi_watchdog != NMI_LOCAL_APIC) &&
  689. (nmi_watchdog != NMI_IO_APIC))
  690. return;
  691. if (wd->enabled == 0)
  692. return;
  693. if (nmi_watchdog == NMI_LOCAL_APIC) {
  694. switch (boot_cpu_data.x86_vendor) {
  695. case X86_VENDOR_AMD:
  696. if (strstr(boot_cpu_data.x86_model_id, "Screwdriver"))
  697. return;
  698. stop_k7_watchdog();
  699. break;
  700. case X86_VENDOR_INTEL:
  701. if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
  702. stop_intel_arch_watchdog();
  703. break;
  704. }
  705. stop_p4_watchdog();
  706. break;
  707. default:
  708. return;
  709. }
  710. }
  711. wd->enabled = 0;
  712. atomic_dec(&nmi_active);
  713. }
  714. /*
  715. * the best way to detect whether a CPU has a 'hard lockup' problem
  716. * is to check it's local APIC timer IRQ counts. If they are not
  717. * changing then that CPU has some problem.
  718. *
  719. * as these watchdog NMI IRQs are generated on every CPU, we only
  720. * have to check the current processor.
  721. */
  722. static DEFINE_PER_CPU(unsigned, last_irq_sum);
  723. static DEFINE_PER_CPU(local_t, alert_counter);
  724. static DEFINE_PER_CPU(int, nmi_touch);
  725. void touch_nmi_watchdog (void)
  726. {
  727. if (nmi_watchdog > 0) {
  728. unsigned cpu;
  729. /*
  730. * Tell other CPUs to reset their alert counters. We cannot
  731. * do it ourselves because the alert count increase is not
  732. * atomic.
  733. */
  734. for_each_present_cpu (cpu)
  735. per_cpu(nmi_touch, cpu) = 1;
  736. }
  737. touch_softlockup_watchdog();
  738. }
  739. int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
  740. {
  741. int sum;
  742. int touched = 0;
  743. int cpu = smp_processor_id();
  744. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  745. u64 dummy;
  746. int rc=0;
  747. /* check for other users first */
  748. if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
  749. == NOTIFY_STOP) {
  750. rc = 1;
  751. touched = 1;
  752. }
  753. sum = read_pda(apic_timer_irqs);
  754. if (__get_cpu_var(nmi_touch)) {
  755. __get_cpu_var(nmi_touch) = 0;
  756. touched = 1;
  757. }
  758. if (cpu_isset(cpu, backtrace_mask)) {
  759. static DEFINE_SPINLOCK(lock); /* Serialise the printks */
  760. spin_lock(&lock);
  761. printk("NMI backtrace for cpu %d\n", cpu);
  762. dump_stack();
  763. spin_unlock(&lock);
  764. cpu_clear(cpu, backtrace_mask);
  765. }
  766. #ifdef CONFIG_X86_MCE
  767. /* Could check oops_in_progress here too, but it's safer
  768. not too */
  769. if (atomic_read(&mce_entry) > 0)
  770. touched = 1;
  771. #endif
  772. /* if the apic timer isn't firing, this cpu isn't doing much */
  773. if (!touched && __get_cpu_var(last_irq_sum) == sum) {
  774. /*
  775. * Ayiee, looks like this CPU is stuck ...
  776. * wait a few IRQs (5 seconds) before doing the oops ...
  777. */
  778. local_inc(&__get_cpu_var(alert_counter));
  779. if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz)
  780. die_nmi("NMI Watchdog detected LOCKUP on CPU %d\n", regs,
  781. panic_on_timeout);
  782. } else {
  783. __get_cpu_var(last_irq_sum) = sum;
  784. local_set(&__get_cpu_var(alert_counter), 0);
  785. }
  786. /* see if the nmi watchdog went off */
  787. if (wd->enabled) {
  788. if (nmi_watchdog == NMI_LOCAL_APIC) {
  789. rdmsrl(wd->perfctr_msr, dummy);
  790. if (dummy & wd->check_bit){
  791. /* this wasn't a watchdog timer interrupt */
  792. goto done;
  793. }
  794. /* only Intel uses the cccr msr */
  795. if (wd->cccr_msr != 0) {
  796. /*
  797. * P4 quirks:
  798. * - An overflown perfctr will assert its interrupt
  799. * until the OVF flag in its CCCR is cleared.
  800. * - LVTPC is masked on interrupt and must be
  801. * unmasked by the LVTPC handler.
  802. */
  803. rdmsrl(wd->cccr_msr, dummy);
  804. dummy &= ~P4_CCCR_OVF;
  805. wrmsrl(wd->cccr_msr, dummy);
  806. apic_write(APIC_LVTPC, APIC_DM_NMI);
  807. /* start the cycle over again */
  808. wrmsrl(wd->perfctr_msr,
  809. -((u64)cpu_khz * 1000 / nmi_hz));
  810. } else if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
  811. /*
  812. * ArchPerfom/Core Duo needs to re-unmask
  813. * the apic vector
  814. */
  815. apic_write(APIC_LVTPC, APIC_DM_NMI);
  816. /* ARCH_PERFMON has 32 bit counter writes */
  817. wrmsr(wd->perfctr_msr,
  818. (u32)(-((u64)cpu_khz * 1000 / nmi_hz)), 0);
  819. } else {
  820. /* start the cycle over again */
  821. wrmsrl(wd->perfctr_msr,
  822. -((u64)cpu_khz * 1000 / nmi_hz));
  823. }
  824. rc = 1;
  825. } else if (nmi_watchdog == NMI_IO_APIC) {
  826. /* don't know how to accurately check for this.
  827. * just assume it was a watchdog timer interrupt
  828. * This matches the old behaviour.
  829. */
  830. rc = 1;
  831. } else
  832. printk(KERN_WARNING "Unknown enabled NMI hardware?!\n");
  833. }
  834. done:
  835. return rc;
  836. }
  837. asmlinkage __kprobes void do_nmi(struct pt_regs * regs, long error_code)
  838. {
  839. nmi_enter();
  840. add_pda(__nmi_count,1);
  841. default_do_nmi(regs);
  842. nmi_exit();
  843. }
  844. int do_nmi_callback(struct pt_regs * regs, int cpu)
  845. {
  846. #ifdef CONFIG_SYSCTL
  847. if (unknown_nmi_panic)
  848. return unknown_nmi_panic_callback(regs, cpu);
  849. #endif
  850. return 0;
  851. }
  852. #ifdef CONFIG_SYSCTL
  853. static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
  854. {
  855. unsigned char reason = get_nmi_reason();
  856. char buf[64];
  857. sprintf(buf, "NMI received for unknown reason %02x\n", reason);
  858. die_nmi(buf, regs, 1); /* Always panic here */
  859. return 0;
  860. }
  861. /*
  862. * proc handler for /proc/sys/kernel/nmi
  863. */
  864. int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
  865. void __user *buffer, size_t *length, loff_t *ppos)
  866. {
  867. int old_state;
  868. nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
  869. old_state = nmi_watchdog_enabled;
  870. proc_dointvec(table, write, file, buffer, length, ppos);
  871. if (!!old_state == !!nmi_watchdog_enabled)
  872. return 0;
  873. if (atomic_read(&nmi_active) < 0) {
  874. printk( KERN_WARNING "NMI watchdog is permanently disabled\n");
  875. return -EIO;
  876. }
  877. /* if nmi_watchdog is not set yet, then set it */
  878. nmi_watchdog_default();
  879. if (nmi_watchdog == NMI_LOCAL_APIC) {
  880. if (nmi_watchdog_enabled)
  881. enable_lapic_nmi_watchdog();
  882. else
  883. disable_lapic_nmi_watchdog();
  884. } else {
  885. printk( KERN_WARNING
  886. "NMI watchdog doesn't know what hardware to touch\n");
  887. return -EIO;
  888. }
  889. return 0;
  890. }
  891. #endif
  892. void __trigger_all_cpu_backtrace(void)
  893. {
  894. int i;
  895. backtrace_mask = cpu_online_map;
  896. /* Wait for up to 10 seconds for all CPUs to do the backtrace */
  897. for (i = 0; i < 10 * 1000; i++) {
  898. if (cpus_empty(backtrace_mask))
  899. break;
  900. mdelay(1);
  901. }
  902. }
  903. EXPORT_SYMBOL(nmi_active);
  904. EXPORT_SYMBOL(nmi_watchdog);
  905. EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
  906. EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
  907. EXPORT_SYMBOL(reserve_perfctr_nmi);
  908. EXPORT_SYMBOL(release_perfctr_nmi);
  909. EXPORT_SYMBOL(reserve_evntsel_nmi);
  910. EXPORT_SYMBOL(release_evntsel_nmi);
  911. EXPORT_SYMBOL(disable_timer_nmi_watchdog);
  912. EXPORT_SYMBOL(enable_timer_nmi_watchdog);
  913. EXPORT_SYMBOL(touch_nmi_watchdog);