perfctr-watchdog.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806
  1. /*
  2. * local apic based NMI watchdog for various CPUs.
  3. *
  4. * This file also handles reservation of performance counters for coordination
  5. * with other users (like oprofile).
  6. *
  7. * Note that these events normally don't tick when the CPU idles. This means
  8. * the frequency varies with CPU load.
  9. *
  10. * Original code for K7/P6 written by Keith Owens
  11. *
  12. */
  13. #include <linux/percpu.h>
  14. #include <linux/module.h>
  15. #include <linux/kernel.h>
  16. #include <linux/bitops.h>
  17. #include <linux/smp.h>
  18. #include <linux/nmi.h>
  19. #include <linux/kprobes.h>
  20. #include <asm/genapic.h>
  21. #include <asm/intel_arch_perfmon.h>
  22. struct nmi_watchdog_ctlblk {
  23. unsigned int cccr_msr;
  24. unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
  25. unsigned int evntsel_msr; /* the MSR to select the events to handle */
  26. };
  27. /* Interface defining a CPU specific perfctr watchdog */
  28. struct wd_ops {
  29. int (*reserve)(void);
  30. void (*unreserve)(void);
  31. int (*setup)(unsigned nmi_hz);
  32. void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz);
  33. void (*stop)(void);
  34. unsigned perfctr;
  35. unsigned evntsel;
  36. u64 checkbit;
  37. };
  38. static const struct wd_ops *wd_ops;
  39. /*
  40. * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
  41. * offset from MSR_P4_BSU_ESCR0.
  42. *
  43. * It will be the max for all platforms (for now)
  44. */
  45. #define NMI_MAX_COUNTER_BITS 66
  46. /*
  47. * perfctr_nmi_owner tracks the ownership of the perfctr registers:
  48. * evtsel_nmi_owner tracks the ownership of the event selection
  49. * - different performance counters/ event selection may be reserved for
  50. * different subsystems this reservation system just tries to coordinate
  51. * things a little
  52. */
  53. static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS);
  54. static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS);
  55. static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
  56. /* converts an msr to an appropriate reservation bit */
  57. static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
  58. {
  59. /* returns the bit offset of the performance counter register */
  60. switch (boot_cpu_data.x86_vendor) {
  61. case X86_VENDOR_AMD:
  62. return (msr - MSR_K7_PERFCTR0);
  63. case X86_VENDOR_INTEL:
  64. if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
  65. return (msr - MSR_ARCH_PERFMON_PERFCTR0);
  66. switch (boot_cpu_data.x86) {
  67. case 6:
  68. return (msr - MSR_P6_PERFCTR0);
  69. case 15:
  70. return (msr - MSR_P4_BPU_PERFCTR0);
  71. }
  72. }
  73. return 0;
  74. }
  75. /*
  76. * converts an msr to an appropriate reservation bit
  77. * returns the bit offset of the event selection register
  78. */
  79. static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
  80. {
  81. /* returns the bit offset of the event selection register */
  82. switch (boot_cpu_data.x86_vendor) {
  83. case X86_VENDOR_AMD:
  84. return (msr - MSR_K7_EVNTSEL0);
  85. case X86_VENDOR_INTEL:
  86. if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
  87. return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
  88. switch (boot_cpu_data.x86) {
  89. case 6:
  90. return (msr - MSR_P6_EVNTSEL0);
  91. case 15:
  92. return (msr - MSR_P4_BSU_ESCR0);
  93. }
  94. }
  95. return 0;
  96. }
  97. /* checks for a bit availability (hack for oprofile) */
  98. int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
  99. {
  100. BUG_ON(counter > NMI_MAX_COUNTER_BITS);
  101. return (!test_bit(counter, perfctr_nmi_owner));
  102. }
  103. /* checks the an msr for availability */
  104. int avail_to_resrv_perfctr_nmi(unsigned int msr)
  105. {
  106. unsigned int counter;
  107. counter = nmi_perfctr_msr_to_bit(msr);
  108. BUG_ON(counter > NMI_MAX_COUNTER_BITS);
  109. return (!test_bit(counter, perfctr_nmi_owner));
  110. }
  111. EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
  112. int reserve_perfctr_nmi(unsigned int msr)
  113. {
  114. unsigned int counter;
  115. counter = nmi_perfctr_msr_to_bit(msr);
  116. /* register not managed by the allocator? */
  117. if (counter > NMI_MAX_COUNTER_BITS)
  118. return 1;
  119. if (!test_and_set_bit(counter, perfctr_nmi_owner))
  120. return 1;
  121. return 0;
  122. }
  123. EXPORT_SYMBOL(reserve_perfctr_nmi);
  124. void release_perfctr_nmi(unsigned int msr)
  125. {
  126. unsigned int counter;
  127. counter = nmi_perfctr_msr_to_bit(msr);
  128. /* register not managed by the allocator? */
  129. if (counter > NMI_MAX_COUNTER_BITS)
  130. return;
  131. clear_bit(counter, perfctr_nmi_owner);
  132. }
  133. EXPORT_SYMBOL(release_perfctr_nmi);
  134. int reserve_evntsel_nmi(unsigned int msr)
  135. {
  136. unsigned int counter;
  137. counter = nmi_evntsel_msr_to_bit(msr);
  138. /* register not managed by the allocator? */
  139. if (counter > NMI_MAX_COUNTER_BITS)
  140. return 1;
  141. if (!test_and_set_bit(counter, evntsel_nmi_owner))
  142. return 1;
  143. return 0;
  144. }
  145. EXPORT_SYMBOL(reserve_evntsel_nmi);
  146. void release_evntsel_nmi(unsigned int msr)
  147. {
  148. unsigned int counter;
  149. counter = nmi_evntsel_msr_to_bit(msr);
  150. /* register not managed by the allocator? */
  151. if (counter > NMI_MAX_COUNTER_BITS)
  152. return;
  153. clear_bit(counter, evntsel_nmi_owner);
  154. }
  155. EXPORT_SYMBOL(release_evntsel_nmi);
  156. void disable_lapic_nmi_watchdog(void)
  157. {
  158. BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
  159. if (atomic_read(&nmi_active) <= 0)
  160. return;
  161. on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
  162. if (wd_ops)
  163. wd_ops->unreserve();
  164. BUG_ON(atomic_read(&nmi_active) != 0);
  165. }
  166. void enable_lapic_nmi_watchdog(void)
  167. {
  168. BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
  169. /* are we already enabled */
  170. if (atomic_read(&nmi_active) != 0)
  171. return;
  172. /* are we lapic aware */
  173. if (!wd_ops)
  174. return;
  175. if (!wd_ops->reserve()) {
  176. printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n");
  177. return;
  178. }
  179. on_each_cpu(setup_apic_nmi_watchdog, NULL, 1);
  180. touch_nmi_watchdog();
  181. }
  182. /*
  183. * Activate the NMI watchdog via the local APIC.
  184. */
  185. static unsigned int adjust_for_32bit_ctr(unsigned int hz)
  186. {
  187. u64 counter_val;
  188. unsigned int retval = hz;
  189. /*
  190. * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
  191. * are writable, with higher bits sign extending from bit 31.
  192. * So, we can only program the counter with 31 bit values and
  193. * 32nd bit should be 1, for 33.. to be 1.
  194. * Find the appropriate nmi_hz
  195. */
  196. counter_val = (u64)cpu_khz * 1000;
  197. do_div(counter_val, retval);
  198. if (counter_val > 0x7fffffffULL) {
  199. u64 count = (u64)cpu_khz * 1000;
  200. do_div(count, 0x7fffffffUL);
  201. retval = count + 1;
  202. }
  203. return retval;
  204. }
  205. static void write_watchdog_counter(unsigned int perfctr_msr,
  206. const char *descr, unsigned nmi_hz)
  207. {
  208. u64 count = (u64)cpu_khz * 1000;
  209. do_div(count, nmi_hz);
  210. if(descr)
  211. pr_debug("setting %s to -0x%08Lx\n", descr, count);
  212. wrmsrl(perfctr_msr, 0 - count);
  213. }
  214. static void write_watchdog_counter32(unsigned int perfctr_msr,
  215. const char *descr, unsigned nmi_hz)
  216. {
  217. u64 count = (u64)cpu_khz * 1000;
  218. do_div(count, nmi_hz);
  219. if(descr)
  220. pr_debug("setting %s to -0x%08Lx\n", descr, count);
  221. wrmsr(perfctr_msr, (u32)(-count), 0);
  222. }
  223. /*
  224. * AMD K7/K8/Family10h/Family11h support.
  225. * AMD keeps this interface nicely stable so there is not much variety
  226. */
  227. #define K7_EVNTSEL_ENABLE (1 << 22)
  228. #define K7_EVNTSEL_INT (1 << 20)
  229. #define K7_EVNTSEL_OS (1 << 17)
  230. #define K7_EVNTSEL_USR (1 << 16)
  231. #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
  232. #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
  233. static int setup_k7_watchdog(unsigned nmi_hz)
  234. {
  235. unsigned int perfctr_msr, evntsel_msr;
  236. unsigned int evntsel;
  237. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  238. perfctr_msr = wd_ops->perfctr;
  239. evntsel_msr = wd_ops->evntsel;
  240. wrmsrl(perfctr_msr, 0UL);
  241. evntsel = K7_EVNTSEL_INT
  242. | K7_EVNTSEL_OS
  243. | K7_EVNTSEL_USR
  244. | K7_NMI_EVENT;
  245. /* setup the timer */
  246. wrmsr(evntsel_msr, evntsel, 0);
  247. write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz);
  248. /* initialize the wd struct before enabling */
  249. wd->perfctr_msr = perfctr_msr;
  250. wd->evntsel_msr = evntsel_msr;
  251. wd->cccr_msr = 0; /* unused */
  252. /* ok, everything is initialized, announce that we're set */
  253. cpu_nmi_set_wd_enabled();
  254. apic_write(APIC_LVTPC, APIC_DM_NMI);
  255. evntsel |= K7_EVNTSEL_ENABLE;
  256. wrmsr(evntsel_msr, evntsel, 0);
  257. return 1;
  258. }
  259. static void single_msr_stop_watchdog(void)
  260. {
  261. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  262. wrmsr(wd->evntsel_msr, 0, 0);
  263. }
  264. static int single_msr_reserve(void)
  265. {
  266. if (!reserve_perfctr_nmi(wd_ops->perfctr))
  267. return 0;
  268. if (!reserve_evntsel_nmi(wd_ops->evntsel)) {
  269. release_perfctr_nmi(wd_ops->perfctr);
  270. return 0;
  271. }
  272. return 1;
  273. }
  274. static void single_msr_unreserve(void)
  275. {
  276. release_evntsel_nmi(wd_ops->evntsel);
  277. release_perfctr_nmi(wd_ops->perfctr);
  278. }
  279. static void __kprobes
  280. single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
  281. {
  282. /* start the cycle over again */
  283. write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
  284. }
  285. static const struct wd_ops k7_wd_ops = {
  286. .reserve = single_msr_reserve,
  287. .unreserve = single_msr_unreserve,
  288. .setup = setup_k7_watchdog,
  289. .rearm = single_msr_rearm,
  290. .stop = single_msr_stop_watchdog,
  291. .perfctr = MSR_K7_PERFCTR0,
  292. .evntsel = MSR_K7_EVNTSEL0,
  293. .checkbit = 1ULL << 47,
  294. };
  295. /*
  296. * Intel Model 6 (PPro+,P2,P3,P-M,Core1)
  297. */
  298. #define P6_EVNTSEL0_ENABLE (1 << 22)
  299. #define P6_EVNTSEL_INT (1 << 20)
  300. #define P6_EVNTSEL_OS (1 << 17)
  301. #define P6_EVNTSEL_USR (1 << 16)
  302. #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
  303. #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
  304. static int setup_p6_watchdog(unsigned nmi_hz)
  305. {
  306. unsigned int perfctr_msr, evntsel_msr;
  307. unsigned int evntsel;
  308. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  309. perfctr_msr = wd_ops->perfctr;
  310. evntsel_msr = wd_ops->evntsel;
  311. /* KVM doesn't implement this MSR */
  312. if (wrmsr_safe(perfctr_msr, 0, 0) < 0)
  313. return 0;
  314. evntsel = P6_EVNTSEL_INT
  315. | P6_EVNTSEL_OS
  316. | P6_EVNTSEL_USR
  317. | P6_NMI_EVENT;
  318. /* setup the timer */
  319. wrmsr(evntsel_msr, evntsel, 0);
  320. nmi_hz = adjust_for_32bit_ctr(nmi_hz);
  321. write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz);
  322. /* initialize the wd struct before enabling */
  323. wd->perfctr_msr = perfctr_msr;
  324. wd->evntsel_msr = evntsel_msr;
  325. wd->cccr_msr = 0; /* unused */
  326. /* ok, everything is initialized, announce that we're set */
  327. cpu_nmi_set_wd_enabled();
  328. apic_write(APIC_LVTPC, APIC_DM_NMI);
  329. evntsel |= P6_EVNTSEL0_ENABLE;
  330. wrmsr(evntsel_msr, evntsel, 0);
  331. return 1;
  332. }
  333. static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
  334. {
  335. /*
  336. * P6 based Pentium M need to re-unmask
  337. * the apic vector but it doesn't hurt
  338. * other P6 variant.
  339. * ArchPerfom/Core Duo also needs this
  340. */
  341. apic_write(APIC_LVTPC, APIC_DM_NMI);
  342. /* P6/ARCH_PERFMON has 32 bit counter write */
  343. write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz);
  344. }
  345. static const struct wd_ops p6_wd_ops = {
  346. .reserve = single_msr_reserve,
  347. .unreserve = single_msr_unreserve,
  348. .setup = setup_p6_watchdog,
  349. .rearm = p6_rearm,
  350. .stop = single_msr_stop_watchdog,
  351. .perfctr = MSR_P6_PERFCTR0,
  352. .evntsel = MSR_P6_EVNTSEL0,
  353. .checkbit = 1ULL << 39,
  354. };
  355. /*
  356. * Intel P4 performance counters.
  357. * By far the most complicated of all.
  358. */
  359. #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1 << 7)
  360. #define P4_ESCR_EVENT_SELECT(N) ((N) << 25)
  361. #define P4_ESCR_OS (1 << 3)
  362. #define P4_ESCR_USR (1 << 2)
  363. #define P4_CCCR_OVF_PMI0 (1 << 26)
  364. #define P4_CCCR_OVF_PMI1 (1 << 27)
  365. #define P4_CCCR_THRESHOLD(N) ((N) << 20)
  366. #define P4_CCCR_COMPLEMENT (1 << 19)
  367. #define P4_CCCR_COMPARE (1 << 18)
  368. #define P4_CCCR_REQUIRED (3 << 16)
  369. #define P4_CCCR_ESCR_SELECT(N) ((N) << 13)
  370. #define P4_CCCR_ENABLE (1 << 12)
  371. #define P4_CCCR_OVF (1 << 31)
  372. #define P4_CONTROLS 18
  373. static unsigned int p4_controls[18] = {
  374. MSR_P4_BPU_CCCR0,
  375. MSR_P4_BPU_CCCR1,
  376. MSR_P4_BPU_CCCR2,
  377. MSR_P4_BPU_CCCR3,
  378. MSR_P4_MS_CCCR0,
  379. MSR_P4_MS_CCCR1,
  380. MSR_P4_MS_CCCR2,
  381. MSR_P4_MS_CCCR3,
  382. MSR_P4_FLAME_CCCR0,
  383. MSR_P4_FLAME_CCCR1,
  384. MSR_P4_FLAME_CCCR2,
  385. MSR_P4_FLAME_CCCR3,
  386. MSR_P4_IQ_CCCR0,
  387. MSR_P4_IQ_CCCR1,
  388. MSR_P4_IQ_CCCR2,
  389. MSR_P4_IQ_CCCR3,
  390. MSR_P4_IQ_CCCR4,
  391. MSR_P4_IQ_CCCR5,
  392. };
  393. /*
  394. * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
  395. * CRU_ESCR0 (with any non-null event selector) through a complemented
  396. * max threshold. [IA32-Vol3, Section 14.9.9]
  397. */
  398. static int setup_p4_watchdog(unsigned nmi_hz)
  399. {
  400. unsigned int perfctr_msr, evntsel_msr, cccr_msr;
  401. unsigned int evntsel, cccr_val;
  402. unsigned int misc_enable, dummy;
  403. unsigned int ht_num;
  404. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  405. rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
  406. if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
  407. return 0;
  408. #ifdef CONFIG_SMP
  409. /* detect which hyperthread we are on */
  410. if (smp_num_siblings == 2) {
  411. unsigned int ebx, apicid;
  412. ebx = cpuid_ebx(1);
  413. apicid = (ebx >> 24) & 0xff;
  414. ht_num = apicid & 1;
  415. } else
  416. #endif
  417. ht_num = 0;
  418. /*
  419. * performance counters are shared resources
  420. * assign each hyperthread its own set
  421. * (re-use the ESCR0 register, seems safe
  422. * and keeps the cccr_val the same)
  423. */
  424. if (!ht_num) {
  425. /* logical cpu 0 */
  426. perfctr_msr = MSR_P4_IQ_PERFCTR0;
  427. evntsel_msr = MSR_P4_CRU_ESCR0;
  428. cccr_msr = MSR_P4_IQ_CCCR0;
  429. cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
  430. /*
  431. * If we're on the kdump kernel or other situation, we may
  432. * still have other performance counter registers set to
  433. * interrupt and they'll keep interrupting forever because
  434. * of the P4_CCCR_OVF quirk. So we need to ACK all the
  435. * pending interrupts and disable all the registers here,
  436. * before reenabling the NMI delivery. Refer to p4_rearm()
  437. * about the P4_CCCR_OVF quirk.
  438. */
  439. if (reset_devices) {
  440. unsigned int low, high;
  441. int i;
  442. for (i = 0; i < P4_CONTROLS; i++) {
  443. rdmsr(p4_controls[i], low, high);
  444. low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF);
  445. wrmsr(p4_controls[i], low, high);
  446. }
  447. }
  448. } else {
  449. /* logical cpu 1 */
  450. perfctr_msr = MSR_P4_IQ_PERFCTR1;
  451. evntsel_msr = MSR_P4_CRU_ESCR0;
  452. cccr_msr = MSR_P4_IQ_CCCR1;
  453. /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */
  454. if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4)
  455. cccr_val = P4_CCCR_OVF_PMI0;
  456. else
  457. cccr_val = P4_CCCR_OVF_PMI1;
  458. cccr_val |= P4_CCCR_ESCR_SELECT(4);
  459. }
  460. evntsel = P4_ESCR_EVENT_SELECT(0x3F)
  461. | P4_ESCR_OS
  462. | P4_ESCR_USR;
  463. cccr_val |= P4_CCCR_THRESHOLD(15)
  464. | P4_CCCR_COMPLEMENT
  465. | P4_CCCR_COMPARE
  466. | P4_CCCR_REQUIRED;
  467. wrmsr(evntsel_msr, evntsel, 0);
  468. wrmsr(cccr_msr, cccr_val, 0);
  469. write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz);
  470. wd->perfctr_msr = perfctr_msr;
  471. wd->evntsel_msr = evntsel_msr;
  472. wd->cccr_msr = cccr_msr;
  473. /* ok, everything is initialized, announce that we're set */
  474. cpu_nmi_set_wd_enabled();
  475. apic_write(APIC_LVTPC, APIC_DM_NMI);
  476. cccr_val |= P4_CCCR_ENABLE;
  477. wrmsr(cccr_msr, cccr_val, 0);
  478. return 1;
  479. }
  480. static void stop_p4_watchdog(void)
  481. {
  482. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  483. wrmsr(wd->cccr_msr, 0, 0);
  484. wrmsr(wd->evntsel_msr, 0, 0);
  485. }
  486. static int p4_reserve(void)
  487. {
  488. if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0))
  489. return 0;
  490. #ifdef CONFIG_SMP
  491. if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1))
  492. goto fail1;
  493. #endif
  494. if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0))
  495. goto fail2;
  496. /* RED-PEN why is ESCR1 not reserved here? */
  497. return 1;
  498. fail2:
  499. #ifdef CONFIG_SMP
  500. if (smp_num_siblings > 1)
  501. release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
  502. fail1:
  503. #endif
  504. release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
  505. return 0;
  506. }
  507. static void p4_unreserve(void)
  508. {
  509. #ifdef CONFIG_SMP
  510. if (smp_num_siblings > 1)
  511. release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
  512. #endif
  513. release_evntsel_nmi(MSR_P4_CRU_ESCR0);
  514. release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
  515. }
  516. static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
  517. {
  518. unsigned dummy;
  519. /*
  520. * P4 quirks:
  521. * - An overflown perfctr will assert its interrupt
  522. * until the OVF flag in its CCCR is cleared.
  523. * - LVTPC is masked on interrupt and must be
  524. * unmasked by the LVTPC handler.
  525. */
  526. rdmsrl(wd->cccr_msr, dummy);
  527. dummy &= ~P4_CCCR_OVF;
  528. wrmsrl(wd->cccr_msr, dummy);
  529. apic_write(APIC_LVTPC, APIC_DM_NMI);
  530. /* start the cycle over again */
  531. write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
  532. }
  533. static const struct wd_ops p4_wd_ops = {
  534. .reserve = p4_reserve,
  535. .unreserve = p4_unreserve,
  536. .setup = setup_p4_watchdog,
  537. .rearm = p4_rearm,
  538. .stop = stop_p4_watchdog,
  539. /* RED-PEN this is wrong for the other sibling */
  540. .perfctr = MSR_P4_BPU_PERFCTR0,
  541. .evntsel = MSR_P4_BSU_ESCR0,
  542. .checkbit = 1ULL << 39,
  543. };
  544. /*
  545. * Watchdog using the Intel architected PerfMon.
  546. * Used for Core2 and hopefully all future Intel CPUs.
  547. */
  548. #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
  549. #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
  550. static struct wd_ops intel_arch_wd_ops;
  551. static int setup_intel_arch_watchdog(unsigned nmi_hz)
  552. {
  553. unsigned int ebx;
  554. union cpuid10_eax eax;
  555. unsigned int unused;
  556. unsigned int perfctr_msr, evntsel_msr;
  557. unsigned int evntsel;
  558. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  559. /*
  560. * Check whether the Architectural PerfMon supports
  561. * Unhalted Core Cycles Event or not.
  562. * NOTE: Corresponding bit = 0 in ebx indicates event present.
  563. */
  564. cpuid(10, &(eax.full), &ebx, &unused, &unused);
  565. if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
  566. (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
  567. return 0;
  568. perfctr_msr = wd_ops->perfctr;
  569. evntsel_msr = wd_ops->evntsel;
  570. wrmsrl(perfctr_msr, 0UL);
  571. evntsel = ARCH_PERFMON_EVENTSEL_INT
  572. | ARCH_PERFMON_EVENTSEL_OS
  573. | ARCH_PERFMON_EVENTSEL_USR
  574. | ARCH_PERFMON_NMI_EVENT_SEL
  575. | ARCH_PERFMON_NMI_EVENT_UMASK;
  576. /* setup the timer */
  577. wrmsr(evntsel_msr, evntsel, 0);
  578. nmi_hz = adjust_for_32bit_ctr(nmi_hz);
  579. write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz);
  580. wd->perfctr_msr = perfctr_msr;
  581. wd->evntsel_msr = evntsel_msr;
  582. wd->cccr_msr = 0; /* unused */
  583. /* ok, everything is initialized, announce that we're set */
  584. cpu_nmi_set_wd_enabled();
  585. apic_write(APIC_LVTPC, APIC_DM_NMI);
  586. evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
  587. wrmsr(evntsel_msr, evntsel, 0);
  588. intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
  589. return 1;
  590. }
  591. static struct wd_ops intel_arch_wd_ops __read_mostly = {
  592. .reserve = single_msr_reserve,
  593. .unreserve = single_msr_unreserve,
  594. .setup = setup_intel_arch_watchdog,
  595. .rearm = p6_rearm,
  596. .stop = single_msr_stop_watchdog,
  597. .perfctr = MSR_ARCH_PERFMON_PERFCTR1,
  598. .evntsel = MSR_ARCH_PERFMON_EVENTSEL1,
  599. };
  600. static void probe_nmi_watchdog(void)
  601. {
  602. switch (boot_cpu_data.x86_vendor) {
  603. case X86_VENDOR_AMD:
  604. if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 &&
  605. boot_cpu_data.x86 != 16)
  606. return;
  607. wd_ops = &k7_wd_ops;
  608. break;
  609. case X86_VENDOR_INTEL:
  610. /*
  611. * Work around Core Duo (Yonah) errata AE49 where perfctr1
  612. * doesn't have a working enable bit.
  613. */
  614. if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) {
  615. intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
  616. intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
  617. }
  618. if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
  619. wd_ops = &intel_arch_wd_ops;
  620. break;
  621. }
  622. switch (boot_cpu_data.x86) {
  623. case 6:
  624. if (boot_cpu_data.x86_model > 13)
  625. return;
  626. wd_ops = &p6_wd_ops;
  627. break;
  628. case 15:
  629. wd_ops = &p4_wd_ops;
  630. break;
  631. default:
  632. return;
  633. }
  634. break;
  635. }
  636. }
  637. /* Interface to nmi.c */
  638. int lapic_watchdog_init(unsigned nmi_hz)
  639. {
  640. if (!wd_ops) {
  641. probe_nmi_watchdog();
  642. if (!wd_ops) {
  643. printk(KERN_INFO "NMI watchdog: CPU not supported\n");
  644. return -1;
  645. }
  646. if (!wd_ops->reserve()) {
  647. printk(KERN_ERR
  648. "NMI watchdog: cannot reserve perfctrs\n");
  649. return -1;
  650. }
  651. }
  652. if (!(wd_ops->setup(nmi_hz))) {
  653. printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n",
  654. raw_smp_processor_id());
  655. return -1;
  656. }
  657. return 0;
  658. }
  659. void lapic_watchdog_stop(void)
  660. {
  661. if (wd_ops)
  662. wd_ops->stop();
  663. }
  664. unsigned lapic_adjust_nmi_hz(unsigned hz)
  665. {
  666. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  667. if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
  668. wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1)
  669. hz = adjust_for_32bit_ctr(hz);
  670. return hz;
  671. }
  672. int __kprobes lapic_wd_event(unsigned nmi_hz)
  673. {
  674. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  675. u64 ctr;
  676. rdmsrl(wd->perfctr_msr, ctr);
  677. if (ctr & wd_ops->checkbit) /* perfctr still running? */
  678. return 0;
  679. wd_ops->rearm(wd, nmi_hz);
  680. return 1;
  681. }
  682. int lapic_watchdog_ok(void)
  683. {
  684. return wd_ops != NULL;
  685. }